mock out stuff to get CI unblocked for now, will add vcrpy soonish

mahmoud · mahmoud · commit 8a00969b09c7 · 2026-04-09T07:53:18.000-07:00
diff --git a/.github/workflows/test-backend.yml b/.github/workflows/test-backend.yml
@@ -21,7 +21,7 @@ jobs:
             -v "${{ github.workspace }}":/app \
             -e PYTHONPATH=/app \
             montage-ci \
-            python -m pytest montage/tests/test_web_basic.py \
+            python -m pytest montage/tests/ \
               -v --tb=short -p no:cacheprovider
 
       - name: Verify backend imports
diff --git a/dockerfile b/dockerfile
@@ -8,6 +8,6 @@ COPY requirements.txt .
 
 RUN pip install --upgrade pip
 RUN pip install -r requirements.txt
-RUN pip install pytest
+RUN pip install pytest responses
 
 EXPOSE 5000
diff --git a/montage/tests/conftest.py b/montage/tests/conftest.py
@@ -0,0 +1,193 @@
+# -*- coding: utf-8 -*-
+"""
+Shared test fixtures.
+
+Mocks all external HTTP calls (Toolforge API, Google Sheets, Wikimedia
+API) so tests run without network access.  The ``responses`` library
+intercepts at the ``requests`` adapter level; any unmocked call raises
+``ConnectionError`` (passthrough=False, the default).
+"""
+
+from __future__ import absolute_import
+
+import json
+import re
+
+import pytest
+import responses as responses_lib
+
+from urllib.parse import parse_qs, urlparse
+
+# ---------------------------------------------------------------------------
+# URLs exactly as constructed by montage code
+# ---------------------------------------------------------------------------
+TOOLFORGE_CATEGORY_URL = 'https://montage.toolforge.org/v1/utils//category'
+TOOLFORGE_FILE_URL = 'https://montage.toolforge.org/v1/utils//file'
+
+# Matches any Google Sheets CSV-export URL regardless of doc ID.
+GSHEET_CSV_URL_RE = re.compile(
+    r'https://docs\.google\.com/spreadsheets/d/.+/gviz/tq\?tqx=out:csv'
+)
+
+# Matches any Wikimedia API user-lookup call.
+MW_API_URL_RE = re.compile(
+    r'https://commons\.wikimedia\.org/w/api\.php\?.*'
+)
+
+# ---------------------------------------------------------------------------
+# Fixture data -- 20 synthetic entries with resolution > 2 megapixels.
+# Enough entries to survive disqualification AND give every juror >=2
+# tasks regardless of random.shuffle ordering during task allocation.
+# ---------------------------------------------------------------------------
+
+
+def _generate_file_infos(n):
+    """Build *n* unique file-info dicts with high resolution."""
+    infos = []
+    for i in range(n):
+        infos.append({
+            'img_name': 'Test_WLM_2015_image_%03d.jpg' % (i + 1),
+            'img_major_mime': 'image',
+            'img_minor_mime': 'jpeg',
+            'img_width': '3264',
+            'img_height': '2448',  # 3264*2448 = 7,990,272 > 2M
+            'img_user': '5193613',
+            'img_user_text': 'Khoshamadgou',
+            # All timestamps after campaign open_date (2015-09-01)
+            'img_timestamp': '201509060%05d' % (20000 + i),
+        })
+    return infos
+
+
+FIXTURE_FILE_INFOS = _generate_file_infos(20)
+
+# Entry returned for the single-filename import in test_web_basic.py
+SELECTED_FILE_INFO = {
+    'img_name': u'Reynisfjara, Su\u00f0urland, Islandia, 2014-08-17, DD 164.JPG',
+    'img_major_mime': 'image',
+    'img_minor_mime': 'jpeg',
+    'img_width': '4928',
+    'img_height': '3280',
+    'img_user': '12345',
+    'img_user_text': 'TestUploader',
+    'img_timestamp': '20140817120000',
+}
+
+CSV_FULL_COLS = [
+    'img_name', 'img_major_mime', 'img_minor_mime',
+    'img_width', 'img_height', 'img_user',
+    'img_user_text', 'img_timestamp',
+]
+
+
+def build_full_csv(file_infos=None):
+    """Build a CSV string with all required columns from file_info dicts."""
+    if file_infos is None:
+        file_infos = FIXTURE_FILE_INFOS
+    lines = [','.join(CSV_FULL_COLS)]
+    for fi in file_infos:
+        lines.append(','.join(str(fi[c]) for c in CSV_FULL_COLS))
+    return '\n'.join(lines) + '\n'
+
+
+def build_filename_csv(file_infos=None):
+    """Build a CSV string with only a 'filename' column."""
+    if file_infos is None:
+        file_infos = FIXTURE_FILE_INFOS
+    lines = ['filename']
+    for fi in file_infos:
+        lines.append(fi['img_name'])
+    return '\n'.join(lines) + '\n'
+
+
+FIXTURE_FULL_CSV = build_full_csv()
+FIXTURE_FILENAME_CSV = build_filename_csv()
+
+
+# ---------------------------------------------------------------------------
+# Disable pdb in error handler -- devtest sets debug_errors=True which
+# calls pdb.post_mortem() on unhandled exceptions.  Under pytest's output
+# capture this crashes with OSError.  Patching pdb to no-ops is safe
+# because no test relies on interactive debugging.
+# ---------------------------------------------------------------------------
+@pytest.fixture(autouse=True)
+def _disable_pdb(monkeypatch):
+    monkeypatch.setattr('pdb.set_trace', lambda *a, **kw: None)
+    monkeypatch.setattr('pdb.post_mortem', lambda *a, **kw: None)
+
+# ---------------------------------------------------------------------------
+# Wikimedia API callback -- returns a plausible user record for any username
+# ---------------------------------------------------------------------------
+def _wikimedia_user_callback(request):
+    """Return a mock globalallusers response matching the requested username."""
+    parsed = urlparse(request.url)
+    params = parse_qs(parsed.query)
+    username = params.get('agufrom', ['Unknown'])[0]
+    # Deterministic fake user ID derived from username
+    user_id = abs(hash(username)) % 10**8
+    body = json.dumps({
+        'query': {
+            'globalallusers': [
+                {'name': username, 'id': str(user_id)}
+            ]
+        }
+    })
+    return (200, {}, body)
+
+
+# ---------------------------------------------------------------------------
+# Fixture: mock_external_apis
+# ---------------------------------------------------------------------------
+@pytest.fixture
+def mock_external_apis():
+    """Activate ``responses`` and register mocks for every external endpoint.
+
+    Covers:
+    - Toolforge category lookup  (POST /v1/utils//category)
+    - Toolforge file lookup      (POST /v1/utils//file)
+    - Google Sheets CSV export   (GET  docs.google.com/spreadsheets/...)
+    - Wikimedia user lookup       (GET  commons.wikimedia.org/w/api.php)
+
+    Any request to an unregistered URL raises ``ConnectionError``,
+    ensuring no live HTTP traffic leaks from tests.
+    """
+    with responses_lib.RequestsMock(assert_all_requests_are_fired=False) as rsps:
+        # -- Toolforge category endpoint --
+        rsps.add(
+            responses_lib.POST,
+            TOOLFORGE_CATEGORY_URL,
+            json={'file_infos': FIXTURE_FILE_INFOS, 'no_info': []},
+            status=200,
+        )
+
+        # -- Toolforge file-lookup endpoint --
+        # Returns both fixture entries and the single "selected" entry so
+        # that both bulk-filename and single-filename imports succeed.
+        rsps.add(
+            responses_lib.POST,
+            TOOLFORGE_FILE_URL,
+            json={
+                'file_infos': FIXTURE_FILE_INFOS + [SELECTED_FILE_INFO],
+                'no_info': [],
+            },
+            status=200,
+        )
+
+        # -- Google Sheets CSV export (any doc ID) --
+        rsps.add(
+            responses_lib.GET,
+            GSHEET_CSV_URL_RE,
+            body=FIXTURE_FULL_CSV,
+            status=200,
+            content_type='text/csv',
+        )
+
+        # -- Wikimedia user-lookup API --
+        # Called by get_mw_userid() when creating new users.
+        rsps.add_callback(
+            responses_lib.GET,
+            MW_API_URL_RE,
+            callback=_wikimedia_user_callback,
+        )
+
+        yield rsps
diff --git a/montage/tests/test_loaders.py b/montage/tests/test_loaders.py
@@ -1,29 +1,92 @@
 # -*- coding: utf-8 -*-
 
 from __future__ import print_function
-
 from __future__ import absolute_import
+
+import responses
 from pytest import raises
 
 from montage.loaders import get_entries_from_gsheet
 
+from .conftest import (
+    FIXTURE_FILE_INFOS,
+    FIXTURE_FULL_CSV,
+    FIXTURE_FILENAME_CSV,
+    TOOLFORGE_FILE_URL,
+)
+
 RESULTS = 'https://docs.google.com/spreadsheets/d/1RDlpT23SV_JB1mIz0OA-iuc3MNdNVLbaK_LtWAC7vzg/edit?usp=sharing'
 FILENAME_LIST = 'https://docs.google.com/spreadsheets/d/1Nqj-JsX3L5qLp5ITTAcAFYouglbs5OpnFwP6zSFpa0M/edit?usp=sharing'
 GENERIC_CSV = 'https://docs.google.com/spreadsheets/d/1WzHFg_bhvNthRMwNmxnk010KJ8fwuyCrby29MvHUzH8/edit#gid=550467819'
 FORBIDDEN_SHEET = 'https://docs.google.com/spreadsheets/d/1tza92brMKkZBTykw3iS6X9ij1D4_kvIYAiUlq1Yi7Fs/edit'
 
+# Pre-compute the actual fetch URLs that loaders.py constructs from the
+# doc IDs embedded in the spreadsheet URLs above.
+_RESULTS_CSV_URL = 'https://docs.google.com/spreadsheets/d/1RDlpT23SV_JB1mIz0OA-iuc3MNdNVLbaK_LtWAC7vzg/gviz/tq?tqx=out:csv'
+_FILENAME_CSV_URL = 'https://docs.google.com/spreadsheets/d/1Nqj-JsX3L5qLp5ITTAcAFYouglbs5OpnFwP6zSFpa0M/gviz/tq?tqx=out:csv'
+_GENERIC_CSV_URL = 'https://docs.google.com/spreadsheets/d/1WzHFg_bhvNthRMwNmxnk010KJ8fwuyCrby29MvHUzH8/gviz/tq?tqx=out:csv'
+_FORBIDDEN_CSV_URL = 'https://docs.google.com/spreadsheets/d/1tza92brMKkZBTykw3iS6X9ij1D4_kvIYAiUlq1Yi7Fs/gviz/tq?tqx=out:csv'
+
+
+@responses.activate
 def test_load_results():
+    """Full CSV with all required columns -- entries created directly."""
+    responses.add(
+        responses.GET,
+        _RESULTS_CSV_URL,
+        body=FIXTURE_FULL_CSV,
+        status=200,
+        content_type='text/csv',
+    )
     imgs, warnings = get_entries_from_gsheet(RESULTS, source='remote')
-    assert len(imgs) == 331
+    assert len(imgs) == len(FIXTURE_FILE_INFOS)
+
 
+@responses.activate
 def test_load_filenames():
-    imgs, warnings = get_entries_from_gsheet(FILENAME_LIST, source='remote') 
-    assert len(imgs) == 89
+    """Partial CSV with only 'filename' column -- triggers Toolforge lookup."""
+    responses.add(
+        responses.GET,
+        _FILENAME_CSV_URL,
+        body=FIXTURE_FILENAME_CSV,
+        status=200,
+        content_type='text/csv',
+    )
+    # The filename-only CSV triggers load_partial_csv -> load_name_list
+    # -> get_by_filename_remote -> POST to Toolforge /file endpoint.
+    responses.add(
+        responses.POST,
+        TOOLFORGE_FILE_URL,
+        json={'file_infos': FIXTURE_FILE_INFOS, 'no_info': []},
+        status=200,
+    )
+    imgs, warnings = get_entries_from_gsheet(FILENAME_LIST, source='remote')
+    assert len(imgs) == len(FIXTURE_FILE_INFOS)
 
+
+@responses.activate
 def test_load_csv():
-    imgs, warnings = get_entries_from_gsheet(GENERIC_CSV, source='remote') 
-    assert len(imgs) == 93
+    """Generic full CSV -- same path as test_load_results."""
+    responses.add(
+        responses.GET,
+        _GENERIC_CSV_URL,
+        body=FIXTURE_FULL_CSV,
+        status=200,
+        content_type='text/csv',
+    )
+    imgs, warnings = get_entries_from_gsheet(GENERIC_CSV, source='remote')
+    assert len(imgs) == len(FIXTURE_FILE_INFOS)
+
 
+@responses.activate
 def test_no_persmission():
+    """Non-CSV content-type signals a permission / sharing error."""
+    responses.add(
+        responses.GET,
+        _FORBIDDEN_CSV_URL,
+        body='<html><body>Sign in</body></html>',
+        status=200,
+        content_type='text/html',
+    )
     with raises(ValueError):
-        imgs, warnings = get_entries_from_gsheet(FORBIDDEN_SHEET, source='remote')
+        get_entries_from_gsheet(FORBIDDEN_SHEET, source='remote')
diff --git a/montage/tests/test_web_basic.py b/montage/tests/test_web_basic.py
@@ -165,7 +165,7 @@ def api_client(montage_app):
     return api_client
 
 
-def test_home_client(base_client, api_client):
+def test_home_client(base_client, api_client, mock_external_apis):
 
     resp = base_client.fetch('organizer: home', '/')
     #resp = base_client.fetch('public: login', '/login')
@@ -821,7 +821,7 @@ def test_home_client(base_client, api_client):
     #resp = base_client.fetch('public: logout', '/logout')
 
 
-def test_multiple_jurors(api_client):
+def test_multiple_jurors(api_client, mock_external_apis):
     # This is copied from above. What's the best way to break up the tests into
     # various stages? Should I use a pytest.fixture?
     fetch = api_client.fetch
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -3,4 +3,5 @@
 tox<3.15.0
 Fabric3
 coverage==5.0.2
-pytest==4.6.9
+pytest>=7,<9
+responses>=0.25.0