diff --git a/.env.example b/.env.example index f3827e44..3b1a8fce 100644 --- a/.env.example +++ b/.env.example @@ -220,6 +220,12 @@ +# -- Substack -- +# Optional: publication URL used by Substack live integration tests. +# SUBSTACK_TEST_PUBLICATION_URL= +# Optional: post slug used by Substack get_post live integration tests. +# SUBSTACK_TEST_POST_SLUG= + # -- Supadata -- # SUPADATA_API_KEY= diff --git a/substack/config.json b/substack/config.json index 7d1f0285..1ad41a1c 100644 --- a/substack/config.json +++ b/substack/config.json @@ -1,7 +1,7 @@ { "name": "Substack", "display_name": "Substack", - "version": "1.0.0", + "version": "2.0.0", "description": "Search Substack publications, read posts and comments. No authentication required.", "entry_point": "substack.py", "actions": { @@ -246,12 +246,12 @@ "items": { "type": "object", "properties": { - "id": {"type": "integer"}, - "body": {"type": "string"}, - "date": {"type": "string"}, - "author_name": {"type": "string"}, - "author_id": {"type": "integer"}, - "like_count": {"type": "integer"}, + "id": {"type": ["integer", "null"]}, + "body": {"type": ["string", "null"]}, + "date": {"type": ["string", "null"]}, + "author_name": {"type": ["string", "null"]}, + "author_id": {"type": ["integer", "null"]}, + "like_count": {"type": ["integer", "null"]}, "children": {"type": "array"} } } diff --git a/substack/requirements.txt b/substack/requirements.txt index 13c7a363..32884a05 100644 --- a/substack/requirements.txt +++ b/substack/requirements.txt @@ -1,2 +1,2 @@ -autohive-integrations-sdk~=1.0.2 +autohive-integrations-sdk~=2.0.0 aiohttp>=3.9.0 diff --git a/substack/substack.py b/substack/substack.py index ec72656b..7f86ba71 100644 --- a/substack/substack.py +++ b/substack/substack.py @@ -82,13 +82,13 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext) -> Ac if inputs.get("search"): params["search"] = inputs["search"] - posts_raw = await context.fetch( + response = await context.fetch( f"{base_url}/api/v1/archive", method="GET", params=params, headers=headers, ) - posts = [_format_post(p) for p in (posts_raw or [])] + posts = [_format_post(p) for p in (response.data or [])] return ActionResult(data={"posts": posts, "count": len(posts)}, cost_usd=0.0) @@ -99,11 +99,12 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext) -> Ac slug = inputs["slug"] headers = _build_headers() - post = await context.fetch( + response = await context.fetch( f"{base_url}/api/v1/posts/{slug}", method="GET", headers=headers, ) + post = response.data result = _drop_none( { "id": post.get("id"), @@ -144,7 +145,8 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext) -> Ac params=params, headers=headers, ) - pubs_raw = response.get("publications", []) if isinstance(response, dict) else response + body = response.data + pubs_raw = body.get("publications", []) if isinstance(body, dict) else body pubs = [ _drop_none( { @@ -159,7 +161,7 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext) -> Ac ) for p in pubs_raw ] - more = response.get("more", False) if isinstance(response, dict) else False + more = body.get("more", False) if isinstance(body, dict) else False return ActionResult(data={"publications": pubs, "more": more}, cost_usd=0.0) @@ -178,13 +180,13 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext) -> Ac "limit": min(inputs.get("limit", 10), 50), } - posts_raw = await context.fetch( + response = await context.fetch( f"{base_url}/api/v1/archive", method="GET", params=params, headers=headers, ) - posts = [_format_post(p) for p in (posts_raw or [])] + posts = [_format_post(p) for p in (response.data or [])] return ActionResult(data={"posts": posts, "count": len(posts)}, cost_usd=0.0) @@ -207,5 +209,6 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext) -> Ac params=params, headers=headers, ) - comments = response.get("comments", []) if isinstance(response, dict) else [] + body = response.data + comments = body.get("comments", []) if isinstance(body, dict) else [] return ActionResult(data={"comments": comments, "count": len(comments)}, cost_usd=0.0) diff --git a/substack/tests/conftest.py b/substack/tests/conftest.py new file mode 100644 index 00000000..05b1a7a4 --- /dev/null +++ b/substack/tests/conftest.py @@ -0,0 +1,11 @@ +"""Test configuration for the Substack integration. + +Shared fixtures (``mock_context``, etc.) come from the repository-root +``conftest.py``. Tests import the integration via the package path +(``from substack.substack import ...``), which resolves from the repo root +that pytest puts on ``sys.path``. + +Do NOT insert the integration directory onto ``sys.path`` here: that makes +``substack.py`` importable as a top-level module and shadows the ``substack`` +package, breaking the package-style imports during collection. +""" diff --git a/substack/tests/context.py b/substack/tests/context.py deleted file mode 100644 index 38dfdee9..00000000 --- a/substack/tests/context.py +++ /dev/null @@ -1,8 +0,0 @@ -# -*- coding: utf-8 -*- -import sys -import os - -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../dependencies"))) - -from substack import substack # noqa: F401 diff --git a/substack/tests/test_substack.py b/substack/tests/test_substack.py deleted file mode 100644 index 77ef6e3a..00000000 --- a/substack/tests/test_substack.py +++ /dev/null @@ -1,365 +0,0 @@ -import asyncio -import sys -import os -import unittest -from unittest.mock import AsyncMock, MagicMock - -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../dependencies"))) - -from substack import substack # noqa: E402 - - -def make_context(fetch_side_effect=None, fetch_return_value=None): - """Create a mock ExecutionContext.""" - context = MagicMock() - context.auth = {} - if fetch_side_effect is not None: - context.fetch = AsyncMock(side_effect=fetch_side_effect) - else: - context.fetch = AsyncMock(return_value=fetch_return_value) - return context - - -def run(coro): - return asyncio.run(coro) - - -# ── Helpers ────────────────────────────────────────────────────────────────── - - -class TestNormaliseUrl(unittest.TestCase): - def _normalise(self, url): - from substack.substack import _normalise_url - - return _normalise_url(url) - - def test_strips_trailing_slash(self): - assert self._normalise("https://example.substack.com/") == "https://example.substack.com" - - def test_upgrades_http_to_https(self): - assert self._normalise("http://example.substack.com") == "https://example.substack.com" - - def test_strips_path(self): - assert self._normalise("https://example.substack.com/p/some-post") == "https://example.substack.com" - - def test_custom_domain_unchanged(self): - assert self._normalise("https://newsletter.example.com") == "https://newsletter.example.com" - - def test_no_change_needed(self): - assert self._normalise("https://example.substack.com") == "https://example.substack.com" - - def test_bare_hostname_no_scheme(self): - assert self._normalise("example.substack.com") == "https://example.substack.com" - - -# ── get_publication_posts ───────────────────────────────────────────────────── - - -class TestGetPublicationPosts(unittest.TestCase): - MOCK_RESPONSE = [ - { - "id": 123, - "slug": "hello-world", - "title": "Hello World", - "subtitle": "A subtitle", - "post_date": "2024-01-01T00:00:00.000Z", - "canonical_url": "https://example.substack.com/p/hello-world", - "audience": "everyone", - "paywall": False, - "reading_time_minutes": 3, - "cover_image": None, - "like_count": 10, - "comment_count": 2, - "type": "newsletter", - } - ] - - def test_success(self): - context = make_context(fetch_return_value=self.MOCK_RESPONSE) - result = run( - substack.execute_action( - "get_publication_posts", - {"publication_url": "https://example.substack.com"}, - context, - ) - ) - data = result.result.data - assert len(data["posts"]) == 1 - assert data["posts"][0]["slug"] == "hello-world" - assert data["count"] == 1 - - def test_passes_pagination_params(self): - context = make_context(fetch_return_value=[]) - run( - substack.execute_action( - "get_publication_posts", - { - "publication_url": "https://example.substack.com", - "offset": 12, - "limit": 6, - }, - context, - ) - ) - params = context.fetch.call_args[1].get("params", {}) - assert params.get("offset") == 12 - assert params.get("limit") == 6 - - def test_url_normalisation(self): - context = make_context(fetch_return_value=[]) - run( - substack.execute_action( - "get_publication_posts", - {"publication_url": "http://example.substack.com/"}, - context, - ) - ) - url_called = context.fetch.call_args[0][0] - assert url_called.startswith("https://example.substack.com") - - def test_no_cookie_header(self): - context = make_context(fetch_return_value=[]) - run( - substack.execute_action( - "get_publication_posts", - {"publication_url": "https://example.substack.com"}, - context, - ) - ) - headers = context.fetch.call_args[1].get("headers", {}) - assert "Cookie" not in headers - - -# ── get_post ────────────────────────────────────────────────────────────────── - - -class TestGetPost(unittest.TestCase): - MOCK_RESPONSE = { - "id": 123, - "slug": "hello-world", - "title": "Hello World", - "subtitle": "A subtitle", - "body_html": "

Content here

", - "post_date": "2024-01-01T00:00:00.000Z", - "canonical_url": "https://example.substack.com/p/hello-world", - "audience": "everyone", - "paywall": False, - "reading_time_minutes": 3, - "cover_image": None, - "like_count": 10, - "comment_count": 2, - "type": "newsletter", - "audio_url": None, - } - - def test_success(self): - context = make_context(fetch_return_value=self.MOCK_RESPONSE) - result = run( - substack.execute_action( - "get_post", - { - "publication_url": "https://example.substack.com", - "slug": "hello-world", - }, - context, - ) - ) - data = result.result.data - assert data["slug"] == "hello-world" - assert data["body_html"] == "

Content here

" - - def test_url_contains_slug(self): - context = make_context(fetch_return_value=self.MOCK_RESPONSE) - run( - substack.execute_action( - "get_post", - { - "publication_url": "https://example.substack.com", - "slug": "hello-world", - }, - context, - ) - ) - url_called = context.fetch.call_args[0][0] - assert "hello-world" in url_called - - -# ── search_publications ─────────────────────────────────────────────────────── - - -class TestSearchPublications(unittest.TestCase): - MOCK_RESPONSE = { - "publications": [ - { - "id": 1, - "name": "Example Newsletter", - "subdomain": "example", - "custom_domain": None, - "logo_url": None, - "description": "A newsletter about things", - "subscriber_count": 500, - } - ], - "more": False, - } - - def test_success(self): - context = make_context(fetch_return_value=self.MOCK_RESPONSE) - result = run( - substack.execute_action( - "search_publications", - {"query": "tech"}, - context, - ) - ) - data = result.result.data - assert len(data["publications"]) == 1 - assert data["more"] is False - - def test_passes_query_param(self): - context = make_context(fetch_return_value=self.MOCK_RESPONSE) - run( - substack.execute_action( - "search_publications", - {"query": "finance"}, - context, - ) - ) - call_kwargs = context.fetch.call_args - params = call_kwargs[1].get("params", {}) - assert params.get("query") == "finance" - - -# ── search_posts ────────────────────────────────────────────────────────────── - - -class TestSearchPosts(unittest.TestCase): - MOCK_RESPONSE = [ - { - "id": 99, - "slug": "matching-post", - "title": "Matching Post", - "subtitle": "", - "post_date": "2024-06-01T00:00:00.000Z", - "canonical_url": "https://example.substack.com/p/matching-post", - "audience": "everyone", - "paywall": False, - "reading_time_minutes": 2, - "cover_image": None, - "like_count": 5, - "comment_count": 1, - "type": "newsletter", - } - ] - - def test_success(self): - context = make_context(fetch_return_value=self.MOCK_RESPONSE) - result = run( - substack.execute_action( - "search_posts", - {"publication_url": "https://example.substack.com", "query": "keyword"}, - context, - ) - ) - data = result.result.data - assert len(data["posts"]) == 1 - assert data["posts"][0]["slug"] == "matching-post" - - def test_uses_archive_endpoint_with_search_param(self): - """search_posts must use /api/v1/archive?search= not /api/v1/posts/search (404).""" - context = make_context(fetch_return_value=[]) - run( - substack.execute_action( - "search_posts", - {"publication_url": "https://example.substack.com", "query": "keyword"}, - context, - ) - ) - url_called = context.fetch.call_args[0][0] - params = context.fetch.call_args[1].get("params", {}) - assert "/api/v1/archive" in url_called - assert params.get("search") == "keyword" - - def test_limit_maximum_passed(self): - context = make_context(fetch_return_value=[]) - run( - substack.execute_action( - "search_posts", - { - "publication_url": "https://example.substack.com", - "query": "x", - "limit": 50, - }, - context, - ) - ) - params = context.fetch.call_args[1].get("params", {}) - assert params.get("limit") == 50 - - -# ── get_post_comments ───────────────────────────────────────────────────────── - - -class TestGetPostComments(unittest.TestCase): - MOCK_RESPONSE = { - "comments": [ - { - "id": 1001, - "body": "Great post!", - "date": "2024-01-02T00:00:00.000Z", - "author_name": "Alice", - "author_id": 55, - "like_count": 3, - "children": [], - } - ] - } - - def test_success(self): - context = make_context(fetch_return_value=self.MOCK_RESPONSE) - result = run( - substack.execute_action( - "get_post_comments", - {"publication_url": "https://example.substack.com", "post_id": 123}, - context, - ) - ) - data = result.result.data - assert len(data["comments"]) == 1 - assert data["comments"][0]["author_name"] == "Alice" - - def test_url_uses_singular_post_path(self): - """URL must use /api/v1/post/ (singular), not /api/v1/posts/.""" - context = make_context(fetch_return_value=self.MOCK_RESPONSE) - run( - substack.execute_action( - "get_post_comments", - {"publication_url": "https://example.substack.com", "post_id": 123}, - context, - ) - ) - url_called = context.fetch.call_args[0][0] - assert "/api/v1/post/123/comments" in url_called - assert "/api/v1/posts/123/comments" not in url_called - - def test_all_comments_sent_as_string(self): - """Substack expects 'true'/'false' strings for the all_comments param.""" - context = make_context(fetch_return_value=self.MOCK_RESPONSE) - run( - substack.execute_action( - "get_post_comments", - { - "publication_url": "https://example.substack.com", - "post_id": 123, - "all_comments": False, - }, - context, - ) - ) - params = context.fetch.call_args[1].get("params", {}) - assert params.get("all_comments") == "false" - - -if __name__ == "__main__": - unittest.main() diff --git a/substack/tests/test_substack_integration.py b/substack/tests/test_substack_integration.py new file mode 100644 index 00000000..7281e08b --- /dev/null +++ b/substack/tests/test_substack_integration.py @@ -0,0 +1,304 @@ +""" +End-to-end integration tests for the Substack integration. + +Substack's public API requires no authentication — tests run against the live +API using a configurable public newsletter. + +Environment variables (all optional): + SUBSTACK_TEST_PUBLICATION_URL — base URL of a Substack publication to test + (default: https://www.astralcodexten.com) + SUBSTACK_TEST_POST_SLUG — slug of a specific post to use in get_post + tests; falls back to the first archive post + +Run (safe — all tests are read-only): + pytest substack/tests/test_substack_integration.py -m integration -v + +Never runs in CI — the default pytest marker filter (-m unit) excludes these, +and the file naming (test_*_integration.py) is not matched by python_files. +""" + +import os + +import aiohttp +import pytest +from autohive_integrations_sdk import FetchResponse, HTTPError, RateLimitError, ResultType +from unittest.mock import MagicMock, AsyncMock + +from substack import substack as substack_integration + +pytestmark = pytest.mark.integration + +TEST_PUBLICATION_URL = os.environ.get("SUBSTACK_TEST_PUBLICATION_URL", "https://www.astralcodexten.com") +TEST_POST_SLUG = os.environ.get("SUBSTACK_TEST_POST_SLUG", "") + + +@pytest.fixture +def live_context(): + async def real_fetch(url, *, method="GET", params=None, headers=None, json=None, data=None, **kwargs): + async with aiohttp.ClientSession() as session: + async with session.request(method, url, params=params, headers=headers or {}) as resp: + try: + resp_data = await resp.json(content_type=None) + except Exception: + resp_data = await resp.text() + if resp.status == 429: + retry_after = int(resp.headers.get("Retry-After", 60)) + raise RateLimitError(retry_after, resp.status, str(resp_data), resp_data) + if resp.status < 200 or resp.status >= 300: + raise HTTPError(resp.status, str(resp_data), resp_data) + return FetchResponse(status=resp.status, headers=dict(resp.headers), data=resp_data) + + ctx = MagicMock(name="ExecutionContext") + ctx.fetch = AsyncMock(side_effect=real_fetch) + ctx.auth = {} + return ctx + + +# ============================================================ +# GET PUBLICATION POSTS +# ============================================================ + + +class TestGetPublicationPosts: + async def test_returns_posts_list(self, live_context): + result = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": TEST_PUBLICATION_URL}, + live_context, + ) + assert result.type == ResultType.ACTION + assert "posts" in result.result.data + assert isinstance(result.result.data["posts"], list) + assert "count" in result.result.data + + async def test_limit_respected(self, live_context): + result = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": TEST_PUBLICATION_URL, "limit": 3}, + live_context, + ) + assert len(result.result.data["posts"]) <= 3 + + async def test_post_item_has_expected_fields(self, live_context): + result = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": TEST_PUBLICATION_URL, "limit": 1}, + live_context, + ) + posts = result.result.data["posts"] + if not posts: + pytest.skip("No posts returned from this publication") + post = posts[0] + assert "slug" in post + assert "title" in post + assert "post_date" in post + + async def test_sort_top(self, live_context): + result = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": TEST_PUBLICATION_URL, "sort": "top", "limit": 5}, + live_context, + ) + assert result.type == ResultType.ACTION + assert isinstance(result.result.data["posts"], list) + + async def test_pagination_returns_different_posts(self, live_context): + first = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": TEST_PUBLICATION_URL, "limit": 2, "offset": 0}, + live_context, + ) + second = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": TEST_PUBLICATION_URL, "limit": 2, "offset": 2}, + live_context, + ) + posts_first = first.result.data["posts"] + posts_second = second.result.data["posts"] + if posts_first and posts_second: + assert posts_first[0]["slug"] != posts_second[0]["slug"] + + +# ============================================================ +# GET POST +# ============================================================ + + +class TestGetPost: + async def test_returns_post_content(self, live_context): + list_result = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": TEST_PUBLICATION_URL, "limit": 1}, + live_context, + ) + posts = list_result.result.data["posts"] + if not posts: + pytest.skip("No posts available to fetch") + slug = TEST_POST_SLUG or posts[0]["slug"] + + result = await substack_integration.execute_action( + "get_post", + {"publication_url": TEST_PUBLICATION_URL, "slug": slug}, + live_context, + ) + assert result.type == ResultType.ACTION + data = result.result.data + assert "slug" in data + assert "title" in data + assert "body_html" in data + + async def test_slug_matches(self, live_context): + list_result = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": TEST_PUBLICATION_URL, "limit": 1}, + live_context, + ) + posts = list_result.result.data["posts"] + if not posts: + pytest.skip("No posts available") + slug = posts[0]["slug"] + + result = await substack_integration.execute_action( + "get_post", + {"publication_url": TEST_PUBLICATION_URL, "slug": slug}, + live_context, + ) + assert result.result.data["slug"] == slug + + async def test_nonexistent_slug_raises_http_error(self, live_context): + """A bad slug 404s — context.fetch raises HTTPError (SDK v2 raises on + non-ok), which execute_action does not catch, so it propagates.""" + with pytest.raises(HTTPError): + await substack_integration.execute_action( + "get_post", + { + "publication_url": TEST_PUBLICATION_URL, + "slug": "this-slug-definitely-does-not-exist-xyz123", + }, + live_context, + ) + + +# ============================================================ +# SEARCH PUBLICATIONS +# ============================================================ + + +class TestSearchPublications: + async def test_returns_publications(self, live_context): + result = await substack_integration.execute_action( + "search_publications", + {"query": "technology"}, + live_context, + ) + assert result.type == ResultType.ACTION + assert "publications" in result.result.data + assert isinstance(result.result.data["publications"], list) + assert "more" in result.result.data + + async def test_publication_has_expected_fields(self, live_context): + result = await substack_integration.execute_action( + "search_publications", + {"query": "newsletter"}, + live_context, + ) + pubs = result.result.data["publications"] + if not pubs: + pytest.skip("No publications returned for query") + pub = pubs[0] + assert "name" in pub + assert "subdomain" in pub + + async def test_limit_respected(self, live_context): + result = await substack_integration.execute_action( + "search_publications", + {"query": "technology", "limit": 3}, + live_context, + ) + assert len(result.result.data["publications"]) <= 3 + + +# ============================================================ +# SEARCH POSTS +# ============================================================ + + +class TestSearchPosts: + async def test_returns_posts(self, live_context): + result = await substack_integration.execute_action( + "search_posts", + {"publication_url": TEST_PUBLICATION_URL, "query": "a"}, + live_context, + ) + assert result.type == ResultType.ACTION + assert "posts" in result.result.data + assert isinstance(result.result.data["posts"], list) + assert "count" in result.result.data + + async def test_posts_have_expected_fields(self, live_context): + result = await substack_integration.execute_action( + "search_posts", + {"publication_url": TEST_PUBLICATION_URL, "query": "a", "limit": 1}, + live_context, + ) + posts = result.result.data["posts"] + if not posts: + pytest.skip("No posts matched search query") + assert "slug" in posts[0] + assert "title" in posts[0] + + +# ============================================================ +# GET POST COMMENTS +# ============================================================ + + +class TestGetPostComments: + async def test_returns_comments_structure(self, live_context): + list_result = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": TEST_PUBLICATION_URL, "limit": 5}, + live_context, + ) + posts = list_result.result.data["posts"] + if not posts: + pytest.skip("No posts available") + + post_id = posts[0].get("id") + if not post_id: + pytest.skip("Post has no numeric id") + + result = await substack_integration.execute_action( + "get_post_comments", + {"publication_url": TEST_PUBLICATION_URL, "post_id": post_id}, + live_context, + ) + assert result.type == ResultType.ACTION + assert "comments" in result.result.data + assert isinstance(result.result.data["comments"], list) + assert "count" in result.result.data + + async def test_count_matches_comments_length(self, live_context): + list_result = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": TEST_PUBLICATION_URL, "limit": 3}, + live_context, + ) + posts = list_result.result.data["posts"] + if not posts: + pytest.skip("No posts available") + + for post in posts: + post_id = post.get("id") + if not post_id: + continue + result = await substack_integration.execute_action( + "get_post_comments", + {"publication_url": TEST_PUBLICATION_URL, "post_id": post_id}, + live_context, + ) + data = result.result.data + assert data["count"] == len(data["comments"]) + return + + pytest.skip("No posts with numeric id found") diff --git a/substack/tests/test_substack_unit.py b/substack/tests/test_substack_unit.py new file mode 100644 index 00000000..e88fcffd --- /dev/null +++ b/substack/tests/test_substack_unit.py @@ -0,0 +1,552 @@ +""" +Unit tests for the Substack integration using mocked fetch. +""" + +import pytest +from autohive_integrations_sdk import FetchResponse, ResultType + +from substack import substack as substack_integration +from substack.substack import _normalise_url + +pytestmark = pytest.mark.unit + +# ============================================================ +# Sample data +# ============================================================ + +MOCK_POST = { + "id": 123, + "slug": "hello-world", + "title": "Hello World", + "subtitle": "A subtitle", + "post_date": "2024-01-01T00:00:00.000Z", + "canonical_url": "https://example.substack.com/p/hello-world", + "audience": "everyone", + "paywall": False, + "reading_time_minutes": 3, + "cover_image": None, + "like_count": 10, + "comment_count": 2, + "type": "newsletter", +} + +MOCK_FULL_POST = { + **MOCK_POST, + "body_html": "

Content here

", + "audio_url": None, +} + +MOCK_SEARCH_PUBS_RESPONSE = { + "publications": [ + { + "id": 1, + "name": "Example Newsletter", + "subdomain": "example", + "custom_domain": None, + "logo_url": None, + "description": "A newsletter about things", + "subscriber_count": 500, + } + ], + "more": False, +} + +MOCK_COMMENTS_RESPONSE = { + "comments": [ + { + "id": 1001, + "body": "Great post!", + "date": "2024-01-02T00:00:00.000Z", + "author_name": "Alice", + "author_id": 55, + "like_count": 3, + "children": [], + } + ] +} + + +# ============================================================ +# HELPERS — _normalise_url (pure function, tested directly) +# ============================================================ + + +def test_normalise_strips_trailing_slash(): + assert _normalise_url("https://example.substack.com/") == "https://example.substack.com" + + +def test_normalise_upgrades_http(): + assert _normalise_url("http://example.substack.com") == "https://example.substack.com" + + +def test_normalise_strips_path(): + assert _normalise_url("https://example.substack.com/p/some-post") == "https://example.substack.com" + + +def test_normalise_custom_domain_unchanged(): + assert _normalise_url("https://newsletter.example.com") == "https://newsletter.example.com" + + +def test_normalise_bare_hostname_no_scheme(): + assert _normalise_url("example.substack.com") == "https://example.substack.com" + + +# ============================================================ +# GET PUBLICATION POSTS +# ============================================================ + + +class TestGetPublicationPosts: + @pytest.mark.asyncio + async def test_success(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=[MOCK_POST]) + result = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": "https://example.substack.com"}, + mock_context, + ) + assert result.type == ResultType.ACTION + assert result.result.data["count"] == 1 + assert result.result.data["posts"][0]["slug"] == "hello-world" + + @pytest.mark.asyncio + async def test_empty_response(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=[]) + result = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": "https://example.substack.com"}, + mock_context, + ) + assert result.result.data["posts"] == [] + assert result.result.data["count"] == 0 + + @pytest.mark.asyncio + async def test_pagination_params_forwarded(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=[]) + await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": "https://example.substack.com", "offset": 12, "limit": 6}, + mock_context, + ) + params = mock_context.fetch.call_args.kwargs.get("params", {}) + assert params["offset"] == 12 + assert params["limit"] == 6 + + @pytest.mark.asyncio + async def test_sort_param_forwarded(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=[]) + await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": "https://example.substack.com", "sort": "top"}, + mock_context, + ) + params = mock_context.fetch.call_args.kwargs.get("params", {}) + assert params["sort"] == "top" + + @pytest.mark.asyncio + async def test_search_param_forwarded(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=[]) + await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": "https://example.substack.com", "search": "python"}, + mock_context, + ) + params = mock_context.fetch.call_args.kwargs.get("params", {}) + assert params.get("search") == "python" + + @pytest.mark.asyncio + async def test_url_normalised(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=[]) + await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": "http://example.substack.com/"}, + mock_context, + ) + url = mock_context.fetch.call_args.args[0] + assert url.startswith("https://example.substack.com") + + @pytest.mark.asyncio + async def test_no_cookie_header(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=[]) + await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": "https://example.substack.com"}, + mock_context, + ) + headers = mock_context.fetch.call_args.kwargs.get("headers", {}) + assert "Cookie" not in headers + + @pytest.mark.asyncio + async def test_null_cover_image_dropped(self, mock_context): + """cover_image=None is excluded from each post by _drop_none.""" + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=[MOCK_POST]) + result = await substack_integration.execute_action( + "get_publication_posts", + {"publication_url": "https://example.substack.com"}, + mock_context, + ) + assert "cover_image" not in result.result.data["posts"][0] + + +# ============================================================ +# GET POST +# ============================================================ + + +class TestGetPost: + @pytest.mark.asyncio + async def test_success(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_FULL_POST) + result = await substack_integration.execute_action( + "get_post", + {"publication_url": "https://example.substack.com", "slug": "hello-world"}, + mock_context, + ) + assert result.type == ResultType.ACTION + assert result.result.data["slug"] == "hello-world" + assert result.result.data["body_html"] == "

Content here

" + assert result.result.data["like_count"] == 10 + + @pytest.mark.asyncio + async def test_url_contains_slug(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_FULL_POST) + await substack_integration.execute_action( + "get_post", + {"publication_url": "https://example.substack.com", "slug": "hello-world"}, + mock_context, + ) + url = mock_context.fetch.call_args.args[0] + assert "/api/v1/posts/hello-world" in url + + @pytest.mark.asyncio + async def test_none_fields_dropped(self, mock_context): + """audio_url and cover_image are None — _drop_none must exclude them.""" + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_FULL_POST) + result = await substack_integration.execute_action( + "get_post", + {"publication_url": "https://example.substack.com", "slug": "hello-world"}, + mock_context, + ) + assert "audio_url" not in result.result.data + assert "cover_image" not in result.result.data + + @pytest.mark.asyncio + async def test_audio_url_present_when_set(self, mock_context): + post_with_audio = {**MOCK_FULL_POST, "audio_url": "https://example.com/audio.mp3"} + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=post_with_audio) + result = await substack_integration.execute_action( + "get_post", + {"publication_url": "https://example.substack.com", "slug": "hello-world"}, + mock_context, + ) + assert result.result.data["audio_url"] == "https://example.com/audio.mp3" + + @pytest.mark.asyncio + async def test_url_normalised(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_FULL_POST) + await substack_integration.execute_action( + "get_post", + {"publication_url": "http://example.substack.com/", "slug": "hello-world"}, + mock_context, + ) + url = mock_context.fetch.call_args.args[0] + assert url.startswith("https://example.substack.com") + + +# ============================================================ +# SEARCH PUBLICATIONS +# ============================================================ + + +class TestSearchPublications: + @pytest.mark.asyncio + async def test_success(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_SEARCH_PUBS_RESPONSE) + result = await substack_integration.execute_action( + "search_publications", + {"query": "tech"}, + mock_context, + ) + assert result.type == ResultType.ACTION + assert len(result.result.data["publications"]) == 1 + assert result.result.data["more"] is False + + @pytest.mark.asyncio + async def test_uses_global_base_url(self, mock_context): + """search_publications uses the global SUBSTACK_BASE, not a publication-specific URL.""" + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_SEARCH_PUBS_RESPONSE) + await substack_integration.execute_action( + "search_publications", + {"query": "tech"}, + mock_context, + ) + url = mock_context.fetch.call_args.args[0] + assert "substack.com/api/v1/publication/search" in url + + @pytest.mark.asyncio + async def test_query_param_forwarded(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_SEARCH_PUBS_RESPONSE) + await substack_integration.execute_action( + "search_publications", + {"query": "finance"}, + mock_context, + ) + params = mock_context.fetch.call_args.kwargs.get("params", {}) + assert params["query"] == "finance" + + @pytest.mark.asyncio + async def test_page_and_limit_forwarded(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_SEARCH_PUBS_RESPONSE) + await substack_integration.execute_action( + "search_publications", + {"query": "tech", "page": 2, "limit": 20}, + mock_context, + ) + params = mock_context.fetch.call_args.kwargs.get("params", {}) + assert params["page"] == 2 + assert params["limit"] == 20 + + @pytest.mark.asyncio + async def test_none_fields_dropped(self, mock_context): + """custom_domain and logo_url are None — dropped by _drop_none.""" + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_SEARCH_PUBS_RESPONSE) + result = await substack_integration.execute_action( + "search_publications", + {"query": "tech"}, + mock_context, + ) + pub = result.result.data["publications"][0] + assert "custom_domain" not in pub + assert "logo_url" not in pub + + @pytest.mark.asyncio + async def test_more_flag_true(self, mock_context): + mock_context.fetch.return_value = FetchResponse( + status=200, headers={}, data={**MOCK_SEARCH_PUBS_RESPONSE, "more": True} + ) + result = await substack_integration.execute_action( + "search_publications", + {"query": "tech"}, + mock_context, + ) + assert result.result.data["more"] is True + + +# ============================================================ +# SEARCH POSTS +# ============================================================ + + +class TestSearchPosts: + @pytest.mark.asyncio + async def test_success(self, mock_context): + mock_context.fetch.return_value = FetchResponse( + status=200, + headers={}, + data=[ + { + "id": 99, + "slug": "matching-post", + "title": "Matching Post", + "subtitle": "", + "post_date": "2024-06-01T00:00:00.000Z", + "canonical_url": "https://example.substack.com/p/matching-post", + "audience": "everyone", + "paywall": False, + "reading_time_minutes": 2, + "cover_image": None, + "like_count": 5, + "comment_count": 1, + "type": "newsletter", + } + ], + ) + result = await substack_integration.execute_action( + "search_posts", + {"publication_url": "https://example.substack.com", "query": "keyword"}, + mock_context, + ) + assert result.type == ResultType.ACTION + assert result.result.data["posts"][0]["slug"] == "matching-post" + assert result.result.data["count"] == 1 + + @pytest.mark.asyncio + async def test_uses_archive_endpoint_with_search_param(self, mock_context): + """/api/v1/posts/search 404s — must use /api/v1/archive?search= instead.""" + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=[]) + await substack_integration.execute_action( + "search_posts", + {"publication_url": "https://example.substack.com", "query": "keyword"}, + mock_context, + ) + url = mock_context.fetch.call_args.args[0] + params = mock_context.fetch.call_args.kwargs.get("params", {}) + assert "/api/v1/archive" in url + assert params.get("search") == "keyword" + + @pytest.mark.asyncio + async def test_offset_param_forwarded(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=[]) + await substack_integration.execute_action( + "search_posts", + {"publication_url": "https://example.substack.com", "query": "x", "offset": 10}, + mock_context, + ) + params = mock_context.fetch.call_args.kwargs.get("params", {}) + assert params["offset"] == 10 + + @pytest.mark.asyncio + async def test_empty_results(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=[]) + result = await substack_integration.execute_action( + "search_posts", + {"publication_url": "https://example.substack.com", "query": "noresults"}, + mock_context, + ) + assert result.result.data["posts"] == [] + assert result.result.data["count"] == 0 + + @pytest.mark.asyncio + async def test_max_limit_forwarded(self, mock_context): + """limit=50 (schema maximum) is passed through to the API.""" + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=[]) + await substack_integration.execute_action( + "search_posts", + {"publication_url": "https://example.substack.com", "query": "x", "limit": 50}, + mock_context, + ) + params = mock_context.fetch.call_args.kwargs.get("params", {}) + assert params["limit"] == 50 + + +# ============================================================ +# GET POST COMMENTS +# ============================================================ + + +class TestGetPostComments: + @pytest.mark.asyncio + async def test_success(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_COMMENTS_RESPONSE) + result = await substack_integration.execute_action( + "get_post_comments", + {"publication_url": "https://example.substack.com", "post_id": 123}, + mock_context, + ) + assert result.type == ResultType.ACTION + assert result.result.data["comments"][0]["author_name"] == "Alice" + assert result.result.data["count"] == 1 + + @pytest.mark.asyncio + async def test_url_uses_singular_post_path(self, mock_context): + """URL must be /api/v1/post/ (singular), not /api/v1/posts/.""" + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_COMMENTS_RESPONSE) + await substack_integration.execute_action( + "get_post_comments", + {"publication_url": "https://example.substack.com", "post_id": 123}, + mock_context, + ) + url = mock_context.fetch.call_args.args[0] + assert "/api/v1/post/123/comments" in url + assert "/api/v1/posts/" not in url + + @pytest.mark.asyncio + async def test_all_comments_true_sent_as_string(self, mock_context): + """Substack expects the string 'true', not a boolean.""" + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_COMMENTS_RESPONSE) + await substack_integration.execute_action( + "get_post_comments", + {"publication_url": "https://example.substack.com", "post_id": 123}, + mock_context, + ) + params = mock_context.fetch.call_args.kwargs.get("params", {}) + assert params["all_comments"] == "true" + + @pytest.mark.asyncio + async def test_all_comments_false_sent_as_string(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_COMMENTS_RESPONSE) + await substack_integration.execute_action( + "get_post_comments", + {"publication_url": "https://example.substack.com", "post_id": 123, "all_comments": False}, + mock_context, + ) + params = mock_context.fetch.call_args.kwargs.get("params", {}) + assert params["all_comments"] == "false" + + @pytest.mark.asyncio + async def test_sort_param_forwarded(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data=MOCK_COMMENTS_RESPONSE) + await substack_integration.execute_action( + "get_post_comments", + {"publication_url": "https://example.substack.com", "post_id": 123, "sort": "newest"}, + mock_context, + ) + params = mock_context.fetch.call_args.kwargs.get("params", {}) + assert params["sort"] == "newest" + + @pytest.mark.asyncio + async def test_empty_response(self, mock_context): + mock_context.fetch.return_value = FetchResponse(status=200, headers={}, data={}) + result = await substack_integration.execute_action( + "get_post_comments", + {"publication_url": "https://example.substack.com", "post_id": 123}, + mock_context, + ) + assert result.result.data["comments"] == [] + assert result.result.data["count"] == 0 + + @pytest.mark.asyncio + async def test_null_comment_fields_pass_output_validation(self, mock_context): + """Deleted/anonymous comments return null body, author_name, etc. + + Comments are passed through raw (not via _drop_none), so the output + schema must allow null for these fields — otherwise SDK v2 output + validation raises VALIDATION_ERROR on a successful 200. Confirmed + against the live API: deleted comments have ``"body": null``. + """ + mock_context.fetch.return_value = FetchResponse( + status=200, + headers={}, + data={ + "comments": [ + { + "id": 2002, + "body": None, + "date": None, + "author_name": None, + "author_id": None, + "like_count": None, + "children": [], + } + ] + }, + ) + result = await substack_integration.execute_action( + "get_post_comments", + {"publication_url": "https://example.substack.com", "post_id": 123}, + mock_context, + ) + assert result.type == ResultType.ACTION + assert result.result.data["count"] == 1 + assert result.result.data["comments"][0]["body"] is None + + +# ============================================================ +# VALIDATION +# ============================================================ + + +class TestValidation: + @pytest.mark.asyncio + async def test_missing_required_input(self, mock_context): + result = await substack_integration.execute_action( + "get_publication_posts", + {}, # publication_url is required + mock_context, + ) + assert result.type == ResultType.VALIDATION_ERROR + + @pytest.mark.asyncio + async def test_unknown_action(self, mock_context): + result = await substack_integration.execute_action("nonexistent_action", {}, mock_context) + assert result.type == ResultType.VALIDATION_ERROR