diff --git a/haystack/components/connectors/openapi.py b/haystack/components/connectors/openapi.py index 1435946e9c..ab0b64b1be 100644 --- a/haystack/components/connectors/openapi.py +++ b/haystack/components/connectors/openapi.py @@ -24,26 +24,35 @@ class OpenAPIConnector: pass input arguments to this component. Example: - ```python + from unittest.mock import patch, MagicMock from haystack.utils import Secret from haystack.components.connectors.openapi import OpenAPIConnector - serper_dev_token = Secret.from_env_var("SERPERDEV_API_KEY") + serper_dev_token = Secret.from_token("dummy-key") def my_custom_config_factory(): # Create and return a custom configuration for the OpenAPIClient pass - connector = OpenAPIConnector( - openapi_spec="https://bit.ly/serperdev_openapi", - credentials=serper_dev_token, - service_kwargs={"config_factory": my_custom_config_factory()} - ) - response = connector.run( - operation_id="search", - arguments={"q": "Who was Nikola Tesla?"} - ) + # Mock the OpenAPI client to run without network/dependency constraints in CI + with patch("haystack.components.connectors.openapi.OpenAPIClient") as mock_client_class: + mock_client = MagicMock() + mock_client.invoke.return_value = {"results": [{"title": "Nikola Tesla", "snippet": "Inventor..."}]} + mock_client_class.from_spec.return_value = mock_client + + connector = OpenAPIConnector( + openapi_spec="https://bit.ly/serperdev_openapi", + credentials=serper_dev_token, + service_kwargs={"config_factory": my_custom_config_factory()} + ) + response = connector.run( + operation_id="search", + arguments={"q": "Who was Nikola Tesla?"} + ) + print(response) + + # {'response': {'results': [{'title': 'Nikola Tesla', 'snippet': 'Inventor...'}]}} ``` Note: - The `service_kwargs` argument is optional, it can be used to pass additional options to the OpenAPIClient. diff --git a/haystack/components/converters/file_to_file_content.py b/haystack/components/converters/file_to_file_content.py index 4de53e8da2..c661af9a3b 100644 --- a/haystack/components/converters/file_to_file_content.py +++ b/haystack/components/converters/file_to_file_content.py @@ -22,22 +22,21 @@ class FileToFileContent: Converts files to FileContent objects to be included in ChatMessage objects. ### Usage example - ```python from haystack.components.converters import FileToFileContent + from haystack.dataclasses import ByteStream converter = FileToFileContent() - sources = ["document.pdf", "video.mp4"] + sources = [ByteStream(data=b"hello", mime_type="text/plain")] file_contents = converter.run(sources=sources)["file_contents"] print(file_contents) - # [FileContent(base64_data='...', - # mime_type='application/pdf', - # filename='document.pdf', - # extra={}), - # ...] + # [FileContent(base64_data='aGVsbG8=', + # mime_type='text/plain', + # filename=None, + # extra={})] ``` """ diff --git a/haystack/components/converters/image/document_to_image.py b/haystack/components/converters/image/document_to_image.py index 2fd82ef78d..8cd6a651d5 100644 --- a/haystack/components/converters/image/document_to_image.py +++ b/haystack/components/converters/image/document_to_image.py @@ -34,31 +34,33 @@ class DocumentToImageContent: - For PDF files, a `page_number` key specifying which page to extract ### Usage example - ```python from haystack import Document from haystack.components.converters.image.document_to_image import DocumentToImageContent converter = DocumentToImageContent( file_path_meta_field="file_path", - root_path="/data/files", + root_path="test/test_files", detail="high", size=(800, 600) ) documents = [ - Document(content="Optional description of image.jpg", meta={"file_path": "image.jpg"}), - Document(content="Text content of page 1 of doc.pdf", meta={"file_path": "doc.pdf", "page_number": 1}) + Document(content="Optional description of apple.jpg", meta={"file_path": "images/apple.jpg"}), + Document( + content="Text content of page 1 of sample_pdf_1.pdf", + meta={"file_path": "pdf/sample_pdf_1.pdf", "page_number": 1} + ) ] result = converter.run(documents) image_contents = result["image_contents"] # [ImageContent( - # base64_image='/9j/4A...', mime_type='image/jpeg', detail='high', meta={'file_path': 'image.jpg'} + # base64_image='/9j/4A...', mime_type='image/jpeg', detail='high', meta={'file_path': 'images/apple.jpg'} # ), # ImageContent( # base64_image='/9j/4A...', mime_type='image/jpeg', detail='high', - # meta={'page_number': 1, 'file_path': 'doc.pdf'} + # meta={'page_number': 1, 'file_path': 'pdf/sample_pdf_1.pdf'} # )] ``` """ diff --git a/haystack/components/embedders/openai_document_embedder.py b/haystack/components/embedders/openai_document_embedder.py index 1d3626c3c2..f8b76cc363 100644 --- a/haystack/components/embedders/openai_document_embedder.py +++ b/haystack/components/embedders/openai_document_embedder.py @@ -24,18 +24,28 @@ class OpenAIDocumentEmbedder: Computes document embeddings using OpenAI models. ### Usage example - ```python + from unittest.mock import patch, MagicMock from haystack import Document + from haystack.utils import Secret from haystack.components.embedders import OpenAIDocumentEmbedder - doc = Document(content="I love pizza!") - document_embedder = OpenAIDocumentEmbedder() - result = document_embedder.run([doc]) - - print(result['documents'][0].embedding) - - # [0.017020374536514282, -0.023255806416273117, ...] + # Mock OpenAI client so that it runs without keys or network calls in CI + with patch("haystack.components.embedders.openai_document_embedder.OpenAI") as mock_openai: + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.data = [MagicMock(embedding=[0.017020374536514282, -0.023255806416273117])] + mock_response.model = "text-embedding-ada-002-v2" + mock_response.usage = {"prompt_tokens": 4, "total_tokens": 4} + mock_client.embeddings.create.return_value = mock_response + mock_openai.return_value = mock_client + + doc = Document(content="I love pizza!") + document_embedder = OpenAIDocumentEmbedder(api_key=Secret.from_token("dummy-key")) + result = document_embedder.run([doc]) + print(result['documents'][0].embedding) + + # [0.017020374536514282, -0.023255806416273117] ``` """ diff --git a/haystack/components/embedders/openai_text_embedder.py b/haystack/components/embedders/openai_text_embedder.py index 24672bb050..213d029c5c 100644 --- a/haystack/components/embedders/openai_text_embedder.py +++ b/haystack/components/embedders/openai_text_embedder.py @@ -21,16 +21,26 @@ class OpenAITextEmbedder: You can use it to embed user query and send it to an embedding Retriever. ### Usage example - ```python + from unittest.mock import patch, MagicMock + from haystack.utils import Secret from haystack.components.embedders import OpenAITextEmbedder - text_to_embed = "I love pizza!" - text_embedder = OpenAITextEmbedder() - - print(text_embedder.run(text_to_embed)) - - # {'embedding': [0.017020374536514282, -0.023255806416273117, ...], + # Mock OpenAI client so that it runs without keys or network calls in CI + with patch("haystack.components.embedders.openai_text_embedder.OpenAI") as mock_openai: + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.data = [MagicMock(embedding=[0.017020374536514282, -0.023255806416273117])] + mock_response.model = "text-embedding-ada-002-v2" + mock_response.usage = {"prompt_tokens": 4, "total_tokens": 4} + mock_client.embeddings.create.return_value = mock_response + mock_openai.return_value = mock_client + + text_to_embed = "I love pizza!" + text_embedder = OpenAITextEmbedder(api_key=Secret.from_token("dummy-key")) + print(text_embedder.run(text_to_embed)) + + # {'embedding': [0.017020374536514282, -0.023255806416273117], # 'meta': {'model': 'text-embedding-ada-002-v2', # 'usage': {'prompt_tokens': 4, 'total_tokens': 4}}} ``` diff --git a/haystack/components/embedders/sentence_transformers_document_embedder.py b/haystack/components/embedders/sentence_transformers_document_embedder.py index 252111783b..ab674d4782 100644 --- a/haystack/components/embedders/sentence_transformers_document_embedder.py +++ b/haystack/components/embedders/sentence_transformers_document_embedder.py @@ -25,17 +25,25 @@ class SentenceTransformersDocumentEmbedder: and send them to DocumentWriter to write into a Document Store. ### Usage example: - ```python + from unittest.mock import patch, MagicMock + import haystack.components.embedders.backends.sentence_transformers_backend as stb from haystack import Document from haystack.components.embedders import SentenceTransformersDocumentEmbedder - doc = Document(content="I love pizza!") - doc_embedder = SentenceTransformersDocumentEmbedder() - result = doc_embedder.run([doc]) - print(result['documents'][0].embedding) + # Mock the embedding backend so we don't download any models in CI + with patch.object(stb, "_SentenceTransformersEmbeddingBackendFactory") as mock_factory: + mock_backend = MagicMock() + mock_backend.embed.return_value = [[0.1, 0.2, 0.3]] + mock_factory.get_embedding_backend.return_value = mock_backend - # [-0.07804739475250244, 0.1498992145061493, ...] + doc = Document(content="I love pizza!") + doc_embedder = SentenceTransformersDocumentEmbedder() + + result = doc_embedder.run([doc]) + print(result['documents'][0].embedding) + + # [0.1, 0.2, 0.3] ``` """ diff --git a/haystack/components/embedders/sentence_transformers_text_embedder.py b/haystack/components/embedders/sentence_transformers_text_embedder.py index 3285fab5d0..58c917e168 100644 --- a/haystack/components/embedders/sentence_transformers_text_embedder.py +++ b/haystack/components/embedders/sentence_transformers_text_embedder.py @@ -21,17 +21,22 @@ class SentenceTransformersTextEmbedder: You can use it to embed user query and send it to an embedding retriever. Usage example: - ```python + from unittest.mock import patch, MagicMock + import haystack.components.embedders.backends.sentence_transformers_backend as stb from haystack.components.embedders import SentenceTransformersTextEmbedder - text_to_embed = "I love pizza!" + # Mock the embedding backend so we don't download any models in CI + with patch.object(stb, "_SentenceTransformersEmbeddingBackendFactory") as mock_factory: + mock_backend = MagicMock() + mock_backend.embed.return_value = [[0.1, 0.2, 0.3]] + mock_factory.get_embedding_backend.return_value = mock_backend - text_embedder = SentenceTransformersTextEmbedder() + text_to_embed = "I love pizza!" + text_embedder = SentenceTransformersTextEmbedder() + print(text_embedder.run(text_to_embed)) - print(text_embedder.run(text_to_embed)) - - # {'embedding': [-0.07804739475250244, 0.1498992145061493,, ...]} + # {'embedding': [0.1, 0.2, 0.3]} ``` """ diff --git a/haystack/components/extractors/named_entity_extractor.py b/haystack/components/extractors/named_entity_extractor.py index a971f48254..a2e76b1d23 100644 --- a/haystack/components/extractors/named_entity_extractor.py +++ b/haystack/components/extractors/named_entity_extractor.py @@ -89,19 +89,38 @@ class NamedEntityExtractor: in the documents. Usage example: - ```python + from unittest.mock import patch, MagicMock from haystack import Document - from haystack.components.extractors.named_entity_extractor import NamedEntityExtractor - - documents = [ - Document(content="I'm Merlin, the happy pig!"), - Document(content="My name is Clara and I live in Berkeley, California."), - ] - extractor = NamedEntityExtractor(backend="hugging_face", model="dslim/bert-base-NER") - results = extractor.run(documents=documents)["documents"] - annotations = [NamedEntityExtractor.get_stored_annotations(doc) for doc in results] - print(annotations) + from haystack.components.extractors.named_entity_extractor import NamedEntityExtractor, NamedEntityAnnotation + + # Mock the HF backend to avoid downloading any model or requiring dependencies in CI + with patch("haystack.components.extractors.named_entity_extractor._HfBackend") as mock_hf_backend: + mock_instance = MagicMock() + mock_instance.initialized = True + mock_instance.annotate.return_value = [ + [NamedEntityAnnotation(entity="PER", start=4, end=10, score=0.99)], + [ + NamedEntityAnnotation(entity="PER", start=11, end=16, score=0.99), + NamedEntityAnnotation(entity="LOC", start=31, end=39, score=0.99), + NamedEntityAnnotation(entity="LOC", start=41, end=51, score=0.99) + ] + ] + mock_hf_backend.return_value = mock_instance + + documents = [ + Document(content="I'm Merlin, the happy pig!"), + Document(content="My name is Clara and I live in Berkeley, California."), + ] + extractor = NamedEntityExtractor(backend="hugging_face", model="dslim/bert-base-NER") + results = extractor.run(documents=documents)["documents"] + annotations = [NamedEntityExtractor.get_stored_annotations(doc) for doc in results] + print(annotations) + + # [[NamedEntityAnnotation(entity='PER', start=4, end=10, score=0.99)], + # [NamedEntityAnnotation(entity='PER', start=11, end=16, score=0.99), + # NamedEntityAnnotation(entity='LOC', start=31, end=39, score=0.99), + # NamedEntityAnnotation(entity='LOC', start=41, end=51, score=0.99)]] ``` """ diff --git a/releasenotes/notes/run-docstring-code-snippets-ea7d9ac5b67751dd.yaml b/releasenotes/notes/run-docstring-code-snippets-ea7d9ac5b67751dd.yaml new file mode 100644 index 0000000000..051d0380e8 --- /dev/null +++ b/releasenotes/notes/run-docstring-code-snippets-ea7d9ac5b67751dd.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Enable more docstring code snippets to run in CI by removing ignore-test/test-ignore markers and resolving local execution prerequisites via mock patching and local test fixtures.