Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions haystack/components/connectors/openapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,35 @@ class OpenAPIConnector:
pass input arguments to this component.

Example:
<!-- test-ignore -->
```python
from unittest.mock import patch, MagicMock
from haystack.utils import Secret
from haystack.components.connectors.openapi import OpenAPIConnector

serper_dev_token = Secret.from_env_var("SERPERDEV_API_KEY")
serper_dev_token = Secret.from_token("dummy-key")

def my_custom_config_factory():
# Create and return a custom configuration for the OpenAPIClient
pass

connector = OpenAPIConnector(
openapi_spec="https://bit.ly/serperdev_openapi",
credentials=serper_dev_token,
service_kwargs={"config_factory": my_custom_config_factory()}
)
response = connector.run(
operation_id="search",
arguments={"q": "Who was Nikola Tesla?"}
)
# Mock the OpenAPI client to run without network/dependency constraints in CI
with patch("haystack.components.connectors.openapi.OpenAPIClient") as mock_client_class:
mock_client = MagicMock()
mock_client.invoke.return_value = {"results": [{"title": "Nikola Tesla", "snippet": "Inventor..."}]}
mock_client_class.from_spec.return_value = mock_client

connector = OpenAPIConnector(
openapi_spec="https://bit.ly/serperdev_openapi",
credentials=serper_dev_token,
service_kwargs={"config_factory": my_custom_config_factory()}
)
response = connector.run(
operation_id="search",
arguments={"q": "Who was Nikola Tesla?"}
)
print(response)

# {'response': {'results': [{'title': 'Nikola Tesla', 'snippet': 'Inventor...'}]}}
```
Note:
- The `service_kwargs` argument is optional, it can be used to pass additional options to the OpenAPIClient.
Expand Down
13 changes: 6 additions & 7 deletions haystack/components/converters/file_to_file_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,21 @@ class FileToFileContent:
Converts files to FileContent objects to be included in ChatMessage objects.

### Usage example
<!-- test-ignore -->
```python
from haystack.components.converters import FileToFileContent
from haystack.dataclasses import ByteStream

converter = FileToFileContent()

sources = ["document.pdf", "video.mp4"]
sources = [ByteStream(data=b"hello", mime_type="text/plain")]

file_contents = converter.run(sources=sources)["file_contents"]
print(file_contents)

# [FileContent(base64_data='...',
# mime_type='application/pdf',
# filename='document.pdf',
# extra={}),
# ...]
# [FileContent(base64_data='aGVsbG8=',
# mime_type='text/plain',
# filename=None,
# extra={})]
```
"""

Expand Down
14 changes: 8 additions & 6 deletions haystack/components/converters/image/document_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,31 +34,33 @@ class DocumentToImageContent:
- For PDF files, a `page_number` key specifying which page to extract

### Usage example
<!-- test-ignore -->
```python
from haystack import Document
from haystack.components.converters.image.document_to_image import DocumentToImageContent

converter = DocumentToImageContent(
file_path_meta_field="file_path",
root_path="/data/files",
root_path="test/test_files",
detail="high",
size=(800, 600)
)

documents = [
Document(content="Optional description of image.jpg", meta={"file_path": "image.jpg"}),
Document(content="Text content of page 1 of doc.pdf", meta={"file_path": "doc.pdf", "page_number": 1})
Document(content="Optional description of apple.jpg", meta={"file_path": "images/apple.jpg"}),
Document(
content="Text content of page 1 of sample_pdf_1.pdf",
meta={"file_path": "pdf/sample_pdf_1.pdf", "page_number": 1}
)
]

result = converter.run(documents)
image_contents = result["image_contents"]
# [ImageContent(
# base64_image='/9j/4A...', mime_type='image/jpeg', detail='high', meta={'file_path': 'image.jpg'}
# base64_image='/9j/4A...', mime_type='image/jpeg', detail='high', meta={'file_path': 'images/apple.jpg'}
# ),
# ImageContent(
# base64_image='/9j/4A...', mime_type='image/jpeg', detail='high',
# meta={'page_number': 1, 'file_path': 'doc.pdf'}
# meta={'page_number': 1, 'file_path': 'pdf/sample_pdf_1.pdf'}
# )]
```
"""
Expand Down
26 changes: 18 additions & 8 deletions haystack/components/embedders/openai_document_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,28 @@ class OpenAIDocumentEmbedder:
Computes document embeddings using OpenAI models.

### Usage example
<!-- test-ignore -->
```python
from unittest.mock import patch, MagicMock
from haystack import Document
from haystack.utils import Secret
from haystack.components.embedders import OpenAIDocumentEmbedder

doc = Document(content="I love pizza!")
document_embedder = OpenAIDocumentEmbedder()
result = document_embedder.run([doc])

print(result['documents'][0].embedding)

# [0.017020374536514282, -0.023255806416273117, ...]
# Mock OpenAI client so that it runs without keys or network calls in CI
with patch("haystack.components.embedders.openai_document_embedder.OpenAI") as mock_openai:
mock_client = MagicMock()
mock_response = MagicMock()
mock_response.data = [MagicMock(embedding=[0.017020374536514282, -0.023255806416273117])]
mock_response.model = "text-embedding-ada-002-v2"
mock_response.usage = {"prompt_tokens": 4, "total_tokens": 4}
mock_client.embeddings.create.return_value = mock_response
mock_openai.return_value = mock_client

doc = Document(content="I love pizza!")
document_embedder = OpenAIDocumentEmbedder(api_key=Secret.from_token("dummy-key"))
result = document_embedder.run([doc])
print(result['documents'][0].embedding)

# [0.017020374536514282, -0.023255806416273117]
```
"""

Expand Down
24 changes: 17 additions & 7 deletions haystack/components/embedders/openai_text_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,26 @@ class OpenAITextEmbedder:
You can use it to embed user query and send it to an embedding Retriever.

### Usage example
<!-- test-ignore -->
```python
from unittest.mock import patch, MagicMock
from haystack.utils import Secret
from haystack.components.embedders import OpenAITextEmbedder

text_to_embed = "I love pizza!"
text_embedder = OpenAITextEmbedder()

print(text_embedder.run(text_to_embed))

# {'embedding': [0.017020374536514282, -0.023255806416273117, ...],
# Mock OpenAI client so that it runs without keys or network calls in CI
with patch("haystack.components.embedders.openai_text_embedder.OpenAI") as mock_openai:
mock_client = MagicMock()
mock_response = MagicMock()
mock_response.data = [MagicMock(embedding=[0.017020374536514282, -0.023255806416273117])]
mock_response.model = "text-embedding-ada-002-v2"
mock_response.usage = {"prompt_tokens": 4, "total_tokens": 4}
mock_client.embeddings.create.return_value = mock_response
mock_openai.return_value = mock_client

text_to_embed = "I love pizza!"
text_embedder = OpenAITextEmbedder(api_key=Secret.from_token("dummy-key"))
print(text_embedder.run(text_to_embed))

# {'embedding': [0.017020374536514282, -0.023255806416273117],
# 'meta': {'model': 'text-embedding-ada-002-v2',
# 'usage': {'prompt_tokens': 4, 'total_tokens': 4}}}
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,25 @@ class SentenceTransformersDocumentEmbedder:
and send them to DocumentWriter to write into a Document Store.

### Usage example:
<!-- test-ignore -->
```python
from unittest.mock import patch, MagicMock
import haystack.components.embedders.backends.sentence_transformers_backend as stb
from haystack import Document
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
doc = Document(content="I love pizza!")
doc_embedder = SentenceTransformersDocumentEmbedder()

result = doc_embedder.run([doc])
print(result['documents'][0].embedding)
# Mock the embedding backend so we don't download any models in CI
with patch.object(stb, "_SentenceTransformersEmbeddingBackendFactory") as mock_factory:
mock_backend = MagicMock()
mock_backend.embed.return_value = [[0.1, 0.2, 0.3]]
mock_factory.get_embedding_backend.return_value = mock_backend

# [-0.07804739475250244, 0.1498992145061493, ...]
doc = Document(content="I love pizza!")
doc_embedder = SentenceTransformersDocumentEmbedder()

result = doc_embedder.run([doc])
print(result['documents'][0].embedding)

# [0.1, 0.2, 0.3]
```
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,22 @@ class SentenceTransformersTextEmbedder:
You can use it to embed user query and send it to an embedding retriever.

Usage example:
<!-- test-ignore -->
```python
from unittest.mock import patch, MagicMock
import haystack.components.embedders.backends.sentence_transformers_backend as stb
from haystack.components.embedders import SentenceTransformersTextEmbedder

text_to_embed = "I love pizza!"
# Mock the embedding backend so we don't download any models in CI
with patch.object(stb, "_SentenceTransformersEmbeddingBackendFactory") as mock_factory:
mock_backend = MagicMock()
mock_backend.embed.return_value = [[0.1, 0.2, 0.3]]
mock_factory.get_embedding_backend.return_value = mock_backend

text_embedder = SentenceTransformersTextEmbedder()
text_to_embed = "I love pizza!"
text_embedder = SentenceTransformersTextEmbedder()
print(text_embedder.run(text_to_embed))

print(text_embedder.run(text_to_embed))

# {'embedding': [-0.07804739475250244, 0.1498992145061493,, ...]}
# {'embedding': [0.1, 0.2, 0.3]}
```
"""

Expand Down
41 changes: 30 additions & 11 deletions haystack/components/extractors/named_entity_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,19 +89,38 @@ class NamedEntityExtractor:
in the documents.

Usage example:
<!-- test-ignore -->
```python
from unittest.mock import patch, MagicMock
from haystack import Document
from haystack.components.extractors.named_entity_extractor import NamedEntityExtractor

documents = [
Document(content="I'm Merlin, the happy pig!"),
Document(content="My name is Clara and I live in Berkeley, California."),
]
extractor = NamedEntityExtractor(backend="hugging_face", model="dslim/bert-base-NER")
results = extractor.run(documents=documents)["documents"]
annotations = [NamedEntityExtractor.get_stored_annotations(doc) for doc in results]
print(annotations)
from haystack.components.extractors.named_entity_extractor import NamedEntityExtractor, NamedEntityAnnotation

# Mock the HF backend to avoid downloading any model or requiring dependencies in CI
with patch("haystack.components.extractors.named_entity_extractor._HfBackend") as mock_hf_backend:
mock_instance = MagicMock()
mock_instance.initialized = True
mock_instance.annotate.return_value = [
[NamedEntityAnnotation(entity="PER", start=4, end=10, score=0.99)],
[
NamedEntityAnnotation(entity="PER", start=11, end=16, score=0.99),
NamedEntityAnnotation(entity="LOC", start=31, end=39, score=0.99),
NamedEntityAnnotation(entity="LOC", start=41, end=51, score=0.99)
]
]
mock_hf_backend.return_value = mock_instance

documents = [
Document(content="I'm Merlin, the happy pig!"),
Document(content="My name is Clara and I live in Berkeley, California."),
]
extractor = NamedEntityExtractor(backend="hugging_face", model="dslim/bert-base-NER")
results = extractor.run(documents=documents)["documents"]
annotations = [NamedEntityExtractor.get_stored_annotations(doc) for doc in results]
print(annotations)

# [[NamedEntityAnnotation(entity='PER', start=4, end=10, score=0.99)],
# [NamedEntityAnnotation(entity='PER', start=11, end=16, score=0.99),
# NamedEntityAnnotation(entity='LOC', start=31, end=39, score=0.99),
# NamedEntityAnnotation(entity='LOC', start=41, end=51, score=0.99)]]
```
"""

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
Enable more docstring code snippets to run in CI by removing ignore-test/test-ignore markers and resolving local execution prerequisites via mock patching and local test fixtures.