diff --git a/packages/bigframes/bigframes/bigquery/_operations/ai.py b/packages/bigframes/bigframes/bigquery/_operations/ai.py index 1070671f5a1f..2e35c4e49ef0 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ai.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ai.py @@ -57,9 +57,9 @@ def generate( **Examples:** - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq - >>> country = bpd.Series(["Japan", "Canada"]) + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> import bigframes.bigquery as bbq # doctest: +SKIP + >>> country = bpd.Series(["Japan", "Canada"]) # doctest: +SKIP >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) # doctest: +SKIP 0 {'result': 'Tokyo', 'full_response': '{"cand... 1 {'result': 'Ottawa', 'full_response': '{"can... @@ -70,30 +70,23 @@ def generate( 1 Ottawa Name: result, dtype: string - You get structured output when the `output_schema` parameter is set: + You get structured output when the ``output_schema`` parameter is set: - >>> animals = bpd.Series(["Rabbit", "Spider"]) - >>> bbq.ai.generate(animals, output_schema={"number_of_legs": "INT64", "is_herbivore": "BOOL"}) + >>> animals = bpd.Series(["Rabbit", "Spider"]) # doctest: +SKIP + >>> bbq.ai.generate(animals, output_schema={"number_of_legs": "INT64", "is_herbivore": "BOOL"}) # doctest: +SKIP 0 {'is_herbivore': True, 'number_of_legs': 4, 'f... 1 {'is_herbivore': False, 'number_of_legs': 8, '... dtype: struct>, status: string>[pyarrow] - .. note:: - - This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - Args: prompt (str | Series | List[str|Series] | Tuple[str|Series, ...]): A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series or pandas Series. connection_id (str, optional): - Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. + Specifies the connection to use to communicate with the model. For example, ``myproject.us.myconnection``. If not provided, the query uses your end-user credential. endpoint (str, optional): - Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any + Specifies the Vertex AI endpoint to use for the model. For example ``"gemini-2.5-flash"``. You can specify any generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and uses the full endpoint of the model. If you don't specify an ENDPOINT value, BigQuery ML selects a recent stable version of Gemini to use. @@ -109,7 +102,7 @@ def generate( Provides additional parameters to the model. The MODEL_PARAMS value must conform to the generateContent request body format. output_schema (Mapping[str, str]): A mapping value that specifies the schema of the output, in the form {field_name: data_type}. Supported data types include - `STRING`, `INT64`, `FLOAT64`, `BOOL`, `ARRAY`, and `STRUCT`. + ``STRING``, ``INT64``, ``FLOAT64``, ``BOOL``, ``ARRAY``, and ``STRUCT``. Returns: bigframes.series.Series: A new struct Series with the result data. The struct contains these fields: @@ -158,40 +151,33 @@ def generate_bool( **Examples:** - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> import bigframes.bigquery as bbq # doctest: +SKIP >>> df = bpd.DataFrame({ ... "col_1": ["apple", "bear", "pear"], ... "col_2": ["fruit", "animal", "animal"] - ... }) - >>> bbq.ai.generate_bool((df["col_1"], " is a ", df["col_2"])) + ... }) # doctest: +SKIP + >>> bbq.ai.generate_bool((df["col_1"], " is a ", df["col_2"])) # doctest: +SKIP 0 {'result': True, 'full_response': '{"candidate... 1 {'result': True, 'full_response': '{"candidate... 2 {'result': False, 'full_response': '{"candidat... dtype: struct>, status: string>[pyarrow] - >>> bbq.ai.generate_bool((df["col_1"], " is a ", df["col_2"])).struct.field("result") + >>> bbq.ai.generate_bool((df["col_1"], " is a ", df["col_2"])).struct.field("result") # doctest: +SKIP 0 True 1 True 2 False Name: result, dtype: boolean - .. note:: - - This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - Args: prompt (str | Series | List[str|Series] | Tuple[str|Series, ...]): A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series or pandas Series. connection_id (str, optional): - Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. + Specifies the connection to use to communicate with the model. For example, ``myproject.us.myconnection``. If not provided, the query uses your end-user credential. endpoint (str, optional): - Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any + Specifies the Vertex AI endpoint to use for the model. For example ``"gemini-2.5-flash"``. You can specify any generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and uses the full endpoint of the model. If you don't specify an ENDPOINT value, BigQuery ML selects a recent stable version of Gemini to use. @@ -242,37 +228,30 @@ def generate_int( **Examples:** - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq - >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"]) - >>> bbq.ai.generate_int(("How many legs does a ", animal, " have?")) + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> import bigframes.bigquery as bbq # doctest: +SKIP + >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"]) # doctest: +SKIP + >>> bbq.ai.generate_int(("How many legs does a ", animal, " have?")) # doctest: +SKIP 0 {'result': 2, 'full_response': '{"candidates":... 1 {'result': 4, 'full_response': '{"candidates":... 2 {'result': 8, 'full_response': '{"candidates":... dtype: struct>, status: string>[pyarrow] - >>> bbq.ai.generate_int(("How many legs does a ", animal, " have?")).struct.field("result") + >>> bbq.ai.generate_int(("How many legs does a ", animal, " have?")).struct.field("result") # doctest: +SKIP 0 2 1 4 2 8 Name: result, dtype: Int64 - .. note:: - - This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - Args: prompt (str | Series | List[str|Series] | Tuple[str|Series, ...]): A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series or pandas Series. connection_id (str, optional): - Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. + Specifies the connection to use to communicate with the model. For example, ``myproject.us.myconnection``. If not provided, the query uses your end-user credential. endpoint (str, optional): - Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any + Specifies the Vertex AI endpoint to use for the model. For example ``"gemini-2.5-flash"``. You can specify any generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and uses the full endpoint of the model. If you don't specify an ENDPOINT value, BigQuery ML selects a recent stable version of Gemini to use. @@ -323,37 +302,30 @@ def generate_double( **Examples:** - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq - >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"]) - >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")) + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> import bigframes.bigquery as bbq # doctest: +SKIP + >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"]) # doctest: +SKIP + >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")) # doctest: +SKIP 0 {'result': 2.0, 'full_response': '{"candidates... 1 {'result': 4.0, 'full_response': '{"candidates... 2 {'result': 8.0, 'full_response': '{"candidates... dtype: struct>, status: string>[pyarrow] - >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")).struct.field("result") + >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")).struct.field("result") # doctest: +SKIP 0 2.0 1 4.0 2 8.0 Name: result, dtype: Float64 - .. note:: - - This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - Args: prompt (str | Series | List[str|Series] | Tuple[str|Series, ...]): A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series or pandas Series. connection_id (str, optional): - Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. + Specifies the connection to use to communicate with the model. For example, ``myproject.us.myconnection``. If not provided, the query uses your end-user credential. endpoint (str, optional): - Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any + Specifies the Vertex AI endpoint to use for the model. For example ``"gemini-2.5-flash"``. You can specify any generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and uses the full endpoint of the model. If you don't specify an ENDPOINT value, BigQuery ML selects a recent stable version of Gemini to use. @@ -407,9 +379,9 @@ def generate_embedding( **Examples:** - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq - >>> df = bpd.DataFrame({"content": ["apple", "bear", "pear"]}) + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> import bigframes.bigquery as bbq # doctest: +SKIP + >>> df = bpd.DataFrame({"content": ["apple", "bear", "pear"]}) # doctest: +SKIP >>> bbq.ai.generate_embedding( ... "project.dataset.model_name", ... df @@ -510,9 +482,9 @@ def generate_text( **Examples:** - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq - >>> df = bpd.DataFrame({"prompt": ["write a poem about apples"]}) + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> import bigframes.bigquery as bbq # doctest: +SKIP + >>> df = bpd.DataFrame({"prompt": ["write a poem about apples"]}) # doctest: +SKIP >>> bbq.ai.generate_text( ... "project.dataset.model_name", ... df @@ -622,12 +594,12 @@ def generate_table( **Examples:** - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> import bigframes.bigquery as bbq # doctest: +SKIP >>> # The user is responsible for constructing a DataFrame that contains >>> # the necessary columns for the model's prompt. For example, a >>> # DataFrame with a 'prompt' column for text classification. - >>> df = bpd.DataFrame({'prompt': ["some text to classify"]}) + >>> df = bpd.DataFrame({'prompt': ["some text to classify"]}) # doctest: +SKIP >>> result = bbq.ai.generate_table( ... "project.dataset.model_name", ... data=df, @@ -645,7 +617,7 @@ def generate_table( output_schema (str | Mapping[str, str]): A string defining the output schema (e.g., "col1 STRING, col2 INT64"), or a mapping value that specifies the schema of the output, in the form {field_name: data_type}. - Supported data types include `STRING`, `INT64`, `FLOAT64`, `BOOL`, `ARRAY`, and `STRUCT`. + Supported data types include ``STRING``, ``INT64``, ``FLOAT64``, ``BOOL``, ``ARRAY``, and ``STRUCT``. temperature (float, optional): A FLOAT64 value that is used for sampling promiscuity. The value must be in the range ``[0.0, 1.0]``. @@ -733,8 +705,8 @@ def embed( **Examples:** - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> import bigframes.bigquery as bbq # doctest: +SKIP >>> bbq.ai.embed("dog", endpoint="text-embedding-005") # doctest: +SKIP 0 {'result': array([ 1.78243860e-03, -1.10658340... @@ -748,28 +720,28 @@ def embed( endpoint (str, optional): A string value that specifies a supported Vertex AI embedding model endpoint to use. The endpoint value that you specify must include the model version, for example, - `"text-embedding-005"`. If you specify this parameter, you can't specify the - `model` parameter. + ``"text-embedding-005"``. If you specify this parameter, you can't specify the + ``model`` parameter. model (str, optional): A string value that specifies a built-in embedding model. The only supported value is - `"embeddinggemma-300m"`. If you specify this parameter, you can't specify the `endpoint`, - `title`, `model_params`, or `connection_id` parameters. + ``"embeddinggemma-300m"``. If you specify this parameter, you can't specify the ``endpoint``, + ``title``, ``model_params``, or ``connection_id`` parameters. task_type (str, optional): A string literal that specifies the intended downstream application to help the model - produce better quality embeddings. Accepts `"retrieval_query"`, `"retrieval_document"`, - `"semantic_similarity"`, `"classification"`, `"clustering"`, `"question_answering"`, - `"fact_verification"`, `"code_retrieval_query"`. + produce better quality embeddings. Accepts ``"retrieval_query"``, ``"retrieval_document"``, + ``"semantic_similarity"``, ``"classification"``, ``"clustering"``, ``"question_answering"``, + ``"fact_verification"``, ``"code_retrieval_query"``. title (str, optional): A string value that specifies the document title, which the model uses to improve - embedding quality. You can only use this parameter if you specify `"retrieval_document"` - for the `task_type` value. + embedding quality. You can only use this parameter if you specify ``"retrieval_document"`` + for the ``task_type`` value. model_params (Mapping[Any, Any], optional): A JSON literal that provides additional parameters to the model. For example, - `{"outputDimensionality": 768}` lets you specify the number of dimensions to use when + ``{"outputDimensionality": 768}`` lets you specify the number of dimensions to use when generating embeddings. connection_id (str, optional): A STRING value specifying the connection to use to communicate with the model, in the - format `PROJECT_ID.LOCATION.CONNECTION_ID`. For example, `myproject.us.myconnection`. + format ``PROJECT_ID.LOCATION.CONNECTION_ID``. For example, ``myproject.us.myconnection``. If not provided, the query uses your end-user credential. Returns: @@ -778,21 +750,6 @@ def embed( * "status": a STRING value that contains the API response status for the corresponding row. This value is empty if the operation was successful. """ - if model is not None: - if any(x is not None for x in [endpoint, title, model_params, connection_id]): - raise ValueError( - "You cannot specify endpoint, title, model_params, or connection_id when the model is set." - ) - elif endpoint is None: - raise ValueError( - "You must specify exactly one of 'endpoint' or 'model' argument." - ) - - if title is not None and task_type != "retrieval_document": - raise ValueError( - "You can only use 'title' parameter if you specify retrieval_document for the task_type value." - ) - operator = ai_ops.AIEmbed( endpoint=endpoint, model=model, @@ -822,21 +779,21 @@ def if_( max_error_ratio: float | None = None, ) -> series.Series: """ - Evaluates the prompt to True or False. Compared to `ai.generate_bool()`, this function + Evaluates the prompt to True or False. Compared to ``ai.generate_bool()``, this function provides optimization such that not all rows are evaluated with the LLM. **Examples:** - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq - >>> us_state = bpd.Series(["Massachusetts", "Illinois", "Hawaii"]) - >>> bbq.ai.if_((us_state, " has a city called Springfield")) + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> import bigframes.bigquery as bbq # doctest: +SKIP + >>> us_state = bpd.Series(["Massachusetts", "Illinois", "Hawaii"]) # doctest: +SKIP + >>> bbq.ai.if_((us_state, " has a city called Springfield")) # doctest: +SKIP 0 True 1 True 2 False dtype: boolean - >>> us_state[bbq.ai.if_((us_state, " has a city called Springfield"))] + >>> us_state[bbq.ai.if_((us_state, " has a city called Springfield"))] # doctest: +SKIP 0 Massachusetts 1 Illinois dtype: string @@ -846,10 +803,10 @@ def if_( A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series or pandas Series. connection_id (str, optional): - Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. + Specifies the connection to use to communicate with the model. For example, ``myproject.us.myconnection``. If not provided, the query uses your end-user credential. endpoint (str, optional): - Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any + Specifies the Vertex AI endpoint to use for the model. For example ``"gemini-2.5-flash"``. You can specify any generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and uses the full endpoint of the model. If you don't specify an ENDPOINT value, BigQuery ML dynamically chooses a model based on your query to have the best cost to quality tradeoff for the task. @@ -860,7 +817,7 @@ def if_( max_error_ratio (float): A float value between 0.0 and 1.0 that contains the maximum acceptable ratio of row-level inference failures to rows processed on this function. If this value is exceeded, then the query fails. The default value is 1.0. - This argument isn't supported when `optimization_mode` is set to "minimize_cost". + This argument isn't supported when ``optimization_mode`` is set to "minimize_cost". Returns: bigframes.series.Series: A new series of bools. @@ -896,11 +853,11 @@ def classify( **Examples:** - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq - >>> df = bpd.DataFrame({'creature': ['Cat', 'Salmon']}) - >>> df['type'] = bbq.ai.classify(df['creature'], ['Mammal', 'Fish']) - >>> df + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> import bigframes.bigquery as bbq # doctest: +SKIP + >>> df = bpd.DataFrame({'creature': ['Cat', 'Salmon']}) # doctest: +SKIP + >>> df['type'] = bbq.ai.classify(df['creature'], ['Mammal', 'Fish']) # doctest: +SKIP + >>> df # doctest: +SKIP creature type 0 Cat Mammal 1 Salmon Fish @@ -918,7 +875,7 @@ def classify( that you expect. You can provide examples to help the model understand your intended threshold for a condition with nuanced or subjective logic. We recommend providing at most 5 examples. connection_id (str, optional): - Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. + Specifies the connection to use to communicate with the model. For example, ``myproject.us.myconnection``. If not provided, the query uses your end-user credential. endpoint (str, optional): A STRING value that specifies the Vertex AI endpoint to use for the model. You can specify any @@ -969,9 +926,9 @@ def score( **Examples:** - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq - >>> animal = bpd.Series(["Tiger", "Rabbit", "Blue Whale"]) + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> import bigframes.bigquery as bbq # doctest: +SKIP + >>> animal = bpd.Series(["Tiger", "Rabbit", "Blue Whale"]) # doctest: +SKIP >>> bbq.ai.score(("Rank the relative weights of ", animal, " on the scale from 1 to 3")) # doctest: +SKIP 0 2.0 1 1.0 @@ -983,15 +940,15 @@ def score( A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series or pandas Series. connection_id (str, optional): - Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. + Specifies the connection to use to communicate with the model. For example, ``myproject.us.myconnection``. If not provided, the query uses your end-user credential. endpoint (str, optional): - Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any + Specifies the Vertex AI endpoint to use for the model. For example ``"gemini-2.5-flash"``. You can specify any generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and uses the full endpoint of the model. If you don't specify an endpoint value, BigQuery ML dynamically chooses a model based on your query to have the best cost to quality tradeoff for the task. max_error_ratio (float, optional): - A value between `0.0` and `1.0` that contains the maximum acceptable ratio of row-level inference failures to + A value between ``0.0`` and ``1.0`` that contains the maximum acceptable ratio of row-level inference failures to rows processed on this function. If this value is exceeded, then the query fails. Returns: @@ -1026,9 +983,9 @@ def similarity( **Examples:** - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq - >>> df = bpd.DataFrame({'word': ['happy', 'sad']}) + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> import bigframes.bigquery as bbq # doctest: +SKIP + >>> df = bpd.DataFrame({'word': ['happy', 'sad']}) # doctest: +SKIP >>> bbq.ai.similarity(df['word'], 'glad', endpoint='text-embedding-005') # doctest: +SKIP 0 0.916601 1 0.660579 @@ -1040,26 +997,19 @@ def similarity( A string or series that provides the second value to compare. Both a BigFrames Series or a pandas Series are allowed. endpoint (str, optional): Specifies the Vertex AI endpoint to use for the text embedding model. - If you specify the model name, such as `'text-embedding-005'`, rather than a URL, then BigQuery ML automatically identifies the model and uses the model's full endpoint. + If you specify the model name, such as ``'text-embedding-005'``, rather than a URL, then BigQuery ML automatically identifies the model and uses the model's full endpoint. model (str, optional): Specifies a built-in text embedding model. The only supported value is the embeddinggemma-300m model. - If you specify this parameter, you can't specify the `endpoint`, `model_params`, or `connection_id` parameters. + If you specify this parameter, you can't specify the ``endpoint``, ``model_params``, or ``connection_id`` parameters. model_params (Mapping[Any, Any], optional): Provides additional parameters to the model. You can use any of the parameters object fields. - One of these fields, `outputDimensionality`, lets you specify the number of dimensions to use when generating embeddings. + One of these fields, ``outputDimensionality``, lets you specify the number of dimensions to use when generating embeddings. connection_id (str, optional): - Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. + Specifies the connection to use to communicate with the model. For example, ``myproject.us.myconnection``. Returns: bigframes.series.Series: A new series of FLOAT64 values representing the cosine similarity. """ - if model is not None: - if any(x is not None for x in [endpoint, model_params, connection_id]): - raise ValueError( - "If 'model' is specified, you cannot specify 'endpoint', 'model_params', or 'connection_id'." - ) - elif endpoint is None: - raise ValueError("You must specify either 'model' or 'endpoint'.") operator = ai_ops.AISimilarity( endpoint=endpoint, @@ -1108,20 +1058,13 @@ def forecast( """ Forecast time series at future horizon. Using Google Research's open source TimesFM(https://github.com/google-research/timesfm) model. - .. note:: - - This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - **Examples:** Forecast using a pandas DataFrame: - >>> import pandas as pd - >>> import bigframes.pandas as bpd - >>> df = pd.DataFrame({"value": [1, 2, 3], "time": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"])}) + >>> import pandas as pd # doctest: +SKIP + >>> import bigframes.pandas as bpd # doctest: +SKIP + >>> df = pd.DataFrame({"value": [1, 2, 3], "time": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"])}) # doctest: +SKIP >>> bpd.options.display.progress_bar = None # doctest: +SKIP >>> forecasted_pandas_df = df.bigquery.ai.forecast(data_col="value", timestamp_col="time", horizon=2) # doctest: +SKIP >>> type(forecasted_pandas_df) # doctest: +SKIP @@ -1129,7 +1072,7 @@ def forecast( Forecast using a BigFrames DataFrame: - >>> bf_df = bpd.DataFrame({"value": [1, 2, 3], "time": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"])}) + >>> bf_df = bpd.DataFrame({"value": [1, 2, 3], "time": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"])}) # doctest: +SKIP >>> forecasted_bf_df = bf_df.bigquery.ai.forecast(data_col="value", timestamp_col="time", horizon=2) # doctest: +SKIP >>> type(forecasted_bf_df) # doctest: +SKIP diff --git a/packages/bigframes/bigframes/functions/_function_session.py b/packages/bigframes/bigframes/functions/_function_session.py index d715c650486d..b4773b93e2fa 100644 --- a/packages/bigframes/bigframes/functions/_function_session.py +++ b/packages/bigframes/bigframes/functions/_function_session.py @@ -20,18 +20,18 @@ import inspect import sys import threading +import warnings from typing import ( + TYPE_CHECKING, Any, - cast, Dict, Literal, Mapping, Optional, Sequence, - TYPE_CHECKING, Union, + cast, ) -import warnings import google.api_core.exceptions from google.cloud import ( @@ -41,9 +41,9 @@ resourcemanager_v3, ) -from bigframes import clients import bigframes.exceptions as bfe import bigframes.formatting_helpers as bf_formatting +from bigframes import clients from bigframes.functions import function as bq_functions from bigframes.functions import udf_def diff --git a/packages/bigframes/bigframes/functions/function.py b/packages/bigframes/bigframes/functions/function.py index f197579113ae..4dee14674042 100644 --- a/packages/bigframes/bigframes/functions/function.py +++ b/packages/bigframes/bigframes/functions/function.py @@ -14,10 +14,9 @@ from __future__ import annotations -import logging -from typing import Callable, Optional, Protocol, runtime_checkable, TYPE_CHECKING - import dataclasses +import logging +from typing import TYPE_CHECKING, Callable, Optional, Protocol, runtime_checkable import google.api_core.exceptions from google.cloud import bigquery @@ -28,8 +27,8 @@ if TYPE_CHECKING: import bigframes.core.col - from bigframes.session import Session import bigframes.series + from bigframes.session import Session logger = logging.getLogger(__name__) @@ -192,8 +191,8 @@ def __call__(self, *args, **kwargs): if self._local_fun: return self._local_fun(*args, **kwargs) # avoid circular imports - from bigframes.core.compile.sqlglot import sql as sg_sql import bigframes.session._io.bigquery as bf_io_bigquery + from bigframes.core.compile.sqlglot import sql as sg_sql args_string = ", ".join([sg_sql.to_sql(sg_sql.literal(v)) for v in args]) sql = f"SELECT `{str(self._udf_def.routine_ref)}`({args_string})" diff --git a/packages/bigframes/tests/system/small/bigquery/test_ai.py b/packages/bigframes/tests/system/small/bigquery/test_ai.py index 421d83db8e08..7fc01bca8eeb 100644 --- a/packages/bigframes/tests/system/small/bigquery/test_ai.py +++ b/packages/bigframes/tests/system/small/bigquery/test_ai.py @@ -290,34 +290,6 @@ def test_ai_embed_string_content(session): ) -def test_ai_embed_no_endpoint_or_model_raises_error(session): - content = bpd.Series(["dog"], session=session) - - with pytest.raises(ValueError): - bbq.ai.embed(content) - - -def test_ai_embed_both_model_and_endpoint_are_set_raises_error(session): - content = bpd.Series(["dog"], session=session) - - with pytest.raises(ValueError): - bbq.ai.embed( - content, endpoint="text-embedding-005", model="embeddinggemma-300m model" - ) - - -def test_ai_embed_title_and_task_type_mismatch_raises_error(session): - content = bpd.Series(["dog"], session=session) - - with pytest.raises(ValueError): - bbq.ai.embed( - content, - endpoint="text-embedding-005", - title="my title", - task_type="text_similarity", - ) - - def test_ai_if(session): s1 = bpd.Series(["apple", "bear"], session=session) s2 = bpd.Series(["fruit", "tree"], session=session) @@ -476,23 +448,5 @@ def test_ai_similarity_both_contents_are_string_literals(session): assert result.dtype == dtypes.FLOAT_DTYPE -def test_ai_similarity_no_endpoint_or_model__raises_error(session): - s1 = bpd.Series(["happy", "sad"], session=session) - s2 = bpd.Series(["glad", "angry"], session=session) - - with pytest.raises(ValueError): - bbq.ai.similarity(s1, s2) - - -def test_ai_similarity_both_endpoint_and_model__raises_error(session): - s1 = "happy" - s2 = "glad" - - with pytest.raises(ValueError): - bbq.ai.similarity( - s1, s2, endpoint="text-embedding-005", model="embeddinggemma-300m" - ) - - def _contains_no_nulls(s: series.Series) -> bool: return len(s) == s.count()