diff --git a/semantic_router/encoders/openai.py b/semantic_router/encoders/openai.py index e4acc5a4..4bc86ac2 100644 --- a/semantic_router/encoders/openai.py +++ b/semantic_router/encoders/openai.py @@ -42,6 +42,7 @@ class OpenAIEncoder(BaseEncoder): token_limit: int = 8192 # default value, should be replaced by config _token_encoder: Any = PrivateAttr() type: str = "openai" + max_retries: int = 3 def __init__( self, @@ -51,6 +52,7 @@ def __init__( openai_org_id: Optional[str] = None, score_threshold: Optional[float] = None, dimensions: Union[int, NotGiven] = NotGiven(), + max_retries: int = 3, ): if name is None: name = EncoderDefault.OPENAI.value["embedding_model"] @@ -72,6 +74,8 @@ def __init__( openai_org_id = openai_org_id or os.getenv("OPENAI_ORG_ID") if api_key is None: raise ValueError("OpenAI API key cannot be 'None'.") + if max_retries is not None: + self.max_retries = max_retries try: self.client = openai.Client( base_url=base_url, api_key=api_key, organization=openai_org_id @@ -102,14 +106,13 @@ def __call__(self, docs: List[str], truncate: bool = True) -> List[List[float]]: if self.client is None: raise ValueError("OpenAI client is not initialized.") embeds = None - error_message = "" if truncate: # check if any document exceeds token limit and truncate if so docs = [self._truncate(doc) for doc in docs] # Exponential backoff - for j in range(1, 7): + for j in range(self.max_retries + 1): try: embeds = self.client.embeddings.create( input=docs, @@ -119,12 +122,18 @@ def __call__(self, docs: List[str], truncate: bool = True) -> List[List[float]]: if embeds.data: break except OpenAIError as e: - sleep(2**j) - error_message = str(e) - logger.warning(f"Retrying in {2**j} seconds...") + logger.error("Exception occurred", exc_info=True) + if self.max_retries != 0 and j < self.max_retries: + sleep(2**j) + logger.warning( + f"Retrying in {2**j} seconds due to OpenAIError: {e}" + ) + else: + raise + except Exception as e: - logger.error(f"OpenAI API call failed. Error: {error_message}") - raise ValueError(f"OpenAI API call failed. Error: {e}") from e + logger.error(f"OpenAI API call failed. Error: {e}") + raise ValueError(f"OpenAI API call failed. Error: {str(e)}") from e if ( not embeds @@ -132,7 +141,7 @@ def __call__(self, docs: List[str], truncate: bool = True) -> List[List[float]]: or not embeds.data ): logger.info(f"Returned embeddings: {embeds}") - raise ValueError(f"No embeddings returned. Error: {error_message}") + raise ValueError("No embeddings returned.") embeddings = [embeds_obj.embedding for embeds_obj in embeds.data] return embeddings @@ -154,14 +163,13 @@ async def acall(self, docs: List[str], truncate: bool = True) -> List[List[float if self.async_client is None: raise ValueError("OpenAI async client is not initialized.") embeds = None - error_message = "" if truncate: # check if any document exceeds token limit and truncate if so docs = [self._truncate(doc) for doc in docs] # Exponential backoff - for j in range(1, 7): + for j in range(self.max_retries + 1): try: embeds = await self.async_client.embeddings.create( input=docs, @@ -171,11 +179,17 @@ async def acall(self, docs: List[str], truncate: bool = True) -> List[List[float if embeds.data: break except OpenAIError as e: - await asleep(2**j) - error_message = str(e) - logger.warning(f"Retrying in {2**j} seconds...") + logger.error("Exception occurred", exc_info=True) + if self.max_retries != 0 and j < self.max_retries: + await asleep(2**j) + logger.warning( + f"Retrying in {2**j} seconds due to OpenAIError: {e}" + ) + else: + raise + except Exception as e: - logger.error(f"OpenAI API call failed. Error: {error_message}") + logger.error(f"OpenAI API call failed. Error: {e}") raise ValueError(f"OpenAI API call failed. Error: {e}") from e if ( @@ -184,7 +198,7 @@ async def acall(self, docs: List[str], truncate: bool = True) -> List[List[float or not embeds.data ): logger.info(f"Returned embeddings: {embeds}") - raise ValueError(f"No embeddings returned. Error: {error_message}") + raise ValueError("No embeddings returned.") embeddings = [embeds_obj.embedding for embeds_obj in embeds.data] return embeddings diff --git a/semantic_router/encoders/zure.py b/semantic_router/encoders/zure.py index dba93600..d6f65660 100644 --- a/semantic_router/encoders/zure.py +++ b/semantic_router/encoders/zure.py @@ -23,6 +23,7 @@ class AzureOpenAIEncoder(BaseEncoder): azure_endpoint: Optional[str] = None api_version: Optional[str] = None model: Optional[str] = None + max_retries: int = 3 def __init__( self, @@ -33,6 +34,7 @@ def __init__( model: Optional[str] = None, # TODO we should change to `name` JB score_threshold: float = 0.82, dimensions: Union[int, NotGiven] = NotGiven(), + max_retries: int = 3, ): name = deployment_name if name is None: @@ -49,6 +51,8 @@ def __init__( self.api_key = os.getenv("AZURE_OPENAI_API_KEY") if self.api_key is None: raise ValueError("No Azure OpenAI API key provided.") + if max_retries is not None: + self.max_retries = max_retries if self.deployment_name is None: self.deployment_name = EncoderDefault.AZURE.value["deployment_name"] # deployment_name may still be None, but it is optional in the API @@ -97,10 +101,9 @@ def __call__(self, docs: List[str]) -> List[List[float]]: if self.client is None: raise ValueError("Azure OpenAI client is not initialized.") embeds = None - error_message = "" # Exponential backoff - for j in range(3): + for j in range(self.max_retries + 1): try: embeds = self.client.embeddings.create( input=docs, @@ -110,15 +113,16 @@ def __call__(self, docs: List[str]) -> List[List[float]]: if embeds.data: break except OpenAIError as e: - # print full traceback - import traceback - - traceback.print_exc() - sleep(2**j) - error_message = str(e) - logger.warning(f"Retrying in {2**j} seconds...") + logger.error("Exception occurred", exc_info=True) + if self.max_retries != 0 and j < self.max_retries: + sleep(2**j) + logger.warning( + f"Retrying in {2**j} seconds due to OpenAIError: {e}" + ) + else: + raise except Exception as e: - logger.error(f"Azure OpenAI API call failed. Error: {error_message}") + logger.error(f"Azure OpenAI API call failed. Error: {e}") raise ValueError(f"Azure OpenAI API call failed. Error: {e}") from e if ( @@ -126,7 +130,7 @@ def __call__(self, docs: List[str]) -> List[List[float]]: or not isinstance(embeds, CreateEmbeddingResponse) or not embeds.data ): - raise ValueError(f"No embeddings returned. Error: {error_message}") + raise ValueError("No embeddings returned.") embeddings = [embeds_obj.embedding for embeds_obj in embeds.data] return embeddings @@ -135,10 +139,9 @@ async def acall(self, docs: List[str]) -> List[List[float]]: if self.async_client is None: raise ValueError("Azure OpenAI async client is not initialized.") embeds = None - error_message = "" # Exponential backoff - for j in range(3): + for j in range(self.max_retries + 1): try: embeds = await self.async_client.embeddings.create( input=docs, @@ -147,16 +150,18 @@ async def acall(self, docs: List[str]) -> List[List[float]]: ) if embeds.data: break - except OpenAIError as e: - # print full traceback - import traceback - traceback.print_exc() - await asleep(2**j) - error_message = str(e) - logger.warning(f"Retrying in {2**j} seconds...") + except OpenAIError as e: + logger.error("Exception occurred", exc_info=True) + if self.max_retries != 0 and j < self.max_retries: + await asleep(2**j) + logger.warning( + f"Retrying in {2**j} seconds due to OpenAIError: {e}" + ) + else: + raise except Exception as e: - logger.error(f"Azure OpenAI API call failed. Error: {error_message}") + logger.error(f"Azure OpenAI API call failed. Error: {e}") raise ValueError(f"Azure OpenAI API call failed. Error: {e}") from e if ( @@ -164,7 +169,7 @@ async def acall(self, docs: List[str]) -> List[List[float]]: or not isinstance(embeds, CreateEmbeddingResponse) or not embeds.data ): - raise ValueError(f"No embeddings returned. Error: {error_message}") + raise ValueError("No embeddings returned.") embeddings = [embeds_obj.embedding for embeds_obj in embeds.data] return embeddings diff --git a/semantic_router/utils/function_call.py b/semantic_router/utils/function_call.py index 3ae14043..d03c0798 100644 --- a/semantic_router/utils/function_call.py +++ b/semantic_router/utils/function_call.py @@ -77,7 +77,11 @@ def to_ollama(self): "type": "object", "properties": { param.name: { - "description": param.description, + "description": ( + param.description + if isinstance(param.description, str) + else None + ), "type": self._ollama_type_mapping(param.type), } for param in self.parameters diff --git a/tests/integration/encoders/test_openai_integration.py b/tests/integration/encoders/test_openai_integration.py index a8b59281..47e617a5 100644 --- a/tests/integration/encoders/test_openai_integration.py +++ b/tests/integration/encoders/test_openai_integration.py @@ -1,5 +1,6 @@ import os import pytest +from openai import OpenAIError from semantic_router.encoders.base import BaseEncoder from semantic_router.encoders.openai import OpenAIEncoder @@ -40,7 +41,7 @@ def test_openai_encoder_call_truncation(self, openai_encoder): os.environ.get("OPENAI_API_KEY") is None, reason="OpenAI API key required" ) def test_openai_encoder_call_no_truncation(self, openai_encoder): - with pytest.raises(ValueError) as _: + with pytest.raises(OpenAIError) as _: # default truncation is True openai_encoder([long_doc], truncate=False) diff --git a/tests/unit/encoders/test_azure.py b/tests/unit/encoders/test_azure.py index 93dffb89..5a6841d7 100644 --- a/tests/unit/encoders/test_azure.py +++ b/tests/unit/encoders/test_azure.py @@ -1,4 +1,5 @@ import pytest +from unittest.mock import AsyncMock, Mock, patch from openai import OpenAIError from openai.types import CreateEmbeddingResponse, Embedding from openai.types.create_embedding_response import Usage @@ -7,14 +8,26 @@ @pytest.fixture -def openai_encoder(mocker): - mocker.patch("openai.Client") +def mock_openai_client(): + with patch("openai.AzureOpenAI") as mock_client: + yield mock_client + + +@pytest.fixture +def mock_openai_async_client(): + with patch("openai.AsyncAzureOpenAI") as mock_async_client: + yield mock_async_client + + +@pytest.fixture +def openai_encoder(mock_openai_client, mock_openai_async_client): return AzureOpenAIEncoder( api_key="test_api_key", deployment_name="test-deployment", azure_endpoint="test_endpoint", api_version="test_version", model="test_model", + max_retries=2, ) @@ -70,21 +83,10 @@ def test_openai_encoder_call_success(self, openai_encoder, mocker): mocker.patch.object( openai_encoder.client.embeddings, "create", side_effect=responses ) - embeddings = openai_encoder(["test document"]) + with patch("semantic_router.encoders.zure.sleep", return_value=None): + embeddings = openai_encoder(["test document"]) assert embeddings == [[0.1, 0.2]] - def test_openai_encoder_call_with_retries(self, openai_encoder, mocker): - mocker.patch("os.getenv", return_value="fake-api-key") - mocker.patch("time.sleep", return_value=None) # To speed up the test - mocker.patch.object( - openai_encoder.client.embeddings, - "create", - side_effect=OpenAIError("Test error"), - ) - with pytest.raises(ValueError) as e: - openai_encoder(["test document"]) - assert "No embeddings returned. Error" in str(e.value) - def test_openai_encoder_call_failure_non_openai_error(self, openai_encoder, mocker): mocker.patch("os.getenv", return_value="fake-api-key") mocker.patch("time.sleep", return_value=None) # To speed up the test @@ -93,8 +95,9 @@ def test_openai_encoder_call_failure_non_openai_error(self, openai_encoder, mock "create", side_effect=Exception("Non-OpenAIError"), ) - with pytest.raises(ValueError) as e: - openai_encoder(["test document"]) + with patch("semantic_router.encoders.zure.sleep", return_value=None): + with pytest.raises(ValueError) as e: + openai_encoder(["test document"]) assert "OpenAI API call failed. Error: Non-OpenAIError" in str(e.value) @@ -120,5 +123,128 @@ def test_openai_encoder_call_successful_retry(self, openai_encoder, mocker): mocker.patch.object( openai_encoder.client.embeddings, "create", side_effect=responses ) - embeddings = openai_encoder(["test document"]) + with patch("semantic_router.encoders.zure.sleep", return_value=None): + embeddings = openai_encoder(["test document"]) assert embeddings == [[0.1, 0.2]] + + def test_retry_logic_sync(self, openai_encoder, mock_openai_client, mocker): + # Mock the embeddings.create method to raise an error twice, then succeed + mock_create = Mock( + side_effect=[ + OpenAIError("API error"), + OpenAIError("API error"), + CreateEmbeddingResponse( + data=[ + Embedding( + embedding=[0.1, 0.2, 0.3], index=0, object="embedding" + ) + ], + model="text-embedding-3-small", + object="list", + usage={"prompt_tokens": 5, "total_tokens": 5}, + ), + ] + ) + mock_openai_client.return_value.embeddings.create = mock_create + mocker.patch("time.sleep", return_value=None) # To speed up the test + + # Patch the sleep function in the encoder module to avoid actual sleep + with patch("semantic_router.encoders.zure.sleep", return_value=None): + result = openai_encoder(["test document"]) + + assert result == [[0.1, 0.2, 0.3]] + assert mock_create.call_count == 3 + + def test_no_retry_on_max_retries_zero(self, openai_encoder, mock_openai_client): + openai_encoder.max_retries = 0 + # Mock the embeddings.create method to always raise an error + mock_create = Mock(side_effect=OpenAIError("API error")) + mock_openai_client.return_value.embeddings.create = mock_create + + with pytest.raises(OpenAIError): + openai_encoder(["test document"]) + + assert mock_create.call_count == 1 # Only the initial attempt, no retries + + def test_retry_logic_sync_max_retries_exceeded( + self, openai_encoder, mock_openai_client, mocker + ): + # Mock the embeddings.create method to always raise an error + mock_create = Mock(side_effect=OpenAIError("API error")) + mock_openai_client.return_value.embeddings.create = mock_create + mocker.patch("time.sleep", return_value=None) # To speed up the test + + # Patch the sleep function in the encoder module to avoid actual sleep + with patch("semantic_router.encoders.zure.sleep", return_value=None): + with pytest.raises(OpenAIError): + openai_encoder(["test document"]) + + assert mock_create.call_count == 3 # Initial attempt + 2 retries + + @pytest.mark.asyncio + async def test_retry_logic_async( + self, openai_encoder, mock_openai_async_client, mocker + ): + # Set up the mock to fail twice, then succeed + mock_create = AsyncMock( + side_effect=[ + OpenAIError("API error"), + OpenAIError("API error"), + CreateEmbeddingResponse( + data=[ + Embedding( + embedding=[0.1, 0.2, 0.3], index=0, object="embedding" + ) + ], + model="text-embedding-3-small", + object="list", + usage={"prompt_tokens": 5, "total_tokens": 5}, + ), + ] + ) + mock_openai_async_client.return_value.embeddings.create = mock_create + mocker.patch("asyncio.sleep", return_value=None) # To speed up the test + + # Patch the asleep function in the encoder module to avoid actual sleep + with patch("semantic_router.encoders.zure.asleep", return_value=None): + result = await openai_encoder.acall(["test document"]) + + assert result == [[0.1, 0.2, 0.3]] + assert mock_create.call_count == 3 + + @pytest.mark.asyncio + async def test_retry_logic_async_max_retries_exceeded( + self, openai_encoder, mock_openai_async_client, mocker + ): + # Mock the embeddings.create method to always raise an error + async def raise_error(*args, **kwargs): + raise OpenAIError("API error") + + mock_create = Mock(side_effect=raise_error) + mock_openai_async_client.return_value.embeddings.create = mock_create + mocker.patch("asyncio.sleep", return_value=None) # To speed up the test + + # Patch the asleep function in the encoder module to avoid actual sleep + with patch("semantic_router.encoders.zure.asleep", return_value=None): + with pytest.raises(OpenAIError): + await openai_encoder.acall(["test document"]) + + assert mock_create.call_count == 3 # Initial attempt + 2 retries + + @pytest.mark.asyncio + async def test_no_retry_on_max_retries_zero_async( + self, openai_encoder, mock_openai_async_client + ): + openai_encoder.max_retries = 0 + + # Mock the embeddings.create method to always raise an error + async def raise_error(*args, **kwargs): + raise OpenAIError("API error") + + mock_create = AsyncMock(side_effect=raise_error) + mock_openai_async_client.return_value.embeddings.create = mock_create + + with pytest.raises(OpenAIError): + await openai_encoder.acall(["test document"]) + + assert mock_create.call_count == 1 # Only the initial attempt, no retries diff --git a/tests/unit/encoders/test_openai.py b/tests/unit/encoders/test_openai.py index 508e9e9e..538e9692 100644 --- a/tests/unit/encoders/test_openai.py +++ b/tests/unit/encoders/test_openai.py @@ -1,4 +1,5 @@ import pytest +from unittest.mock import AsyncMock, Mock, patch from openai import OpenAIError from openai.types import CreateEmbeddingResponse, Embedding from openai.types.create_embedding_response import Usage @@ -7,9 +8,20 @@ @pytest.fixture -def openai_encoder(mocker): - mocker.patch("openai.Client") - return OpenAIEncoder(openai_api_key="test_api_key") +def mock_openai_client(): + with patch("openai.Client") as mock_client: + yield mock_client + + +@pytest.fixture +def mock_openai_async_client(): + with patch("openai.AsyncClient") as mock_async_client: + yield mock_async_client + + +@pytest.fixture +def openai_encoder(mock_openai_client, mock_openai_async_client): + return OpenAIEncoder(openai_api_key="fake_key", max_retries=2) class TestOpenAIEncoder: @@ -64,21 +76,10 @@ def test_openai_encoder_call_success(self, openai_encoder, mocker): mocker.patch.object( openai_encoder.client.embeddings, "create", side_effect=responses ) - embeddings = openai_encoder(["test document"]) + with patch("semantic_router.encoders.openai.sleep", return_value=None): + embeddings = openai_encoder(["test document"]) assert embeddings == [[0.1, 0.2]] - def test_openai_encoder_call_with_retries(self, openai_encoder, mocker): - mocker.patch("os.getenv", return_value="fake-api-key") - mocker.patch("time.sleep", return_value=None) # To speed up the test - mocker.patch.object( - openai_encoder.client.embeddings, - "create", - side_effect=OpenAIError("Test error"), - ) - with pytest.raises(ValueError) as e: - openai_encoder(["test document"]) - assert "No embeddings returned. Error" in str(e.value) - def test_openai_encoder_call_failure_non_openai_error(self, openai_encoder, mocker): mocker.patch("os.getenv", return_value="fake-api-key") mocker.patch("time.sleep", return_value=None) # To speed up the test @@ -87,8 +88,9 @@ def test_openai_encoder_call_failure_non_openai_error(self, openai_encoder, mock "create", side_effect=Exception("Non-OpenAIError"), ) - with pytest.raises(ValueError) as e: - openai_encoder(["test document"]) + with patch("semantic_router.encoders.openai.sleep", return_value=None): + with pytest.raises(ValueError) as e: + openai_encoder(["test document"]) assert "OpenAI API call failed. Error: Non-OpenAIError" in str(e.value) @@ -114,5 +116,128 @@ def test_openai_encoder_call_successful_retry(self, openai_encoder, mocker): mocker.patch.object( openai_encoder.client.embeddings, "create", side_effect=responses ) - embeddings = openai_encoder(["test document"]) + with patch("semantic_router.encoders.openai.sleep", return_value=None): + embeddings = openai_encoder(["test document"]) assert embeddings == [[0.1, 0.2]] + + def test_retry_logic_sync(self, openai_encoder, mock_openai_client, mocker): + # Mock the embeddings.create method to raise an error twice, then succeed + mock_create = Mock( + side_effect=[ + OpenAIError("API error"), + OpenAIError("API error"), + CreateEmbeddingResponse( + data=[ + Embedding( + embedding=[0.1, 0.2, 0.3], index=0, object="embedding" + ) + ], + model="text-embedding-3-small", + object="list", + usage={"prompt_tokens": 5, "total_tokens": 5}, + ), + ] + ) + mock_openai_client.return_value.embeddings.create = mock_create + mocker.patch("time.sleep", return_value=None) # To speed up the test + + # Patch the sleep function in the encoder module to avoid actual sleep + with patch("semantic_router.encoders.openai.sleep", return_value=None): + result = openai_encoder(["test document"]) + + assert result == [[0.1, 0.2, 0.3]] + assert mock_create.call_count == 3 + + def test_no_retry_on_max_retries_zero(self, openai_encoder, mock_openai_client): + openai_encoder.max_retries = 0 + # Mock the embeddings.create method to always raise an error + mock_create = Mock(side_effect=OpenAIError("API error")) + mock_openai_client.return_value.embeddings.create = mock_create + + with pytest.raises(OpenAIError): + openai_encoder(["test document"]) + + assert mock_create.call_count == 1 # Only the initial attempt, no retries + + def test_retry_logic_sync_max_retries_exceeded( + self, openai_encoder, mock_openai_client, mocker + ): + # Mock the embeddings.create method to always raise an error + mock_create = Mock(side_effect=OpenAIError("API error")) + mock_openai_client.return_value.embeddings.create = mock_create + mocker.patch("time.sleep", return_value=None) # To speed up the test + + # Patch the sleep function in the encoder module to avoid actual sleep + with patch("semantic_router.encoders.openai.sleep", return_value=None): + with pytest.raises(OpenAIError): + openai_encoder(["test document"]) + + assert mock_create.call_count == 3 # Initial attempt + 2 retries + + @pytest.mark.asyncio + async def test_retry_logic_async( + self, openai_encoder, mock_openai_async_client, mocker + ): + # Set up the mock to fail twice, then succeed + mock_create = AsyncMock( + side_effect=[ + OpenAIError("API error"), + OpenAIError("API error"), + CreateEmbeddingResponse( + data=[ + Embedding( + embedding=[0.1, 0.2, 0.3], index=0, object="embedding" + ) + ], + model="text-embedding-3-small", + object="list", + usage={"prompt_tokens": 5, "total_tokens": 5}, + ), + ] + ) + mock_openai_async_client.return_value.embeddings.create = mock_create + mocker.patch("asyncio.sleep", return_value=None) # To speed up the test + + # Patch the asleep function in the encoder module to avoid actual sleep + with patch("semantic_router.encoders.openai.asleep", return_value=None): + result = await openai_encoder.acall(["test document"]) + + assert result == [[0.1, 0.2, 0.3]] + assert mock_create.call_count == 3 + + @pytest.mark.asyncio + async def test_retry_logic_async_max_retries_exceeded( + self, openai_encoder, mock_openai_async_client, mocker + ): + # Mock the embeddings.create method to always raise an error + async def raise_error(*args, **kwargs): + raise OpenAIError("API error") + + mock_create = Mock(side_effect=raise_error) + mock_openai_async_client.return_value.embeddings.create = mock_create + mocker.patch("asyncio.sleep", return_value=None) # To speed up the test + + # Patch the asleep function in the encoder module to avoid actual sleep + with patch("semantic_router.encoders.openai.asleep", return_value=None): + with pytest.raises(OpenAIError): + await openai_encoder.acall(["test document"]) + + assert mock_create.call_count == 3 # Initial attempt + 2 retries + + @pytest.mark.asyncio + async def test_no_retry_on_max_retries_zero_async( + self, openai_encoder, mock_openai_async_client + ): + openai_encoder.max_retries = 0 + + # Mock the embeddings.create method to always raise an error + async def raise_error(*args, **kwargs): + raise OpenAIError("API error") + + mock_create = AsyncMock(side_effect=raise_error) + mock_openai_async_client.return_value.embeddings.create = mock_create + + with pytest.raises(OpenAIError): + await openai_encoder.acall(["test document"]) + + assert mock_create.call_count == 1 # Only the initial attempt, no retries