From 2ea1f19944812909cc3c34dbd45bf3e9972f501b Mon Sep 17 00:00:00 2001 From: Siraj R Aizlewood Date: Mon, 11 Mar 2024 17:03:41 +0400 Subject: [PATCH 1/7] Fit and vec_evaluate can now handle dynamic routes by treating them as static. Introduced _simulate_static_route_selection which simulates static routes, even when dynamic routes are being evaluated in _vec_evaluate(). This was necessary as dynamic routes use text inputs, but we use vector inputs when evaluating for increased performance. Also refactored a little to avoid code duplication between _simulate_static_route_selection() and __call__(). --- semantic_router/layer.py | 61 ++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/semantic_router/layer.py b/semantic_router/layer.py index 3ef16206..0741756c 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -225,23 +225,11 @@ def __call__( if vector is None: if text is None: raise ValueError("Either text or vector must be provided") - vector_arr = self._encode(text=text) - else: - vector_arr = np.array(vector) - # get relevant results (scores and routes) - results = self._retrieve(xq=vector_arr) - # decide most relevant routes - top_class, top_class_scores = self._semantic_classify(results) - # TODO do we need this check? - route = self.check_for_matching_routes(top_class) - if route is None: - return RouteChoice() - threshold = ( - route.score_threshold - if route.score_threshold is not None - else self.score_threshold - ) - passed = self._pass_threshold(top_class_scores, threshold) + vector = self._encode(text=text) + + route, top_class_scores = self._retrieve_top_route(vector) + passed = self._check_threshold(top_class_scores, route) + if passed: if route.function_schema and text is None: raise ValueError( @@ -263,6 +251,29 @@ def __call__( else: # if no route passes threshold, return empty route choice return RouteChoice() + + def _retrieve_top_route(self, vector: List[float]) -> Tuple[Optional[Route], List[float]]: + """ + Retrieve the top matching route based on the given vector. + Returns a tuple of the route (if any) and the scores of the top class. + """ + # get relevant results (scores and routes) + results = self._retrieve(xq=np.array(vector)) + # decide most relevant routes + top_class, top_class_scores = self._semantic_classify(results) + # TODO do we need this check? + route = self.check_for_matching_routes(top_class) + return route, top_class_scores + + def _check_threshold(self, scores: List[float], route: Optional[Route]) -> bool: + """ + Check if the route's score passes the specified threshold. + """ + if route is None: + return False + threshold = route.score_threshold if route.score_threshold is not None else self.score_threshold + return self._pass_threshold(scores, threshold) + def __str__(self): return ( @@ -481,11 +492,25 @@ def _vec_evaluate(self, Xq: Union[List[float], Any], y: List[str]) -> float: """ correct = 0 for xq, target_route in zip(Xq, y): - route_choice = self(vector=xq) + # We can't do route_choice = self(vector=xq) here as it won't work for dynamic routes. + route_choice = self._simulate_static_route_selection(vector=xq) if route_choice.name == target_route: correct += 1 accuracy = correct / len(Xq) return accuracy + + def _simulate_static_route_selection(self, vector: List[float]) -> RouteChoice: + """ + Simulate the route selection process treating all routes as static, including threshold checking. + Dynamic routes require a query string to be passed to the __call__ method, but here we work with vectors to boost performance. + Hence, we simulate the route selection process treating all routes as static. + """ + route, scores = self._retrieve_top_route(vector) + passed = self._check_threshold(scores, route) + if passed: + return RouteChoice(name=route.name, function_call=None, similarity_score=None, trigger=None) + else: + return RouteChoice() def _get_route_names(self) -> List[str]: return [route.name for route in self.routes] From 25eae2433c9e4ba7dc1d63b705a61ea2858def5b Mon Sep 17 00:00:00 2001 From: Siraj R Aizlewood Date: Mon, 11 Mar 2024 17:10:37 +0400 Subject: [PATCH 2/7] Linting. --- semantic_router/layer.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/semantic_router/layer.py b/semantic_router/layer.py index 0741756c..5acf92e0 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -226,7 +226,7 @@ def __call__( if text is None: raise ValueError("Either text or vector must be provided") vector = self._encode(text=text) - + route, top_class_scores = self._retrieve_top_route(vector) passed = self._check_threshold(top_class_scores, route) @@ -251,8 +251,10 @@ def __call__( else: # if no route passes threshold, return empty route choice return RouteChoice() - - def _retrieve_top_route(self, vector: List[float]) -> Tuple[Optional[Route], List[float]]: + + def _retrieve_top_route( + self, vector: List[float] + ) -> Tuple[Optional[Route], List[float]]: """ Retrieve the top matching route based on the given vector. Returns a tuple of the route (if any) and the scores of the top class. @@ -264,17 +266,20 @@ def _retrieve_top_route(self, vector: List[float]) -> Tuple[Optional[Route], Lis # TODO do we need this check? route = self.check_for_matching_routes(top_class) return route, top_class_scores - + def _check_threshold(self, scores: List[float], route: Optional[Route]) -> bool: """ Check if the route's score passes the specified threshold. """ if route is None: return False - threshold = route.score_threshold if route.score_threshold is not None else self.score_threshold + threshold = ( + route.score_threshold + if route.score_threshold is not None + else self.score_threshold + ) return self._pass_threshold(scores, threshold) - def __str__(self): return ( f"RouteLayer(encoder={self.encoder}, " @@ -498,7 +503,7 @@ def _vec_evaluate(self, Xq: Union[List[float], Any], y: List[str]) -> float: correct += 1 accuracy = correct / len(Xq) return accuracy - + def _simulate_static_route_selection(self, vector: List[float]) -> RouteChoice: """ Simulate the route selection process treating all routes as static, including threshold checking. @@ -508,7 +513,9 @@ def _simulate_static_route_selection(self, vector: List[float]) -> RouteChoice: route, scores = self._retrieve_top_route(vector) passed = self._check_threshold(scores, route) if passed: - return RouteChoice(name=route.name, function_call=None, similarity_score=None, trigger=None) + return RouteChoice( + name=route.name, function_call=None, similarity_score=None, trigger=None + ) else: return RouteChoice() From 0262f97b2bc6579937ac384a7c5a6f472cce9fe9 Mon Sep 17 00:00:00 2001 From: Siraj R Aizlewood Date: Mon, 11 Mar 2024 17:17:03 +0400 Subject: [PATCH 3/7] Linting. --- semantic_router/layer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/semantic_router/layer.py b/semantic_router/layer.py index 5acf92e0..1ab1db73 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -230,7 +230,7 @@ def __call__( route, top_class_scores = self._retrieve_top_route(vector) passed = self._check_threshold(top_class_scores, route) - if passed: + if passed and route is not None: if route.function_schema and text is None: raise ValueError( "Route has a function schema, but no text was provided." @@ -512,7 +512,7 @@ def _simulate_static_route_selection(self, vector: List[float]) -> RouteChoice: """ route, scores = self._retrieve_top_route(vector) passed = self._check_threshold(scores, route) - if passed: + if passed and route is not None: return RouteChoice( name=route.name, function_call=None, similarity_score=None, trigger=None ) From 5eca3c02d5d7dc4dcfff531bbfb56f2cceb67fad Mon Sep 17 00:00:00 2001 From: Siraj R Aizlewood Date: Mon, 11 Mar 2024 17:42:35 +0400 Subject: [PATCH 4/7] New PyTests. --- tests/unit/test_layer.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/unit/test_layer.py b/tests/unit/test_layer.py index 415150a5..d7945450 100644 --- a/tests/unit/test_layer.py +++ b/tests/unit/test_layer.py @@ -8,6 +8,7 @@ from semantic_router.layer import LayerConfig, RouteLayer from semantic_router.llms.base import BaseLLM from semantic_router.route import Route +from semantic_router.index.local import LocalIndex def mock_encoder_call(utterances): @@ -522,3 +523,38 @@ def test_remove(self): layer_config = LayerConfig(routes=[route]) layer_config.remove("test") assert layer_config.routes == [] + + def test_check_threshold_with_none_route(self, openai_encoder): + route_layer = RouteLayer(encoder=openai_encoder) + result = route_layer._check_threshold(scores=[0.5, 0.6], route=None) + assert ( + not result + ), "Expected _check_threshold to return False when route is None." + + def test_simulate_static_route_selection_returns_route_choice( + self, openai_encoder, routes + ): + route_layer = RouteLayer(encoder=openai_encoder, routes=routes) + # Manually set the index to simulate a scenario where a route passes the threshold + route_layer.index = LocalIndex() + # Assuming routes[0] is the route we want to simulate as the top route + route_layer.index.add( + embeddings=[[0.1, 0.2, 0.3]], + routes=[routes[0].name], + utterances=[routes[0].utterances[0]], + ) + # Adjust the score_threshold to ensure the route passes the threshold + route_layer.score_threshold = 0 + + # Simulate a vector that would match the route we added to the index + vector = [0.1, 0.2, 0.3] + route_choice = route_layer._simulate_static_route_selection(vector=vector) + + assert ( + route_choice.name == routes[0].name + ), "Expected the RouteChoice name to match the simulated top route name." + assert route_choice.function_call is None, "Expected function_call to be None." + assert ( + route_choice.similarity_score is None + ), "Expected similarity_score to be None." + assert route_choice.trigger is None, "Expected trigger to be None." From eb56039a89009874abe45e15fd631e23c75d0346 Mon Sep 17 00:00:00 2001 From: Siraj R Aizlewood Date: Tue, 12 Mar 2024 01:07:18 +0400 Subject: [PATCH 5/7] Removed _simulate_static_route_selection Simulation of static routes from dynamic routes now handled in __call__ as this avoids a situation where we have subtly different logic in __call__ compared to _simulate_static_route_selection. --- semantic_router/layer.py | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/semantic_router/layer.py b/semantic_router/layer.py index 1ab1db73..432f0af9 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -220,6 +220,7 @@ def __call__( self, text: Optional[str] = None, vector: Optional[List[float]] = None, + simulate_static: bool = False, ) -> RouteChoice: # if no vector provided, encode text to get vector if vector is None: @@ -230,7 +231,7 @@ def __call__( route, top_class_scores = self._retrieve_top_route(vector) passed = self._check_threshold(top_class_scores, route) - if passed and route is not None: + if passed and route is not None and not simulate_static: if route.function_schema and text is None: raise ValueError( "Route has a function schema, but no text was provided." @@ -248,6 +249,13 @@ def __call__( else: route.llm = self.llm return route(text) + elif passed and route is not None and simulate_static: + return RouteChoice( + name=route.name, + function_call=None, + similarity_score=None, + trigger=None, + ) else: # if no route passes threshold, return empty route choice return RouteChoice() @@ -497,28 +505,13 @@ def _vec_evaluate(self, Xq: Union[List[float], Any], y: List[str]) -> float: """ correct = 0 for xq, target_route in zip(Xq, y): - # We can't do route_choice = self(vector=xq) here as it won't work for dynamic routes. - route_choice = self._simulate_static_route_selection(vector=xq) + # We treate dynamic routes as static here, because when evaluating we use only vectors, and dynamic routes expect strings by default. + route_choice = self(vector=xq, simulate_static=True) if route_choice.name == target_route: correct += 1 accuracy = correct / len(Xq) return accuracy - def _simulate_static_route_selection(self, vector: List[float]) -> RouteChoice: - """ - Simulate the route selection process treating all routes as static, including threshold checking. - Dynamic routes require a query string to be passed to the __call__ method, but here we work with vectors to boost performance. - Hence, we simulate the route selection process treating all routes as static. - """ - route, scores = self._retrieve_top_route(vector) - passed = self._check_threshold(scores, route) - if passed and route is not None: - return RouteChoice( - name=route.name, function_call=None, similarity_score=None, trigger=None - ) - else: - return RouteChoice() - def _get_route_names(self) -> List[str]: return [route.name for route in self.routes] From 4ff71a1b19f5f05b703ae6e2fa2a96c9d47d31f9 Mon Sep 17 00:00:00 2001 From: Siraj R Aizlewood Date: Tue, 12 Mar 2024 01:10:50 +0400 Subject: [PATCH 6/7] Removed defunct PyTests. --- tests/unit/test_layer.py | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/tests/unit/test_layer.py b/tests/unit/test_layer.py index d7945450..80725108 100644 --- a/tests/unit/test_layer.py +++ b/tests/unit/test_layer.py @@ -523,38 +523,3 @@ def test_remove(self): layer_config = LayerConfig(routes=[route]) layer_config.remove("test") assert layer_config.routes == [] - - def test_check_threshold_with_none_route(self, openai_encoder): - route_layer = RouteLayer(encoder=openai_encoder) - result = route_layer._check_threshold(scores=[0.5, 0.6], route=None) - assert ( - not result - ), "Expected _check_threshold to return False when route is None." - - def test_simulate_static_route_selection_returns_route_choice( - self, openai_encoder, routes - ): - route_layer = RouteLayer(encoder=openai_encoder, routes=routes) - # Manually set the index to simulate a scenario where a route passes the threshold - route_layer.index = LocalIndex() - # Assuming routes[0] is the route we want to simulate as the top route - route_layer.index.add( - embeddings=[[0.1, 0.2, 0.3]], - routes=[routes[0].name], - utterances=[routes[0].utterances[0]], - ) - # Adjust the score_threshold to ensure the route passes the threshold - route_layer.score_threshold = 0 - - # Simulate a vector that would match the route we added to the index - vector = [0.1, 0.2, 0.3] - route_choice = route_layer._simulate_static_route_selection(vector=vector) - - assert ( - route_choice.name == routes[0].name - ), "Expected the RouteChoice name to match the simulated top route name." - assert route_choice.function_call is None, "Expected function_call to be None." - assert ( - route_choice.similarity_score is None - ), "Expected similarity_score to be None." - assert route_choice.trigger is None, "Expected trigger to be None." From ac024f08c95c325d3bb23c814bd4bf1841f79ad3 Mon Sep 17 00:00:00 2001 From: Siraj R Aizlewood Date: Tue, 12 Mar 2024 01:12:12 +0400 Subject: [PATCH 7/7] Linting. --- tests/unit/test_layer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_layer.py b/tests/unit/test_layer.py index 80725108..415150a5 100644 --- a/tests/unit/test_layer.py +++ b/tests/unit/test_layer.py @@ -8,7 +8,6 @@ from semantic_router.layer import LayerConfig, RouteLayer from semantic_router.llms.base import BaseLLM from semantic_router.route import Route -from semantic_router.index.local import LocalIndex def mock_encoder_call(utterances):