From b8fcc2979a55dfce699c139f708830acf4563a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Thu, 14 Mar 2024 15:31:00 +0100 Subject: [PATCH 01/10] Enable to set top_k for RouteLayer --- semantic_router/layer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/semantic_router/layer.py b/semantic_router/layer.py index d0d3e33a..beb784b0 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -182,6 +182,7 @@ def __init__( llm: Optional[BaseLLM] = None, routes: Optional[List[Route]] = None, index: Optional[BaseIndex] = None, # type: ignore + top_k: int = 5, ): logger.info("local") self.index: BaseIndex = index if index is not None else LocalIndex() @@ -196,6 +197,10 @@ def __init__( self.llm = llm self.routes: list[Route] = routes if routes is not None else [] self.score_threshold = self.encoder.score_threshold + self.top_k = top_k + if self.top_k < 1: + raise ValueError(f"top_k needs to be >= 1, but was: {self.top_k}.") + # set route score thresholds if not already set for route in self.routes: if route.score_threshold is None: @@ -266,7 +271,7 @@ def _retrieve_top_route( Returns a tuple of the route (if any) and the scores of the top class. """ # get relevant results (scores and routes) - results = self._retrieve(xq=np.array(vector)) + results = self._retrieve(xq=np.array(vector), top_k=self.top_k) # decide most relevant routes top_class, top_class_scores = self._semantic_classify(results) # TODO do we need this check? From af52afb6a99da59eeda8f73b7c7529fcf0dc4029 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Thu, 14 Mar 2024 15:33:18 +0100 Subject: [PATCH 02/10] Add top_k unit test for RouteLayer --- tests/unit/test_layer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_layer.py b/tests/unit/test_layer.py index 415150a5..0eb43442 100644 --- a/tests/unit/test_layer.py +++ b/tests/unit/test_layer.py @@ -120,9 +120,10 @@ def test_data(): class TestRouteLayer: def test_initialization(self, openai_encoder, routes): - route_layer = RouteLayer(encoder=openai_encoder, routes=routes) + route_layer = RouteLayer(encoder=openai_encoder, routes=routes, top_k=10) assert openai_encoder.score_threshold == 0.82 assert route_layer.score_threshold == 0.82 + assert route_layer.top_k == 10 assert len(route_layer.index) if route_layer.index is not None else 0 == 5 assert ( len(set(route_layer._get_route_names())) From 29a99b5742ef0962f4b90c2d1913246192006218 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Thu, 14 Mar 2024 15:43:21 +0100 Subject: [PATCH 03/10] Added support for setting different aggregation method for HybridRL --- semantic_router/hybrid_layer.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/semantic_router/hybrid_layer.py b/semantic_router/hybrid_layer.py index 9791786f..1796c118 100644 --- a/semantic_router/hybrid_layer.py +++ b/semantic_router/hybrid_layer.py @@ -25,6 +25,7 @@ def __init__( routes: List[Route] = [], alpha: float = 0.3, top_k: int = 5, + aggregation: str = "SUM", ): self.encoder = encoder self.score_threshold = self.encoder.score_threshold @@ -39,6 +40,10 @@ def __init__( self.top_k = top_k if self.top_k < 1: raise ValueError(f"top_k needs to be >= 1, but was: {self.top_k}.") + self.aggregation = aggregation + if not isinstance(self.aggregation, str) or self.aggregation not in ["SUM", "MEAN", "MAX"]: + raise ValueError(f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'.") + self.aggregation_method = self._set_aggregation_method(self.aggregation) self.routes = routes if isinstance(self.sparse_encoder, TfidfEncoder) and hasattr( self.sparse_encoder, "fit" @@ -165,6 +170,16 @@ def _convex_scaling(self, dense: np.ndarray, sparse: np.ndarray): sparse = np.array(sparse) * (1 - self.alpha) return dense, sparse + def _set_aggregation_method(self, aggregation: str = "SUM"): + if aggregation == "SUM": + return lambda x: sum(x) + elif aggregation == "MEAN": + return lambda x: np.mean(x) + elif aggregation == "MAX": + return lambda x: np.max(x) + else: + raise ValueError(f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'.") + def _semantic_classify(self, query_results: List[Dict]) -> Tuple[str, List[float]]: scores_by_class: Dict[str, List[float]] = {} for result in query_results: @@ -176,7 +191,7 @@ def _semantic_classify(self, query_results: List[Dict]) -> Tuple[str, List[float scores_by_class[route] = [score] # Calculate total score for each class - total_scores = {route: sum(scores) for route, scores in scores_by_class.items()} + total_scores = {route: self.aggregation_method(scores) for route, scores in scores_by_class.items()} top_class = max(total_scores, key=lambda x: total_scores[x], default=None) # Return the top class and its associated scores From d6977ea1aa8bd26786132318bbd8a8aab305bfe2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Thu, 14 Mar 2024 16:08:41 +0100 Subject: [PATCH 04/10] Add unit test to verify that aggregation works --- tests/unit/test_hybrid_layer.py | 42 ++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_hybrid_layer.py b/tests/unit/test_hybrid_layer.py index d4896509..60b218c8 100644 --- a/tests/unit/test_hybrid_layer.py +++ b/tests/unit/test_hybrid_layer.py @@ -192,6 +192,46 @@ def test_add_route_tfidf(self, cohere_encoder, tfidf_encoder, routes): ] assert hybrid_route_layer.sparse_index is not None assert len(hybrid_route_layer.sparse_index) == len(all_utterances) - + + def test_setting_aggregation_methods(self, openai_encoder, routes): + for agg in ["SUM", "MEAN", "MAX"]: + route_layer = HybridRouteLayer( + encoder=openai_encoder, + sparse_encoder=sparse_encoder, + routes=routes, + aggregation=agg, + ) + assert route_layer.aggregation == agg + + def test_semantic_classify_multiple_routes_with_different_aggregation(self, openai_encoder, routes): + route_scores = [ + {"route": "Route 1", "score": 0.5}, + {"route": "Route 1", "score": 0.5}, + {"route": "Route 1", "score": 0.5}, + {"route": "Route 1", "score": 0.5}, + {"route": "Route 2", "score": 0.4}, + {"route": "Route 2", "score": 0.6}, + {"route": "Route 2", "score": 0.8}, + {"route": "Route 3", "score": 0.1}, + {"route": "Route 3", "score": 1.0}, + ] + for agg in ["SUM", "MEAN", "MAX"]: + route_layer = HybridRouteLayer( + encoder=openai_encoder, + sparse_encoder=sparse_encoder, + routes=routes, + aggregation=agg, + ) + classification, score = route_layer._semantic_classify(route_scores) + + if agg == "SUM": + assert classification == "Route 1" + assert score == [0.5,] * 4 + elif agg == "MEAN": + assert classification == "Route 2" + assert score == [0.4, 0.6, 0.8] + elif agg == "MAX": + assert classification == "Route 3" + assert score == [0.1, 1.0] # Add more tests for edge cases and error handling as needed. From 3e2cc98a888672436c3706bdcc56052b7a153967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Thu, 14 Mar 2024 16:11:50 +0100 Subject: [PATCH 05/10] Add support for setting aggregation in RouteLayer --- semantic_router/hybrid_layer.py | 2 +- semantic_router/layer.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/semantic_router/hybrid_layer.py b/semantic_router/hybrid_layer.py index 1796c118..692ed897 100644 --- a/semantic_router/hybrid_layer.py +++ b/semantic_router/hybrid_layer.py @@ -176,7 +176,7 @@ def _set_aggregation_method(self, aggregation: str = "SUM"): elif aggregation == "MEAN": return lambda x: np.mean(x) elif aggregation == "MAX": - return lambda x: np.max(x) + return lambda x: max(x) else: raise ValueError(f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'.") diff --git a/semantic_router/layer.py b/semantic_router/layer.py index beb784b0..68c84e39 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -183,6 +183,7 @@ def __init__( routes: Optional[List[Route]] = None, index: Optional[BaseIndex] = None, # type: ignore top_k: int = 5, + aggregation: str = "SUM", ): logger.info("local") self.index: BaseIndex = index if index is not None else LocalIndex() @@ -200,6 +201,10 @@ def __init__( self.top_k = top_k if self.top_k < 1: raise ValueError(f"top_k needs to be >= 1, but was: {self.top_k}.") + self.aggregation = aggregation + if not isinstance(self.aggregation, str) or self.aggregation not in ["SUM", "MEAN", "MAX"]: + raise ValueError(f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'.") + self.aggregation_method = self._set_aggregation_method(self.aggregation) # set route score thresholds if not already set for route in self.routes: @@ -395,6 +400,16 @@ def _retrieve(self, xq: Any, top_k: int = 5) -> List[dict]: # get scores and routes scores, routes = self.index.query(vector=xq, top_k=top_k) return [{"route": d, "score": s.item()} for d, s in zip(routes, scores)] + + def _set_aggregation_method(self, aggregation: str = "SUM"): + if aggregation == "SUM": + return lambda x: sum(x) + elif aggregation == "MEAN": + return lambda x: np.mean(x) + elif aggregation == "MAX": + return lambda x: max(x) + else: + raise ValueError(f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'.") def _semantic_classify(self, query_results: List[dict]) -> Tuple[str, List[float]]: scores_by_class: Dict[str, List[float]] = {} From 55b742041cec57bbe373f14fea4f43cb5dd3ec38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Thu, 14 Mar 2024 16:14:36 +0100 Subject: [PATCH 06/10] Fix bug where agg method not used in RL --- semantic_router/layer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/semantic_router/layer.py b/semantic_router/layer.py index 68c84e39..309bf60d 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -422,7 +422,7 @@ def _semantic_classify(self, query_results: List[dict]) -> Tuple[str, List[float scores_by_class[route] = [score] # Calculate total score for each class - total_scores = {route: sum(scores) for route, scores in scores_by_class.items()} + total_scores = {route: self.aggregation_method(scores) for route, scores in scores_by_class.items()} top_class = max(total_scores, key=lambda x: total_scores[x], default=None) # Return the top class and its associated scores From 738e7cbe36f783c8c37f02a8df02af5a993713dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Thu, 14 Mar 2024 16:14:41 +0100 Subject: [PATCH 07/10] Add tests for agg --- tests/unit/test_layer.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/unit/test_layer.py b/tests/unit/test_layer.py index 0eb43442..9eb1acaf 100644 --- a/tests/unit/test_layer.py +++ b/tests/unit/test_layer.py @@ -523,3 +523,42 @@ def test_remove(self): layer_config = LayerConfig(routes=[route]) layer_config.remove("test") assert layer_config.routes == [] + + def test_setting_aggregation_methods(self, openai_encoder, routes): + for agg in ["SUM", "MEAN", "MAX"]: + route_layer = RouteLayer( + encoder=openai_encoder, + routes=routes, + aggregation=agg, + ) + assert route_layer.aggregation == agg + + def test_semantic_classify_multiple_routes_with_different_aggregation(self, openai_encoder, routes): + route_scores = [ + {"route": "Route 1", "score": 0.5}, + {"route": "Route 1", "score": 0.5}, + {"route": "Route 1", "score": 0.5}, + {"route": "Route 1", "score": 0.5}, + {"route": "Route 2", "score": 0.4}, + {"route": "Route 2", "score": 0.6}, + {"route": "Route 2", "score": 0.8}, + {"route": "Route 3", "score": 0.1}, + {"route": "Route 3", "score": 1.0}, + ] + for agg in ["SUM", "MEAN", "MAX"]: + route_layer = RouteLayer( + encoder=openai_encoder, + routes=routes, + aggregation=agg, + ) + classification, score = route_layer._semantic_classify(route_scores) + + if agg == "SUM": + assert classification == "Route 1" + assert score == [0.5,] * 4 + elif agg == "MEAN": + assert classification == "Route 2" + assert score == [0.4, 0.6, 0.8] + elif agg == "MAX": + assert classification == "Route 3" + assert score == [0.1, 1.0] From d65559d20b74c077b3689c85be1ed5af27fb7121 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Thu, 14 Mar 2024 16:22:21 +0100 Subject: [PATCH 08/10] Linted code --- semantic_router/hybrid_layer.py | 15 +++++++++++---- semantic_router/layer.py | 17 ++++++++++++----- tests/unit/test_hybrid_layer.py | 11 +++++++---- tests/unit/test_layer.py | 10 ++++++---- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/semantic_router/hybrid_layer.py b/semantic_router/hybrid_layer.py index 692ed897..f293891a 100644 --- a/semantic_router/hybrid_layer.py +++ b/semantic_router/hybrid_layer.py @@ -41,8 +41,10 @@ def __init__( if self.top_k < 1: raise ValueError(f"top_k needs to be >= 1, but was: {self.top_k}.") self.aggregation = aggregation - if not isinstance(self.aggregation, str) or self.aggregation not in ["SUM", "MEAN", "MAX"]: - raise ValueError(f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'.") + if self.aggregation not in ["SUM", "MEAN", "MAX"]: + raise ValueError( + f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'." + ) self.aggregation_method = self._set_aggregation_method(self.aggregation) self.routes = routes if isinstance(self.sparse_encoder, TfidfEncoder) and hasattr( @@ -178,7 +180,9 @@ def _set_aggregation_method(self, aggregation: str = "SUM"): elif aggregation == "MAX": return lambda x: max(x) else: - raise ValueError(f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'.") + raise ValueError( + f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'." + ) def _semantic_classify(self, query_results: List[Dict]) -> Tuple[str, List[float]]: scores_by_class: Dict[str, List[float]] = {} @@ -191,7 +195,10 @@ def _semantic_classify(self, query_results: List[Dict]) -> Tuple[str, List[float scores_by_class[route] = [score] # Calculate total score for each class - total_scores = {route: self.aggregation_method(scores) for route, scores in scores_by_class.items()} + total_scores = { + route: self.aggregation_method(scores) + for route, scores in scores_by_class.items() + } top_class = max(total_scores, key=lambda x: total_scores[x], default=None) # Return the top class and its associated scores diff --git a/semantic_router/layer.py b/semantic_router/layer.py index 309bf60d..9a856a86 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -202,8 +202,10 @@ def __init__( if self.top_k < 1: raise ValueError(f"top_k needs to be >= 1, but was: {self.top_k}.") self.aggregation = aggregation - if not isinstance(self.aggregation, str) or self.aggregation not in ["SUM", "MEAN", "MAX"]: - raise ValueError(f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'.") + if self.aggregation not in ["SUM", "MEAN", "MAX"]: + raise ValueError( + f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'." + ) self.aggregation_method = self._set_aggregation_method(self.aggregation) # set route score thresholds if not already set @@ -400,7 +402,7 @@ def _retrieve(self, xq: Any, top_k: int = 5) -> List[dict]: # get scores and routes scores, routes = self.index.query(vector=xq, top_k=top_k) return [{"route": d, "score": s.item()} for d, s in zip(routes, scores)] - + def _set_aggregation_method(self, aggregation: str = "SUM"): if aggregation == "SUM": return lambda x: sum(x) @@ -409,7 +411,9 @@ def _set_aggregation_method(self, aggregation: str = "SUM"): elif aggregation == "MAX": return lambda x: max(x) else: - raise ValueError(f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'.") + raise ValueError( + f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'." + ) def _semantic_classify(self, query_results: List[dict]) -> Tuple[str, List[float]]: scores_by_class: Dict[str, List[float]] = {} @@ -422,7 +426,10 @@ def _semantic_classify(self, query_results: List[dict]) -> Tuple[str, List[float scores_by_class[route] = [score] # Calculate total score for each class - total_scores = {route: self.aggregation_method(scores) for route, scores in scores_by_class.items()} + total_scores = { + route: self.aggregation_method(scores) + for route, scores in scores_by_class.items() + } top_class = max(total_scores, key=lambda x: total_scores[x], default=None) # Return the top class and its associated scores diff --git a/tests/unit/test_hybrid_layer.py b/tests/unit/test_hybrid_layer.py index 60b218c8..4b65b4cf 100644 --- a/tests/unit/test_hybrid_layer.py +++ b/tests/unit/test_hybrid_layer.py @@ -192,7 +192,7 @@ def test_add_route_tfidf(self, cohere_encoder, tfidf_encoder, routes): ] assert hybrid_route_layer.sparse_index is not None assert len(hybrid_route_layer.sparse_index) == len(all_utterances) - + def test_setting_aggregation_methods(self, openai_encoder, routes): for agg in ["SUM", "MEAN", "MAX"]: route_layer = HybridRouteLayer( @@ -202,8 +202,10 @@ def test_setting_aggregation_methods(self, openai_encoder, routes): aggregation=agg, ) assert route_layer.aggregation == agg - - def test_semantic_classify_multiple_routes_with_different_aggregation(self, openai_encoder, routes): + + def test_semantic_classify_multiple_routes_with_different_aggregation( + self, openai_encoder, routes + ): route_scores = [ {"route": "Route 1", "score": 0.5}, {"route": "Route 1", "score": 0.5}, @@ -226,7 +228,7 @@ def test_semantic_classify_multiple_routes_with_different_aggregation(self, open if agg == "SUM": assert classification == "Route 1" - assert score == [0.5,] * 4 + assert score == [0.5, 0.5, 0.5, 0.5] elif agg == "MEAN": assert classification == "Route 2" assert score == [0.4, 0.6, 0.8] @@ -234,4 +236,5 @@ def test_semantic_classify_multiple_routes_with_different_aggregation(self, open assert classification == "Route 3" assert score == [0.1, 1.0] + # Add more tests for edge cases and error handling as needed. diff --git a/tests/unit/test_layer.py b/tests/unit/test_layer.py index 9eb1acaf..06f24896 100644 --- a/tests/unit/test_layer.py +++ b/tests/unit/test_layer.py @@ -523,7 +523,7 @@ def test_remove(self): layer_config = LayerConfig(routes=[route]) layer_config.remove("test") assert layer_config.routes == [] - + def test_setting_aggregation_methods(self, openai_encoder, routes): for agg in ["SUM", "MEAN", "MAX"]: route_layer = RouteLayer( @@ -532,8 +532,10 @@ def test_setting_aggregation_methods(self, openai_encoder, routes): aggregation=agg, ) assert route_layer.aggregation == agg - - def test_semantic_classify_multiple_routes_with_different_aggregation(self, openai_encoder, routes): + + def test_semantic_classify_multiple_routes_with_different_aggregation( + self, openai_encoder, routes + ): route_scores = [ {"route": "Route 1", "score": 0.5}, {"route": "Route 1", "score": 0.5}, @@ -555,7 +557,7 @@ def test_semantic_classify_multiple_routes_with_different_aggregation(self, open if agg == "SUM": assert classification == "Route 1" - assert score == [0.5,] * 4 + assert score == [0.5, 0.5, 0.5, 0.5] elif agg == "MEAN": assert classification == "Route 2" assert score == [0.4, 0.6, 0.8] From fcb2d1af167f98bab5aa6196fce668c715a1365b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Thu, 14 Mar 2024 16:30:23 +0100 Subject: [PATCH 09/10] Updated codecov file --- coverage.xml | 385 +++++++++++++++++++++++++++------------------------ 1 file changed, 206 insertions(+), 179 deletions(-) diff --git a/coverage.xml b/coverage.xml index ef0c214f..321f6c5c 100644 --- a/coverage.xml +++ b/coverage.xml @@ -1,12 +1,12 @@ - + /Users/andreped/workspace/semantic-router/semantic_router - + @@ -18,7 +18,7 @@ - + @@ -33,104 +33,116 @@ - - - + + - - + + - - + + - + + - - + + + - - - + + - - + + + - - - - - - + + + + + - - + - + + + - - + + + - - + + - - - - - - - + + + + + - - + - + - + - + + - - + - - + + + - - - + + + + + + + + + + + + + + + - + @@ -204,217 +216,232 @@ - + + - - + + - - + + - + - - - - + + + + - + - + - + - - + + - + - - - + - - + + + - + + - + - - - - - - - - + + + + + + + + + + - - - - - - + + + - - - - - - - - - + + + + + + + + + + + + - - - - - + + + + - + + - - - - + + + - - - - - - + + + - + + + + + - - - + + + - - + - - - - - - - - - + + + + + + + + + + + - - - - - - + + + - + - - + + + + - - - + - - - + + + + + - - - + + + - - - + + + - + + - - + + - - - - - + + + - + - - + - - + - - - - + + + - + + + + - - - - + + + + + + + - + + + + + + + + + + + @@ -671,7 +698,7 @@ - + @@ -878,7 +905,7 @@ - + @@ -903,12 +930,12 @@ - + - - - - + + + + From 7cd375590a7511304699e06a9189c214fcf0b01d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Fri, 15 Mar 2024 07:58:49 +0100 Subject: [PATCH 10/10] Change aggregation method names to lower case --- semantic_router/hybrid_layer.py | 12 ++++++------ semantic_router/layer.py | 12 ++++++------ tests/unit/test_hybrid_layer.py | 10 +++++----- tests/unit/test_layer.py | 10 +++++----- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/semantic_router/hybrid_layer.py b/semantic_router/hybrid_layer.py index f293891a..5f223384 100644 --- a/semantic_router/hybrid_layer.py +++ b/semantic_router/hybrid_layer.py @@ -25,7 +25,7 @@ def __init__( routes: List[Route] = [], alpha: float = 0.3, top_k: int = 5, - aggregation: str = "SUM", + aggregation: str = "sum", ): self.encoder = encoder self.score_threshold = self.encoder.score_threshold @@ -41,7 +41,7 @@ def __init__( if self.top_k < 1: raise ValueError(f"top_k needs to be >= 1, but was: {self.top_k}.") self.aggregation = aggregation - if self.aggregation not in ["SUM", "MEAN", "MAX"]: + if self.aggregation not in ["sum", "mean", "max"]: raise ValueError( f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'." ) @@ -172,12 +172,12 @@ def _convex_scaling(self, dense: np.ndarray, sparse: np.ndarray): sparse = np.array(sparse) * (1 - self.alpha) return dense, sparse - def _set_aggregation_method(self, aggregation: str = "SUM"): - if aggregation == "SUM": + def _set_aggregation_method(self, aggregation: str = "sum"): + if aggregation == "sum": return lambda x: sum(x) - elif aggregation == "MEAN": + elif aggregation == "mean": return lambda x: np.mean(x) - elif aggregation == "MAX": + elif aggregation == "max": return lambda x: max(x) else: raise ValueError( diff --git a/semantic_router/layer.py b/semantic_router/layer.py index 9a856a86..221de2be 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -183,7 +183,7 @@ def __init__( routes: Optional[List[Route]] = None, index: Optional[BaseIndex] = None, # type: ignore top_k: int = 5, - aggregation: str = "SUM", + aggregation: str = "sum", ): logger.info("local") self.index: BaseIndex = index if index is not None else LocalIndex() @@ -202,7 +202,7 @@ def __init__( if self.top_k < 1: raise ValueError(f"top_k needs to be >= 1, but was: {self.top_k}.") self.aggregation = aggregation - if self.aggregation not in ["SUM", "MEAN", "MAX"]: + if self.aggregation not in ["sum", "mean", "max"]: raise ValueError( f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'." ) @@ -403,12 +403,12 @@ def _retrieve(self, xq: Any, top_k: int = 5) -> List[dict]: scores, routes = self.index.query(vector=xq, top_k=top_k) return [{"route": d, "score": s.item()} for d, s in zip(routes, scores)] - def _set_aggregation_method(self, aggregation: str = "SUM"): - if aggregation == "SUM": + def _set_aggregation_method(self, aggregation: str = "sum"): + if aggregation == "sum": return lambda x: sum(x) - elif aggregation == "MEAN": + elif aggregation == "mean": return lambda x: np.mean(x) - elif aggregation == "MAX": + elif aggregation == "max": return lambda x: max(x) else: raise ValueError( diff --git a/tests/unit/test_hybrid_layer.py b/tests/unit/test_hybrid_layer.py index 4b65b4cf..bf0c2ad2 100644 --- a/tests/unit/test_hybrid_layer.py +++ b/tests/unit/test_hybrid_layer.py @@ -194,7 +194,7 @@ def test_add_route_tfidf(self, cohere_encoder, tfidf_encoder, routes): assert len(hybrid_route_layer.sparse_index) == len(all_utterances) def test_setting_aggregation_methods(self, openai_encoder, routes): - for agg in ["SUM", "MEAN", "MAX"]: + for agg in ["sum", "mean", "max"]: route_layer = HybridRouteLayer( encoder=openai_encoder, sparse_encoder=sparse_encoder, @@ -217,7 +217,7 @@ def test_semantic_classify_multiple_routes_with_different_aggregation( {"route": "Route 3", "score": 0.1}, {"route": "Route 3", "score": 1.0}, ] - for agg in ["SUM", "MEAN", "MAX"]: + for agg in ["sum", "mean", "max"]: route_layer = HybridRouteLayer( encoder=openai_encoder, sparse_encoder=sparse_encoder, @@ -226,13 +226,13 @@ def test_semantic_classify_multiple_routes_with_different_aggregation( ) classification, score = route_layer._semantic_classify(route_scores) - if agg == "SUM": + if agg == "sum": assert classification == "Route 1" assert score == [0.5, 0.5, 0.5, 0.5] - elif agg == "MEAN": + elif agg == "mean": assert classification == "Route 2" assert score == [0.4, 0.6, 0.8] - elif agg == "MAX": + elif agg == "max": assert classification == "Route 3" assert score == [0.1, 1.0] diff --git a/tests/unit/test_layer.py b/tests/unit/test_layer.py index 06f24896..4a55777b 100644 --- a/tests/unit/test_layer.py +++ b/tests/unit/test_layer.py @@ -525,7 +525,7 @@ def test_remove(self): assert layer_config.routes == [] def test_setting_aggregation_methods(self, openai_encoder, routes): - for agg in ["SUM", "MEAN", "MAX"]: + for agg in ["sum", "mean", "max"]: route_layer = RouteLayer( encoder=openai_encoder, routes=routes, @@ -547,7 +547,7 @@ def test_semantic_classify_multiple_routes_with_different_aggregation( {"route": "Route 3", "score": 0.1}, {"route": "Route 3", "score": 1.0}, ] - for agg in ["SUM", "MEAN", "MAX"]: + for agg in ["sum", "mean", "max"]: route_layer = RouteLayer( encoder=openai_encoder, routes=routes, @@ -555,12 +555,12 @@ def test_semantic_classify_multiple_routes_with_different_aggregation( ) classification, score = route_layer._semantic_classify(route_scores) - if agg == "SUM": + if agg == "sum": assert classification == "Route 1" assert score == [0.5, 0.5, 0.5, 0.5] - elif agg == "MEAN": + elif agg == "mean": assert classification == "Route 2" assert score == [0.4, 0.6, 0.8] - elif agg == "MAX": + elif agg == "max": assert classification == "Route 3" assert score == [0.1, 1.0]