Skip to content

Commit

Permalink
Functional tests: capture algo performance
Browse files Browse the repository at this point in the history
  • Loading branch information
bruvduroiu committed Nov 28, 2023
1 parent a4750ae commit b4fce30
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 13 deletions.
19 changes: 6 additions & 13 deletions semantic_router/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from numpy.linalg import norm

from semantic_router.encoders import BaseEncoder, CohereEncoder, OpenAIEncoder
from semantic_router.linear import similarity_matrix, top_scores
from semantic_router.schema import Decision


Expand Down Expand Up @@ -63,18 +64,12 @@ def _query(self, text: str, top_k: int = 5):
xq = np.squeeze(xq) # Reduce to 1d array.

if self.index is not None:
index_norm = norm(self.index, axis=1)
xq_norm = norm(xq.T)
sim = np.dot(self.index, xq.T) / (index_norm * xq_norm)
# get indices of top_k records
top_k = min(top_k, sim.shape[0])
idx = np.argpartition(sim, -top_k)[-top_k:]
scores = sim[idx]
# calculate similarity matrix
sim = similarity_matrix(xq, self.index)
scores, idx = top_scores(sim, top_k)
# get the utterance categories (decision names)
decisions = self.categories[idx] if self.categories is not None else []
return [
{"decision": d, "score": s.item()} for d, s in zip(decisions, scores)
]
return [{"decision": d, "score": s.item()} for d, s in zip(decisions, scores)]
else:
return []

Expand All @@ -89,9 +84,7 @@ def _semantic_classify(self, query_results: list[dict]) -> tuple[str, list[float
scores_by_class[decision] = [score]

# Calculate total score for each class
total_scores = {
decision: sum(scores) for decision, scores in scores_by_class.items()
}
total_scores = {decision: sum(scores) for decision, scores in scores_by_class.items()}
top_class = max(total_scores, key=lambda x: total_scores[x], default=None)

# Return the top class and its associated scores
Expand Down
30 changes: 30 additions & 0 deletions semantic_router/linear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from typing import Tuple

import numpy as np
from numpy.linalg import norm


def similarity_matrix(xq: np.ndarray, index: np.ndarray) -> np.ndarray:
"""Compute the similarity scores between a query vector and a set of vectors.
Args:
xq: A query vector (1d ndarray)
index: A set of vectors.
Returns:
The similarity between the query vector and the set of vectors.
"""

index_norm = norm(index, axis=1)
xq_norm = norm(xq.T)
sim = np.dot(index, xq.T) / (index_norm * xq_norm)
return sim


def top_scores(sim: np.ndarray, top_k: int = 5) -> Tuple[np.ndarray, np.ndarray]:
# get indices of top_k records
top_k = min(top_k, sim.shape[0])
idx = np.argpartition(sim, -top_k)[-top_k:]
scores = sim[idx]

return scores, idx
69 changes: 69 additions & 0 deletions tests/functional/test_linear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import pytest
import numpy as np

from semantic_router.linear import similarity_matrix, top_scores


@pytest.fixture
def ident_vector():
return np.identity(10)[0]


@pytest.fixture
def test_index():
return np.array([[3, 0, 0], [2, 1, 0], [0, 1, 0]])


def test_similarity_matrix__dimensionality():
"""Test that the similarity matrix is square."""
xq = np.random.random((10,)) # 10-dimensional embedding vector
index = np.random.random((100, 10))
S = similarity_matrix(xq, index)
assert S.shape == (100,)


def test_similarity_matrix__is_norm_max(ident_vector):
"""
Using identical vectors should yield a maximum similarity of 1
"""
index = np.repeat(np.atleast_2d(ident_vector), 3, axis=0)
sim = similarity_matrix(ident_vector, index)
assert sim.max() == 1.0


def test_similarity_matrix__is_norm_min(ident_vector):
"""
Using orthogonal vectors should yield a minimum similarity of 0
"""
orth_v = np.roll(np.atleast_2d(ident_vector), 1)
index = np.repeat(orth_v, 3, axis=0)
sim = similarity_matrix(ident_vector, index)
assert sim.min() == 0.0


def test_top_scores__is_sorted(test_index):
"""
Test that the top_scores function returns a sorted list of scores.
"""

xq = test_index[0] # should have max similarity

sim = similarity_matrix(xq, test_index)
_, idx = top_scores(sim, 3)

# Scores and indexes should be sorted ascending
assert np.array_equal(idx, np.array([2, 1, 0]))


def test_top_scores__scores(test_index):
"""
Test that for a known vector and a known index, the top_scores function
returns exactly the expected scores.
"""
xq = test_index[0] # should have max similarity

sim = similarity_matrix(xq, test_index)
scores, _ = top_scores(sim, 3)

# Scores and indexes should be sorted ascending
assert np.allclose(scores, np.array([0.0, 0.89442719, 1.0]))
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit b4fce30

Please sign in to comment.