From c65ffa27dd4f0c7eb6ce99acb79b09a87e27cbf3 Mon Sep 17 00:00:00 2001 From: "Andrew X. Shah" Date: Fri, 26 Jan 2024 03:00:30 -0700 Subject: [PATCH] feat(metrics/similarity): create tanimoto coefficient --- src/metrics/similarity.rs | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/metrics/similarity.rs b/src/metrics/similarity.rs index 393dea3..7107522 100644 --- a/src/metrics/similarity.rs +++ b/src/metrics/similarity.rs @@ -1,5 +1,5 @@ use crate::{ - linalg::dot, + linalg::{dot, magnitude}, utils::{intersection, union}, }; @@ -44,3 +44,27 @@ pub fn jaccard_index(a: &[bool], b: &[bool]) -> f64 { intersection as f64 / union as f64 } } + +/// Compute the Tanimoto Coefficient for continuous vectors. +/// A measure of similarity for real-valued vectors that generalizes the Jaccard index. +/// Formula: dot(a, b) / (|a|^2 + |b|^2 - dot(a, b)) +/// +/// # Examples +/// +/// ``` +/// # use engram::metrics::tanimoto_coefficient; +/// let a = [1.0, 2.0, 3.0]; +/// let b = [1.3, 2.1, 3.0]; +/// let c = [1.5, 2.4, 4.0]; +/// assert_eq!(tanimoto_coefficient(&a, &b), 0.9931506849315067); +/// assert_eq!(tanimoto_coefficient(&a, &c), 0.9284627092846273); +/// ``` +pub fn tanimoto_coefficient(a: &[f64], b: &[f64]) -> f64 { + let dot = dot(a, b); + let denominator = magnitude(a) + magnitude(b) - dot; + if denominator == 0.0 { + 0.0 + } else { + dot / denominator + } +}