Skip to content

Commit

Permalink
Merge pull request #32 from EveWCheng/main
Browse files Browse the repository at this point in the history
WWD
  • Loading branch information
woodthom2 authored Apr 10, 2024
2 parents 554fac4 + 1b02305 commit b22aee0
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 0 deletions.
44 changes: 44 additions & 0 deletions src/harmony/matching/wmd_matcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from wmd import WMD
import numpy as np
import math
import libwmdrelax

def euclidean_dist(point1, point2):
if len(point1) != len(point2):
raise ValueError("Points must have the same number of dimensions")

squared_distance = sum((p1 - p2) ** 2 for p1, p2 in zip(point1, point2))
distance = math.sqrt(squared_distance)
return distance

def par_to_vecs(par,vectorisation_function):
return [vectorisation_function(sent) for sent in par]

def dist(vecs1,vecs2):
vec_union = list(vecs1 + vecs2)
n1,n2 = len(vecs1),len(vecs2)
n = len(vec_union)
dist_ = np.zeros((n,n))
for i in range(n):
for j in range(i):
dist_[i,j] = dist_[j,i] = euclidean_dist(vec_union[i],vec_union[j])

nw1 = [1. for i in range(n1)]+[0. for i in range(n2)]
nw2 = [0. for i in range(n1)] +[1. for i in range(n2)]
return np.array(dist_,dtype=np.float32),np.array(nw1,dtype=np.float32),np.array(nw2,dtype=np.float32)


def pars_dist_emd_emdrelaxed(par1,par2,vectorisation_function):
relax_cache = libwmdrelax.emd_relaxed_cache_init(int(100))
cache = libwmdrelax.emd_cache_init(int(100))

vecs1,vecs2 = par_to_vecs(par1,vectorisation_function),par_to_vecs(par2,vectorisation_function)
dist_,nw1,nw2 = dist(vecs1,vecs2)
emd = libwmdrelax.emd(nw1,nw2,dist_,cache)
emd_relaxed = libwmdrelax.emd_relaxed(nw1,nw2,dist_,relax_cache)
return emd,emd_relaxed





66 changes: 66 additions & 0 deletions src/harmony_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import harmony
import numpy as np
from harmony import match_instruments
import json
import harmony.matching.wmd_matcher
from wmd import WMD

def import_():
instruments = []
with open("mhc_data/mhc_questions.json", "r", encoding="utf-8") as f:
for l in f:
instrument = json.loads(l)
instruments.append(instrument)
return instruments

def texts_similarity_matrix_benchmark(text_vectors):
# Create numpy array of texts vectors
# Get similarity with polarity
vectors_pos,vectors_neg = harmony.matching.matcher.vectors_pos_neg(text_vectors)
if vectors_pos.any():
pos_pairwise_similarity = harmony.matching.matcher_utils.cosine_similarity(vectors_pos, vectors_pos)
return pos_pairwise_similarity

def test_similarity():
questions = ["I was bothered by things that usually don’t bother me.","I did not feel like eating; my appetite was poor.","I felt that I could not shake off the blues even with help from my family or friends.","I felt I was just as good as other people."]
questions = ["lost my key", "found my car"]
vectorisation_function = harmony.matching.default_matcher.convert_texts_to_vector
text_vectors = harmony.matching.matcher.process_questions(questions)
print(text_vectors)
text_vectors = harmony.matching.matcher.vectorise_texts(text_vectors,vectorisation_function)
print(texts_similarity_matrix_benchmark(text_vectors))
# pip install harmonydata

def test_match_instruments_with_function():
instruments = import_()
print(instruments[0])
query = "Lost much sleep over worry?"
vectorisation_function = harmony.matching.default_matcher.convert_texts_to_vector
all_questions, similarity_with_polarity, query_similarity, new_vectors_dict=harmony.matching.matcher.match_instruments_with_function(instruments[1:10],query,vectorisation_function,[],[],np.zeros((0, 0)),{})
print(all_questions)
print(similarity_with_polarity)
# print(query_similarity)
# print(new_vectors_dict)
np.savetxt("sim_with_polarity.txt", similarity_with_polarity, fmt='%d', delimiter='\t')


def test_wwd():
vectorisation_function = harmony.matching.default_matcher.convert_texts_to_vector
par1 = ["I want to go outside","oh outside is nice"]
par2 = ["I want to go outside maybe","oh outside is nice"]
par3 = ["You are a dog", "I love dogs"]
par4 = ["I am sad","are you sad"]

# par2 = ["Who wants to go outside","oh the dog wants to go outside"]
emd,emd_relaxed = harmony.matching.wmd_matcher.pars_dist_emd_emdrelaxed(par4,par3,vectorisation_function)
print(emd)
print(emd_relaxed)

test_wwd()







0 comments on commit b22aee0

Please sign in to comment.