-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #32 from EveWCheng/main
WWD
- Loading branch information
Showing
2 changed files
with
110 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from wmd import WMD | ||
import numpy as np | ||
import math | ||
import libwmdrelax | ||
|
||
def euclidean_dist(point1, point2): | ||
if len(point1) != len(point2): | ||
raise ValueError("Points must have the same number of dimensions") | ||
|
||
squared_distance = sum((p1 - p2) ** 2 for p1, p2 in zip(point1, point2)) | ||
distance = math.sqrt(squared_distance) | ||
return distance | ||
|
||
def par_to_vecs(par,vectorisation_function): | ||
return [vectorisation_function(sent) for sent in par] | ||
|
||
def dist(vecs1,vecs2): | ||
vec_union = list(vecs1 + vecs2) | ||
n1,n2 = len(vecs1),len(vecs2) | ||
n = len(vec_union) | ||
dist_ = np.zeros((n,n)) | ||
for i in range(n): | ||
for j in range(i): | ||
dist_[i,j] = dist_[j,i] = euclidean_dist(vec_union[i],vec_union[j]) | ||
|
||
nw1 = [1. for i in range(n1)]+[0. for i in range(n2)] | ||
nw2 = [0. for i in range(n1)] +[1. for i in range(n2)] | ||
return np.array(dist_,dtype=np.float32),np.array(nw1,dtype=np.float32),np.array(nw2,dtype=np.float32) | ||
|
||
|
||
def pars_dist_emd_emdrelaxed(par1,par2,vectorisation_function): | ||
relax_cache = libwmdrelax.emd_relaxed_cache_init(int(100)) | ||
cache = libwmdrelax.emd_cache_init(int(100)) | ||
|
||
vecs1,vecs2 = par_to_vecs(par1,vectorisation_function),par_to_vecs(par2,vectorisation_function) | ||
dist_,nw1,nw2 = dist(vecs1,vecs2) | ||
emd = libwmdrelax.emd(nw1,nw2,dist_,cache) | ||
emd_relaxed = libwmdrelax.emd_relaxed(nw1,nw2,dist_,relax_cache) | ||
return emd,emd_relaxed | ||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import harmony | ||
import numpy as np | ||
from harmony import match_instruments | ||
import json | ||
import harmony.matching.wmd_matcher | ||
from wmd import WMD | ||
|
||
def import_(): | ||
instruments = [] | ||
with open("mhc_data/mhc_questions.json", "r", encoding="utf-8") as f: | ||
for l in f: | ||
instrument = json.loads(l) | ||
instruments.append(instrument) | ||
return instruments | ||
|
||
def texts_similarity_matrix_benchmark(text_vectors): | ||
# Create numpy array of texts vectors | ||
# Get similarity with polarity | ||
vectors_pos,vectors_neg = harmony.matching.matcher.vectors_pos_neg(text_vectors) | ||
if vectors_pos.any(): | ||
pos_pairwise_similarity = harmony.matching.matcher_utils.cosine_similarity(vectors_pos, vectors_pos) | ||
return pos_pairwise_similarity | ||
|
||
def test_similarity(): | ||
questions = ["I was bothered by things that usually don’t bother me.","I did not feel like eating; my appetite was poor.","I felt that I could not shake off the blues even with help from my family or friends.","I felt I was just as good as other people."] | ||
questions = ["lost my key", "found my car"] | ||
vectorisation_function = harmony.matching.default_matcher.convert_texts_to_vector | ||
text_vectors = harmony.matching.matcher.process_questions(questions) | ||
print(text_vectors) | ||
text_vectors = harmony.matching.matcher.vectorise_texts(text_vectors,vectorisation_function) | ||
print(texts_similarity_matrix_benchmark(text_vectors)) | ||
# pip install harmonydata | ||
|
||
def test_match_instruments_with_function(): | ||
instruments = import_() | ||
print(instruments[0]) | ||
query = "Lost much sleep over worry?" | ||
vectorisation_function = harmony.matching.default_matcher.convert_texts_to_vector | ||
all_questions, similarity_with_polarity, query_similarity, new_vectors_dict=harmony.matching.matcher.match_instruments_with_function(instruments[1:10],query,vectorisation_function,[],[],np.zeros((0, 0)),{}) | ||
print(all_questions) | ||
print(similarity_with_polarity) | ||
# print(query_similarity) | ||
# print(new_vectors_dict) | ||
np.savetxt("sim_with_polarity.txt", similarity_with_polarity, fmt='%d', delimiter='\t') | ||
|
||
|
||
def test_wwd(): | ||
vectorisation_function = harmony.matching.default_matcher.convert_texts_to_vector | ||
par1 = ["I want to go outside","oh outside is nice"] | ||
par2 = ["I want to go outside maybe","oh outside is nice"] | ||
par3 = ["You are a dog", "I love dogs"] | ||
par4 = ["I am sad","are you sad"] | ||
|
||
# par2 = ["Who wants to go outside","oh the dog wants to go outside"] | ||
emd,emd_relaxed = harmony.matching.wmd_matcher.pars_dist_emd_emdrelaxed(par4,par3,vectorisation_function) | ||
print(emd) | ||
print(emd_relaxed) | ||
|
||
test_wwd() | ||
|
||
|
||
|
||
|
||
|
||
|
||
|