-
Notifications
You must be signed in to change notification settings - Fork 3
/
semantic_search_notebook_testomgeving.sb
93 lines (78 loc) · 3.6 KB
/
semantic_search_notebook_testomgeving.sb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# %% [markdown]
This notebook can be used to eyeball the quality of the generated embeddings.
To compare similarities, use one trace as a query (the `uid` variable) and sort the case based on this input trace, and replace the `project` parameter in the `connect_project` call.
# %% [python]
import sys
from types import SimpleNamespace
from hansken.connect import connect_project
in_browser = 'js' in sys.modules
context = connect_project(endpoint='https://gatekeeper01.test.hansken.holmes.nl/gatekeeper/',
project='411cefbf-f995-416f-9eb2-3cd2f378ca74',
keystore='https://keystore01.test.hansken.holmes.nl/keystore/',
auth=SimpleNamespace() if in_browser else None,
interactive=True)
# %% [python]
import numpy
from js import window
uid = "a0260a2a-d576-4b2c-a84c-682ac94bee14:0-1-0-2-0-6-2-5-161-a"
##DREIGEND
#wanneer je met hansken standaard zoekt krijg je alleen exacte matches
#of je moet maar een paar woorden gebruiken en dan krijg je ook alleen de exacte macthes van zinnen waar die woorden in voorkomen.
#2686f05d-0e60-4847-968a-61f8dddef801:0-3-9-16
#d6169d2d-6346-4eb5-acc8-ba2c3ddf8460:0-1-0-0-0-2-1-0-0-1-c-1b-8
#d6169d2d-6346-4eb5-acc8-ba2c3ddf8460:0-1-0-0-0-2-1-0-0-1-c-1b-6
#b94ab972-6bcb-44d6-b08b-d081a33320b2:0-1-0-0-0-1-1-1-0-15-b-3-29
#b94ab972-6bcb-44d6-b08b-d081a33320b2:0-1-0-0-0-1-1-1-0-15-b-3-28
#b94ab972-6bcb-44d6-b08b-d081a33320b2:0-1-0-0-0-1-1-1-0-15-b-3-30
#2686f05d-0e60-4847-968a-61f8dddef801:0-3-9-32
#HOE LAAT GAAN WE ETEN VANAVOND?
#wanneer je met hansken standaar zoekt, krijg je alleen exacte matches.
#b94ab972-6bcb-44d6-b08b-d081a33320b2:0-1-0-0-0-1-1-1-0-15-b-2-3
#ON MY WAY TO AIRPORT
#a0260a2a-d576-4b2c-a84c-682ac94bee14:0-1-0-2-0-6-2-5-161-a
#I AM FREEZING HERE
#a0260a2a-d576-4b2c-a84c-682ac94bee14:0-1-0-2-0-6-2-5-161-d
#CHAMPAGNE
#2686f05d-0e60-4847-968a-61f8dddef801:0-3-3-10
#KILLING AND EATING
#2686f05d-0e60-4847-968a-61f8dddef801:0-1-0-1-2-c-1-8-1f-37
#d6169d2d-6346-4eb5-acc8-ba2c3ddf8460:0-6-2c-ce2
#DRUGS PROMO
#a0260a2a-d576-4b2c-a84c-682ac94bee14:0-4-6-6
#MONEY/INVEST
#2686f05d-0e60-4847-968a-61f8dddef801:0-3-3-1c
#SNAPT DE LADING NIET VAN DIT BERICHT
#b94ab972-6bcb-44d6-b08b-d081a33320b2:0-1-0-0-0-1-1-1-0-15-b-3-27
#2686f05d-0e60-4847-968a-61f8dddef801:0-3-9-48
query_hash = context.trace(uid).get('prediction')[0]['embedding']
print(context.trace(uid).get('chatMessage.message'))
print(numpy.frombuffer(bytes(query_hash), dtype='>f4'))
# %% [python]
from js import window
from hansken.util import Vector
query_hash = Vector.from_sequence(query_hash)
print(query_hash)
# %% [python]
from base64 import b64encode
from hansken.query import Sort
from js import document
import numpy
from scipy.spatial.distance import euclidean
candidates = context.search(
'chatMessage.message:*', sort=Sort(
field='prediction.embedding',
filter='prediction.modelName:bert',
mode='euclideanDistance',
value=query_hash),
select=['chatMessage'],
count=50)
output = document.createElement('table')
table_header = document.createElement('tr')
table_header.innerHTML = "<th>Trace uid</th><th>Euclidean distance</th><th>Preview</th>"
output.append(table_header)
query_hash_as_intarray = numpy.frombuffer(bytes(query_hash), dtype='>f4').astype(int)
for result in candidates:
table_row = document.createElement('tr')
table_row.innerHTML = f'<td><a href="http://localhost:9090/gui/technisch/#/projects/{context.project_id}/trace/{result.uid}/detail" target="_blank">{result.uid}</a></td><td></td><td>{result["chatMessage.message"]}</td>'
output.appendChild(table_row)
output