Skip to content

Commit

Permalink
First draft of new API for semanticexplorer
Browse files Browse the repository at this point in the history
  • Loading branch information
fbanados committed Jan 15, 2025
1 parent 8ba2d54 commit 15facfc
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 22 deletions.
6 changes: 6 additions & 0 deletions src/morphodict/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,10 @@
api_views.click_in_text_embedded_test,
name="dictionary-click-in-text-embedded-test",
),
# API for semantic explorer
path(
"api/rapidwords-index/",
api_views.rapidwords_index,
name="dictionary-rapidwords-index-api",
),
]
25 changes: 24 additions & 1 deletion src/morphodict/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from django.http import HttpResponse, HttpResponseBadRequest, JsonResponse, Http404
from django.shortcuts import render

from morphodict.search import api_search
from morphodict.search import api_search, wordnet_index_search


def click_in_text(request) -> HttpResponse:
Expand Down Expand Up @@ -30,3 +30,26 @@ def click_in_text_embedded_test(request):
if not settings.DEBUG:
raise Http404()
return render(request, "API/click-in-text-embedded-test.html")


def rapidwords_index(request) -> HttpResponse:
"""
rapidwords by index
see SerializedSearchResult in schema.py for API specifications
"""

rw_index = request.GET.get("rw_index")
if rw_index is None:
return HttpResponseBadRequest("index param rw_index is missing")
elif rw_index == "":
return HttpResponseBadRequest("index param rw_index is an empty string")

results = wordnet_index_search(index=rw_index)
if results:
response = {"results": results.serialized_presentation_results()}
else:
response = {"results": []}

json_response = JsonResponse(response)
json_response["Access-Control-Allow-Origin"] = "*"
return json_response
15 changes: 14 additions & 1 deletion src/morphodict/search/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from .runner import search, wordnet_search as wordnet_runner
from .core import SearchResults
from .core import SearchResults, Result
from .presentation import SerializedPresentationResult
from .query import Query
from .wordnet import WordnetEntry
from morphodict.lexicon.models import RapidWords


def search_with_affixes(
Expand Down Expand Up @@ -44,3 +45,15 @@ def wordnet_search(query: str) -> list[tuple[WordnetEntry, str, SearchResults]]
# If we are doing an english simple phrase
search_query = Query(query)
return wordnet_runner(search_query)


def wordnet_index_search(index: str) -> SearchResults | None:
try:
rw_category = RapidWords.objects.get(index=index.strip())
results = SearchResults()
for word in rw_category.wordforms.all():
results.add_result(Result(word, rapidwords_match=True))
return results
except:
pass
return None
5 changes: 2 additions & 3 deletions src/morphodict/search/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def wordnet_search(
get_lemma_freq(wn_results)
for result in wn_results.unsorted_results():
result.relevance_score = result.lemma_freq
"""

if wordnet_search.espt:
# Then it is an inflected query that should be Espt-Search based
espt_search = EsptSearch(query, wn_results)
Expand All @@ -175,8 +175,7 @@ def wordnet_search(
wordnet_search.espt.query.old_query_terms
)
definition = wordnet_search.inflect_wordnet_definition(wn_entry)
"""
definition = wn_entry.definition()

results.append((wn_entry, definition, wn_results))
return results

Expand Down
2 changes: 2 additions & 0 deletions src/morphodict/search/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ def create_related_result(self, new_wordform, **kwargs):

target_language_wordnet_match: list[str] = field(default_factory=list)

rapidwords_match: Optional[bool] = None

def features(self):
ret = {}
for field in dataclasses.fields(Result):
Expand Down
35 changes: 18 additions & 17 deletions src/morphodict/search/wordnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,18 @@
class WordNetSearch:
synsets: list[WordNetSynset]

# espt: EsptSearch | None
espt: EsptSearch | None

def __init__(self, query: Query):
# self.espt = None
self.espt = None
canonical_query: list[str] = query.query_terms
# if 1 < len(query.query_terms):
# self.espt = EsptSearch(query, SearchResults())
# self.espt.convert_search_query_to_espt()
# if not self.espt.query_analyzed_ok:
# self.espt = None
# else:
# canonical_query = self.espt.query.query_terms
if 1 < len(query.query_terms):
self.espt = EsptSearch(query, SearchResults())
self.espt.convert_search_query_to_espt()
if not self.espt.query_analyzed_ok:
self.espt = None
else:
canonical_query = self.espt.query.query_terms
lemmas = wordnet.synsets("_".join(canonical_query))
candidate_infinitive = [x.removesuffix("s") for x in canonical_query]
if canonical_query != candidate_infinitive:
Expand All @@ -43,10 +44,9 @@ def ranking(synset: WordNetSynset) -> int:

self.synsets.sort(key=ranking, reverse=True)

"""
def inflect_wordnet_definition(self, wn_entry: WordnetEntry) -> str:
if self.espt:
results: list[str]= []
results: list[str] = []
orig_tags_starting_with_plus: list[str] = []
tags_ending_with_plus: list[str] = []
if self.espt.tags:
Expand All @@ -59,16 +59,16 @@ def inflect_wordnet_definition(self, wn_entry: WordnetEntry) -> str:
noun_tags = []
if "+N" in self.espt.tags:
noun_tags = [
tag
for tag in self.espt.tags
if tag in source_noun_tags
tag for tag in self.espt.tags if tag in source_noun_tags
]
if "+N" in tags_starting_with_plus:
tags_starting_with_plus.remove("+N")
if "+Der/Dim" in tags_starting_with_plus:
# noun tags need to be repeated in this case
insert_index = tags_starting_with_plus.index("+Der/Dim") + 1
tags_starting_with_plus[insert_index:insert_index] = noun_tags
tags_starting_with_plus[insert_index:insert_index] = (
noun_tags
)

analysis = RichAnalysis(
(
Expand All @@ -81,12 +81,13 @@ def inflect_wordnet_definition(self, wn_entry: WordnetEntry) -> str:
for phrase in wn_entry.definition().split(";"):
clean_phrase = cleanup_target_definition_for_translation(phrase)
tags_starting_with_plus = orig_tags_starting_with_plus[:]
result = inflect_target_language_phrase(analysis,clean_phrase) or inflect_target_language_phrase(analysis,"to "+clean_phrase)
result = inflect_target_language_phrase(
analysis, clean_phrase
) or inflect_target_language_phrase(analysis, "to " + clean_phrase)
if result:
results.append(result)
else:
results.append(phrase)
return ";".join(results)

return wn_entry.definition()
"""
27 changes: 27 additions & 0 deletions src/morphodict/tests/API_tests/test_views.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
import json
from django.urls import reverse

ASCII_WAPAMEW = "wapamew"
Expand Down Expand Up @@ -53,3 +54,29 @@ def test_click_in_text_disables_affix_search(client):
reverse("dictionary-word-click-in-text-api") + f"?q={ASCII_WAPAMEW}"
).content.decode("utf-8")
assert EXPECTED_SUFFIX_SEARCH_RESULT not in click_in_text_response


@pytest.mark.django_db
def test_dictionary_rapidword_index_api_fail(client):
response = client.get(reverse("dictionary-rapidwords-index-api"))

assert response.status_code == 400


@pytest.mark.django_db
def test_dictionary_rapidword_index_api_succeed(client):
normal_search_response = client.get(
reverse("dictionary-rapidwords-index-api") + f"?rw_index=5.2.2"
).content.decode("utf-8")
json_response = json.loads(normal_search_response)
assert "results" in json_response.keys()
assert len(json_response["results"]) > 0
for result in json_response["results"]:
keys = result.keys()
assert "lemma_wordform" in keys
keys = result["lemma_wordform"].keys()
assert "text" in keys
assert "linguist_info" in keys
assert "definitions" in keys
for d in result["lemma_wordform"]["definitions"]:
assert "text" in d.keys()

0 comments on commit 15facfc

Please sign in to comment.