From 15facfc979e1b6232341971ba44aaf80e310cc51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felipe=20Ba=C3=B1ados=20Schwerter?= Date: Wed, 15 Jan 2025 21:44:12 +0000 Subject: [PATCH] First draft of new API for semanticexplorer --- src/morphodict/api/urls.py | 6 ++++ src/morphodict/api/views.py | 25 +++++++++++++- src/morphodict/search/__init__.py | 15 ++++++++- src/morphodict/search/runner.py | 5 ++- src/morphodict/search/types.py | 2 ++ src/morphodict/search/wordnet.py | 35 ++++++++++---------- src/morphodict/tests/API_tests/test_views.py | 27 +++++++++++++++ 7 files changed, 93 insertions(+), 22 deletions(-) diff --git a/src/morphodict/api/urls.py b/src/morphodict/api/urls.py index c5c2ffeb3..66342f7b1 100644 --- a/src/morphodict/api/urls.py +++ b/src/morphodict/api/urls.py @@ -14,4 +14,10 @@ api_views.click_in_text_embedded_test, name="dictionary-click-in-text-embedded-test", ), + # API for semantic explorer + path( + "api/rapidwords-index/", + api_views.rapidwords_index, + name="dictionary-rapidwords-index-api", + ), ] diff --git a/src/morphodict/api/views.py b/src/morphodict/api/views.py index ebc7f6e42..919d1f6b2 100644 --- a/src/morphodict/api/views.py +++ b/src/morphodict/api/views.py @@ -2,7 +2,7 @@ from django.http import HttpResponse, HttpResponseBadRequest, JsonResponse, Http404 from django.shortcuts import render -from morphodict.search import api_search +from morphodict.search import api_search, wordnet_index_search def click_in_text(request) -> HttpResponse: @@ -30,3 +30,26 @@ def click_in_text_embedded_test(request): if not settings.DEBUG: raise Http404() return render(request, "API/click-in-text-embedded-test.html") + + +def rapidwords_index(request) -> HttpResponse: + """ + rapidwords by index + see SerializedSearchResult in schema.py for API specifications + """ + + rw_index = request.GET.get("rw_index") + if rw_index is None: + return HttpResponseBadRequest("index param rw_index is missing") + elif rw_index == "": + return HttpResponseBadRequest("index param rw_index is an empty string") + + results = wordnet_index_search(index=rw_index) + if results: + response = {"results": results.serialized_presentation_results()} + else: + response = {"results": []} + + json_response = JsonResponse(response) + json_response["Access-Control-Allow-Origin"] = "*" + return json_response diff --git a/src/morphodict/search/__init__.py b/src/morphodict/search/__init__.py index 057a8a460..357a2492a 100644 --- a/src/morphodict/search/__init__.py +++ b/src/morphodict/search/__init__.py @@ -1,8 +1,9 @@ from .runner import search, wordnet_search as wordnet_runner -from .core import SearchResults +from .core import SearchResults, Result from .presentation import SerializedPresentationResult from .query import Query from .wordnet import WordnetEntry +from morphodict.lexicon.models import RapidWords def search_with_affixes( @@ -44,3 +45,15 @@ def wordnet_search(query: str) -> list[tuple[WordnetEntry, str, SearchResults]] # If we are doing an english simple phrase search_query = Query(query) return wordnet_runner(search_query) + + +def wordnet_index_search(index: str) -> SearchResults | None: + try: + rw_category = RapidWords.objects.get(index=index.strip()) + results = SearchResults() + for word in rw_category.wordforms.all(): + results.add_result(Result(word, rapidwords_match=True)) + return results + except: + pass + return None diff --git a/src/morphodict/search/runner.py b/src/morphodict/search/runner.py index c7d9fc4d6..78e81477d 100644 --- a/src/morphodict/search/runner.py +++ b/src/morphodict/search/runner.py @@ -163,7 +163,7 @@ def wordnet_search( get_lemma_freq(wn_results) for result in wn_results.unsorted_results(): result.relevance_score = result.lemma_freq - """ + if wordnet_search.espt: # Then it is an inflected query that should be Espt-Search based espt_search = EsptSearch(query, wn_results) @@ -175,8 +175,7 @@ def wordnet_search( wordnet_search.espt.query.old_query_terms ) definition = wordnet_search.inflect_wordnet_definition(wn_entry) - """ - definition = wn_entry.definition() + results.append((wn_entry, definition, wn_results)) return results diff --git a/src/morphodict/search/types.py b/src/morphodict/search/types.py index da1532def..7ec55c5eb 100644 --- a/src/morphodict/search/types.py +++ b/src/morphodict/search/types.py @@ -234,6 +234,8 @@ def create_related_result(self, new_wordform, **kwargs): target_language_wordnet_match: list[str] = field(default_factory=list) + rapidwords_match: Optional[bool] = None + def features(self): ret = {} for field in dataclasses.fields(Result): diff --git a/src/morphodict/search/wordnet.py b/src/morphodict/search/wordnet.py index 70cf324a3..932cd26fc 100644 --- a/src/morphodict/search/wordnet.py +++ b/src/morphodict/search/wordnet.py @@ -17,17 +17,18 @@ class WordNetSearch: synsets: list[WordNetSynset] - # espt: EsptSearch | None + espt: EsptSearch | None + def __init__(self, query: Query): - # self.espt = None + self.espt = None canonical_query: list[str] = query.query_terms - # if 1 < len(query.query_terms): - # self.espt = EsptSearch(query, SearchResults()) - # self.espt.convert_search_query_to_espt() - # if not self.espt.query_analyzed_ok: - # self.espt = None - # else: - # canonical_query = self.espt.query.query_terms + if 1 < len(query.query_terms): + self.espt = EsptSearch(query, SearchResults()) + self.espt.convert_search_query_to_espt() + if not self.espt.query_analyzed_ok: + self.espt = None + else: + canonical_query = self.espt.query.query_terms lemmas = wordnet.synsets("_".join(canonical_query)) candidate_infinitive = [x.removesuffix("s") for x in canonical_query] if canonical_query != candidate_infinitive: @@ -43,10 +44,9 @@ def ranking(synset: WordNetSynset) -> int: self.synsets.sort(key=ranking, reverse=True) - """ def inflect_wordnet_definition(self, wn_entry: WordnetEntry) -> str: if self.espt: - results: list[str]= [] + results: list[str] = [] orig_tags_starting_with_plus: list[str] = [] tags_ending_with_plus: list[str] = [] if self.espt.tags: @@ -59,16 +59,16 @@ def inflect_wordnet_definition(self, wn_entry: WordnetEntry) -> str: noun_tags = [] if "+N" in self.espt.tags: noun_tags = [ - tag - for tag in self.espt.tags - if tag in source_noun_tags + tag for tag in self.espt.tags if tag in source_noun_tags ] if "+N" in tags_starting_with_plus: tags_starting_with_plus.remove("+N") if "+Der/Dim" in tags_starting_with_plus: # noun tags need to be repeated in this case insert_index = tags_starting_with_plus.index("+Der/Dim") + 1 - tags_starting_with_plus[insert_index:insert_index] = noun_tags + tags_starting_with_plus[insert_index:insert_index] = ( + noun_tags + ) analysis = RichAnalysis( ( @@ -81,7 +81,9 @@ def inflect_wordnet_definition(self, wn_entry: WordnetEntry) -> str: for phrase in wn_entry.definition().split(";"): clean_phrase = cleanup_target_definition_for_translation(phrase) tags_starting_with_plus = orig_tags_starting_with_plus[:] - result = inflect_target_language_phrase(analysis,clean_phrase) or inflect_target_language_phrase(analysis,"to "+clean_phrase) + result = inflect_target_language_phrase( + analysis, clean_phrase + ) or inflect_target_language_phrase(analysis, "to " + clean_phrase) if result: results.append(result) else: @@ -89,4 +91,3 @@ def inflect_wordnet_definition(self, wn_entry: WordnetEntry) -> str: return ";".join(results) return wn_entry.definition() - """ diff --git a/src/morphodict/tests/API_tests/test_views.py b/src/morphodict/tests/API_tests/test_views.py index b02faabfe..f8f2b697d 100644 --- a/src/morphodict/tests/API_tests/test_views.py +++ b/src/morphodict/tests/API_tests/test_views.py @@ -1,4 +1,5 @@ import pytest +import json from django.urls import reverse ASCII_WAPAMEW = "wapamew" @@ -53,3 +54,29 @@ def test_click_in_text_disables_affix_search(client): reverse("dictionary-word-click-in-text-api") + f"?q={ASCII_WAPAMEW}" ).content.decode("utf-8") assert EXPECTED_SUFFIX_SEARCH_RESULT not in click_in_text_response + + +@pytest.mark.django_db +def test_dictionary_rapidword_index_api_fail(client): + response = client.get(reverse("dictionary-rapidwords-index-api")) + + assert response.status_code == 400 + + +@pytest.mark.django_db +def test_dictionary_rapidword_index_api_succeed(client): + normal_search_response = client.get( + reverse("dictionary-rapidwords-index-api") + f"?rw_index=5.2.2" + ).content.decode("utf-8") + json_response = json.loads(normal_search_response) + assert "results" in json_response.keys() + assert len(json_response["results"]) > 0 + for result in json_response["results"]: + keys = result.keys() + assert "lemma_wordform" in keys + keys = result["lemma_wordform"].keys() + assert "text" in keys + assert "linguist_info" in keys + assert "definitions" in keys + for d in result["lemma_wordform"]["definitions"]: + assert "text" in d.keys()