First draft of new API for semanticexplorer

UAlbertaALTLab · Jan 15, 2025 · 15facfc · 15facfc
1 parent 8ba2d54
commit 15facfc
Show file tree

Hide file tree

Showing 7 changed files with 93 additions and 22 deletions.
diff --git a/src/morphodict/api/urls.py b/src/morphodict/api/urls.py
@@ -14,4 +14,10 @@
         api_views.click_in_text_embedded_test,
         name="dictionary-click-in-text-embedded-test",
     ),
+    # API for semantic explorer
+    path(
+        "api/rapidwords-index/",
+        api_views.rapidwords_index,
+        name="dictionary-rapidwords-index-api",
+    ),
 ]
diff --git a/src/morphodict/api/views.py b/src/morphodict/api/views.py
@@ -2,7 +2,7 @@
 from django.http import HttpResponse, HttpResponseBadRequest, JsonResponse, Http404
 from django.shortcuts import render
 
-from morphodict.search import api_search
+from morphodict.search import api_search, wordnet_index_search
 
 
 def click_in_text(request) -> HttpResponse:
@@ -30,3 +30,26 @@ def click_in_text_embedded_test(request):
     if not settings.DEBUG:
         raise Http404()
     return render(request, "API/click-in-text-embedded-test.html")
+
+
+def rapidwords_index(request) -> HttpResponse:
+    """
+    rapidwords by index
+    see SerializedSearchResult in schema.py for API specifications
+    """
+
+    rw_index = request.GET.get("rw_index")
+    if rw_index is None:
+        return HttpResponseBadRequest("index param rw_index is missing")
+    elif rw_index == "":
+        return HttpResponseBadRequest("index param rw_index is an empty string")
+
+    results = wordnet_index_search(index=rw_index)
+    if results:
+        response = {"results": results.serialized_presentation_results()}
+    else:
+        response = {"results": []}
+
+    json_response = JsonResponse(response)
+    json_response["Access-Control-Allow-Origin"] = "*"
+    return json_response
diff --git a/src/morphodict/search/__init__.py b/src/morphodict/search/__init__.py
@@ -1,8 +1,9 @@
 from .runner import search, wordnet_search as wordnet_runner
-from .core import SearchResults
+from .core import SearchResults, Result
 from .presentation import SerializedPresentationResult
 from .query import Query
 from .wordnet import WordnetEntry
+from morphodict.lexicon.models import RapidWords
 
 
 def search_with_affixes(
@@ -44,3 +45,15 @@ def wordnet_search(query: str) -> list[tuple[WordnetEntry, str, SearchResults]]
     # If we are doing an english simple phrase
     search_query = Query(query)
     return wordnet_runner(search_query)
+
+
+def wordnet_index_search(index: str) -> SearchResults | None:
+    try:
+        rw_category = RapidWords.objects.get(index=index.strip())
+        results = SearchResults()
+        for word in rw_category.wordforms.all():
+            results.add_result(Result(word, rapidwords_match=True))
+        return results
+    except:
+        pass
+    return None
diff --git a/src/morphodict/search/runner.py b/src/morphodict/search/runner.py
@@ -163,7 +163,7 @@ def wordnet_search(
                 get_lemma_freq(wn_results)
                 for result in wn_results.unsorted_results():
                     result.relevance_score = result.lemma_freq
-                """
+
                 if wordnet_search.espt:
                     # Then it is an inflected query that should be Espt-Search based
                     espt_search = EsptSearch(query, wn_results)
@@ -175,8 +175,7 @@ def wordnet_search(
                             wordnet_search.espt.query.old_query_terms
                         )
                     definition = wordnet_search.inflect_wordnet_definition(wn_entry)
-                """
-                definition = wn_entry.definition()
+
                 results.append((wn_entry, definition, wn_results))
         return results
 

diff --git a/src/morphodict/search/types.py b/src/morphodict/search/types.py
@@ -234,6 +234,8 @@ def create_related_result(self, new_wordform, **kwargs):
 
     target_language_wordnet_match: list[str] = field(default_factory=list)
 
+    rapidwords_match: Optional[bool] = None
+
     def features(self):
         ret = {}
         for field in dataclasses.fields(Result):

diff --git a/src/morphodict/search/wordnet.py b/src/morphodict/search/wordnet.py
@@ -17,17 +17,18 @@
 class WordNetSearch:
     synsets: list[WordNetSynset]
 
-    # espt: EsptSearch | None
+    espt: EsptSearch | None
+
     def __init__(self, query: Query):
-        # self.espt = None
+        self.espt = None
         canonical_query: list[str] = query.query_terms
-        # if 1 < len(query.query_terms):
-        #    self.espt = EsptSearch(query, SearchResults())
-        #    self.espt.convert_search_query_to_espt()
-        #    if not self.espt.query_analyzed_ok:
-        #        self.espt = None
-        #    else:
-        #        canonical_query = self.espt.query.query_terms
+        if 1 < len(query.query_terms):
+            self.espt = EsptSearch(query, SearchResults())
+            self.espt.convert_search_query_to_espt()
+            if not self.espt.query_analyzed_ok:
+                self.espt = None
+            else:
+                canonical_query = self.espt.query.query_terms
         lemmas = wordnet.synsets("_".join(canonical_query))
         candidate_infinitive = [x.removesuffix("s") for x in canonical_query]
         if canonical_query != candidate_infinitive:
@@ -43,10 +44,9 @@ def ranking(synset: WordNetSynset) -> int:
 
         self.synsets.sort(key=ranking, reverse=True)
 
-    """
     def inflect_wordnet_definition(self, wn_entry: WordnetEntry) -> str:
         if self.espt:
-            results: list[str]= []
+            results: list[str] = []
             orig_tags_starting_with_plus: list[str] = []
             tags_ending_with_plus: list[str] = []
             if self.espt.tags:
@@ -59,16 +59,16 @@ def inflect_wordnet_definition(self, wn_entry: WordnetEntry) -> str:
                     noun_tags = []
                     if "+N" in self.espt.tags:
                         noun_tags = [
-                            tag
-                            for tag in self.espt.tags
-                            if tag in source_noun_tags
+                            tag for tag in self.espt.tags if tag in source_noun_tags
                         ]
                         if "+N" in tags_starting_with_plus:
                             tags_starting_with_plus.remove("+N")
                         if "+Der/Dim" in tags_starting_with_plus:
                             # noun tags need to be repeated in this case
                             insert_index = tags_starting_with_plus.index("+Der/Dim") + 1
-                            tags_starting_with_plus[insert_index:insert_index] = noun_tags
+                            tags_starting_with_plus[insert_index:insert_index] = (
+                                noun_tags
+                            )
 
                 analysis = RichAnalysis(
                     (
@@ -81,12 +81,13 @@ def inflect_wordnet_definition(self, wn_entry: WordnetEntry) -> str:
                 for phrase in wn_entry.definition().split(";"):
                     clean_phrase = cleanup_target_definition_for_translation(phrase)
                     tags_starting_with_plus = orig_tags_starting_with_plus[:]
-                    result = inflect_target_language_phrase(analysis,clean_phrase) or inflect_target_language_phrase(analysis,"to "+clean_phrase)
+                    result = inflect_target_language_phrase(
+                        analysis, clean_phrase
+                    ) or inflect_target_language_phrase(analysis, "to " + clean_phrase)
                     if result:
                         results.append(result)
                     else:
                         results.append(phrase)
                 return ";".join(results)
 
         return wn_entry.definition()
-        """
diff --git a/src/morphodict/tests/API_tests/test_views.py b/src/morphodict/tests/API_tests/test_views.py
@@ -1,4 +1,5 @@
 import pytest
+import json
 from django.urls import reverse
 
 ASCII_WAPAMEW = "wapamew"
@@ -53,3 +54,29 @@ def test_click_in_text_disables_affix_search(client):
         reverse("dictionary-word-click-in-text-api") + f"?q={ASCII_WAPAMEW}"
     ).content.decode("utf-8")
     assert EXPECTED_SUFFIX_SEARCH_RESULT not in click_in_text_response
+
+
+@pytest.mark.django_db
+def test_dictionary_rapidword_index_api_fail(client):
+    response = client.get(reverse("dictionary-rapidwords-index-api"))
+
+    assert response.status_code == 400
+
+
+@pytest.mark.django_db
+def test_dictionary_rapidword_index_api_succeed(client):
+    normal_search_response = client.get(
+        reverse("dictionary-rapidwords-index-api") + f"?rw_index=5.2.2"
+    ).content.decode("utf-8")
+    json_response = json.loads(normal_search_response)
+    assert "results" in json_response.keys()
+    assert len(json_response["results"]) > 0
+    for result in json_response["results"]:
+        keys = result.keys()
+        assert "lemma_wordform" in keys
+        keys = result["lemma_wordform"].keys()
+        assert "text" in keys
+        assert "linguist_info" in keys
+        assert "definitions" in keys
+        for d in result["lemma_wordform"]["definitions"]:
+            assert "text" in d.keys()