Added setting to handle Semantic Searches

UAlbertaALTLab · Jan 14, 2025 · 8ba2d54 · 8ba2d54
1 parent 8c85c5d
commit 8ba2d54
Show file tree

Hide file tree

Showing 11 changed files with 187 additions and 30 deletions.
diff --git a/src/crkeng/app/preferences.py b/src/crkeng/app/preferences.py
@@ -174,6 +174,20 @@ class InflectEnglishPhrase(Preference):
     default = "yes"
 
 
+@register_preference
+class AttemptSemanticSearch(Preference):
+    """
+    Should we do semantic search? (Wordnet)
+    """
+
+    cookie_name = "attempt_semantic_search"
+    choices = {
+        "yes": "Attempt semantic search to provide results distinguishing between different English meanings when data is available",
+        "no": "Use only the standard language search",
+    }
+    default = "no"
+
+
 @register_preference
 class AutoTranslateDefs(Preference):
     """

diff --git a/src/morphodict/frontend/templates/morphodict/settings.html b/src/morphodict/frontend/templates/morphodict/settings.html
@@ -313,6 +313,33 @@ <h3 id="paradigm-audio" class="setting__title">Show/Play Synthesized Audio</h3>
                 </div>
             </form>
         </section>
+
+        <section>
+            <h3 id="attempt-semantic-search" class="setting__title">Attempt semantic search for English queries</h3>
+
+            <form method="POST" action="{% url "preference:change" "attempt_semantic_search" %}"
+                  data-save-preference="attempt_semantic_search">
+                <ul class="unbullet">
+                    {% for value, label in preferences.attempt_semantic_search.choices_with_labels %}
+                        <li class="option">
+                            <label class="option__label">
+                                <input type="radio" name="attempt_semantic_search" value="{{ value }}" class="option__control"
+                                       {% if preferences.attempt_semantic_search.current_choice == value %}checked{% endif %}>
+                                <span class="option__label-text">{{ value|capfirst }}</span>
+                            </label>
+                            <p class="option__description">
+                                {{ label }}
+                            </p>
+                        </li>
+                    {% endfor %}
+                </ul>
+
+                <div class="action-bar">
+                    {% csrf_token %}
+                    <button type="submit"> Save settings</button>
+                </div>
+            </form>
+        </section>
     <section>
             <h3 id="inflect-english-phrase" class="setting__title">Automatically translate English phrases into Cree word-forms </h3>
 

diff --git a/src/morphodict/frontend/templates/morphodict/wordnet-results.html b/src/morphodict/frontend/templates/morphodict/wordnet-results.html
@@ -17,7 +17,7 @@ <h3>Messages from search run</h3>
         <section class="prose box box--spaced">
             <h2 class="prose__section-title">{{result.wn_entry.numbering}}. {{result.wn_entry.original_str}} ({% relabel_one result.wn_entry.paren_pos %})</h2>
             <div class="meanings__meaning" data-cy="lemma-meaning">
-              {{result.wn_entry.definition}} 
+              {{result.definition}} 
             {% include "morphodict/components/citations.html" with dictionary_sources=result.wn_entry.sources %}
             </div>
             {% if result.wn_entry.synonyms|length > 1 %}

diff --git a/src/morphodict/frontend/views.py b/src/morphodict/frontend/views.py
@@ -96,10 +96,10 @@ def index(request):  # pragma: no cover
     if user_query:
         include_auto_definitions = should_include_auto_definitions(request)
         inflect_english_phrases = should_inflect_phrases(request)
-
-        wordnet_results = wordnet_search(user_query)
-        if wordnet_results:
-            return wordnet(request, user_query, wordnet_results)
+        if should_attempt_semantic_search(request, user_query):
+            wordnet_results = wordnet_search(user_query)
+            if wordnet_results:
+                return wordnet(request, user_query, wordnet_results)
 
         search_results = search_with_affixes(
             user_query,
@@ -145,7 +145,8 @@ def wordnet(request, user_query, results):
     def process_result(r):
         return {
             "wn_entry": r[0],
-            "results": r[1].serialized_presentation_results(
+            "definition": r[1],
+            "results": r[2].serialized_presentation_results(
                 display_mode=DisplayMode.current_value_from_request(request),
                 animate_emoji=AnimateEmoji.current_value_from_request(request),
                 show_emoji=ShowEmoji.current_value_from_request(request),
@@ -170,6 +171,34 @@ def search_results(request, query_string: str):  # pragma: no cover
     dict_source = get_dict_source(request)  # type: ignore
     include_auto_definitions = should_include_auto_definitions(request)
     inflect_english_phrases = should_inflect_phrases(request)
+    if should_attempt_semantic_search(request, query_string):
+        wordnet_results = wordnet_search(query_string)
+        if wordnet_results:
+
+            def process_result(r):
+                return {
+                    "wn_entry": r[0],
+                    "definition": r[1],
+                    "results": r[2].serialized_presentation_results(
+                        display_mode=DisplayMode.current_value_from_request(request),
+                        animate_emoji=AnimateEmoji.current_value_from_request(request),
+                        show_emoji=ShowEmoji.current_value_from_request(request),
+                        dict_source=get_dict_source(request),
+                    ),
+                }
+
+            return render(
+                request,
+                "morphodict/wordnet-results.html",
+                {
+                    "query_string": query_string,
+                    "search_results": [process_result(r) for r in wordnet_results],
+                    "show_morphemes": request.COOKIES.get("show_morphemes"),
+                    "show_ic": request.COOKIES.get("show_inflectional_category"),
+                    "did_wordnet_search": True,
+                },
+            )
+
     results = search_with_affixes(
         query_string,
         include_auto_definitions=include_auto_definitions,
@@ -318,6 +347,25 @@ def should_inflect_phrases(request):
     return False if request.COOKIES.get("inflect_english_phrase") == "no" else True
 
 
+def should_attempt_semantic_search(request, query):
+    if query:
+        tokens = [x.strip()[3:] for x in query.split() if x.strip().startswith("wn:")]
+
+        def true(term):
+            return not (
+                term == "0"
+                or term.lower() == "n"
+                or term.lower() == "f"
+                or term.lower() == "false"
+                or term.lower() == "no"
+            )
+
+        if len(tokens) > 0:
+            return true(tokens[-1])
+
+    return False if request.COOKIES.get("attempt_semantic_search") == "no" else True
+
+
 def get_dict_source(request):
     if dictionary_source := request.COOKIES.get("dictionary_source"):
         if dictionary_source:

diff --git a/src/morphodict/phrase_translate/to_target/__init__.py b/src/morphodict/phrase_translate/to_target/__init__.py
@@ -14,7 +14,9 @@
 logger = logging.getLogger(__name__)
 
 
-def inflect_target_language_phrase(analysis, lemma_definition) -> str | None:
+def inflect_target_language_phrase(
+    analysis: tuple | RichAnalysis, lemma_definition
+) -> str | None:
     if isinstance(analysis, tuple):
         analysis = RichAnalysis(analysis)
     cree_wordform_tag_list = (

diff --git a/src/morphodict/search/__init__.py b/src/morphodict/search/__init__.py
@@ -40,9 +40,7 @@ def api_search(
     ).serialized_presentation_results()
 
 
-def wordnet_search(query: str) -> list[tuple[WordnetEntry, SearchResults]] | None:
+def wordnet_search(query: str) -> list[tuple[WordnetEntry, str, SearchResults]] | None:
     # If we are doing an english simple phrase
     search_query = Query(query)
-    if search_query.wn:
-        return wordnet_runner(search_query)
-    return None
+    return wordnet_runner(search_query)
diff --git a/src/morphodict/search/espt.py b/src/morphodict/search/espt.py
@@ -12,6 +12,8 @@
 )
 
 from morphodict.search.types import Result
+from morphodict.search.core import SearchResults
+from morphodict.search.query import Query
 from morphodict.phrase_translate.types import PhraseAnalyzedQuery
 from morphodict.analysis import RichAnalysis
 from morphodict.analysis.tag_map import UnknownTagError
@@ -39,7 +41,7 @@ class EsptSearch:
         other methods.
     """
 
-    def __init__(self, query, search_results):
+    def __init__(self, query: Query, search_results: SearchResults):
         self.search_results = search_results
         self.query = query
         self.query_analyzed_ok = False

diff --git a/src/morphodict/search/query.py b/src/morphodict/search/query.py
@@ -83,6 +83,7 @@ def replace_query(self, new_query):
         Inflected phrase search discards functional words like "they" using this method
         Does not affect flags
         """
+        self.old_query_terms = self.query_terms
         query_string = treat_query(new_query)
         self.query_terms = query_string.split()
 

diff --git a/src/morphodict/search/runner.py b/src/morphodict/search/runner.py
@@ -18,7 +18,6 @@
 from morphodict.search.types import Result, WordnetEntry
 from morphodict.search.util import first_non_none_value
 from morphodict.search.wordnet import WordNetSearch
-from morphodict.lexicon.models import Wordform
 
 
 def search(
@@ -139,7 +138,9 @@ def is_almost_certainly_cree(query: Query, search_results: SearchResults) -> boo
     return False
 
 
-def wordnet_search(query: Query) -> list[tuple[WordnetEntry, SearchResults]] | None:
+def wordnet_search(
+    query: Query,
+) -> list[tuple[WordnetEntry, str, SearchResults]] | None:
     wordnet_search = WordNetSearch(query)
     if len(wordnet_search.synsets) > 0:
         # Wordnet search was successful _at the wordnet level_
@@ -155,23 +156,28 @@ def wordnet_search(query: Query) -> list[tuple[WordnetEntry, SearchResults]] | N
                     r = Result(wordform, target_language_wordnet_match=[synset.name])
                     wn_results.add_result(r)
                 wn_entry = WordnetEntry(synset.name)
+                definition = wn_entry.definition()
                 wn_entry.original_str = " ".join(query.query_terms)
                 synsets.setdefault(wn_entry.pos(), []).append(wn_entry)
                 wn_entry.numbering = len(synsets[wn_entry.pos()])
                 get_lemma_freq(wn_results)
                 for result in wn_results.unsorted_results():
                     result.relevance_score = result.lemma_freq
-                if wordnet_search.analyzed_query:
+                """
+                if wordnet_search.espt:
                     # Then it is an inflected query that should be Espt-Search based
                     espt_search = EsptSearch(query, wn_results)
                     espt_search.convert_search_query_to_espt()
                     espt_search.inflect_search_results()
                     find_pos_matches(espt_search, wn_results)
-                    if wordnet_search.analyzed_query.filtered_query:
+                    if wordnet_search.espt.query_analyzed_ok:
                         wn_entry.original_str = str(
-                            wordnet_search.analyzed_query.filtered_query
+                            wordnet_search.espt.query.old_query_terms
                         )
-                results.append((wn_entry, wn_results))
+                    definition = wordnet_search.inflect_wordnet_definition(wn_entry)
+                """
+                definition = wn_entry.definition()
+                results.append((wn_entry, definition, wn_results))
         return results
 
     return None
diff --git a/src/morphodict/search/wordnet.py b/src/morphodict/search/wordnet.py
@@ -1,25 +1,36 @@
+from morphodict.phrase_translate.to_target import inflect_target_language_phrase
 from morphodict.search.types import produce_entries, WordnetEntry
 from morphodict.search.query import Query
+from morphodict.search.core import SearchResults
 from nltk.corpus import wordnet
 from morphodict.lexicon.models import WordNetSynset
-from morphodict.phrase_translate.types import PhraseAnalyzedQuery
+from morphodict.analysis import RichAnalysis
+from morphodict.search.espt import EsptSearch
+from morphodict.phrase_translate.tag_maps import (
+    source_noun_tags,
+)
+from morphodict.phrase_translate.definition_cleanup import (
+    cleanup_target_definition_for_translation,
+)
 
 
 class WordNetSearch:
     synsets: list[WordNetSynset]
-    analyzed_query: PhraseAnalyzedQuery | None
 
+    # espt: EsptSearch | None
     def __init__(self, query: Query):
-        self.analyzed_query = None
-        inflected = PhraseAnalyzedQuery(" ".join(query.query_terms))
-        if 1 < len(query.query_terms) and inflected.filtered_query:
-            canonical_query = inflected.filtered_query.split(" ")
-            self.analyzed_query = inflected
-        else:
-            canonical_query = query.query_terms
+        # self.espt = None
+        canonical_query: list[str] = query.query_terms
+        # if 1 < len(query.query_terms):
+        #    self.espt = EsptSearch(query, SearchResults())
+        #    self.espt.convert_search_query_to_espt()
+        #    if not self.espt.query_analyzed_ok:
+        #        self.espt = None
+        #    else:
+        #        canonical_query = self.espt.query.query_terms
         lemmas = wordnet.synsets("_".join(canonical_query))
         candidate_infinitive = [x.removesuffix("s") for x in canonical_query]
-        if lemmas != candidate_infinitive:
+        if canonical_query != candidate_infinitive:
             lemmas.extend(wordnet.synsets("_".join(candidate_infinitive)))
         self.synsets = list(
             WordNetSynset.objects.filter(
@@ -31,3 +42,51 @@ def ranking(synset: WordNetSynset) -> int:
             return WordnetEntry(synset.name).ranking()
 
         self.synsets.sort(key=ranking, reverse=True)
+
+    """
+    def inflect_wordnet_definition(self, wn_entry: WordnetEntry) -> str:
+        if self.espt:
+            results: list[str]= []
+            orig_tags_starting_with_plus: list[str] = []
+            tags_ending_with_plus: list[str] = []
+            if self.espt.tags:
+                for t in self.espt.new_tags:
+                    if t.startswith("+"):
+                        orig_tags_starting_with_plus.append(t)
+                    else:
+                        tags_ending_with_plus.append(t)
+                    tags_starting_with_plus = orig_tags_starting_with_plus[:]
+                    noun_tags = []
+                    if "+N" in self.espt.tags:
+                        noun_tags = [
+                            tag
+                            for tag in self.espt.tags
+                            if tag in source_noun_tags
+                        ]
+                        if "+N" in tags_starting_with_plus:
+                            tags_starting_with_plus.remove("+N")
+                        if "+Der/Dim" in tags_starting_with_plus:
+                            # noun tags need to be repeated in this case
+                            insert_index = tags_starting_with_plus.index("+Der/Dim") + 1
+                            tags_starting_with_plus[insert_index:insert_index] = noun_tags
+
+                analysis = RichAnalysis(
+                    (
+                        tags_ending_with_plus,
+                        "",
+                        noun_tags + tags_starting_with_plus,
+                    )
+                )
+
+                for phrase in wn_entry.definition().split(";"):
+                    clean_phrase = cleanup_target_definition_for_translation(phrase)
+                    tags_starting_with_plus = orig_tags_starting_with_plus[:]
+                    result = inflect_target_language_phrase(analysis,clean_phrase) or inflect_target_language_phrase(analysis,"to "+clean_phrase)
+                    if result:
+                        results.append(result)
+                    else:
+                        results.append(phrase)
+                return ";".join(results)
+
+        return wn_entry.definition()
+        """
diff --git a/src/morphodict/search/wordnet_test.py b/src/morphodict/search/wordnet_test.py
@@ -16,13 +16,13 @@ def test_wordnet_success(db):
     search_results = wordnet_search(query="see wn:1")
 
     assert len(search_results) > 1
-    for wn_entry, results in search_results:
+    for wn_entry, wn_defn, results in search_results:
         assert len(results.sorted_results()) > 0
 
 
 def test_wordnet_space_success(db):
     search_results = wordnet_search(query="Ursa Major wn:1")
 
     assert len(search_results) > 0
-    for wn_entry, results in search_results:
+    for wn_entry, wn_defn, results in search_results:
         assert len(results.sorted_results()) > 0