Skip to content

Commit

Permalink
Added setting to handle Semantic Searches
Browse files Browse the repository at this point in the history
  • Loading branch information
fbanados committed Jan 14, 2025
1 parent 8c85c5d commit 8ba2d54
Show file tree
Hide file tree
Showing 11 changed files with 187 additions and 30 deletions.
14 changes: 14 additions & 0 deletions src/crkeng/app/preferences.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,20 @@ class InflectEnglishPhrase(Preference):
default = "yes"


@register_preference
class AttemptSemanticSearch(Preference):
"""
Should we do semantic search? (Wordnet)
"""

cookie_name = "attempt_semantic_search"
choices = {
"yes": "Attempt semantic search to provide results distinguishing between different English meanings when data is available",
"no": "Use only the standard language search",
}
default = "no"


@register_preference
class AutoTranslateDefs(Preference):
"""
Expand Down
27 changes: 27 additions & 0 deletions src/morphodict/frontend/templates/morphodict/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,33 @@ <h3 id="paradigm-audio" class="setting__title">Show/Play Synthesized Audio</h3>
</div>
</form>
</section>

<section>
<h3 id="attempt-semantic-search" class="setting__title">Attempt semantic search for English queries</h3>

<form method="POST" action="{% url "preference:change" "attempt_semantic_search" %}"
data-save-preference="attempt_semantic_search">
<ul class="unbullet">
{% for value, label in preferences.attempt_semantic_search.choices_with_labels %}
<li class="option">
<label class="option__label">
<input type="radio" name="attempt_semantic_search" value="{{ value }}" class="option__control"
{% if preferences.attempt_semantic_search.current_choice == value %}checked{% endif %}>
<span class="option__label-text">{{ value|capfirst }}</span>
</label>
<p class="option__description">
{{ label }}
</p>
</li>
{% endfor %}
</ul>

<div class="action-bar">
{% csrf_token %}
<button type="submit"> Save settings</button>
</div>
</form>
</section>
<section>
<h3 id="inflect-english-phrase" class="setting__title">Automatically translate English phrases into Cree word-forms </h3>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ <h3>Messages from search run</h3>
<section class="prose box box--spaced">
<h2 class="prose__section-title">{{result.wn_entry.numbering}}. {{result.wn_entry.original_str}} ({% relabel_one result.wn_entry.paren_pos %})</h2>
<div class="meanings__meaning" data-cy="lemma-meaning">
{{result.wn_entry.definition}}
{{result.definition}}
{% include "morphodict/components/citations.html" with dictionary_sources=result.wn_entry.sources %}
</div>
{% if result.wn_entry.synonyms|length > 1 %}
Expand Down
58 changes: 53 additions & 5 deletions src/morphodict/frontend/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@ def index(request): # pragma: no cover
if user_query:
include_auto_definitions = should_include_auto_definitions(request)
inflect_english_phrases = should_inflect_phrases(request)

wordnet_results = wordnet_search(user_query)
if wordnet_results:
return wordnet(request, user_query, wordnet_results)
if should_attempt_semantic_search(request, user_query):
wordnet_results = wordnet_search(user_query)
if wordnet_results:
return wordnet(request, user_query, wordnet_results)

search_results = search_with_affixes(
user_query,
Expand Down Expand Up @@ -145,7 +145,8 @@ def wordnet(request, user_query, results):
def process_result(r):
return {
"wn_entry": r[0],
"results": r[1].serialized_presentation_results(
"definition": r[1],
"results": r[2].serialized_presentation_results(
display_mode=DisplayMode.current_value_from_request(request),
animate_emoji=AnimateEmoji.current_value_from_request(request),
show_emoji=ShowEmoji.current_value_from_request(request),
Expand All @@ -170,6 +171,34 @@ def search_results(request, query_string: str): # pragma: no cover
dict_source = get_dict_source(request) # type: ignore
include_auto_definitions = should_include_auto_definitions(request)
inflect_english_phrases = should_inflect_phrases(request)
if should_attempt_semantic_search(request, query_string):
wordnet_results = wordnet_search(query_string)
if wordnet_results:

def process_result(r):
return {
"wn_entry": r[0],
"definition": r[1],
"results": r[2].serialized_presentation_results(
display_mode=DisplayMode.current_value_from_request(request),
animate_emoji=AnimateEmoji.current_value_from_request(request),
show_emoji=ShowEmoji.current_value_from_request(request),
dict_source=get_dict_source(request),
),
}

return render(
request,
"morphodict/wordnet-results.html",
{
"query_string": query_string,
"search_results": [process_result(r) for r in wordnet_results],
"show_morphemes": request.COOKIES.get("show_morphemes"),
"show_ic": request.COOKIES.get("show_inflectional_category"),
"did_wordnet_search": True,
},
)

results = search_with_affixes(
query_string,
include_auto_definitions=include_auto_definitions,
Expand Down Expand Up @@ -318,6 +347,25 @@ def should_inflect_phrases(request):
return False if request.COOKIES.get("inflect_english_phrase") == "no" else True


def should_attempt_semantic_search(request, query):
if query:
tokens = [x.strip()[3:] for x in query.split() if x.strip().startswith("wn:")]

def true(term):
return not (
term == "0"
or term.lower() == "n"
or term.lower() == "f"
or term.lower() == "false"
or term.lower() == "no"
)

if len(tokens) > 0:
return true(tokens[-1])

return False if request.COOKIES.get("attempt_semantic_search") == "no" else True


def get_dict_source(request):
if dictionary_source := request.COOKIES.get("dictionary_source"):
if dictionary_source:
Expand Down
4 changes: 3 additions & 1 deletion src/morphodict/phrase_translate/to_target/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
logger = logging.getLogger(__name__)


def inflect_target_language_phrase(analysis, lemma_definition) -> str | None:
def inflect_target_language_phrase(
analysis: tuple | RichAnalysis, lemma_definition
) -> str | None:
if isinstance(analysis, tuple):
analysis = RichAnalysis(analysis)
cree_wordform_tag_list = (
Expand Down
6 changes: 2 additions & 4 deletions src/morphodict/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ def api_search(
).serialized_presentation_results()


def wordnet_search(query: str) -> list[tuple[WordnetEntry, SearchResults]] | None:
def wordnet_search(query: str) -> list[tuple[WordnetEntry, str, SearchResults]] | None:
# If we are doing an english simple phrase
search_query = Query(query)
if search_query.wn:
return wordnet_runner(search_query)
return None
return wordnet_runner(search_query)
4 changes: 3 additions & 1 deletion src/morphodict/search/espt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
)

from morphodict.search.types import Result
from morphodict.search.core import SearchResults
from morphodict.search.query import Query
from morphodict.phrase_translate.types import PhraseAnalyzedQuery
from morphodict.analysis import RichAnalysis
from morphodict.analysis.tag_map import UnknownTagError
Expand Down Expand Up @@ -39,7 +41,7 @@ class EsptSearch:
other methods.
"""

def __init__(self, query, search_results):
def __init__(self, query: Query, search_results: SearchResults):
self.search_results = search_results
self.query = query
self.query_analyzed_ok = False
Expand Down
1 change: 1 addition & 0 deletions src/morphodict/search/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def replace_query(self, new_query):
Inflected phrase search discards functional words like "they" using this method
Does not affect flags
"""
self.old_query_terms = self.query_terms
query_string = treat_query(new_query)
self.query_terms = query_string.split()

Expand Down
18 changes: 12 additions & 6 deletions src/morphodict/search/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from morphodict.search.types import Result, WordnetEntry
from morphodict.search.util import first_non_none_value
from morphodict.search.wordnet import WordNetSearch
from morphodict.lexicon.models import Wordform


def search(
Expand Down Expand Up @@ -139,7 +138,9 @@ def is_almost_certainly_cree(query: Query, search_results: SearchResults) -> boo
return False


def wordnet_search(query: Query) -> list[tuple[WordnetEntry, SearchResults]] | None:
def wordnet_search(
query: Query,
) -> list[tuple[WordnetEntry, str, SearchResults]] | None:
wordnet_search = WordNetSearch(query)
if len(wordnet_search.synsets) > 0:
# Wordnet search was successful _at the wordnet level_
Expand All @@ -155,23 +156,28 @@ def wordnet_search(query: Query) -> list[tuple[WordnetEntry, SearchResults]] | N
r = Result(wordform, target_language_wordnet_match=[synset.name])
wn_results.add_result(r)
wn_entry = WordnetEntry(synset.name)
definition = wn_entry.definition()
wn_entry.original_str = " ".join(query.query_terms)
synsets.setdefault(wn_entry.pos(), []).append(wn_entry)
wn_entry.numbering = len(synsets[wn_entry.pos()])
get_lemma_freq(wn_results)
for result in wn_results.unsorted_results():
result.relevance_score = result.lemma_freq
if wordnet_search.analyzed_query:
"""
if wordnet_search.espt:
# Then it is an inflected query that should be Espt-Search based
espt_search = EsptSearch(query, wn_results)
espt_search.convert_search_query_to_espt()
espt_search.inflect_search_results()
find_pos_matches(espt_search, wn_results)
if wordnet_search.analyzed_query.filtered_query:
if wordnet_search.espt.query_analyzed_ok:
wn_entry.original_str = str(
wordnet_search.analyzed_query.filtered_query
wordnet_search.espt.query.old_query_terms
)
results.append((wn_entry, wn_results))
definition = wordnet_search.inflect_wordnet_definition(wn_entry)
"""
definition = wn_entry.definition()
results.append((wn_entry, definition, wn_results))
return results

return None
79 changes: 69 additions & 10 deletions src/morphodict/search/wordnet.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,36 @@
from morphodict.phrase_translate.to_target import inflect_target_language_phrase
from morphodict.search.types import produce_entries, WordnetEntry
from morphodict.search.query import Query
from morphodict.search.core import SearchResults
from nltk.corpus import wordnet
from morphodict.lexicon.models import WordNetSynset
from morphodict.phrase_translate.types import PhraseAnalyzedQuery
from morphodict.analysis import RichAnalysis
from morphodict.search.espt import EsptSearch
from morphodict.phrase_translate.tag_maps import (
source_noun_tags,
)
from morphodict.phrase_translate.definition_cleanup import (
cleanup_target_definition_for_translation,
)


class WordNetSearch:
synsets: list[WordNetSynset]
analyzed_query: PhraseAnalyzedQuery | None

# espt: EsptSearch | None
def __init__(self, query: Query):
self.analyzed_query = None
inflected = PhraseAnalyzedQuery(" ".join(query.query_terms))
if 1 < len(query.query_terms) and inflected.filtered_query:
canonical_query = inflected.filtered_query.split(" ")
self.analyzed_query = inflected
else:
canonical_query = query.query_terms
# self.espt = None
canonical_query: list[str] = query.query_terms
# if 1 < len(query.query_terms):
# self.espt = EsptSearch(query, SearchResults())
# self.espt.convert_search_query_to_espt()
# if not self.espt.query_analyzed_ok:
# self.espt = None
# else:
# canonical_query = self.espt.query.query_terms
lemmas = wordnet.synsets("_".join(canonical_query))
candidate_infinitive = [x.removesuffix("s") for x in canonical_query]
if lemmas != candidate_infinitive:
if canonical_query != candidate_infinitive:
lemmas.extend(wordnet.synsets("_".join(candidate_infinitive)))
self.synsets = list(
WordNetSynset.objects.filter(
Expand All @@ -31,3 +42,51 @@ def ranking(synset: WordNetSynset) -> int:
return WordnetEntry(synset.name).ranking()

self.synsets.sort(key=ranking, reverse=True)

"""
def inflect_wordnet_definition(self, wn_entry: WordnetEntry) -> str:
if self.espt:
results: list[str]= []
orig_tags_starting_with_plus: list[str] = []
tags_ending_with_plus: list[str] = []
if self.espt.tags:
for t in self.espt.new_tags:
if t.startswith("+"):
orig_tags_starting_with_plus.append(t)
else:
tags_ending_with_plus.append(t)
tags_starting_with_plus = orig_tags_starting_with_plus[:]
noun_tags = []
if "+N" in self.espt.tags:
noun_tags = [
tag
for tag in self.espt.tags
if tag in source_noun_tags
]
if "+N" in tags_starting_with_plus:
tags_starting_with_plus.remove("+N")
if "+Der/Dim" in tags_starting_with_plus:
# noun tags need to be repeated in this case
insert_index = tags_starting_with_plus.index("+Der/Dim") + 1
tags_starting_with_plus[insert_index:insert_index] = noun_tags
analysis = RichAnalysis(
(
tags_ending_with_plus,
"",
noun_tags + tags_starting_with_plus,
)
)
for phrase in wn_entry.definition().split(";"):
clean_phrase = cleanup_target_definition_for_translation(phrase)
tags_starting_with_plus = orig_tags_starting_with_plus[:]
result = inflect_target_language_phrase(analysis,clean_phrase) or inflect_target_language_phrase(analysis,"to "+clean_phrase)
if result:
results.append(result)
else:
results.append(phrase)
return ";".join(results)
return wn_entry.definition()
"""
4 changes: 2 additions & 2 deletions src/morphodict/search/wordnet_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ def test_wordnet_success(db):
search_results = wordnet_search(query="see wn:1")

assert len(search_results) > 1
for wn_entry, results in search_results:
for wn_entry, wn_defn, results in search_results:
assert len(results.sorted_results()) > 0


def test_wordnet_space_success(db):
search_results = wordnet_search(query="Ursa Major wn:1")

assert len(search_results) > 0
for wn_entry, results in search_results:
for wn_entry, wn_defn, results in search_results:
assert len(results.sorted_results()) > 0

0 comments on commit 8ba2d54

Please sign in to comment.