Skip to content

Commit

Permalink
Refactored PhraseAnalyzedQuery
Browse files Browse the repository at this point in the history
  • Loading branch information
fbanados committed Nov 7, 2024
1 parent 14c9791 commit ebd36b7
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 51 deletions.
53 changes: 53 additions & 0 deletions src/morphodict/phrase_translate/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import re
from morphodict.phrase_translate.fst import source_phrase_analyses

# e.g., " swim +V+AI+Prt+3Pl"
PHRASE_ANALYSIS_OUTPUT_RE = re.compile(
r"""
\s* # leading blank space(s) from flag diacritics
(?P<query>.*)
\s
(?P<tags>\+[^\ ]+)
""",
re.VERBOSE,
)

class PhraseAnalyzedQuery:
"""A structured object holding pieces of, and info about, a phrase query.
>>> PhraseAnalyzedQuery("they swam").filtered_query
'swim'
>>> PhraseAnalyzedQuery("they swam").has_tags
True
>>> PhraseAnalyzedQuery("they swam").tags
['+V', '+AI', '+Prt', '+3Pl']
>>> PhraseAnalyzedQuery("excellent").has_tags
False
"""

def __init__(self, query: str, add_verbose_message=None):
self.query = query
self.has_tags = False
self.filtered_query = None
self.tags = None
phrase_analyses: list[str] = source_phrase_analyses(query)

if add_verbose_message:
add_verbose_message(phrase_analyses=phrase_analyses)

if len(phrase_analyses) != 1:
return

phrase_analysis = phrase_analyses[0]
if "+?" in phrase_analysis:
return

if not (match := PHRASE_ANALYSIS_OUTPUT_RE.fullmatch(phrase_analysis)):
return

self.filtered_query = match["query"]
self.has_tags = True
self.tags = ["+" + t for t in match["tags"].split("+") if t]

def __repr__(self):
return f"<PhraseAnalyzedQuery {self.__dict__!r}>"
52 changes: 1 addition & 51 deletions src/morphodict/search/espt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)

from morphodict.search.types import Result
from morphodict.phrase_translate.fst import source_phrase_analyses
from morphodict.phrase_translate.types import PhraseAnalyzedQuery
from morphodict.analysis import RichAnalysis
from morphodict.analysis.tag_map import UnknownTagError
from morphodict.lexicon.models import Wordform
Expand Down Expand Up @@ -192,54 +192,4 @@ def _generate_inflected_results(self) -> list[_EipResult]:
return results


# e.g., " swim +V+AI+Prt+3Pl"
PHRASE_ANALYSIS_OUTPUT_RE = re.compile(
r"""
\s* # leading blank space(s) from flag diacritics
(?P<query>.*)
\s
(?P<tags>\+[^\ ]+)
""",
re.VERBOSE,
)


class PhraseAnalyzedQuery:
"""A structured object holding pieces of, and info about, a phrase query.
>>> PhraseAnalyzedQuery("they swam").filtered_query
'swim'
>>> PhraseAnalyzedQuery("they swam").has_tags
True
>>> PhraseAnalyzedQuery("they swam").tags
['+V', '+AI', '+Prt', '+3Pl']
>>> PhraseAnalyzedQuery("excellent").has_tags
False
"""

def __init__(self, query: str, add_verbose_message=None):
self.query = query
self.has_tags = False
self.filtered_query = None
self.tags = None
phrase_analyses: list[str] = source_phrase_analyses(query)

if add_verbose_message:
add_verbose_message(phrase_analyses=phrase_analyses)

if len(phrase_analyses) != 1:
return

phrase_analysis = phrase_analyses[0]
if "+?" in phrase_analysis:
return

if not (match := PHRASE_ANALYSIS_OUTPUT_RE.fullmatch(phrase_analysis)):
return

self.filtered_query = match["query"]
self.has_tags = True
self.tags = ["+" + t for t in match["tags"].split("+") if t]

def __repr__(self):
return f"<PhraseAnalyzedQuery {self.__dict__!r}>"

0 comments on commit ebd36b7

Please sign in to comment.