Skip to content

Commit

Permalink
more clean-up
Browse files Browse the repository at this point in the history
  • Loading branch information
clovis committed Apr 3, 2024
1 parent 7a13de9 commit a392bb1
Showing 1 changed file with 0 additions and 35 deletions.
35 changes: 0 additions & 35 deletions python/philologic/loadtime/LoadFilters.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,41 +47,6 @@ def generate_words_sorted(loader_obj, text):
os.system(wordcommand)


def old_spacy_tagger(loader_obj, text):
"""Tag words with Spacy"""
with open(text["raw"] + ".tmp", "w", encoding="utf8") as tmp_file:
with open(text["raw"], encoding="utf8") as fh:
sentence = []
current_sent_id = None
for line in fh:
philo_type, word, philo_id, attrib = line.split("\t")
if philo_type in ("word", "sent", "punct"):
sent_id = " ".join(philo_id.split()[:6])
record = Record(philo_type, word, philo_id.split())
record.attrib = loads(attrib)
if current_sent_id is not None and sent_id != current_sent_id:
spacy_sentence = SpacyDoc(loader_obj.nlp.vocab, [r.name for r in sentence])
parsed_sentence = loader_obj.nlp(spacy_sentence)
for saved_record, parsed_word in zip(sentence, parsed_sentence):
saved_record.attrib["pos"] = parsed_word.pos_
saved_record.attrib["tag"] = parsed_word.tag_
saved_record.attrib["ent_type"] = parsed_word.ent_type_
saved_record.attrib["lemma"] = parsed_word.lemma_
print(saved_record, file=tmp_file)
sentence = []
sentence.append(record)
current_sent_id = sent_id
if sentence:
spacy_sentence = SpacyDoc(loader_obj.nlp.vocab, [r.name for r in sentence])
parsed_sentence = loader_obj.nlp(spacy_sentence)
for saved_record, parsed_word in zip(sentence, parsed_sentence):
saved_record.attrib["pos"] = parsed_word.pos_
saved_record.attrib["tag"] = parsed_word.tag_
saved_record.attrib["ent_type"] = parsed_word.ent_type_
saved_record.attrib["lemma"] = parsed_word.lemma_
print(saved_record, file=tmp_file)


def spacy_tagger(loader_obj, text):
"""Tag words with Spacy"""

Expand Down

0 comments on commit a392bb1

Please sign in to comment.