From a392bb13276d833165c8195d62a19225b8cc8a80 Mon Sep 17 00:00:00 2001 From: Clovis Gladstone Date: Wed, 3 Apr 2024 16:30:15 -0500 Subject: [PATCH] more clean-up --- python/philologic/loadtime/LoadFilters.py | 35 ----------------------- 1 file changed, 35 deletions(-) diff --git a/python/philologic/loadtime/LoadFilters.py b/python/philologic/loadtime/LoadFilters.py index fd87e90c..f16b0a6a 100755 --- a/python/philologic/loadtime/LoadFilters.py +++ b/python/philologic/loadtime/LoadFilters.py @@ -47,41 +47,6 @@ def generate_words_sorted(loader_obj, text): os.system(wordcommand) -def old_spacy_tagger(loader_obj, text): - """Tag words with Spacy""" - with open(text["raw"] + ".tmp", "w", encoding="utf8") as tmp_file: - with open(text["raw"], encoding="utf8") as fh: - sentence = [] - current_sent_id = None - for line in fh: - philo_type, word, philo_id, attrib = line.split("\t") - if philo_type in ("word", "sent", "punct"): - sent_id = " ".join(philo_id.split()[:6]) - record = Record(philo_type, word, philo_id.split()) - record.attrib = loads(attrib) - if current_sent_id is not None and sent_id != current_sent_id: - spacy_sentence = SpacyDoc(loader_obj.nlp.vocab, [r.name for r in sentence]) - parsed_sentence = loader_obj.nlp(spacy_sentence) - for saved_record, parsed_word in zip(sentence, parsed_sentence): - saved_record.attrib["pos"] = parsed_word.pos_ - saved_record.attrib["tag"] = parsed_word.tag_ - saved_record.attrib["ent_type"] = parsed_word.ent_type_ - saved_record.attrib["lemma"] = parsed_word.lemma_ - print(saved_record, file=tmp_file) - sentence = [] - sentence.append(record) - current_sent_id = sent_id - if sentence: - spacy_sentence = SpacyDoc(loader_obj.nlp.vocab, [r.name for r in sentence]) - parsed_sentence = loader_obj.nlp(spacy_sentence) - for saved_record, parsed_word in zip(sentence, parsed_sentence): - saved_record.attrib["pos"] = parsed_word.pos_ - saved_record.attrib["tag"] = parsed_word.tag_ - saved_record.attrib["ent_type"] = parsed_word.ent_type_ - saved_record.attrib["lemma"] = parsed_word.lemma_ - print(saved_record, file=tmp_file) - - def spacy_tagger(loader_obj, text): """Tag words with Spacy"""