Skip to content

Commit

Permalink
Merge pull request #98 from dmmiller612/spacy-fix
Browse files Browse the repository at this point in the history
udated spacy
  • Loading branch information
dmmiller612 authored Feb 13, 2021
2 parents 170eb90 + 8c1d94c commit 9041c53
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 5 deletions.
4 changes: 2 additions & 2 deletions requirements-service.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ tqdm==4.32.2
neuralcoref==4.0
argparse
scikit-learn
bert-extractive-summarizer==0.6.1
bert-extractive-summarizer
Flask
flask-cors
nltk
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.1.0/en_core_web_sm-2.1.0.tar.gz
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.1.0/en_core_web_sm-2.1.0.tar.gz
23 changes: 21 additions & 2 deletions summarizer/sentence_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,26 @@ class SentenceHandler(object):

def __init__(self, language=English):
self.nlp = language()
self.nlp.add_pipe(self.nlp.create_pipe('sentencizer'))

try:
self.nlp.add_pipe(self.nlp.create_pipe('sentencizer'))
self.is_spacy_3 = False
except:
self.nlp.add_pipe("sentencizer")
self.is_spacy_3 = True

def sentence_processor(self, doc, min_length: int = 40, max_length: int = 600):
to_return = []

for c in doc.sents:
if max_length > len(c.text.strip()) > min_length:

if self.is_spacy_3:
to_return.append(c.text.strip())
else:
to_return.append(c.string.strip())

return to_return

def process(self, body: str, min_length: int = 40, max_length: int = 600) -> List[str]:
"""
Expand All @@ -19,7 +38,7 @@ def process(self, body: str, min_length: int = 40, max_length: int = 600) -> Lis
:return: Returns a list of sentences.
"""
doc = self.nlp(body)
return [c.string.strip() for c in doc.sents if max_length > len(c.string.strip()) > min_length]
return self.sentence_processor(doc, min_length, max_length)

def __call__(self, body: str, min_length: int = 40, max_length: int = 600) -> List[str]:
return self.process(body, min_length, max_length)
2 changes: 1 addition & 1 deletion tests/test_summary_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def summarizer():

@pytest.fixture()
def summarizer_multi_hidden():
return Summarizer('distilbert-base-uncased', hidden=[-1,-2,-3])
return Summarizer('distilbert-base-uncased', hidden=[-1,-2])


@pytest.fixture()
Expand Down

0 comments on commit 9041c53

Please sign in to comment.