-
Notifications
You must be signed in to change notification settings - Fork 292
/
demo_phrase_machine.py
25 lines (22 loc) · 1.33 KB
/
demo_phrase_machine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import spacy
from scattertext import SampleCorpora, PhraseMachinePhrases, dense_rank, RankDifference, AssociationCompactor, produce_scattertext_explorer
from scattertext.CorpusFromPandas import CorpusFromPandas
convention_df = SampleCorpora.ConventionData2012.get_data()
corpus = (CorpusFromPandas(convention_df,
category_col='party',
text_col='text',
feats_from_spacy_doc=PhraseMachinePhrases(),
nlp=spacy.load('en_core_web_sm', parser=False))
.build().compact(AssociationCompactor(4000)))
html = produce_scattertext_explorer(corpus,
category='democrat',
category_name='Democratic',
not_category_name='Republican',
minimum_term_frequency=0,
pmi_threshold_coefficient=0,
transform=dense_rank,
metadata=corpus.get_df()['speaker'],
term_scorer=RankDifference(),
width_in_pixels=1000)
open('./demo_phrase_machine.html', 'wb').write(html.encode('utf-8'))
print('Open ./demo_phrase_machine.html in Chrome or Firefox.')