-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLDA_simulation.py
50 lines (38 loc) · 1.27 KB
/
LDA_simulation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 11 17:04:12 2018
@author: LEIHAO
"""
import numpy as np
import lda
# --------------------------
# Create a dataset
# --------------------------
vocab=['law','finance','math','physics','art']
topic_num=5
dat_matrix=np.ones((10,5),dtype=int)*200
rand_mat=np.random.randint(0,11,(10,5),dtype=int)
dat_mat=dat_matrix+rand_mat
# -------------------------
# Fit LDA model
# -------------------------
model=lda.LDA(n_topics=topic_num, n_iter=1500, random_state=1)
model.fit(dat_mat)
topic_word2=model.topic_word_
n_top_words=5
for i, topic_dist in enumerate(topic_word):
topic_words=np.array(vocab)[np.argsort(topic_dist)][:-(n_top_words+1):-1]
print('Topic {}: {}'.format(i, ' '.join(topic_words)))
# ------------------------------------
# Different Term-frequency simulation
# ------------------------------------
dat_mat2=np.ones((2,5),dtype=int)*200
dat_mat2[:,4]=np.random.randint(1,11,2)
model2=lda.LDA(n_topics=topic_num, n_iter=1500, random_state=1)
model2.fit(dat_mat2)
topic_word2=model2.topic_word_
#n_top_words=5
for i, topic_dist in enumerate(topic_word2):
topic_words=np.array(vocab)[np.argsort(topic_dist)][:-(n_top_words+1):-1]
print('Topic {}: {}'.format(i, ' '.join(topic_word2)))