-
Notifications
You must be signed in to change notification settings - Fork 0
/
Comick-Demo.py
105 lines (92 loc) · 4.33 KB
/
Comick-Demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import pickle
import streamlit as st
from pathlib import Path
from tagger import POSTagger
from utils import text_preprocessing, word_embedding, idxs_to_tags, print_annotated_text
from oov_sents import sents, tokenize_sents
from models import list_model, pretrained_models
st.set_page_config(
page_title="Comick",
page_icon="💚",
layout="centered"
)
st.write(
"<h2 style='text-align: center; margin-bottom: 50px'>Comick Demo Application</h2>",
unsafe_allow_html=True
)
st.write(
"<h4 style='text-align: left;'>Choose Text</h4>",
unsafe_allow_html=True
)
selected_text = st.selectbox(
'',
options=list(sents.keys()),
format_func=lambda option: sents[option],
label_visibility="hidden"
)
st.write(
"<h4 style='text-align: left; margin-top: 20px;'>Text</h4>",
unsafe_allow_html=True
)
st.write(
sents[selected_text],
unsafe_allow_html=True
)
st.write(
"<h4 style='text-align: left; margin-top: 20px;'>Choose Model</h4>",
unsafe_allow_html=True
)
selected_model = st.selectbox(
'',
options=list(list_model.keys()),
format_func=lambda option: list_model[option],
index=16,
label_visibility="hidden"
)
oov_tokens = list(pickle.load(open(Path("word_embeddings/oov_embedding_dict.pkl"), "rb")).keys())
words, idxs_tokenize = text_preprocessing(tokenize_sents[selected_text])
word_embeddings = word_embedding(idxs_tokenize)
comick_pos_tagger = POSTagger(pretrained_models[selected_model])
pred_tags = idxs_to_tags(comick_pos_tagger(word_embeddings).argmax(dim=-1))
comick_pos_tags = [(f"{word} (OOV)", tag) if word.lower() in oov_tokens else (word, tag) for word, tag in zip(tokenize_sents[selected_text], pred_tags)]
st.write(
"<h4 style='text-align: left; margin-top: 20px;'>Part-of-Speech Tag</h4>",
unsafe_allow_html=True
)
print_annotated_text(comick_pos_tags)
expander = st.expander("See tag information")
expander.write(
"""
<ol>
<li>CC : Coordinating conjunction, also called coordinator. Example: dan, tetapi, atau.</li>
<li>CD : Cardinal number. Example: dua, juta, enam, 7916, sepertiga, 0,025, 0,525, banyak, kedua, ribuan, 2007, 25.</li>
<li>DT : Determiner / article. Example: para, sang, si.</li>
<li>FW : Foreign word.. Example: climate change, terms and conditions.</li>
<li>IN : Preposition. Example: dalam, dengan, di, ke, oleh, pada, untuk.</li>
<li>JJ : Adjective. Example: bersih, panjang, hitam, lama, jauh, marah, suram, nasional, bulat.</li>
<li>MD : Modal and auxiliary verb. Example: boleh, harus, sudah, mesti, perlu.</li>
<li>NEG : Negation. Example: tidak, belum, jangan.</li>
<li>NN : Noun. Example: monyet, bawah, sekarang, rupiah.</li>
<li>NND : Classifier, partitive, and measurement noun. Example: orang, ton, helai, lembar.</li>
<li>NNP : Proper noun. Example: Boediono, Laut Jawa, Indonesia, India, Malaysia, Bank Mandiri, BBKP, Januari, Senin, Idul Fitri, Piala Dunia, Liga Primer, Lord of the Rings: The Return of the King.</li>
<li>OD : Ordinal number. Example: ketiga, ke-4, pertama.</li>
<li>PR : Demonstrative pronoun. Example: ini, itu, sini, situ.</li>
<li>PRP : Personal pronoun. Example: saya, kami, kita, kamu, kalian, dia, mereka.</li>
<li>RB : Adverb. Example: sangat, hanya, justru, niscaya, segera.</li>
<li>RP : Particle. Example: pun, -lah, -kah.</li>
<li>SC : Subordinating conjunction, also called subordinator. Example: sejak, jika, seandainya, supaya, meski, seolah-olah, sebab, maka, tanpa, dengan, bahwa, yang, lebih ... daripada ..., semoga.</li>
<li>SYM : Symbol. Example: IDR, +, %, @.</li>
<li>UH : Interjection. Example: brengsek, oh, ooh, aduh, ayo, mari, hai.</li>
<li>VB : Verbs. Example: merancang, mengatur, pergi, bekerja, tertidur.</li>
<li>WH : Question. Example: siapa, apa, mana, kenapa, kapan, di mana, bagaimana, berapa.</li>
<li>X : Unknown. Example: statemen.</li>
<li>Z : Punctuation. Example: "...", ?, .</li>
<li>UNK : Unknown token, because word embedding does not exist.</li>
</ol>
""",
unsafe_allow_html=True
)
st.write(
"<p style='text-align: left; margin-top: 60px; text-align:center;'>Made with 💚 by <a href='https://haloapping.github.io/' target=_blank>haloapping</a></p>",
unsafe_allow_html=True
)