-
Notifications
You must be signed in to change notification settings - Fork 0
/
knetreader.jl
65 lines (61 loc) · 1.52 KB
/
knetreader.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
## KNET MODEL BiLSTM CNN CRF for NER
### author arda akdemir
##generate an array containing documents which contain sentences in all documents
function read_sents(filename)
docs = []
sents = []
sent= []
f = open(filename)
lines = readlines(f);
for line in lines
if occursin("DOCSTART",line)
if length(sents)> 0
push!(docs,sents)
sents=[]
sent=[]
end
elseif length(line)==0 ## new sentence
if length(sent)>0
push!(sents,sent)
sent =[]
end
else
push!(sent,split(line)[1])
end
end
docs
end
##read
function read_with_labels(filename)
docs = []
sents = []
sent = []
tags = []
taglist = String[]
f = open(filename)
lines = readlines(f);
for line in lines
if occursin("DOCSTART",line)
if length(sents)> 0
push!(docs,(sents))
sents=[]
sent=[]
end
elseif length(line)==0 ## new sentence
if length(sent)>0
push!(sents,(sent,tags))
sent =[]
tags = []
end
else
push!(sent,split(line)[1])
push!(tags,split(line)[2])
if !(split(line)[2] in taglist)
push!(taglist,split(line)[2])
end
end
end
docs,taglist
end
#docs,taglist = read_with_labels("knetfolder/train.txt");
#taglist