forked from meta-toolkit/meta
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.toml
66 lines (58 loc) · 1.41 KB
/
config.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
stop-words = "../data/lemur-stopwords.txt"
libsvm-modules = "../deps/libsvm-modules/"
prefix = "../data/"
function-words = "../data/function-words.txt"
punctuation = "../data/sentence-boundaries/sentence-punctuation.txt"
start-exceptions = "../data/sentence-boundaries/sentence-start-exceptions.txt"
end-exceptions = "../data/sentence-boundaries/sentence-end-exceptions.txt"
query-judgements = "../data/ceeaus-qrels.txt"
corpus-type = "line-corpus"
dataset = "20newsgroups"
forward-index = "20news-fwd"
inverted-index = "20news-inv"
[[analyzers]]
method = "ngram-word"
ngram = 1
filter = "default-chain"
[ranker]
method = "bm25"
k1 = 1.2
b = 0.75
k3 = 500
[classifier]
method = "one-vs-all"
[classifier.base]
method = "sgd"
loss = "hinge"
prefix = "sgd-model"
[lda]
inference = "gibbs"
max-iters = 1000
alpha = 1.0
beta = 1.0
topics = 4
model-prefix = "lda-model"
[crf]
prefix = "crf"
treebank = "penn-treebank" # relative to data prefix
corpus = "wsj"
section-size = 99
train-sections = [0, 18]
dev-sections = [19, 21]
test-sections = [22, 24]
[sequence]
prefix = "perceptron-tagger"
treebank = "penn-treebank" # relative to data prefix
corpus = "wsj"
section-size = 99
train-sections = [0, 18]
dev-sections = [19, 21]
test-sections = [22, 24]
[parser]
prefix = "parser"
treebank = "penn-treebank" # relative to data prefix
corpus = "wsj"
section-size = 99
train-sections = [2, 21]
dev-sections = [22, 22]
test-sections = [23, 23]