-
Notifications
You must be signed in to change notification settings - Fork 1
/
params.yaml
90 lines (81 loc) · 2.21 KB
/
params.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Every month you'd like to have models for must be listed here.
all_months:
- 2021-04
- 2021-05
- 2021-06
- 2021-07
- 2021-08
- 2021-09
- 2021-10
- 2021-11
- 2021-12
- 2022-01
- 2022-02
- 2022-03
- 2022-12
- 2023-01
- 2023-02
- 2023-03
- 2023-04
- 2023-05
- 2023-06
- 2023-07
- 2023-08
- 2023-09
- 2023-10
- 2023-11
- 2023-12
# If you can download data from pushshift for a month, list it here.
download_months:
- 2021-04
- 2021-05
- 2021-06
- 2021-07
- 2021-08
- 2021-09
- 2021-10
- 2021-11
- 2021-12
- 2022-01
- 2022-02
- 2022-03
- 2022-12
- 2023-01
- 2023-02
# If you had to manually download data from elsewhere, list the month here.
# Make sure the .zst archives are in ${manual_downloads_dir}/comments
manual_downloads_dir: data/manual_downloads
manual_download_months:
- 2023-03
- 2023-04
- 2023-05
- 2023-06
- 2023-07
- 2023-08
- 2023-09
- 2023-10
- 2023-11
- 2023-12
# Parameters to the ihop.import_data c2v script
community2vec_data_prep:
top_n: 10000
exclude_top_users: 0.05
# Parameters to the ihop.community2vec script
community2vec_params:
# Hyperparameters to tune against for getting the best analogy accuracy
param_grid: "'{\"alpha\": [0.08, 0.05], \"vector_size\":[100], \"sample\":[0, 0.001, 0.005], \"negative\":[10,20]}'"
# How many epochs to train each model for?
epochs: 5
# How many workers to use for training the model?
workers: 12
# Kmeans cluster parameter settings for annotation - you can use any options that are available in sklearn.cluster.KMeans init
kmeans_cluster_params:
model_params: "'{\"n_clusters\":100}'"
# Agglomerative cluster parameter settings for annotation - you can use any options that are available in sklearn.cluster.AgglomerativeClustering to override the defaults from ihop.clustering. Note that the default linkage option is "average", which may not result in very intuitive clusterings.
agglomerative_cluster_params:
model_params: "'{\"n_clusters\":100}'"
# Configure directories to store experiment results in
comments_dir: data/raw_data/comments
submissions_dir: data/raw_data/submissions
community2vec_dir: data/community2vec
annotation_data_dir: data/annotation_data