-
Notifications
You must be signed in to change notification settings - Fork 0
/
dvc.yaml
51 lines (51 loc) · 1.86 KB
/
dvc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
stages:
download_data:
cmd: >-
python src/data/download_data.py
--script_path="scripts/select_comments.sql"
--save_path="data/initial_data/comments.parquet"
&&
python src/data/download_data.py
--script_path="scripts/select_tags.sql"
--save_path="data/initial_data/tags.parquet"
deps:
- src/data/download_data.py
- config.yaml
- scripts/select_comments.sql
- scripts/select_tags.sql
outs:
- data/initial_data/comments.parquet
- data/initial_data/tags.parquet
prepare_data:
cmd: >-
python src/data/prepare_data.py
--comments_path="data/initial_data/comments.parquet"
--tags_path="data/initial_data/tags.parquet"
--save_path="data/intermediate_data/comments_tags.parquet"
deps:
- src/data/prepare_data.py
- config.yaml
outs:
- data/intermediate_data/comments_tags.parquet
get_tags:
cmd: >-
python src/data/get_tags.py
--df_path="data/intermediate_data/comments_tags.parquet"
--save_path="data/intermediate_data/df_tagged.parquet"
deps:
- src/data/get_tags.py
- src/models/roberta.py
- config.yaml
outs:
- data/intermediate_data/df_tagged.parquet
- models/roberta/models--AlexKay--xlm-roberta-large-qa-multilingual-finedtuned-ru
get_final_df:
cmd: >-
python src/data/get_final_df.py
--df_tagged_path="data/intermediate_data/df_tagged.parquet"
--save_path="data/prepared_data/prepared_df.parquet"
deps:
- src/data/get_final_df.py
- config.yaml
outs:
- data/prepared_data/prepared_df.parquet