-
Notifications
You must be signed in to change notification settings - Fork 3
/
custom_train.py
109 lines (90 loc) · 3.03 KB
/
custom_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""Train File."""
# Imports
import argparse
import os
from src.datasets import *
from src.models import *
from src.trainers import *
from src.utils.configuration import Config
from src.utils.logger import Logger
from src.utils.mapper import configmapper
from src.utils.misc import generate_grid_search_configs, seed
dirname = os.path.dirname(__file__) # For Paths Relative to Current File
# Config
parser = argparse.ArgumentParser(
prog="train.py", description="Train a model with Base Trainer."
)
parser.add_argument(
"--config_dir", type=str, action="store", help="The directory for all config files."
)
parser.add_argument(
"--grid_search",
action="store_true",
help="Whether to do a grid_search",
default=False,
)
parser.add_argument(
"--validation",
action="store_true",
help="Whether to use validation data or test data",
default=False,
)
args = parser.parse_args()
model_config = Config(path=os.path.join(args.config_dir, "model.yaml"))
train_config = Config(path=os.path.join(args.config_dir, "train.yaml"))
data_config = Config(path=os.path.join(args.config_dir, "dataset.yaml"))
grid_search = args.grid_search
# log_dir = "/content/drive/MyDrive/SuperPixels/logs/"
log_dir = "./logs/"
# Seed
seed(train_config.main_config.seed)
# Data
if "main" in data_config.as_dict().keys(): # Regular Data
if args.validation:
train_data_config = data_config.train_val.train
val_data_config = data_config.train_val.val
else:
train_data_config = data_config.train
val_data_config = data_config.val
train_data = configmapper.get_object("datasets", train_data_config.name)(
train_data_config
)
val_data = configmapper.get_object("datasets", val_data_config.name)(
val_data_config
)
else: # HF Type Data
dataset = configmapper.get_object("datasets", data_config.name)(data_config)
train_data = dataset.train_dataset["train"]
val_data = dataset.train_dataset["test"]
# Logger
logger = Logger(
log_path=os.path.join(
log_dir,
args.config_dir.strip("/").split("/")[-1]
+ ("" if args.validation else "_orig"),
)
)
if grid_search:
train_configs = generate_grid_search_configs(train_config, train_config.grid_search)
print(f"Total Configurations Generated: {len(train_configs)}")
for train_config in train_configs:
print(train_config)
## Seed
seed(train_config.main_config.seed)
model = configmapper.get_object("models", model_config.name)(model_config)
# Trainer
trainer = configmapper.get_object("trainers", train_config.trainer_name)(
train_config
)
## Train
trainer.train(model, train_data, val_data, logger)
else:
## Seed
seed(train_config.main_config.seed)
model = configmapper.get_object("models", model_config.name)(model_config)
## Trainer
trainer = configmapper.get_object("trainers", train_config.trainer_name)(
train_config
)
## Train
trainer.train(model, train_data, val_data, logger)