-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_train_pcmasking_subset.py
170 lines (142 loc) · 9.48 KB
/
main_train_pcmasking_subset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import argparse
import datetime
import time
from pathlib import Path
from pcmasking.neural_networks.models_split_over_nodes import generate_models
from pcmasking.neural_networks.training import train_all_models
from pcmasking.utils.main_utils import load_fine_tune_weights, save_masking_vector, save_history, read_txt_to_list, \
parse_str_to_bool, parse_str_to_bool_or_int, set_random_seed
from pcmasking.utils.setup import SetupNeuralNetworks
from pcmasking.utils.tf_gpu_management import manage_gpu, set_gpu
def train_pcmasking_subset(config_file, nn_inputs_file, nn_outputs_file, train_indices, load_weights_from_ckpt,
continue_previous_training, config_fine_tune, seed):
argv = ["-c", config_file]
setup = SetupNeuralNetworks(argv)
inputs = read_txt_to_list(nn_inputs_file)
outputs = read_txt_to_list(nn_outputs_file)
selected_outputs = [outputs[i] for i in train_indices]
model_descriptions = generate_models(setup, inputs, selected_outputs, continue_training=continue_previous_training,
seed=seed)
# If we are doing fine-tuning, we need to load the weights from trained PreMaskNet
if setup.nn_type == "MaskNet" and config_fine_tune is not None:
load_fine_tune_weights(config_fine_tune, model_descriptions, seed, inputs=inputs, outputs=selected_outputs)
history = train_all_models(model_descriptions, setup, from_checkpoint=load_weights_from_ckpt,
continue_training=continue_previous_training)
# Save histories
base_dir = Path(config_file).parent
save_history(history, model_descriptions, selected_outputs, base_dir)
# In case of PreMaskNet, save the masking vector for each variable
if setup.nn_type == "PreMaskNet":
save_masking_vector(model_descriptions, selected_outputs, base_dir)
if __name__ == "__main__":
"""
Main function to train PCMasking networks for a subset of specified output variables.
Command-line Arguments:
-s, --seed (int, optional): Random seed for reproducibility. If not provided, a random seed is not set.
-g, --gpu_index (int, optional): Index of the GPU to use for training. If not provided, all available GPUs are used.
-f, --fine_tune_config (str, optional): YAML configuration file for fine-tuning from a trained PreMaskNet. If not provided, training will start from scratch.
-c, --config_file (str, required): Path to the YAML configuration file for neural network creation.
-i, --inputs_file (str, required): Path to the .txt file containing neural network input variables.
-o, --outputs_file (str, required): Path to the .txt file containing neural network output variables.
-x, --train_indices (str, required): Range of output variable indices in the format 'start-end' to specify which networks to train.
-l, --load_ckpt (bool, required): Flag to load weights from a previous checkpoint during training.
-t, --continue_training (bool, required): Flag to continue training from the previous session, resuming model and optimizer states.
Variables:
yaml_config_file (Path): Path object for the YAML configuration file.
inputs_file (Path): Path object for the input variables .txt file.
outputs_file (Path): Path object for the output variables .txt file.
train_idx (list[int]): List of indices for the selected output variables to train.
load_ckpt (bool): Whether to load model weights from a previous checkpoint.
continue_training (bool): Whether to continue training from the previous session.
random_seed_parsed (int or bool): Parsed random seed value.
gpu_index (int or None): Index of the GPU to use for training, if provided.
fine_tune_cfg (Path or None): Path to the fine-tuning YAML configuration file, if provided.
Raises:
ArgumentError: If the configuration file, inputs file, or outputs file have incorrect extensions.
ValueError: If the range of train indices is incorrect.
Example:
$ python main_train_pcmasking_subset.py -c config.yml -i inputs.txt -o outputs.txt -x "1-10" -l False -t False -s 42 -g 0
Workflow:
1. Parse command-line arguments and validate file paths and extensions.
2. Manage GPU settings based on the specified GPU index or multi-GPU configuration.
3. Parse the range of output indices for training.
4. Set a random seed for reproducibility if specified.
5. Load the model setup and input/output variables.
6. Generate models and optionally load fine-tuned weights.
7. Train the model(s) based on the specified configurations.
8. Save training history and masking vector if required.
"""
parser = argparse.ArgumentParser(
description="Trains PCMasking networks for only a subset of specified output variables.")
parser.add_argument("-s", "--seed", help="Integer value for random seed. "
"Use 'False' or leave out this option to not set a random seed.",
default=False, type=parse_str_to_bool_or_int, nargs='?', const=True)
parser.add_argument("-g", "--gpu_index",
help="GPU index. If given, only the GPU specified by index will be used for training.",
required=False, default=False, type=int, nargs='?')
parser.add_argument("-f", "--fine_tune_config",
help="Configuration file for previously trained PreMaskNet to load weights from for fine-tuning.",
required=False, default=None, type=str, nargs='?')
required_args = parser.add_argument_group("required arguments")
required_args.add_argument("-c", "--config_file", help="YAML configuration file for neural network creation.",
required=True)
required_args.add_argument("-i", "--inputs_file", help=".txt file with NN inputs list.", required=True, type=str)
required_args.add_argument("-o", "--outputs_file", help=".txt file with NN outputs list.", required=True, type=str)
required_args.add_argument("-x", "--train_indices", help="Start and end index of outputs in outputs list, "
"specifying the neural networks that are to be trained. "
"Must be a string in the form 'start-end'.",
required=True, type=str)
required_args.add_argument("-l", "--load_ckpt",
help="Boolean indicating whether to load weights from checkpoint from previous training.",
required=True, type=parse_str_to_bool)
required_args.add_argument("-t", "--continue_training",
help="Boolean indicating whether to continue with previous training. The model "
"(including optimizer) is loaded and the learning rate is initialized with the "
"last learning rate from previous training.",
required=True, type=parse_str_to_bool)
args = parser.parse_args()
yaml_config_file = Path(args.config_file)
inputs_file = Path(args.inputs_file)
outputs_file = Path(args.outputs_file)
train_idx = args.train_indices
load_ckpt = args.load_ckpt
continue_training = args.continue_training
random_seed_parsed = args.seed
gpu_index = args.gpu_index
fine_tune_cfg = None if args.fine_tune_config == "" else args.fine_tune_config
if not yaml_config_file.suffix == ".yml":
parser.error(f"Configuration file must be YAML file (.yml). Got {yaml_config_file}")
if not inputs_file.suffix == ".txt":
parser.error(f"File with neural network inputs must be .txt file. Got {inputs_file}")
if not outputs_file.suffix == ".txt":
parser.error(f"File with neural network outputs must be .txt file. Got {outputs_file}")
if fine_tune_cfg is not None:
if not fine_tune_cfg.endswith(".yml"):
parser.error(f"Fine-tuning configuration file must be YAML file (.yml). Got {fine_tune_cfg}")
else:
fine_tune_cfg = Path(fine_tune_cfg)
# GPU management: Allow memory growth if training is done on multiple GPUs, otherwise limit GPUs to single GPU
if gpu_index is None:
manage_gpu(yaml_config_file)
else:
set_gpu(index=gpu_index)
# Parse indices of outputs selected for training
start, end = train_idx.split("-")
train_idx = list(range(int(start), int(end) + 1))
if not train_idx:
raise ValueError("Given train indices were incorrect. Start indices must be smaller than end index. ")
# Set random seed
random_seed = set_random_seed(random_seed_parsed)
print(f"\nYAML config file: {yaml_config_file}")
print(f"Input list .txt file: {inputs_file}")
print(f"Output list .txt file: {outputs_file}")
print(f"Train indices: {train_idx}")
print(f"Fine-tuning config: {fine_tune_cfg}")
print(f"Random seed: {random_seed}\n")
print(f"\n\n{datetime.datetime.now()} --- Start training PCMasking networks.", flush=True)
t_init = time.time()
train_pcmasking_subset(yaml_config_file, inputs_file, outputs_file, train_idx, load_ckpt, continue_training,
fine_tune_cfg,
random_seed)
t_total = datetime.timedelta(seconds=time.time() - t_init)
print(f"\n{datetime.datetime.now()} --- Finished. Elapsed time: {t_total}")