did

Algo-Boys · Sep 18, 2023 · 5be4e20 · 5be4e20
1 parent 8b3a0b4
commit 5be4e20
Show file tree

Hide file tree

Showing 5 changed files with 183 additions and 163 deletions.
diff --git a/config.philipp.yaml b/config.philipp.yaml
@@ -8,27 +8,29 @@ model:
 
 training:
   learning_rate: 0.0005
-  batch_size: 32 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
-  epochs: 150 
-  eval_every_n: 5 # evaluate every n epochs
+  batch_size: 4 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
+  epochs: 4
+  eval_every_n: 1 # evaluate every n epochs
   num_workers: 4 # number of workers for dataloader
-  device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically
+  device: "cpu" # device to run on if gpu is available, else "cpu" will be set automatically
 
 dataset:
   download: true
-  dataset_root_path: "data" # files will be downloaded into this dir
+  dataset_root_path: "/Volumes/pherkel 2/SWR2-ASR" # files will be downloaded into this dir
   language_name: "mls_german_opus"
-  limited_supervision: false # set to True if you want to use limited supervision
-  dataset_percentage: 1 # percentage of dataset to use (1.0 = 100%)
+  limited_supervision: True # set to True if you want to use limited supervision
+  dataset_percentage: 0.01 # percentage of dataset to use (1.0 = 100%)
   shuffle: true
 
 tokenizer:
   tokenizer_path: "data/tokenizers/char_tokenizer_german.json"
 
 checkpoints:
-  model_load_path: "data/runs/epoch31" # path to load model from
+  # use "~"" to disable loading/saving
+  model_load_path: ~ # path to load model from
   model_save_path: "data/runs/epoch" # path to save model to
 
 inference:
   model_load_path: "data/runs/epoch30" # path to load model from
+  beam_width: 10 # beam width for beam search
   device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically
diff --git a/config.yaml b/config.yaml
@@ -7,11 +7,12 @@ model:
   dropout: 0.3 # recommended to be around 0.4 for smaller datasets, 0.1 for really large datasets
 
 training:
-  learning_rate: 5e-4
+  learning_rate: 0.0005 # between 0.0001 and 0.0006 seems to work well
   batch_size: 8 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
   epochs: 3 
-  eval_every_n: 3 # evaluate every n epochs
+  eval_every_n: 1 # evaluate every n epochs, set to 0 to disable
   num_workers: 8 # number of workers for dataloader
+  device: "cpu" # device to run on if gpu is available, else "cpu" will be set automatically
 
 dataset:
   download: True
@@ -22,9 +23,11 @@ dataset:
   shuffle: True
 
 tokenizer:
+  # use "~"" to train a new tokenizer
   tokenizer_path: "data/tokenizers/char_tokenizer_german.yaml"
 
 checkpoints:
+  # use "~"" to disable loading/saving
   model_load_path: "YOUR/PATH" # path to load model from
   model_save_path: "YOUR/PATH" # path to save model to