Skip to content

Commit

Permalink
did
Browse files Browse the repository at this point in the history
  • Loading branch information
Pherkel committed Sep 18, 2023
1 parent 8b3a0b4 commit 5be4e20
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 163 deletions.
18 changes: 10 additions & 8 deletions config.philipp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,29 @@ model:

training:
learning_rate: 0.0005
batch_size: 32 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
epochs: 150
eval_every_n: 5 # evaluate every n epochs
batch_size: 4 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
epochs: 4
eval_every_n: 1 # evaluate every n epochs
num_workers: 4 # number of workers for dataloader
device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically
device: "cpu" # device to run on if gpu is available, else "cpu" will be set automatically

dataset:
download: true
dataset_root_path: "data" # files will be downloaded into this dir
dataset_root_path: "/Volumes/pherkel 2/SWR2-ASR" # files will be downloaded into this dir
language_name: "mls_german_opus"
limited_supervision: false # set to True if you want to use limited supervision
dataset_percentage: 1 # percentage of dataset to use (1.0 = 100%)
limited_supervision: True # set to True if you want to use limited supervision
dataset_percentage: 0.01 # percentage of dataset to use (1.0 = 100%)
shuffle: true

tokenizer:
tokenizer_path: "data/tokenizers/char_tokenizer_german.json"

checkpoints:
model_load_path: "data/runs/epoch31" # path to load model from
# use "~"" to disable loading/saving
model_load_path: ~ # path to load model from
model_save_path: "data/runs/epoch" # path to save model to

inference:
model_load_path: "data/runs/epoch30" # path to load model from
beam_width: 10 # beam width for beam search
device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically
7 changes: 5 additions & 2 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ model:
dropout: 0.3 # recommended to be around 0.4 for smaller datasets, 0.1 for really large datasets

training:
learning_rate: 5e-4
learning_rate: 0.0005 # between 0.0001 and 0.0006 seems to work well
batch_size: 8 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
epochs: 3
eval_every_n: 3 # evaluate every n epochs
eval_every_n: 1 # evaluate every n epochs, set to 0 to disable
num_workers: 8 # number of workers for dataloader
device: "cpu" # device to run on if gpu is available, else "cpu" will be set automatically

dataset:
download: True
Expand All @@ -22,9 +23,11 @@ dataset:
shuffle: True

tokenizer:
# use "~"" to train a new tokenizer
tokenizer_path: "data/tokenizers/char_tokenizer_german.yaml"

checkpoints:
# use "~"" to disable loading/saving
model_load_path: "YOUR/PATH" # path to load model from
model_save_path: "YOUR/PATH" # path to save model to

Expand Down
Loading

0 comments on commit 5be4e20

Please sign in to comment.