Merge pull request #38 from Algo-Boys/decoder

Decoder
Algo-Boys · Sep 18, 2023 · f945067 · f945067
2 parents 8b3a0b4 + 21a3b1d
commit f945067
Show file tree

Hide file tree

Showing 18 changed files with 1,535 additions and 155 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,11 @@
+# pictures
+**/*.png
+
 # Training files
 data/*
 !data/tokenizers
 !data/own
+!data/metrics.csv
 
 # Mac
 **/.DS_Store
@@ -67,8 +71,7 @@ cover/
 *.mo
 *.pot
 
-#Model
-YOUR
+
 
 # Django stuff:
 *.log

diff --git a/Dockerfile b/Dockerfile
diff --git a/Makefile b/Makefile
diff --git a/config.cluster.yaml b/config.cluster.yaml
diff --git a/config.philipp.yaml b/config.philipp.yaml
@@ -1,34 +1,45 @@
+dataset:
+  download: True
+  dataset_root_path: "/Volumes/pherkel 2/SWR2-ASR" # files will be downloaded into this dir
+  language_name: "mls_german_opus"
+  limited_supervision: True # set to True if you want to use limited supervision
+  dataset_percentage: 0.01 # percentage of dataset to use (1.0 = 100%)
+  shuffle: True
+
 model: 
   n_cnn_layers: 3
   n_rnn_layers: 5
   rnn_dim: 512
   n_feats: 128 # number of mel features
   stride: 2
-  dropout: 0.2 # recommended to be around 0.4-0.6 for smaller datasets, 0.1 for really large datasets
-
-training:
-  learning_rate: 0.0005
-  batch_size: 32 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
-  epochs: 150 
-  eval_every_n: 5 # evaluate every n epochs
-  num_workers: 4 # number of workers for dataloader
-  device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically
-
-dataset:
-  download: true
-  dataset_root_path: "data" # files will be downloaded into this dir
-  language_name: "mls_german_opus"
-  limited_supervision: false # set to True if you want to use limited supervision
-  dataset_percentage: 1 # percentage of dataset to use (1.0 = 100%)
-  shuffle: true
+  dropout: 0.6 # recommended to be around 0.4 for smaller datasets, 0.1 for really large datasets
 
 tokenizer:
   tokenizer_path: "data/tokenizers/char_tokenizer_german.json"
 
-checkpoints:
-  model_load_path: "data/runs/epoch31" # path to load model from
-  model_save_path: "data/runs/epoch" # path to save model to
+decoder:
+  type: "greedy" # greedy, or lm (beam search)
+
+  lm: # config for lm decoder
+    language_model_path: "data" # path where model and supplementary files are stored
+    language: "german"
+    n_gram: 3 # n-gram size of the language model, 3 or 5
+    beam_size: 50 
+    beam_threshold: 50
+    n_best: 1
+    lm_weight: 2
+    word_score: 0
+
+training:
+  learning_rate: 0.0005
+  batch_size: 8 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
+  epochs: 100
+  eval_every_n: 1 # evaluate every n epochs
+  num_workers: 8 # number of workers for dataloader
+
+checkpoints: # use "~" to disable saving/loading
+  model_load_path: "data/epoch67" # path to load model from
+  model_save_path: ~ # path to save model to
 
 inference:
-  model_load_path: "data/runs/epoch30" # path to load model from
-  device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically
+  model_load_path: "data/epoch67" # path to load model from
diff --git a/config.yaml b/config.yaml
@@ -1,3 +1,11 @@
+dataset:
+  download: True
+  dataset_root_path: "YOUR/PATH" # files will be downloaded into this dir
+  language_name: "mls_german_opus"
+  limited_supervision: False # set to True if you want to use limited supervision
+  dataset_percentage: 1.0 # percentage of dataset to use (1.0 = 100%)
+  shuffle: True
+
 model: 
   n_cnn_layers: 3
   n_rnn_layers: 5
@@ -6,29 +14,33 @@ model:
   stride: 2
   dropout: 0.3 # recommended to be around 0.4 for smaller datasets, 0.1 for really large datasets
 
+tokenizer:
+  tokenizer_path: "data/tokenizers/char_tokenizer_german.json"
+
+decoder:
+  type: "greedy" # greedy, or lm (beam search)
+
+  lm: # config for lm decoder
+    language_model_path: "data" # path where model and supplementary files are stored
+    language: "german"
+    n_gram: 3 # n-gram size of the language model, 3 or 5
+    beam_size: 50 
+    beam_threshold: 50
+    n_best: 1
+    lm_weight: 2,
+    word_score: 0,
+
 training:
-  learning_rate: 5e-4
+  learning_rate: 0.0005
   batch_size: 8 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
   epochs: 3 
   eval_every_n: 3 # evaluate every n epochs
   num_workers: 8 # number of workers for dataloader
 
-dataset:
-  download: True
-  dataset_root_path: "YOUR/PATH" # files will be downloaded into this dir
-  language_name: "mls_german_opus"
-  limited_supervision: False # set to True if you want to use limited supervision
-  dataset_percentage: 1.0 # percentage of dataset to use (1.0 = 100%)
-  shuffle: True
-
-tokenizer:
-  tokenizer_path: "data/tokenizers/char_tokenizer_german.yaml"
-
-checkpoints:
+checkpoints: # use "~" to disable saving/loading
   model_load_path: "YOUR/PATH" # path to load model from
   model_save_path: "YOUR/PATH" # path to save model to
 
 inference:
   model_load_path: "YOUR/PATH" # path to load model from
-  beam_width: 10 # beam width for beam search
-  device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically
+
diff --git a/data/own/Philipp_HerrK.flac b/data/own/Philipp_HerrK.flac
diff --git a/data/tokenizers/tokens_german.txt b/data/tokenizers/tokens_german.txt
@@ -0,0 +1,38 @@
+_
+<BLANK>
+<UNK>
+<SPACE>
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+é
+à
+ä
+ö
+ß
+ü
+-
+'