Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decoder #38

Merged
merged 14 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# pictures
**/*.png

# Training files
data/*
!data/tokenizers
!data/own
!data/metrics.csv

# Mac
**/.DS_Store
Expand Down Expand Up @@ -67,8 +71,7 @@ cover/
*.mo
*.pot

#Model
YOUR


# Django stuff:
*.log
Expand Down
13 changes: 0 additions & 13 deletions Dockerfile

This file was deleted.

9 changes: 0 additions & 9 deletions Makefile

This file was deleted.

34 changes: 0 additions & 34 deletions config.cluster.yaml

This file was deleted.

55 changes: 33 additions & 22 deletions config.philipp.yaml
Original file line number Diff line number Diff line change
@@ -1,34 +1,45 @@
dataset:
download: True
dataset_root_path: "/Volumes/pherkel 2/SWR2-ASR" # files will be downloaded into this dir
language_name: "mls_german_opus"
limited_supervision: True # set to True if you want to use limited supervision
dataset_percentage: 0.01 # percentage of dataset to use (1.0 = 100%)
shuffle: True

model:
n_cnn_layers: 3
n_rnn_layers: 5
rnn_dim: 512
n_feats: 128 # number of mel features
stride: 2
dropout: 0.2 # recommended to be around 0.4-0.6 for smaller datasets, 0.1 for really large datasets

training:
learning_rate: 0.0005
batch_size: 32 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
epochs: 150
eval_every_n: 5 # evaluate every n epochs
num_workers: 4 # number of workers for dataloader
device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically

dataset:
download: true
dataset_root_path: "data" # files will be downloaded into this dir
language_name: "mls_german_opus"
limited_supervision: false # set to True if you want to use limited supervision
dataset_percentage: 1 # percentage of dataset to use (1.0 = 100%)
shuffle: true
dropout: 0.6 # recommended to be around 0.4 for smaller datasets, 0.1 for really large datasets

tokenizer:
tokenizer_path: "data/tokenizers/char_tokenizer_german.json"

checkpoints:
model_load_path: "data/runs/epoch31" # path to load model from
model_save_path: "data/runs/epoch" # path to save model to
decoder:
type: "greedy" # greedy, or lm (beam search)

lm: # config for lm decoder
language_model_path: "data" # path where model and supplementary files are stored
language: "german"
n_gram: 3 # n-gram size of the language model, 3 or 5
beam_size: 50
beam_threshold: 50
n_best: 1
lm_weight: 2
word_score: 0

training:
learning_rate: 0.0005
batch_size: 8 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
epochs: 100
eval_every_n: 1 # evaluate every n epochs
num_workers: 8 # number of workers for dataloader

checkpoints: # use "~" to disable saving/loading
model_load_path: "data/epoch67" # path to load model from
model_save_path: ~ # path to save model to

inference:
model_load_path: "data/runs/epoch30" # path to load model from
device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically
model_load_path: "data/epoch67" # path to load model from
42 changes: 27 additions & 15 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
dataset:
download: True
dataset_root_path: "YOUR/PATH" # files will be downloaded into this dir
language_name: "mls_german_opus"
limited_supervision: False # set to True if you want to use limited supervision
dataset_percentage: 1.0 # percentage of dataset to use (1.0 = 100%)
shuffle: True

model:
n_cnn_layers: 3
n_rnn_layers: 5
Expand All @@ -6,29 +14,33 @@ model:
stride: 2
dropout: 0.3 # recommended to be around 0.4 for smaller datasets, 0.1 for really large datasets

tokenizer:
tokenizer_path: "data/tokenizers/char_tokenizer_german.json"

decoder:
type: "greedy" # greedy, or lm (beam search)

lm: # config for lm decoder
language_model_path: "data" # path where model and supplementary files are stored
language: "german"
n_gram: 3 # n-gram size of the language model, 3 or 5
beam_size: 50
beam_threshold: 50
n_best: 1
lm_weight: 2,
word_score: 0,

training:
learning_rate: 5e-4
learning_rate: 0.0005
batch_size: 8 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
epochs: 3
eval_every_n: 3 # evaluate every n epochs
num_workers: 8 # number of workers for dataloader

dataset:
download: True
dataset_root_path: "YOUR/PATH" # files will be downloaded into this dir
language_name: "mls_german_opus"
limited_supervision: False # set to True if you want to use limited supervision
dataset_percentage: 1.0 # percentage of dataset to use (1.0 = 100%)
shuffle: True

tokenizer:
tokenizer_path: "data/tokenizers/char_tokenizer_german.yaml"

checkpoints:
checkpoints: # use "~" to disable saving/loading
model_load_path: "YOUR/PATH" # path to load model from
model_save_path: "YOUR/PATH" # path to save model to

inference:
model_load_path: "YOUR/PATH" # path to load model from
beam_width: 10 # beam width for beam search
device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically

Binary file added data/own/Philipp_HerrK.flac
Binary file not shown.
38 changes: 38 additions & 0 deletions data/tokenizers/tokens_german.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
_
<BLANK>
<UNK>
<SPACE>
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
é
à
ä
ö
ß
ü
-
'
Loading