Skip to content

Commit

Permalink
Merge branch 'decoder' of github.com:Algo-Boys/SWR2-cool-projekt into…
Browse files Browse the repository at this point in the history
… decoder
  • Loading branch information
Pherkel committed Sep 18, 2023
2 parents 9475900 + e062272 commit d568904
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 23 deletions.
12 changes: 6 additions & 6 deletions config.cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@ model:
rnn_dim: 512
n_feats: 128 # number of mel features
stride: 2
dropout: 0.25 # recommended to be around 0.4-0.6 for smaller datasets, 0.1 for really large datasets
dropout: 0.2 # recommended to be around 0.4-0.6 for smaller datasets, 0.1 for really large datasets

training:
learning_rate: 0.0005
batch_size: 64 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
batch_size: 400 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
epochs: 150
eval_every_n: 5 # evaluate every n epochs
num_workers: 8 # number of workers for dataloader
num_workers: 12 # number of workers for dataloader
device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically

dataset:
download: True
download: False
dataset_root_path: "/mnt/lustre/mladm/mfa252/data" # files will be downloaded into this dir
language_name: "mls_german_opus"
limited_supervision: False # set to True if you want to use limited supervision
Expand All @@ -26,9 +26,9 @@ tokenizer:
tokenizer_path: "data/tokenizers/char_tokenizer_german.json"

checkpoints:
model_load_path: "data/runs/epoch31" # path to load model from
model_load_path: "data/runs/epoch50" # path to load model from
model_save_path: "data/runs/epoch" # path to save model to

inference:
model_load_path: ~ # path to load model from
device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically
device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically
Binary file added data/own/swabian.flac
Binary file not shown.
10 changes: 5 additions & 5 deletions hpc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

#SBATCH --job-name=swr-teamprojekt
#SBATCH --partition=a100
#SBATCH --time=00:30:00
#SBATCH --time=24:00:00

### Note: --gres=gpu:x should equal to ntasks-per-node
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --gres=gpu:a100:1
#SBATCH --cpus-per-task=8
#SBATCH --mem=64gb
#SBATCH --chdir=/mnt/lustre/mladm/mfa252/SWR2-cool-projekt-main/
#SBATCH --gres=gpu:a100:4
#SBATCH --cpus-per-task=16
#SBATCH --mem=32gb
#SBATCH --chdir=/mnt/lustre/mladm/mfa252/ref/
#SBATCH --output=/mnt/lustre/mladm/mfa252/%x-%j.out

source venv/bin/activate
Expand Down
2 changes: 1 addition & 1 deletion hpc_train.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/sh

yes no | python -m swr2_asr.train --config_path config.cluster.yaml
python -m swr2_asr.train --config_path config.cluster.yaml
7 changes: 6 additions & 1 deletion swr2_asr/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,12 @@ def main(config_path: str, file_path: str) -> None:
).to(device)

checkpoint = torch.load(inference_config["model_load_path"], map_location=device)
model.load_state_dict(checkpoint["model_state_dict"], strict=True)

state_dict = {
k[len("module.") :] if k.startswith("module.") else k: v
for k, v in checkpoint["model_state_dict"].items()
}
model.load_state_dict(state_dict, strict=True)
model.eval()

waveform, sample_rate = torchaudio.load(file_path) # pylint: disable=no-member
Expand Down
36 changes: 26 additions & 10 deletions swr2_asr/utils/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,35 @@
import torch


def plot(epochs, path):
def plot(path):
"""Plots the losses over the epochs"""
losses = []
train_losses = []
test_losses = []
cers = []
wers = []
for epoch in range(1, epochs + 1):
current_state = torch.load(path + str(epoch))
losses.append(current_state["loss"])
test_losses.append(current_state["test_loss"])
cers.append(current_state["avg_cer"])
wers.append(current_state["avg_wer"])

plt.plot(losses)
plt.plot(test_losses)
epoch = 5
while True:
try:
current_state = torch.load(path + str(epoch), map_location=torch.device("cpu"))
except FileNotFoundError:
break
train_losses.append((epoch, current_state["train_loss"].item()))
test_losses.append((epoch, current_state["test_loss"]))
cers.append((epoch, current_state["avg_cer"]))
wers.append((epoch, current_state["avg_wer"]))
epoch += 5

plt.plot(*zip(*train_losses), label="train_loss")
plt.plot(*zip(*test_losses), label="test_loss")
plt.plot(*zip(*cers), label="cer")
plt.plot(*zip(*wers), label="wer")
plt.xlabel("epoch")
plt.ylabel("score")
plt.title("Model performance for 5n epochs")
plt.legend()
plt.savefig("losses.svg")


if __name__ == "__main__":
plot("data/runs/epoch")

0 comments on commit d568904

Please sign in to comment.