Skip to content

Commit

Permalink
Test/train
Browse files Browse the repository at this point in the history
  • Loading branch information
burggraaff committed May 22, 2024
1 parent 79135df commit 76cb2a4
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 31 deletions.
33 changes: 20 additions & 13 deletions fpcup/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas.api.types import is_datetime64_any_dtype as is_datetime
from tqdm import tqdm

from matplotlib import pyplot as plt, dates as mdates, patches as mpatches, ticker as mticker
from matplotlib import pyplot as plt, dates as mdates, patches as mpatches, patheffects as mpe, ticker as mticker
from matplotlib import colormaps, rcParams

rcParams.update({"axes.grid": True,
Expand Down Expand Up @@ -500,51 +500,58 @@ def weighted_mean_loss(loss_per_batch: np.ndarray) -> np.ndarray:
return loss_per_epoch


c_train = "#4477AA"
c_test = "#EE6677"
pe_epoch = [mpe.Stroke(linewidth=4, foreground="black"),
mpe.Normal()]
def plot_loss_curve(losses_train: np.ndarray, *, losses_test: Optional[np.ndarray]=None,
title: Optional[str]=None, saveto: Optional[PathOrStr]=None) -> None:
"""
Plot the loss curve per batch and per epoch.
"""
# Colours and labels
batchcolour = "C2"
epochcolour = "black"

# Constants
n_epochs, n_batches = losses_train.shape
epochs = np.arange(n_epochs + 1)
batches = np.arange(losses_train.size) + 1

# Pull out data
# Training data: get loss per batch and per epoch
loss_initial = [losses_train[0, 0]]
losses_train_epoch = weighted_mean_loss(losses_train)
losses_train_epoch = np.concatenate([loss_initial, losses_train_epoch])

losses_train_batch = losses_train.ravel()

# Testing data: dummy loss at epoch 0
losses_test = np.insert(losses_test, 0, np.nan)

# Variables for limits etc.
try:
maxloss = np.nanmax(losses_train.max(), losses_test.max())
maxloss = np.nanmax([np.nanmax(losses_train), np.nanmax(losses_test)])
except AttributeError: # is no test losses were provided
maxloss = losses_train.max()

# Plot loss per batch
# Figure setup
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(5, 5), layout="constrained")
ax.plot(batches, losses_train_batch, color=batchcolour)

# Plot training loss per batch
ax.plot(batches, losses_train_batch, color=c_train, zorder=0)

ax.set_xlim(0, len(batches))
ax.set_xlabel("Batch", color=batchcolour)
ax.set_xlabel("Batch", color=c_train)
ax.set_ylabel("Loss")
ax.grid(True, axis="y", ls="--")
ax.grid(False, axis="x")

# Plot loss per epoch
# Plot training/testing loss per epoch
ax2 = ax.twiny()
ax2.plot(epochs, losses_train_epoch, color=epochcolour)
ax2.plot(epochs, losses_train_epoch, color=c_train, path_effects=pe_epoch, label="Train", zorder=1)
ax2.plot(epochs, losses_test, color=c_test, path_effects=pe_epoch, label="Test", zorder=1)

ax2.set_xlim(0, n_epochs)
ax2.set_ylim(0, maxloss*1.05)
ax2.set_xlabel("Epoch", color=epochcolour)
ax2.set_xlabel("Epoch")
ax2.grid(True, ls="--")
ax2.legend(loc="best")

# Final settings
fig.suptitle(title)
Expand Down
4 changes: 2 additions & 2 deletions nn/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
# Temporary: keep it simple
CROP = "barley"
VARIETY = "Spring_barley_301"
SOILTYPE = "ec2"
pattern = "*_ec2_B*"
SOILTYPE = "ec3"
pattern = f"*_{SOILTYPE}_B*"
pattern_suffix = pattern + ".wsum"


Expand Down
35 changes: 30 additions & 5 deletions nn/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from torch.utils.data import DataLoader, Dataset

from fpcup.tools import RUNNING_IN_IPYTHON
from fpcup.typing import Callable
from fpcup.typing import Callable, Optional

### DEFINE CONSTANTS

Expand Down Expand Up @@ -63,7 +63,13 @@ def test_batch(model: nn.Module, loss_function: Callable, optimizer: torch.optim
Test a given neural network `model` on data.
One batch.
"""
pass
X, y = X.to(device), y.to(device)

# Compute prediction error
pred = model(X)
loss = loss_function(pred, y)

return loss.item()


def train_epoch(model: nn.Module, dataloader: DataLoader, loss_function: Callable, optimizer: torch.optim.Optimizer) -> list[float]:
Expand All @@ -81,7 +87,23 @@ def train_epoch(model: nn.Module, dataloader: DataLoader, loss_function: Callabl
return loss_per_batch


def train(model: nn.Module, dataloader: DataLoader, loss_function: Callable, optimizer: torch.optim.Optimizer, n_epochs: int=10) -> list[float]:
def test_epoch(model: nn.Module, dataloader: DataLoader, loss_function: Callable, optimizer: torch.optim.Optimizer) -> list[float]:
"""
Train a given neural network `model` on data.
One epoch.
"""
# Setup
model.eval() # Set to training mode

# Loop over batches
loss_per_batch = [test_batch(model, loss_function, optimizer, X, y) for (X, y) in tqdm(dataloader, desc="Testing", unit="data", unit_scale=dataloader.batch_size, disable=RUNNING_IN_IPYTHON, leave=False)]
loss = np.mean(loss_per_batch)

return loss


def train(model: nn.Module, training_data: DataLoader, loss_function: Callable, optimizer: torch.optim.Optimizer, *,
testing_data: Optional[DataLoader]=None, n_epochs: int=10) -> list[float]:
"""
Train a given neural network `model` on data.
n_epochs epochs (default: 10).
Expand All @@ -91,11 +113,14 @@ def train(model: nn.Module, dataloader: DataLoader, loss_function: Callable, opt

for i in trange(n_epochs, desc="Training", unit="epoch"):
# Train
loss_train = train_epoch(model, dataloader, loss_function, optimizer)
loss_train = train_epoch(model, training_data, loss_function, optimizer)
loss_train_epoch.append(loss_train)

# Test
loss_test = np.ones_like(loss_train)
if testing_data is not None:
loss_test = test_epoch(model, testing_data, loss_function, optimizer)
else:
loss_test = np.nan
loss_test_epoch.append(loss_test)

loss_train_epoch = np.array(loss_train_epoch)
Expand Down
31 changes: 20 additions & 11 deletions nn/testnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from tqdm import tqdm

from torch import nn, optim, tensor, Tensor
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import DataLoader, Dataset, random_split

import fpcup
from fpcup.typing import PathOrStr
Expand All @@ -19,12 +19,13 @@

### Parse command line arguments
import argparse
parser = argparse.ArgumentParser(description="Analyse a PCSE ensemble with one varying parameter, as generated by wofost_ensemble_parameters.py.")
parser.add_argument("output_dir", help="folder to load PCSE outputs from", type=fpcup.io.Path)
parser.add_argument("--results_dir", help="folder to save plots into", type=fpcup.io.Path, default=fpcup.DEFAULT_RESULTS/"sensitivity")
parser = argparse.ArgumentParser(description="Train a neural network on PCSE inputs/outputs.")
parser.add_argument("output_dir", help="folder to load data (PCSE outputs) from", type=fpcup.io.Path)
parser.add_argument("-t", "--test_fraction", help="number of data (PCSE outputs) to reserve for testing", type=float, default=0.2)
parser.add_argument("-n", "--number_epochs", help="number of training epochs", type=int, default=10)
parser.add_argument("-b", "--batch_size", help="batch size", type=int, default=64)
parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true")
parser.add_argument("--results_dir", help="folder to save plots into", type=fpcup.io.Path, default=fpcup.DEFAULT_RESULTS/"nn")
args = parser.parse_args()


Expand All @@ -38,21 +39,29 @@
fpcup.multiprocessing.freeze_support()

### SETUP
# Data
data = PCSEEnsembleDatasetSmall(args.output_dir)
dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=True)
# Load data
dataset = PCSEEnsembleDatasetSmall(args.output_dir)
if args.verbose:
print(data)
print(f"Batch size: {dataloader.batch_size}")
print("Loaded data set:")
print(dataset)

# Training / Testing data split
fractions = [1-args.test_fraction, args.test_fraction]
training_dataset, testing_dataset = random_split(dataset, fractions)
training_data = DataLoader(training_dataset, batch_size=args.batch_size, shuffle=True)
testing_data = DataLoader(testing_dataset, batch_size=args.batch_size, shuffle=False)
if args.verbose:
print(f"Split data into training ({fractions[0]:.0%}) and testing ({fractions[1]:.0%}).")
print(f"Batch size: {args.batch_size}")

# Network
model = PCSEEmulator().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)


### TRAINING
losses_train, losses_test = train(model, dataloader, lossfunc, optimizer, n_epochs=args.number_epochs)
losses_train, losses_test = train(model, training_data, lossfunc, optimizer, testing_data=testing_data, n_epochs=args.number_epochs)


### PLOT
fpcup.plotting.plot_loss_curve(losses_train, title=tag, saveto=f"nn_loss_{tag}.pdf")
fpcup.plotting.plot_loss_curve(losses_train, losses_test=losses_test, title=tag, saveto=f"nn_loss_{tag}.pdf")
Empty file added results/nn/placeholder
Empty file.

0 comments on commit 76cb2a4

Please sign in to comment.