Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: tensor dimension swap instead of view change #240

Merged
merged 5 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions numalogic/models/autoencoder/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@
return optim.RMSprop(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
raise NotImplementedError(f"Unsupported optimizer value provided: {optim_algo}")

def configure_shape(self, batch: Tensor) -> Tensor:
def configure_shape(self, x: Tensor) -> Tensor:
"""Method to configure the batch shape for each type of model architecture."""
return batch
return x

Check warning on line 85 in numalogic/models/autoencoder/base.py

View check run for this annotation

Codecov / codecov/patch

numalogic/models/autoencoder/base.py#L85

Added line #L85 was not covered by tests

def _get_reconstruction_loss(self, batch: Tensor) -> Tensor:
_, recon = self.forward(batch)
Expand Down
30 changes: 21 additions & 9 deletions numalogic/models/autoencoder/variants/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,27 +259,38 @@ def _init_weights(self) -> None:
nn.init.xavier_normal_(module.weight, gain=calculate_gain("relu"))

def forward(self, batch: Tensor) -> tuple[Tensor, Tensor]:
"""
Forward pass for the Conv1dAE model.

Args:
----
batch: Input batch of shape (batch_size, seq_len, in_channels)

Returns
-------
A tuple of (encoded, decoded) tensors
"""
batch = self.configure_shape(batch)
encoded = self.encoder(batch)
decoded = self.decoder(encoded)
return encoded, decoded
return encoded, self.configure_shape(decoded)

def configure_shape(self, batch: Tensor) -> Tensor:
return batch.view(-1, self.in_channels, self.seq_len)
def configure_shape(self, x: Tensor) -> Tensor:
return torch.swapdims(x, 1, 2)

def encode(self, batch: Tensor) -> Tensor:
batch = self.configure_shape(batch)
return self.encoder(batch)

def _get_reconstruction_loss(self, batch: Tensor) -> Tensor:
_, recon = self.forward(batch)
x = batch.view(-1, self.in_channels, self.seq_len)
return self.criterion(x, recon)
# x = torch.swapdims(batch, 1, 2)
return self.criterion(batch, recon)

def predict_step(self, batch: Tensor, batch_idx: int, dataloader_idx: int = 0) -> Tensor:
"""Returns reconstruction for streaming input."""
recon = self.reconstruction(batch)
recon = recon.view(-1, self.seq_len, self.in_channels)
# recon = torch.swapdims(recon, 1, 2)
return self.criterion(batch, recon, reduction="none")


Expand All @@ -300,7 +311,7 @@ class SparseConv1dAE(Conv1dAE):
**kwargs: VanillaAE kwargs
"""

def __init__(self, beta=1e-3, rho=0.05, *args, **kwargs):
def __init__(self, beta: float = 1e-3, rho: float = 0.05, *args, **kwargs):
super().__init__(*args, **kwargs)
self.beta = beta
self.rho = rho
Expand All @@ -326,13 +337,14 @@ def kl_divergence(self, activations: Tensor) -> Tensor:

def _get_reconstruction_loss(self, batch) -> Tensor:
latent, recon = self.forward(batch)
batch = batch.view(-1, self.in_channels, self.seq_len)
# batch = torch.swapdims(batch, 1, 2)
loss = self.criterion(batch, recon)
penalty = self.kl_divergence(latent)
return loss + (self.beta * penalty)

def validation_step(self, batch: Tensor, batch_idx: int) -> Tensor:
recon = self.reconstruction(batch)
loss = self.criterion(batch, recon.view(-1, self.seq_len, self.in_channels))
# loss = self.criterion(batch, torch.swapdims(recon, 1, 2))
loss = self.criterion(batch, recon)
self._total_val_loss += loss.detach().item()
return loss
20 changes: 10 additions & 10 deletions numalogic/models/autoencoder/variants/vanilla.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,25 +182,25 @@ def __init__(

@staticmethod
def init_weights(m: nn.Module) -> None:
r"""Initiate parameters in the transformer model."""
"""Initialize the parameters in the model."""
if type(m) == nn.Linear:
nn.init.xavier_normal_(m.weight)

def forward(self, batch: Tensor) -> tuple[Tensor, Tensor]:
batch = batch.view(-1, self.n_features, self.seq_len)
batch = torch.swapdims(batch, 1, 2)
ab93 marked this conversation as resolved.
Show resolved Hide resolved
encoded = self.encoder(batch)
decoded = self.decoder(encoded)
return encoded, decoded
return encoded, torch.swapdims(decoded, 1, 2)

def _get_reconstruction_loss(self, batch):
def _get_reconstruction_loss(self, batch: Tensor):
_, recon = self.forward(batch)
x = batch.view(-1, self.n_features, self.seq_len)
return self.criterion(x, recon)
# x = torch.swapdims(batch, 1, 2)
return self.criterion(batch, recon)

def predict_step(self, batch: Tensor, batch_idx: int, dataloader_idx: int = 0):
"""Returns reconstruction for streaming input."""
recon = self.reconstruction(batch)
recon = recon.view(-1, self.seq_len, self.n_features)
# recon = torch.swapdims(recon, 1, 2)
return self.criterion(batch, recon, reduction="none")


Expand Down Expand Up @@ -247,14 +247,14 @@ def kl_divergence(self, activations: Tensor) -> Tensor:

def _get_reconstruction_loss(self, batch: Tensor) -> Tensor:
latent, recon = self.forward(batch)
x = batch.view(-1, self.n_features, self.seq_len)
loss = self.criterion(x, recon)
# x = torch.swapdims(batch, 1, 2)
loss = self.criterion(batch, recon)
penalty = self.kl_divergence(latent)
return loss + (self.beta * penalty)

def validation_step(self, batch: Tensor, batch_idx: int) -> Tensor:
recon = self.reconstruction(batch)
recon = recon.view(-1, self.seq_len, self.n_features)
# recon = torch.swapdims(recon, 1, 2)
loss = self.criterion(batch, recon)
self._total_val_loss += loss.detach().item()
return loss
4 changes: 2 additions & 2 deletions numalogic/models/vae/variants/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def forward(self, z: Tensor) -> Tensor:
out = self.unflatten(out)
out = torch.relu(self.bnorm(self.conv_tr(out)))
out = torch.relu(self.fc_out(out))
out = out.view(-1, self.seq_len, self.n_features)
out = torch.swapdims(out, 1, 2)
return self.td_linear(out)


Expand Down Expand Up @@ -213,7 +213,7 @@ def forward(self, x: Tensor) -> tuple[MultivariateNormal, Tensor]:

def configure_shape(self, x: Tensor) -> Tensor:
"""Method to configure the batch shape for each type of model architecture."""
return x.view(-1, self.n_features, self.seq_len)
return torch.swapdims(x, 1, 2)

def kld_loss(self, p: MultivariateNormal) -> Tensor:
"""
Expand Down
27 changes: 27 additions & 0 deletions numalogic/tools/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ def inverse_window_last_only(batched: Tensor) -> Tensor:
class StreamingDataset(IterableDataset):
r"""An iterable Dataset designed for streaming time series input.

Iterates over the input data and returns a sequence of shape
(batch_size, seq_len, num_features)

Args:
----
data: A numpy array containing the input data in the shape of (batch, num_features).
Expand Down Expand Up @@ -179,6 +182,30 @@ def __getitem__(self, idx: Union[int, slice]) -> npt.NDArray[float]:
return self._data[idx : idx + self._seq_len]


class StreamingDataLoader(DataLoader):
"""
A DataLoader for convinience that uses StreamingDataset for handling time series data.
ab93 marked this conversation as resolved.
Show resolved Hide resolved

Args:
----
data: A numpy array containing the input data in the shape of (batch, num_features).
seq_len: Length of the sliding window sequences to be generated from the input data
kwargs: Additional arguments to be passed to the DataLoader

Raises
------
ValueError: If the sequence length is greater than the data size
InvalidDataShapeError: If the input data array does not
have a minimum dimension size of 2
TypeError: If wrong argument is passed in kwargs
"""

def __init__(self, data: npt.NDArray[float], seq_len: int, **kwargs):
if "dataset" in kwargs:
raise TypeError("dataset argument is not supported for StreamingDataLoader!")
super().__init__(StreamingDataset(data, seq_len), **kwargs)


class TimeseriesDataModule(pl.LightningDataModule):
r"""A timeseries data module for use in PyTorch Lightning.

Expand Down
Loading