Skip to content

Commit

Permalink
Merge branch 'main' into tmp/fc-model
Browse files Browse the repository at this point in the history
Signed-off-by: Avik Basu <ab93@users.noreply.github.com>
  • Loading branch information
ab93 committed Jun 11, 2024
2 parents 7114f76 + 0e1a928 commit f3252a2
Show file tree
Hide file tree
Showing 23 changed files with 974 additions and 872 deletions.
3 changes: 2 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ var/
*.egg-info/
.installed.cfg
*.egg
site

# Virtual environment
.env
venv/
**/.venv/
2 changes: 1 addition & 1 deletion .github/workflows/changelog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest
name: Generate changelog
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
with:
ref: main
fetch-depth: 0
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Install poetry
run: pipx install poetry==1.6.1

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'poetry'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ jobs:
python-version: ["3.9"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Install poetry
run: pipx install poetry==1.6.1

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'poetry'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/gh-pages.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ jobs:
if: github.repository == 'numaproj/numalogic'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.9
- name: build
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
name: Black format
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: psf/black@stable
with:
options: "--check --verbose"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ jobs:

name: Publish to PyPi
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Install poetry
run: pipx install poetry==1.6.1

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'poetry'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
python-version: ["3.11"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Docker Login
uses: docker/login-action@v2.1.0
Expand Down
16 changes: 5 additions & 11 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
####################################################################################################

ARG PYTHON_VERSION=3.11
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
FROM python:${PYTHON_VERSION}-bookworm AS builder

ARG POETRY_VERSION=1.6
ARG POETRY_VERSION=1.8
ARG INSTALL_EXTRAS

ENV POETRY_NO_INTERACTION=1 \
Expand All @@ -16,28 +16,22 @@ ENV POETRY_NO_INTERACTION=1 \
POETRY_HOME="/opt/poetry" \
PATH="$POETRY_HOME/bin:$PATH"

RUN apt-get update \
&& apt-get install --no-install-recommends -y build-essential \
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
&& pip install --no-cache-dir poetry==$POETRY_VERSION
RUN pip install --no-cache-dir poetry==$POETRY_VERSION

WORKDIR /app
COPY poetry.lock pyproject.toml ./

RUN poetry install --without dev --no-root --extras "${INSTALL_EXTRAS}" \
&& poetry run pip install --no-cache-dir "torch>=2.0,<3.0" --index-url https://download.pytorch.org/whl/cpu \
&& poetry run pip install --no-cache-dir "lightning[pytorch]" \
&& rm -rf $POETRY_CACHE_DIR \
&& pip cache purge \
&& apt-get purge -y --auto-remove build-essential
&& poetry run pip install --no-cache-dir "lightning[pytorch]<3.0"

####################################################################################################
# runtime: used for running the udf vertices
####################################################################################################
FROM python:${PYTHON_VERSION}-slim-bookworm AS runtime

RUN apt-get update \
&& apt-get install --no-install-recommends -y dumb-init \
&& apt-get install dumb-init \
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
&& apt-get purge -y --auto-remove

Expand Down
445 changes: 155 additions & 290 deletions examples/multichannel_ae.ipynb

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions numalogic/config/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class PreprocessFactory(_ObjectFactory):
GaussianNoiseAdder,
DifferenceTransform,
FlattenVector,
FlattenVectorWithPadding,
PercentileScaler,
ExpMovingAverage,
)
Expand All @@ -67,6 +68,7 @@ class PreprocessFactory(_ObjectFactory):
"GaussianNoiseAdder": GaussianNoiseAdder,
"DifferenceTransform": DifferenceTransform,
"FlattenVector": FlattenVector,
"FlattenVectorWithPadding": FlattenVectorWithPadding,
"PercentileScaler": PercentileScaler,
"ExpMovingAverage": ExpMovingAverage,
}
Expand Down Expand Up @@ -123,6 +125,7 @@ class ModelFactory(_ObjectFactory):
from numalogic.models.autoencoder.variants import (
VanillaAE,
SparseVanillaAE,
MultichannelAE,
Conv1dAE,
SparseConv1dAE,
LSTMAE,
Expand All @@ -135,6 +138,7 @@ class ModelFactory(_ObjectFactory):
_CLS_MAP: ClassVar[dict] = {
"VanillaAE": VanillaAE,
"SparseVanillaAE": SparseVanillaAE,
"MultichannelAE": MultichannelAE,
"Conv1dAE": Conv1dAE,
"SparseConv1dAE": SparseConv1dAE,
"LSTMAE": LSTMAE,
Expand Down
4 changes: 2 additions & 2 deletions numalogic/connectors/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class DruidFetcherConf:
dimensions: list[str] = field(default_factory=list)
aggregations: dict = field(default_factory=dict)
group_by: list[str] = field(default_factory=list)
pivot: Pivot = field(default_factory=lambda: Pivot())
pivot: Optional[Pivot] = field(default_factory=lambda: Pivot())
granularity: str = "minute"

def __post_init__(self):
Expand Down Expand Up @@ -92,7 +92,7 @@ class RDSFetcherConf:
# metric column names
metrics: list[str]
group_by: list[str] = field(default_factory=list)
pivot: Pivot = field(default_factory=lambda: Pivot())
pivot: Optional[Pivot] = field(default_factory=lambda: Pivot())
hash_query_type: bool = True
hash_column_name: str = "model_md5_hash"
datetime_column_name: str = "eventdatetime"
Expand Down
15 changes: 4 additions & 11 deletions numalogic/models/autoencoder/variants/vanilla.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def predict_step(self, batch: Tensor, batch_idx: int, dataloader_idx: int = 0):


class MultichannelAE(BaseAE):
r"""Multichannel Autoencoder model based on the vanilla encoder and decoder.
r"""Multichannel Vanilla Autoencoder model based on the vanilla encoder and decoder.
Each channel is an isolated neural network.
Args:
Expand All @@ -234,14 +234,12 @@ def __init__(
decoder_layersizes: Sequence[int] = (8, 16),
dropout_p: float = 0.25,
batchnorm: bool = False,
encoderinfo: bool = False,
**kwargs,
):
super().__init__(**kwargs)
self.seq_len = seq_len
self.dropout_prob = dropout_p
self.n_channels = n_channels
self.encoderinfo = encoderinfo
# The number of features per channel default to 1 in this architecture
self.n_features = 1

Expand Down Expand Up @@ -291,16 +289,11 @@ def forward(self, batch: Tensor) -> tuple[Tensor, Tensor]:
encoded = encoder(batch_channel)
decoded = decoder(encoded)

if self.encoderinfo:
encoded_all.append(encoded)
encoded_all.append(encoded)
decoded_all.append(decoded)

if self.encoderinfo:
# Stack the encoded outputs of all channels when required
encoded_all = torch.stack(encoded_all, dim=-1)
encoded_all = torch.squeeze(encoded_all, 1)
else:
encoded_all = EMPTY_TENSOR
encoded_all = torch.stack(encoded_all, dim=-1)
encoded_all = torch.squeeze(encoded_all, 1)

decoded_all = torch.stack(decoded_all, dim=-1)
decoded_all = torch.squeeze(decoded_all, 1)
Expand Down
2 changes: 2 additions & 0 deletions numalogic/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
GaussianNoiseAdder,
DifferenceTransform,
FlattenVector,
FlattenVectorWithPadding,
)
from numalogic.transforms._movavg import ExpMovingAverage, expmov_avg_aggregator
from numalogic.transforms._postprocess import TanhNorm, tanh_norm, SigmoidNorm
Expand All @@ -38,6 +39,7 @@
"GaussianNoiseAdder",
"DifferenceTransform",
"FlattenVector",
"FlattenVectorWithPadding",
"PercentileScaler",
"SigmoidNorm",
]
53 changes: 53 additions & 0 deletions numalogic/transforms/_stateless.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,56 @@ def transform(self, X: npt.NDArray[float], **__) -> npt.NDArray[float]:

def inverse_transform(self, X: npt.NDArray[float]) -> npt.NDArray[float]:
return X.reshape(-1, self.n_features)


class FlattenVectorWithPadding(StatelessTransformer):
"""A stateless transformer that flattens some of the columns and does padding on the rest.
Args:
____
features: list of all feature names in the order of columns of the payload matrix
flatten_features: list of feature names to be flattened
padding_with: numerical value to be used for padding, default is 0
"""

__slots__ = (
"features",
"flatten_features",
"padding_with",
"padding_features",
"flatten_indexes",
"padding_indexes",
)

@staticmethod
def _feature_indexes(features_all: list[str], features: list[str]) -> list[int]:
return [features_all.index(f) for f in features]

def __init__(self, features: list[str], flatten_features: list[str], padding_with: float = 0.0):
self.features = features
self.flatten_features = flatten_features
self.padding_with = padding_with

self.padding_features = list(set(features) - set(flatten_features))
if not self.padding_features:
raise ValueError("At least one feature should be left for padding.")
self.flatten_indexes = self._feature_indexes(features, self.flatten_features)
self.padding_indexes = self._feature_indexes(features, self.padding_features)

def transform(self, X: npt.NDArray[float], **__) -> npt.NDArray[float]:
X_flatten = X[:, self.flatten_indexes].flatten().reshape(-1, 1)
padding_len = X_flatten.shape[0] - X.shape[0]
X_padding = np.pad(
X[:, self.padding_indexes],
((0, padding_len), (0, 0)),
mode="constant",
constant_values=self.padding_with,
)
return np.concatenate([X_flatten, X_padding], axis=1)

def inverse_transform(self, X: npt.NDArray[float]) -> npt.NDArray[float]:
X_flatten = X[:, 0].reshape(-1, len(self.flatten_features))
original_len = X.shape[0] - X_flatten.shape[0]
X_padding_removed = X[:original_len, 1:]
return np.concatenate([X_flatten, X_padding_removed], axis=1)
2 changes: 1 addition & 1 deletion numalogic/udfs/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def init_server(step: str, server_type: str):
redis_client = get_redis_client_from_conf(pipeline_conf.redis_conf)
udf = UDFFactory.get_udf_instance(step, r_client=redis_client, pl_conf=pipeline_conf)

return ServerFactory.get_server_instance(server_type, handler=udf)
return ServerFactory.get_server_instance(server_type, mapper_instance=udf)


def start_server() -> None:
Expand Down
8 changes: 4 additions & 4 deletions numalogic/udfs/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,12 @@ def get_udf_instance(cls, udf_name: str, **kwargs) -> nl_udf_t:
class ServerFactory:
"""Factory class to fetch the right pynumaflow function server/mapper."""

from pynumaflow.mapper import Mapper, MultiProcMapper, AsyncMapper
from pynumaflow.mapper import MapServer, MapMultiprocServer, MapAsyncServer

_SERVER_MAP: ClassVar[dict] = {
"sync": Mapper,
"async": AsyncMapper,
"multiproc": MultiProcMapper,
"sync": MapServer,
"async": MapAsyncServer,
"multiproc": MapMultiprocServer,
}

@classmethod
Expand Down
Loading

0 comments on commit f3252a2

Please sign in to comment.