Skip to content

Commit

Permalink
Merge branch 'vaqxai-pylint' into suml-docker
Browse files Browse the repository at this point in the history
  • Loading branch information
s19568 committed Jan 23, 2024
2 parents f367e51 + a67076d commit 14cb94a
Show file tree
Hide file tree
Showing 10 changed files with 49 additions and 27 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ conda activate penguins-env
3. Run the project

```
kedro run
kedro run
```

## PyCharm Setup
Expand Down
9 changes: 4 additions & 5 deletions src/penguins/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from kedro.framework.cli.utils import KedroCliError, load_entry_points
from kedro.framework.project import configure_project

from kedro.framework.cli.project import run as kedro_framework_cli_project_run

def _find_run_command(package_name):
try:
Expand All @@ -21,9 +21,7 @@ def _find_run_command(package_name):
# use run command from installed plugin if it exists
return run
# use run command from `kedro.framework.cli.project`
from kedro.framework.cli.project import run

return run
return kedro_framework_cli_project_run
# fail badly if cli.py exists, but has no `cli` in it
if not hasattr(project_cli, "cli"):
raise KedroCliError(f"Cannot load commands from {package_name}.cli")
Expand All @@ -34,9 +32,10 @@ def _find_run_command_in_plugins(plugins):
for group in plugins:
if "run" in group.commands:
return group.commands["run"]

return None

def main(*args, **kwargs):
"""Program entry point"""
package_name = Path(__file__).parent.name
configure_project(package_name)
run = _find_run_command(package_name)
Expand Down
11 changes: 7 additions & 4 deletions src/penguins/pipelines/modeling/nodes.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
from typing import Dict, Tuple
"""Nodes for the modeling pipeline."""
from typing import Tuple

import mlflow
import pandas as pd
from autogluon.tabular import TabularPredictor
from sklearn.model_selection import train_test_split


def split_data(data: pd.DataFrame, parameters: Dict) -> Tuple:
def split_data(data: pd.DataFrame) -> Tuple:
"""Split data into train and test sets."""
train, test = train_test_split(data, test_size=0.2)
return test, test
return train, test


def train_model(train: pd.DataFrame, test: pd.DataFrame) -> TabularPredictor:
"""Train a model on the given data."""
mlflow.set_experiment("penguins")
classificator = TabularPredictor(label="species", log_to_file=False, problem_type="multiclass",
eval_metric="accuracy")
classificator.fit(train, time_limit=120)
y_pred = classificator.evaluate(test)
classificator.evaluate(test)
for key, value in classificator.fit_summary()["model_performance"].items():
mlflow.log_metric(f"{key}_accuracy", value)
return classificator
7 changes: 4 additions & 3 deletions src/penguins/pipelines/modeling/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@

from kedro.pipeline import Pipeline, node, pipeline

from .nodes import split_data, train_model
from penguins.pipelines.modeling.nodes import split_data, train_model


def create_pipeline(**kwargs) -> Pipeline:
def create_pipeline() -> Pipeline:
"""Create the kedro modeling pipeline."""
return pipeline([
node(
func=split_data,
inputs=["model_input_table", "params:model_options"],
inputs=["model_input_table"],
outputs=["train", "test"],
name="split_data_node",
),
Expand Down
7 changes: 5 additions & 2 deletions src/penguins/pipelines/preprocessing/nodes.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""Nodes for the preprocessing pipeline."""
import pandas as pd
from sklearn.preprocessing import LabelEncoder


def preprocess_penguins(
penguins: pd.DataFrame
) -> pd.DataFrame:
"""Preprocess the penguins data by encoding categorical columns."""
# to encode
island_encoder = LabelEncoder()
penguins["island"] = island_encoder.fit_transform(penguins["island"])
Expand All @@ -24,7 +26,8 @@ def preprocess_penguins(


def create_model_input_table(
preprocess_penguins: pd.DataFrame
preprocessed_penguins: pd.DataFrame
) -> pd.DataFrame:
model_input_table = preprocess_penguins.dropna()
"""Create a model input table by dropping rows with missing values."""
model_input_table = preprocessed_penguins.dropna()
return model_input_table
5 changes: 3 additions & 2 deletions src/penguins/pipelines/preprocessing/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

from kedro.pipeline import Pipeline, node, pipeline

from .nodes import preprocess_penguins, create_model_input_table
from penguins.pipelines.preprocessing.nodes import preprocess_penguins, create_model_input_table


def create_pipeline(**kwargs) -> Pipeline:
def create_pipeline() -> Pipeline:
"""Create the kedro preprocessing pipeline."""
return pipeline([
node(
func=preprocess_penguins,
Expand Down
8 changes: 7 additions & 1 deletion src/penguins/pipelines/serving/nodes.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
"""Serving pipeline nodes"""
import io
import pickle

import pandas as pd
from autogluon.tabular import TabularPredictor


def save_data(data: pd.DataFrame, classificator: TabularPredictor, encoders: pickle.OBJ) -> pd.DataFrame:
def save_data(
data: pd.DataFrame,
classificator: TabularPredictor,
encoders: pickle.OBJ
) -> pd.DataFrame:
"""Saves model data to a file"""
df = pd.read_csv(io.StringIO(data), sep=",")

df["island"] = encoders["island"].transform(df["island"])
Expand Down
5 changes: 3 additions & 2 deletions src/penguins/pipelines/serving/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

from kedro.pipeline import Pipeline, node, pipeline

from .nodes import save_data
from penguins.pipelines.serving.nodes import save_data


def create_pipeline(**kwargs) -> Pipeline:
def create_pipeline() -> Pipeline:
"""Create the kedro serving pipeline."""
return pipeline([
node(
func=save_data,
Expand Down
3 changes: 2 additions & 1 deletion src/penguins/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
# Class that manages how configuration is loaded.
from kedro.config import OmegaConfigLoader # noqa: import-outside-toplevel

CONFIG_LOADER_CLASS = OmegaConfigLoader
# CONFIG_LOADER_CLASS = OmegaConfigLoader
ConfigLoaderClass = OmegaConfigLoader
# Keyword arguments to pass to the `CONFIG_LOADER_CLASS` constructor.
# CONFIG_LOADER_ARGS = {
# "config_patterns": {
Expand Down
19 changes: 13 additions & 6 deletions src/tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,25 @@
from pathlib import Path

import pytest
from kedro.config import ConfigLoader
from kedro.config import OmegaConfigLoader
from kedro.framework.context import KedroContext
from kedro.framework.hooks import _create_hook_manager
from kedro.framework.project import settings


@pytest.fixture
def config_loader():
return ConfigLoader(conf_source=str(Path.cwd() / settings.CONF_SOURCE))
"""Load the config file for tests"""
return OmegaConfigLoader(conf_source=str(Path.cwd() / settings.CONF_SOURCE))


@pytest.fixture
def project_context(config_loader):
def project_context(cfg_loader):
"""Introduce project context so tests behave like in a real project."""
return KedroContext(
package_name="penguins",
project_path=Path.cwd(),
config_loader=config_loader,
config_loader=cfg_loader,
hook_manager=_create_hook_manager(),
)

Expand All @@ -36,5 +38,10 @@ def project_context(config_loader):
# and should be replaced with the ones testing the project
# functionality
class TestProjectContext:
def test_project_path(self, project_context):
assert project_context.project_path == Path.cwd()
"""Example test for project context"""
def test_project_path(self, proj_context):
"""Example test for project path"""
assert proj_context.project_path == Path.cwd()
def test_project_name(self, proj_context):
"""Example test for project name"""
assert proj_context.project_name == "penguins"

0 comments on commit 14cb94a

Please sign in to comment.