Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

consider pylint's ideas (UNTESTED) #1

Merged
merged 4 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ conda activate penguins-env
3. Run the project

```
kedro run
kedro run
```

## PyCharm Setup
Expand Down
9 changes: 4 additions & 5 deletions src/penguins/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from kedro.framework.cli.utils import KedroCliError, load_entry_points
from kedro.framework.project import configure_project

from kedro.framework.cli.project import run as kedro_framework_cli_project_run

def _find_run_command(package_name):
try:
Expand All @@ -21,9 +21,7 @@ def _find_run_command(package_name):
# use run command from installed plugin if it exists
return run
# use run command from `kedro.framework.cli.project`
from kedro.framework.cli.project import run

return run
return kedro_framework_cli_project_run
# fail badly if cli.py exists, but has no `cli` in it
if not hasattr(project_cli, "cli"):
raise KedroCliError(f"Cannot load commands from {package_name}.cli")
Expand All @@ -34,9 +32,10 @@ def _find_run_command_in_plugins(plugins):
for group in plugins:
if "run" in group.commands:
return group.commands["run"]

return None

def main(*args, **kwargs):
"""Program entry point"""
package_name = Path(__file__).parent.name
configure_project(package_name)
run = _find_run_command(package_name)
Expand Down
11 changes: 7 additions & 4 deletions src/penguins/pipelines/modeling/nodes.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
from typing import Dict, Tuple
"""Nodes for the modeling pipeline."""
from typing import Tuple

import mlflow
import pandas as pd
from autogluon.tabular import TabularPredictor
from sklearn.model_selection import train_test_split


def split_data(data: pd.DataFrame, parameters: Dict) -> Tuple:
def split_data(data: pd.DataFrame) -> Tuple:
"""Split data into train and test sets."""
train, test = train_test_split(data, test_size=0.2)
return test, test
return train, test


def train_model(train: pd.DataFrame, test: pd.DataFrame) -> TabularPredictor:
"""Train a model on the given data."""
mlflow.set_experiment("penguins")
classificator = TabularPredictor(label="species", log_to_file=False, problem_type="multiclass",
eval_metric="accuracy")
classificator.fit(train, time_limit=120)
y_pred = classificator.evaluate(test)
classificator.evaluate(test)
for key, value in classificator.fit_summary()["model_performance"].items():
mlflow.log_metric(f"{key}_accuracy", value)
return classificator
7 changes: 4 additions & 3 deletions src/penguins/pipelines/modeling/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@

from kedro.pipeline import Pipeline, node, pipeline

from .nodes import split_data, train_model
from penguins.pipelines.modeling.nodes import split_data, train_model


def create_pipeline(**kwargs) -> Pipeline:
def create_pipeline() -> Pipeline:
"""Create the kedro modeling pipeline."""
return pipeline([
node(
func=split_data,
inputs=["model_input_table", "params:model_options"],
inputs=["model_input_table"],
outputs=["train", "test"],
name="split_data_node",
),
Expand Down
7 changes: 5 additions & 2 deletions src/penguins/pipelines/preprocessing/nodes.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""Nodes for the preprocessing pipeline."""
import pandas as pd
from sklearn.preprocessing import LabelEncoder


def preprocess_penguins(
penguins: pd.DataFrame
) -> pd.DataFrame:
"""Preprocess the penguins data by encoding categorical columns."""
# to encode
island_encoder = LabelEncoder()
penguins["island"] = island_encoder.fit_transform(penguins["island"])
Expand All @@ -24,7 +26,8 @@ def preprocess_penguins(


def create_model_input_table(
preprocess_penguins: pd.DataFrame
preprocessed_penguins: pd.DataFrame
) -> pd.DataFrame:
model_input_table = preprocess_penguins.dropna()
"""Create a model input table by dropping rows with missing values."""
model_input_table = preprocessed_penguins.dropna()
return model_input_table
5 changes: 3 additions & 2 deletions src/penguins/pipelines/preprocessing/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

from kedro.pipeline import Pipeline, node, pipeline

from .nodes import preprocess_penguins, create_model_input_table
from penguins.pipelines.preprocessing.nodes import preprocess_penguins, create_model_input_table


def create_pipeline(**kwargs) -> Pipeline:
def create_pipeline() -> Pipeline:
"""Create the kedro preprocessing pipeline."""
return pipeline([
node(
func=preprocess_penguins,
Expand Down
8 changes: 7 additions & 1 deletion src/penguins/pipelines/serving/nodes.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
"""Serving pipeline nodes"""
import io
import pickle

import pandas as pd
from autogluon.tabular import TabularPredictor


def save_data(data: pd.DataFrame, classificator: TabularPredictor, encoders: pickle.OBJ) -> pd.DataFrame:
def save_data(
data: pd.DataFrame,
classificator: TabularPredictor,
encoders: pickle.OBJ
) -> pd.DataFrame:
"""Saves model data to a file"""
df = pd.read_csv(io.StringIO(data), sep=",")

df["island"] = encoders["island"].transform(df["island"])
Expand Down
5 changes: 3 additions & 2 deletions src/penguins/pipelines/serving/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

from kedro.pipeline import Pipeline, node, pipeline

from .nodes import save_data
from penguins.pipelines.serving.nodes import save_data


def create_pipeline(**kwargs) -> Pipeline:
def create_pipeline() -> Pipeline:
"""Create the kedro serving pipeline."""
return pipeline([
node(
func=save_data,
Expand Down
3 changes: 2 additions & 1 deletion src/penguins/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
# Class that manages how configuration is loaded.
from kedro.config import OmegaConfigLoader # noqa: import-outside-toplevel

CONFIG_LOADER_CLASS = OmegaConfigLoader
# CONFIG_LOADER_CLASS = OmegaConfigLoader
ConfigLoaderClass = OmegaConfigLoader
# Keyword arguments to pass to the `CONFIG_LOADER_CLASS` constructor.
# CONFIG_LOADER_ARGS = {
# "config_patterns": {
Expand Down
19 changes: 13 additions & 6 deletions src/tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,25 @@
from pathlib import Path

import pytest
from kedro.config import ConfigLoader
from kedro.config import OmegaConfigLoader
from kedro.framework.context import KedroContext
from kedro.framework.hooks import _create_hook_manager
from kedro.framework.project import settings


@pytest.fixture
def config_loader():
return ConfigLoader(conf_source=str(Path.cwd() / settings.CONF_SOURCE))
"""Load the config file for tests"""
return OmegaConfigLoader(conf_source=str(Path.cwd() / settings.CONF_SOURCE))


@pytest.fixture
def project_context(config_loader):
def project_context(cfg_loader):
"""Introduce project context so tests behave like in a real project."""
return KedroContext(
package_name="penguins",
project_path=Path.cwd(),
config_loader=config_loader,
config_loader=cfg_loader,
hook_manager=_create_hook_manager(),
)

Expand All @@ -36,5 +38,10 @@ def project_context(config_loader):
# and should be replaced with the ones testing the project
# functionality
class TestProjectContext:
def test_project_path(self, project_context):
assert project_context.project_path == Path.cwd()
"""Example test for project context"""
def test_project_path(self, proj_context):
"""Example test for project path"""
assert proj_context.project_path == Path.cwd()
def test_project_name(self, proj_context):
"""Example test for project name"""
assert proj_context.project_name == "penguins"