Skip to content

Commit

Permalink
clear
Browse files Browse the repository at this point in the history
  • Loading branch information
pirocheto committed Nov 22, 2023
1 parent 33ecf95 commit 3b90ea5
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 24 deletions.
6 changes: 6 additions & 0 deletions dvc.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
params:
- dvclive/params.yaml
metrics:
- dvclive/metrics.json
plots:
- dvclive/plots/metrics:
x: step
artifacts:
model:
path: dvclive/model/model.pkl
type: model
14 changes: 1 addition & 13 deletions dvclive/model/params.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1 @@
cls__estimator__C: 8.809631367836817e-05
cls__estimator__loss: squared_hinge
cls__estimator__tol: 1.5261822420077893e-05
tfidf__char__lowercase: false
tfidf__char__ngram_range: !!python/tuple
- 1
- 2
tfidf__char__use_idf: false
tfidf__word__lowercase: false
tfidf__word__ngram_range: !!python/tuple
- 1
- 2
tfidf__word__use_idf: false
{}
1 change: 1 addition & 0 deletions dvclive/params.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cls: MultinomialNB
39 changes: 31 additions & 8 deletions scripts/compare_classifiers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import pickle
from pathlib import Path

import dvc.api
import numpy as np
import pandas as pd
import yaml
from rich.pretty import pprint
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
Expand All @@ -16,6 +20,13 @@

DATA_PATH = "data/data.csv"

classifiers = [
("svm", LinearSVC(dual="auto")),
("lr", LogisticRegression()),
("knn", KNeighborsClassifier()),
("nb", MultinomialNB()),
]


# Function to load data
def load_data(path):
Expand All @@ -27,14 +38,6 @@ def load_data(path):
return X_train, y_train


classifiers = [
("svm", LinearSVC(dual="auto")),
("lr", LogisticRegression()),
("knn", KNeighborsClassifier()),
("nb", MultinomialNB()),
]


# Function to print the best trial results
def print_best_exps(n=10):
pd.set_option("display.max_columns", None)
Expand All @@ -60,6 +63,8 @@ def main():
for exp_name, classifier in classifiers:
print(f"Experiment '{exp_name}' in progress...")
with Live(exp_name=exp_name) as live:
live.log_param("cls", classifier.__class__.__name__)

tfidf = FeatureUnion(
[
("word", TfidfVectorizer()),
Expand All @@ -86,6 +91,24 @@ def main():
],
)

# Create a directory to save the model
model_dir = Path(live.dir) / "model"
model_dir.mkdir(exist_ok=True)

# Save the model to a pickle file
model_path = model_dir / "model.pkl"
model_path.write_bytes(pickle.dumps(model))

# Log the model as an artifact using dvclive
live.log_artifact(model_path, type="model", cache=False)

# Save parameters to a YAML file
params_path = model_dir / "params.yaml"
with open(params_path, "w") as fp:
yaml.dump({}, fp)

live.log_artifact(params_path, cache=False)

for name, values in scores.items():
if name.startswith("test_"):
name = name.replace("test_", "")
Expand Down
6 changes: 3 additions & 3 deletions scripts/optimize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,16 +108,16 @@ def __call__(self, trial) -> Any:
model_path = model_dir / "model.pkl"
model_path.write_bytes(pickle.dumps(model))

# Log the model as an artifact using dvclive
live.log_artifact(model_path, type="model", cache=False)

# Save parameters to a YAML file
params_path = model_dir / "params.yaml"
with open(params_path, "w") as fp:
yaml.dump(params, fp)

live.log_artifact(params_path, cache=False)

# Log the model as an artifact using dvclive
live.log_artifact(model_path, type="model", cache=False)

scores = cross_validate(
model,
self.X_train,
Expand Down

0 comments on commit 3b90ea5

Please sign in to comment.