-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimple_cv_pipeline.py
52 lines (37 loc) · 1.71 KB
/
simple_cv_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pandas as pd
import numpy as np
import optuna
from optuna.samplers import TPESampler
from sklearn.svm import SVC
from sklearn.metrics import make_scorer, matthews_corrcoef
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.multiclass import OneVsRestClassifier
from sklearn.decomposition import TruncatedSVD
def objective(trial, X, y):
# Define the search space for hyperparameters
c_svm = trial.suggest_float('c_svm', 0.001, 1, log=True)
gamma_svm = trial.suggest_float('gamma_svm', 0.001, 1, log=True)
# Create the Random Forest Classifier with the suggested hyperparameters
seed = 42
clf = OneVsRestClassifier(SVC(kernel='rbf', gamma=gamma_svm, C=c_svm, random_state=seed, max_iter=1000),
n_jobs=-1)
mcc_scorer = make_scorer(matthews_corrcoef)
if trial.should_prune():
raise optuna.TrialPruned()
svd = TruncatedSVD(n_components=2).fit(X)
X = svd.transform(X)
# Perform 5-fold cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(clf, X, y, cv=cv, scoring=mcc_scorer)
# Return the mean accuracy score as the objective value to be maximized
return scores.mean()
def optimize_hyperparameters(X, y):
final_objective = lambda trial: objective(trial, X, y)
sampler = TPESampler(seed=42)
study = optuna.create_study(direction='maximize', sampler=sampler)
study.optimize(final_objective, n_trials=50, show_progress_bar=True, n_jobs=-1)
best_params = study.best_params
best_value = study.best_value
print("Best Hyperparameters:", best_params)
print("Best Value:", best_value)
return best_params, best_value