-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassification.py
63 lines (49 loc) · 2.47 KB
/
classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import matplotlib.pyplot as plt
from imblearn.metrics import classification_report_imbalanced
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.ensemble import GradientBoostingClassifier, HistGradientBoostingClassifier, RandomForestClassifier, BaggingClassifier
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.linear_model import LogisticRegression,SGDClassifier, Perceptron, PassiveAggressiveClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import svm, tree, linear_model
from sklearn.svm import SVC, NuSVC, LinearSVC
from sklearn.kernel_ridge import KernelRidge
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix, \
ConfusionMatrixDisplay
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier, NearestCentroid, NeighborhoodComponentsAnalysis
from xgboost import XGBClassifier
from util import read_results
from sklearn.naive_bayes import BernoulliNB, CategoricalNB, GaussianNB
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN
from imblearn.under_sampling import RandomUnderSampler
#pip install imbalanced-learn
import numpy as np
np.random.seed(42)
training_corpus = read_results('resultados_training')
testing_corpus = read_results('resultados_testing')
X_test = testing_corpus[['boolean', 'tf', 'embeddings', 'st', 'wmd']]
y_test = testing_corpus['label']
X = training_corpus[['boolean', 'tf', 'embeddings', 'st', 'wmd']]
y = training_corpus['label']
ros = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)
clf = RandomForestClassifier()
# clf = XGBClassifier(n_estimators=2, max_depth=100)
clf.fit(X_resampled, y_resampled)
print(clf.feature_importances_)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
print(classification_report_imbalanced(y_test, y_pred))
print(f'Acurácia balanceada: {balanced_accuracy_score(y_test, y_pred)}')
cm = confusion_matrix(y_test, y_pred, labels=clf.classes_)
print(cm)
# disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)
# disp.plot()
# plt.show()