-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSVM_Methods.py
executable file
·64 lines (58 loc) · 2.35 KB
/
SVM_Methods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#-*- coding: utf-8 -*-
import scipy
import numpy
from sklearn import svm
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from MachineLearning.Model_metrics import F_score_multiclass_Kfolds, F_score_Kfolds
def SVM_Train(x, y, test_size, shuffle=True, kernel ='linear', C=1.0, gamma=0.001):
if test_size>0:
train_x, test_x, train_y, test_y = train_test_split(x,y, test_size=test_size, shuffle=shuffle)
else:
train_x = x
train_y = y
test_x = []
test_y = []
testsize = len(test_y)
#Define classifier
clf = svm.SVC(kernel = kernel, C = C, gamma = gamma)
clf.fit(train_x,train_y)
#Test data
y_preds = []
if test_size>0:
for i in range(testsize):
predicted = clf.predict(test_x[i].reshape(1,-1))[0]
y_preds.append(predicted)
return clf, test_y, y_preds
def SVM_Kfolds(x, y, k, kernel='linear', C=1.0, gamma=0.001, multiclass=False, with_counts=True, with_lists=True, with_confusion_matrix=True):
test_size = len(y)//k
y_pred_list = []
true_ys_list = []
for t in range(k):
clf, test_y, y_preds = SVM_Train(x, y, test_size, shuffle=True, kernel=kernel, C=C, gamma=gamma)
y_pred_list.append(y_preds)
true_ys_list.append(test_y)
if multiclass:
results = F_score_multiclass_Kfolds(true_ys_list, y_pred_list, with_counts=with_counts, with_lists=with_lists, with_confusion_matrix=with_confusion_matrix)
else:
results = F_score_Kfolds(true_ys_list, y_pred_list, with_counts=with_counts, with_lists=with_lists)
return results
def SVM_weights_untrained(x, y, feature_names, kernel = 'linear', C = 1.0, gamma = 0.001):
if type(x) == type([]):
x = numpy.array(x)
if type(y) == type([]):
y = numpy.array(y)
clf = svm.SVC(kernel = kernel, C = C, gamma = gamma)
clf.fit(x,y)
weights = clf.coef_.tolist()[0]
influences = list(zip(feature_names, weights))
return influences
def SVM_weights_trained(clf,keyword_list):
weights = clf.coef_.tolist()[0]
vectorizer = CountVectorizer(min_df=1, token_pattern='(?u)\\b\\w+\\b')
IM = vectorizer.fit_transform(keyword_list)
feature_names = vectorizer.get_feature_names()
influences = list(zip(feature_names, weights))
return influences
if __name__ == "__main__":
pass