-
Notifications
You must be signed in to change notification settings - Fork 0
/
feature_selection.py
76 lines (33 loc) · 1.58 KB
/
feature_selection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -*- coding: utf-8 -*-
# run creating_iGraph_objects.R, general_network_features.R, triad_occurrences_hippocampus_subgraphs.R, clinical_data_processing.R, subject_data_analysis.R
import pandas as pd
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
subjectdata = pd.read_csv("subjectdata.csv")
features = subjectdata.values[:, 1:204]
labels = subjectdata.values[:, 204]
model = LogisticRegression()
rfe162 = RFE(model, 162)
fit162 = rfe162.fit(features, labels)
selections162 = fit162.support_
selectedfeatures162 = subjectdata['Unnamed: 0']
for n in range(0, len(fit162.support_)):
if fit162.support_.tolist()[n] == True:
selectedfeatures162 = pd.concat([selectedfeatures162, subjectdata.iloc[:, n + 1]], axis = 1)
selectedfeatures162.to_csv('selectedfeatures162.csv')
rfe101 = RFE(model, 101)
fit101 = rfe101.fit(features, labels)
selections101 = fit101.support_
selectedfeatures101 = subjectdata['Unnamed: 0']
for n in range(0, len(fit101.support_)):
if fit101.support_.tolist()[n] == True:
selectedfeatures101 = pd.concat([selectedfeatures101, subjectdata.iloc[:, n + 1]], axis = 1)
selectedfeatures101.to_csv('selectedfeatures101.csv')
rfe60 = RFE(model, 60)
fit60 = rfe60.fit(features, labels)
selections60 = fit60.support_
selectedfeatures60 = subjectdata['Unnamed: 0']
for n in range(0, len(fit60.support_)):
if fit60.support_.tolist()[n] == True:
selectedfeatures60 = pd.concat([selectedfeatures60, subjectdata.iloc[:, n + 1]], axis = 1)
selectedfeatures60.to_csv('selectedfeatures60.csv')