-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathread_csv.py
134 lines (88 loc) · 3.56 KB
/
read_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python
# coding: utf-8
# In[2]:
from collections import defaultdict
import csv
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# ## read data to numpy(not in use)
# In[385]:
# def readCsvToNumpy(file_name, feat_num):
# util_mat = []
# with open(file_name, newline='', encoding='utf-8') as csvfile:
# next(csvfile, None)
# rd = csv.reader(csvfile, delimiter=' ', quotechar='|')
# for idx, row in enumerate(rd):
# row = (' '.join(row))
# row = row.split(',')
# if len(row) == feat_num:
# util_mat.append(row)
# # convert 2d list to 2d numpy array
# for idx, row in enumerate(util_mat):
# util_mat[idx] = np.asarray(row)
# util_mat = np.asarray(util_mat)
# return util_mat
# def getPlayerMatrix(util_mat, left_idx, right_idx):
# player_mat = util_mat[:, left_idx:right_idx]
# player_mat = player_mat.astype(int)
# return player_mat
# def getTeamMatrix(util_mat, player_mat, team_idx):
# hashmap = defaultdict(list)
# for idx, item in enumerate(util_mat):
# hashmap[util_mat[idx, team_idx]].append(player_mat[idx, :])
# team_mat = []
# # print('Team number', len(hashmap))
# for key, value in hashmap.items():
# team_avr = [sum(x)/len(value) for x in zip(*value)]
# team_mat.append(team_avr)
# # team_mat.append((key, temp))
# # for idx, item in enumerate(team_mat):
# # if item[0] == 'Arsenal':
# # print(idx, item)
# # convert team feature matrix to numpy matrix
# for idx, row in enumerate(team_mat):
# team_mat[idx] = np.asarray(row, dtype=int)
# team_mat = np.asarray(team_mat, dtype=int);
# return team_mat
# if __name__ == "__main__":
# util_mat = readCsvToNumpy('data_clean.csv', 74)
# # print(util_mat.shape, util_mat)
# player_mat = getPlayerMatrix(util_mat, 44, 73)
# # print(player_mat.shape, player_mat)
# team_mat = getTeamMatrix(util_mat, player_mat, 6)
# # print(team_mat[0, :])
# res = np.dot(player_mat, np.transpose(team_mat))
# # # print(hashmap['FC Barcelona'])
# # print(res[0,:])
# ## read data to pandas Data frame
# In[3]:
util_df = pd.read_csv('data_clean.csv', na_filter=False)
# print(util_df)
player_df = util_df.iloc[:, 44:73]
# print(player_df)
team_df = util_df.groupby('Club', sort=False).mean()
# print(team_df)
team_df = team_df.iloc[:, 37:66]
# print(team_df)
res = np.dot(player_df, np.transpose(team_df))
# In[ ]:
util_df.iloc[:,1]
# In[54]:
# util_df.describe()
player_characteristics = ['Crossing','Finishing', 'HeadingAccuracy',
'ShortPassing', 'Volleys', 'Dribbling', 'Curve',
'FKAccuracy', 'LongPassing', 'BallControl',
'Acceleration', 'SprintSpeed', 'Agility', 'Reactions',
'Balance', 'ShotPower', 'Jumping', 'Stamina',
'Strength', 'LongShots', 'Aggression',
'Interceptions', 'Positioning', 'Vision',
'Penalties', 'Composure', 'Marking', 'StandingTackle',
'SlidingTackle']
plt.figure(figsize= (25, 16))
hm=sns.heatmap(util_df.loc[:, player_characteristics + ['Overall']].corr(), annot = True, linewidths=.5, cmap='Reds')
hm.set_title(label='Heatmap of dataset', fontsize=20)
hm;
# corr_matrix = util_df.corr()
# corr_matrix.loc[player_characteristics, 'LB'].sort_values(ascending=False).head()