This repository has been archived by the owner on Apr 23, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
7. scatter plot.py
74 lines (61 loc) · 1.54 KB
/
7. scatter plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# importing libraries
import csv
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
features = None
genders = []
types = []
combines = {}
with open('data/data_clean_v2.csv', newline='') as csvfile:
index = 0
rows = csv.reader(csvfile)
for row in rows:
if index == 0:
features = row
if index != 0:
types.append(int(row[13]))
genders.append(int(row[16]))
# if (int(row[13])) not in types:
# types.append(int(row[13]))
#
# if (int(row[16])) not in genders:
# genders.append(int(row[16]))
if (int(row[13]), int(row[16])) not in combines:
combines[(int(row[13]), int(row[16]))] = 1
else:
combines[(int(row[13]), int(row[16]))] += 1
index += 1
genders = np.array(genders)
types = np.array(types)
print(genders)
print(types)
rng = np.random.RandomState(0)
print(combines)
print(len(combines))
size = []
x = []
y = []
color = []
# for type in types:
# for gender in genders:
# x.append(type)
# y.append(gender)
# size.append(combines[(type, gender)] / 100)
# color.append(np.random.rand(3,))
#
# labels = [x,y]
#
# # size = np.log2(size)
#
# print(size)
#
# plt.scatter(x, y, s=size, c=color, alpha=0.2)
# plt.xlabel("Types")
# plt.ylabel("Gender")
#
# plt.savefig('bubble_chart_national.png')
# plt.show()
plt.plot(types, genders, 'o', color='black')
plt.savefig('scatter_chart_national.png')
plt.show()