-
Notifications
You must be signed in to change notification settings - Fork 0
/
enron_outliers.py
37 lines (28 loc) · 956 Bytes
/
enron_outliers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/python
import pickle
import sys
import matplotlib.pyplot
sys.path.append("../tools/")
from feature_format import featureFormat, targetFeatureSplit
### read in data dictionary, convert to numpy array
data_dict = pickle.load( open("../final_project/final_project_dataset.pkl", "rb") )
data_dict['TOTAL']
data_dict.pop('TOTAL',0)
features = ["salary", "bonus"]
data = featureFormat(data_dict, features)
#find the biggest outlier
temp=sorted(data,key=lambda dat:dat[0], reverse=True)
max_sal= temp[0][0]
#print max_sal
for k in data_dict:
if (data_dict[k]['bonus'] < 5000000 and data_dict[k]['bonus'] != 'NaN') and \
(data_dict[k]['salary'] < 1000000 and data_dict[k]['salary'] != 'NaN'):
print(k)
### your code below
for point in data:
salary = point[0]
bonus = point[1]
matplotlib.pyplot.scatter( salary, bonus )
matplotlib.pyplot.xlabel("salary")
matplotlib.pyplot.ylabel("bonus")
matplotlib.pyplot.show()