This repository has been archived by the owner on May 25, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathstats.py
80 lines (68 loc) · 2.95 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import pandas as pd
import numpy as np
import datetime
import plotly.express as px
import database
def create_perc(data):
perc = {'0-10':0,'10-20':0,'20-30':0,'30-40':0,'40-50':0,'50-60':0,'60-70':0,'70-80':0,'80-90':0,'90-100':0}
score = data['overall_score']
for i in score:
if ((i >= np.percentile(score,0))&(i < np.percentile(score,10))):
perc['0-10'] += 1
elif ((i >= np.percentile(score,10))&(i < np.percentile(score,20))):
perc['10-20'] += 1
elif ((i >= np.percentile(score,20))&(i < np.percentile(score,30))):
perc['20-30'] += 1
elif ((i >= np.percentile(score,30))&(i < np.percentile(score,40))):
perc['30-40'] += 1
elif ((i >= np.percentile(score,40))&(i < np.percentile(score,50))):
perc['40-50'] += 1
elif ((i >= np.percentile(score,50))&(i < np.percentile(score,60))):
perc['50-60'] += 1
elif ((i >= np.percentile(score,60))&(i < np.percentile(score,70))):
perc['60-70'] += 1
elif ((i >= np.percentile(score,70))&(i < np.percentile(score,80))):
perc['70-80'] += 1
elif ((i >= np.percentile(score,80))&(i < np.percentile(score,90))):
perc['80-90'] += 1
elif ((i >= np.percentile(score,90))&(i < np.percentile(score,100))):
perc['90-100'] += 1
data_perc = pd.DataFrame.from_dict(perc,orient='index',columns=['count'])
return data_perc
def create_doj(data):
doj = {'this_week':0,'next_week':0,'this_month':0,'next_month':0}
dates = data['Date_Of_Joining']
for d in dates:
# d_obj = datetime.datetime.strptime(d,'%Y-%m-%d %H:%M:%S.%f').date()
d_obj = d
today = datetime.datetime.now().date()
diff = (d_obj - today).days
if ((diff>=0) & (diff<7)):
doj['this_week'] += 1
elif ((diff>=7) & (diff<14)):
doj['next_week'] += 1
elif ((today.month == d_obj.month) & (diff>=14)):
doj['this_month'] += 1
elif ((today.month != d_obj.month) & (diff>=14)):
doj['next_month'] += 1
data_doj = pd.DataFrame.from_dict(doj,orient='index',columns=['num_days'])
return data_doj
def create_yoe (df):
df = df.sort_values("Year_of_Experience",ascending=True)
df = df.groupby(["Year_of_Experience"]).mean()
df = df.reset_index()
return df
def create_ski (df):
# [print(x) for x in df['Skill'].values]
df['Skill'] = [x.split(", ") for x in df['Skill'].values]
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
mlb.fit_transform(df['Skill'])
mlb.classes_
ski = pd.DataFrame(mlb.fit_transform(df['Skill']), columns=list(mlb.classes_))
num_skills = []
# [print(ski[col].value_counts()[1]) for col in ski.columns]
for col in list(ski.columns):
# print(ski[col].value_counts().values)
num_skills.append(ski[col].value_counts()[1])
return num_skills, list(ski.columns)