forked from urbhagat/NightFour
-
Notifications
You must be signed in to change notification settings - Fork 0
/
HealthData 09-28-2015 Urvish
121 lines (85 loc) · 2.94 KB
/
HealthData 09-28-2015 Urvish
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Urvish Bhagat
# Import section
import os
import numpy as np
import pandas as pd
import matplotlib as mpl
from collections import OrderedDict
from bokeh.charts import Dot, output_file, show
# Read in the health data
govdata = pd.read_csv ('/users/khaji123/desktop/
Inpatient_Prospective_Payment_System__
IPPS__Provider_Summary_for_the_Top_100_
Diagnosis-Related_Groups__DRG__-_
FY2011 (1).csv')
# Assign each column to a new data column without the $
acc = govdata[' Average Covered Charges '].str.lstrip('$')
atp = govdata[' Average Total Payments '].str.lstrip('$')
amp = govdata['Average Medicare Payments'].str.lstrip('$')
# Change the data type from oject to float so that it is numeric
# Assign to another new variable so the new data type "sticks"
accnum = acc.astype(float)
atpnum = atp.astype(float)
ampnum = amp.astype(float)
#update govdata table
govdata.update (accnum)
govdata.update (atpnum)
govdata.update (ampnum)
#Mean, Mode, Std. Dev for last 3 columns
accmean = accnum.mean()
accmode = accnum.mode()
accstd = accnum.std()
atpmean = atpnum.mean()
atpmode = atpnum.mode()
atpstd = atpnum.std()
ampmean = ampnum.mean()
ampmode = ampnum.mode()
ampstd = ampnum.std()
#print
govdata
print "Mean of Average Covered Charges"
print accmean
print "Mode of Average Covered Charges"
print accmode
print "STD of Average Covered Charges"
print accstd
print "Mean of Average Total Payments"
print atpmean
print "Mode of Average Total Payments"
print atpmode
print "STD of Average Total Payments"
print atpstd
print "Mean of Average Medical Payments"
print ampmean
print "Mode of Average Medical Payments"
print ampmode
print "STD of Average Medical Payments"
print ampstd
Once the information was organized graph summary statistics were produced for each individual column. Below are the codes that were inputted through python and the graphs that were outputted with numerical representations.
# prepare some data
# dict, OrderedDict, lists, arrays and DataFrames are valid inputs
xyvalues = OrderedDict()
xyvalues['mean']=[accmean, atpmean, ampmean]
#xyvalues['Mode']=[accmode, atpmode, ampmode]
xyvalues['Std']=[accstd, atpstd, ampstd]
dot = Dot(xyvalues, ['Covered Charges', 'Total Payment', 'Medical Payment'], title='dots')
output_file('dot.html')
show(dot)
# prepare some data
data = {"y": [36133.9542244, 31155, 35065.3659313]}
# output to static HTML file
output_file("bar.html")
# create a new line chat with a title and axis labels
p = Bar(data, cat=['Mean','Mode', 'STD'], title="Average Coverage Charge",
xlabel='categories', ylabel='values', width=400, height=400)
# show the results
show(p)
# prepare some data
data = {"y": [9707.47380419, 4370, 7664.64259815]}
# output to static HTML file
output_file("bar.html")
# create a new line chat with a title and axis labels
p = Bar(data, cat=['Mean','Mode', 'STD'], title="Average Total Payments",
xlabel='categories', ylabel='values', width=400, height=400)
# show the results
show(p)