-
Notifications
You must be signed in to change notification settings - Fork 0
/
incomechange.py
117 lines (101 loc) · 4.48 KB
/
incomechange.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""
This script will track changes in earned income.
Currently it only works for the Supportive Services Employment Team but the base
report should be able to be modified to work for other providers as well.
Future versions will of the
class will be able to be used to track changes in any specific form of monthly
income or non-chash benefits. Additional features will include the ability to
track changes in any group of income types.
"""
__author__ = "David Marienburg"
__maintainer__ = "David Marienburg"
__version__ = "1.0"
import pandas as pd
from tkinter.filedialog import askopenfilename
from tkinter.filedialog import asksaveasfilename
class Report:
def __init__(self, file):
"""
Initiate the data frame
"""
self.entry_df = pd.read_excel(file, sheet_name="EntryData")
self.exit_df = pd.read_excel(file, sheet_name="ExitInterimData")
def return_most_recent_employment(self, df):
"""
Return a data frame that only contains the most recent version of the
Source of Income(141) column where the value of said column is equal to
Earned Income(HUD)
"""
# create a local copy of the dataframe with the values sorted by Client
# Unique ID and Entry Exit Entry Date dropping duplicates by Client
# Unique ID
o_df = df[df["Source of Income(141)"] == "Earned Income (HUD)"].sort_values(
by=["Client Unique Id", "Start Date(841)"],
ascending=False
).drop_duplicates(subset="Client Unique Id", keep="first")
# return the data frame
return o_df
def merge_dfs(self, e_df, ex_df):
"""
Create a new dataframe by merging entry and exit dataframes then return
three dataframes, one showing income gain, one showing income decrease,
and one showing no income change.
"""
# merge the entry dataframe and the exit dataframe on the client unique
# id column filling nan values in the monthly amount fields with 0
merged = e_df[[
"Client Unique Id",
"Client Uid",
"Monthly Amount(142)"
]].merge(
ex_df[["Client Unique Id", "Client Uid", "Monthly Amount(142)"]],
on=["Client Unique Id", "Client Uid"],
how="left",
suffixes=("_entry", "_exit")
).fillna(0)
# create the gain, loss, no_change dataframes that do not contain the
# Client Unique IDs column
gain = merged[
merged["Monthly Amount(142)_entry"] < merged["Monthly Amount(142)_exit"]
][["Client Uid", "Monthly Amount(142)_entry", "Monthly Amount(142)_exit"]]
loss = merged[
merged["Monthly Amount(142)_entry"] > merged["Monthly Amount(142)_exit"]
][["Client Uid", "Monthly Amount(142)_entry", "Monthly Amount(142)_exit"]]
no_change = merged[
merged["Monthly Amount(142)_entry"] == merged["Monthly Amount(142)_exit"]
][["Client Uid", "Monthly Amount(142)_entry", "Monthly Amount(142)_exit"]]
# return the dataframes
return gain, loss, no_change
def process(self):
"""
Process the raw report and save the processed sheets to a new excel
workbook
"""
# create the three processed sheets using the merge_dfs and
# return_most_recent_employment methods
gain, loss, no_change = self.merge_dfs(
self.return_most_recent_employment(self.entry_df),
self.return_most_recent_employment(self.exit_df)
)
# create the writer object
writer = pd.ExcelWriter(
asksaveasfilename(
title="Save the Income Change report",
initialfile="Income Change Report(Processed)",
defaultextension=".xlsx"
),
engine="xlsxwriter"
)
# create the individual sheets
gain.to_excel(writer, sheet_name="PTs with Income Growth", index=False)
loss.to_excel(writer, sheet_name="PTs with Income Loss", index=False)
no_change.to_excel(writer, sheet_name="PTs with No Income Change", index=False)
self.entry_df.to_excel(writer, sheet_name="Raw Entry Data", index=False)
self.exit_df.to_excel(writer, sheet_name="Raw Exit and Interim Data", index=False)
# save the excel workbook and return True
writer.save()
return True
if __name__ == "__main__":
file = askopenfilename(title="Open the Income Change report")
a = Report(file)
a.process()