-
Notifications
You must be signed in to change notification settings - Fork 0
/
longborrow_overrides.py
76 lines (66 loc) · 2.82 KB
/
longborrow_overrides.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import argparse
import pandas as pd
def long_borrow_overrides(corrected_events):
# load v1.2 of events dataset
events_df = pd.read_csv("source_data/SCoData_events_v1.2_2022-01.csv")
# load the newer version with the corrections we want
corrected_events_df = pd.read_csv(corrected_events)
# limit just to borrows
corrected_borrows = corrected_events_df[corrected_events_df.event_type == "Borrow"]
# identify the long borrows that were flagged for correction
yearplus_borrows = events_df[events_df.borrow_duration_days > 365]
print("Found %d borrows longer than a year" % yearplus_borrows.shape[0])
# make a list to correct override rows
overrides = []
# iterate through the long borrows and check for corrections
for borrow in yearplus_borrows.itertuples():
# check if the borrow has been corrected
# because we don't have event ids, we need to find based on matching
# (already filtered to borrows only)
# filter by member/item first
member_item_events = corrected_borrows[
(corrected_borrows.member_uris == borrow.member_uris)
& (corrected_borrows.item_uri == borrow.item_uri)
]
# first check for start date match
corrected_borrow = member_item_events[
member_item_events.start_date == borrow.start_date
]
match_on = "start_date"
if not corrected_borrow.shape[0]:
# if no match by start date, try by end date
corrected_borrow = member_item_events[
member_item_events.end_date == borrow.end_date
]
match_on = "end_date"
# get first row (should be one and only one)
correction = corrected_borrow.iloc[0]
# if the borrow duration changed, add to the list of overrides
if borrow.borrow_duration_days != correction.borrow_duration_days:
corrected_borrow["match_date"] = match_on
overrides.append(corrected_borrow)
# combine the overrides into a new dataframe
override_df = pd.concat(overrides)
print("Saving %d long borrow overrides" % override_df.shape[0])
# limit to just the columns needed for identifying & overriding
override_df = override_df[
[
"event_type",
"member_uris",
"item_uri",
"start_date",
"end_date",
"borrow_duration_days",
"match_date",
]
]
pd.DataFrame(override_df).to_csv("long_borrow_overrides.csv", index=False)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process some integers.")
parser.add_argument(
"events",
metavar="CSVFILE",
help="path to more recent events export with updated borrows",
)
args = parser.parse_args()
long_borrow_overrides(args.events)