-
Notifications
You must be signed in to change notification settings - Fork 0
/
stats.py
114 lines (92 loc) · 3.06 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import datetime
import functools
import operator
from collections.abc import Iterable
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import math
missing_vehicles = [x.split("\t") for x in open("stats_missing_vehicle.tsv").read().split("\n")]
missing_vehicles = [{
"date": datetime.datetime.fromisoformat(x[0]),
"possible_vehicles": str(x[1])
} for x in missing_vehicles if len(x[0]) > 0]
max_days_ago = 1
hour_detail = True
def filter_missing(mv):
mv_new = []
today = datetime.datetime.today()
for x in mv:
days_ago = (today - x["date"]).days
if days_ago <= max_days_ago:
mv_new.append(x)
return mv_new
repl = {
"LF": "HLF 20",
"LF 10": "HLF 20",
"LF 20": "HLF 20",
"TLF 4000": "HLF 20",
}
all_missing_combined = []
all_missing_combined_dated = {}
for x in filter_missing(missing_vehicles):
v = x["possible_vehicles"]
if "|" in v:
v = v.split("|")[0]
if v in repl:
v = repl[v]
all_missing_combined.append(v)
if hour_detail:
datekey = f'{x["date"].day}.{x["date"].month}.{x["date"].year} {x["date"].hour}'
else:
datekey = f'{x["date"].day}.{x["date"].month}.{x["date"].year}'
if not datekey in all_missing_combined_dated:
all_missing_combined_dated[datekey] = []
all_missing_combined_dated[datekey].append(v)
all_missing_counts = dict(pd.value_counts(all_missing_combined))
all_missing_counts_dated = {}
for dk in all_missing_combined_dated:
all_missing_counts_dated[dk] = dict(pd.value_counts(all_missing_combined_dated[dk]))
# print("Missing Vehicles")
# for v in all_missing_counts:
# print(" ", v, "->", all_missing_counts[v])
fig, ax = plt.subplots()
## data
# detail data
plotdata = pd.DataFrame(all_missing_counts_dated)
# normalized average
x = [x for x in plotdata.index]
y = [all_missing_counts[x] / 24 for x in plotdata.index]
# normalized average error
dy = []
for i in plotdata.index:
vals = []
for dk in list(all_missing_counts_dated.keys()):
if not i in all_missing_counts_dated[dk]:
continue
v = all_missing_counts_dated[dk][i]
vals.append(v)
vals = sorted(vals)
cut_count = max([math.floor(len(vals) / 4), 1])
top_cut = len(vals)-cut_count
bottom_cut = cut_count
v_top = vals[top_cut:]
v_bottom = vals[:bottom_cut]
v_top_avg = round(sum(v_top) / len(v_top), 2)
v_bottom_avg = round(sum(v_bottom) / len(v_bottom), 2)
diff = abs(v_top_avg - v_bottom_avg) / 2.0
dy.append(diff)
# print(i)
# print(" >", len(vals), "=>", f"T: {top_cut}: | B: :{bottom_cut}")
# print(" >", "Cuts:", f"T: {v_top} | B: {v_bottom}")
# print(" >", "Avgs:", f"T: {v_top_avg} | B: {v_bottom_avg}")
# print(" > >", "Diff:", diff)
## plot
total_bar_width = .9
b = ax.bar(x, y, color="black", width=total_bar_width)
p = plotdata.plot(kind="bar", ax=ax, width=total_bar_width)
e = ax.errorbar(x, y, yerr=dy, fmt="none", ecolor="gray", capsize=6, elinewidth=1, markeredgewidth=1)
## show
plt.show()
plt.savefig("stats.png")