-
Notifications
You must be signed in to change notification settings - Fork 1
/
list_missing_dates.py
38 lines (30 loc) · 1005 Bytes
/
list_missing_dates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from datetime import date, timedelta, datetime
import json
import pytz
from itertools import tee
io_timezone = pytz.timezone("America/Toronto")
def pairwise(iterable): # Modified from https://stackoverflow.com/a/2315049
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a = iter(iterable)
b = iter(iterable)
next(b)
return zip(a, b)
def missing_dates(dates): # https://stackoverflow.com/a/2315049
for prev, curr in pairwise(sorted(dates)):
i = prev
while i + timedelta(1) < curr:
i += timedelta(1)
yield i
file_path = "yes.txt"
with open(file_path) as file:
tweets = json.loads(file.read())
dates = []
for tweet in tweets:
tweetdatetime = datetime.strptime(tweet["date"], '%Y-%m-%dT%H:%M:%S%z')
tweetEST = tweetdatetime.astimezone(tz=io_timezone)
dates.append(tweetEST.date())
m = 0
for missing in missing_dates(dates):
print(missing)
m += 1
print("Missing {} tweets!".format(m))