-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutilDEDI2021.py
124 lines (105 loc) · 4.55 KB
/
utilDEDI2021.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
utilDEDI2021.py
jsonl read/write for the DEDI programs and other utilities
NOTE 19.10.22: Those customized writeedit()/writesrc() -- originally there is provide a more readable
JSONL format for manual editing, which wasn't needed after a while -- were a bad idea,
or at least should have used a more systematic (and compatible) way of dealing with None
Programmer: Philip A. Schrodt <schrodt735@gmail.com>
This code is covered under the MIT license: http://opensource.org/licenses/MIT
REVISION HISTORY:
31-Jul-2019: Initial version
07-Aug-2019: Added timestamp()
=========================================================================================================
"""
import datetime
import json
WEEK_SUFFIX = "Wk5"
MONTH_INFIX = "202101"
MONTH_SUFFIX = "-" + MONTH_INFIX + ".jsonl"
recOrder = ["ccode", "status",
"+date", "comment", "country",
"+id", "icewsid",
"-headline",
"-text",
"+size", "sizeCategory",
"+protesterdemand", "stateresponse",
"+protest", "protesterviolence", "protesteridentity",
"+event", "eventText",
"-location",
"+region", "version", "language", "publication", "year", "enddate", "citation", "codedDate", "coder"]
srcOrder = ["ccode", "status", "+id",
"+date", "comment", "country", "region", "event", "eventText",
"-headline",
"-text",
"+size", "sizeCategory",
"+protesterdemand", "stateresponse",
"+protesterviolence", "protesteridentity",
"-location",
"+region", "version", "language", "publication", "year", "enddate", "citation"]
def read_file(filename):
""" returns next record in a line-delimited JSON file """
jstr = ""
for line in open(filename, "r"):
if line.startswith("}"):
# print(jstr) # debug: uncomment to find badly formed cases, or put this into a try/except
adict = json.loads(jstr + "}")
yield adict
jstr = ""
else:
if "\t" in line:
line = line.replace("\t", "\\t")
jstr += line[:-1].strip()
def writeedit(rec, fout):
""" Write combined record """
fout.write('{\n')
for fl in recOrder[:-1]:
if fl.startswith("-"):
fl = fl[1:]
fout.write('\n"' + fl + '":\n')
if fl == "location":
fout.write(json.dumps(rec[fl]) + ",")
else:
fout.write(json.dumps(rec[fl], indent=2, sort_keys=True ) + ",")
elif fl.startswith("+"):
fout.write("\n")
fl = fl[1:]
fout.write('"' + fl + '": "' + str(rec[fl]) + '", ')
else:
if fl == "eventText":
fout.write(json.dumps(rec[fl]) + ",")
else:
fout.write('"' + fl + '": "' + str(rec[fl]) + '", ')
fl = recOrder[-1]
fout.write('"' + fl + '": "' + str(rec[fl]) + '"\n}\n')
def writesrc(rec, fout):
""" Write original record """
fout.write('{\n')
for fl in srcOrder[:-1]:
if fl.startswith("-"):
fl = fl[1:]
fout.write('\n"' + fl + '":\n')
if fl == "location":
fout.write(json.dumps(rec[fl]) + ",")
else:
fout.write(json.dumps(rec[fl], indent=2, sort_keys=True ) + ",")
elif fl.startswith("+"):
fout.write("\n")
fl = fl[1:]
fout.write('"' + fl + '": "' + str(rec[fl]) + '", ')
else:
if fl == "eventText":
fout.write('"eventText": '+ json.dumps(rec[fl]) + ",")
else:
fout.write('"' + fl + '": "' + str(rec[fl]) + '", ')
fl = srcOrder[-1]
fout.write('"' + fl + '": "' + str(rec[fl]) + '"\n}\n')
def timestamp():
return '-' + datetime.datetime.now().strftime("%Y%m%d")[2:] + "-" + datetime.datetime.now().strftime("%H%M%S") + ".jsonl"
def newdate(isodate, forward = False):
"""move the date back one day
Note: Python 3.7 has a "datetime.fromisoformat()" function to do the conversion without the string conversions. Though now I've written them..."""
if forward:
thedate = datetime.date(int(isodate[:4]), int(isodate[5:7]), int(isodate[-2:])) + datetime.timedelta(days = 1)
else:
thedate = datetime.date(int(isodate[:4]), int(isodate[5:7]), int(isodate[-2:])) - datetime.timedelta(days = 1)
return thedate.isoformat(), thedate