-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_scraper.py
186 lines (161 loc) · 6.49 KB
/
data_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import os
import json
import numpy as np
import pickle
from matplotlib.image import imread
from tqdm import tqdm
def scrape_motion_data(directory, filename):
"""
Scrapes motion data for all vehicles in a UDP packet
Used as a helper function for scrape_udp_data (not meant to be called independently)
Parameters:
- directory: the directory containing the udp packets
- filename: the filename of the packet
Returns: timestamp (system time) associated with packet, array of motion data
Data Format: [x-pos, y-pos, z-pos, x-vel, y-vel, z-vel, x-forwarddir, y-forwarddir, z-forwarddir, x-rightdir, y-rightdir, z-rightdir]
"""
motion_file = open(directory + filename)
data = json.load(motion_file)
motion_file.close()
motion_data = data["udpPacket"]["mCarMotionData"]
saved_motion_data = np.zeros((len(motion_data), 12))
for i in range(len(motion_data)):
saved_motion_data[i] = [motion_data[i]["mWorldPositionX"], motion_data[i]["mWorldPositionY"], motion_data[i]["mWorldPositionZ"],
motion_data[i]["mWorldVelocityX"], motion_data[i]["mWorldVelocityY"], motion_data[i]["mWorldVelocityZ"],
motion_data[i]["mWorldForwardDirX"], motion_data[i]["mWorldForwardDirY"], motion_data[i]["mWorldForwardDirZ"],
motion_data[i]["mWorldRightDirX"], motion_data[i]["mWorldRightDirY"], motion_data[i]["mWorldRightDirZ"]]
timestamp = data["timestamp"]
return timestamp, saved_motion_data
def scrape_udp_data(motion_directory):
"""
Collects all udp data within the specified directories
Parameters:
- motion_directory: directory containing the motion udp data
- telemetry_directory: directory containing the telemetry udp data
- filename: name of pickle file to store scraped data (must be of format *.pkl)
defaults to "udp_data.pkl"
Saves: saves dictionary file as a pickle serializable object
Data Format: [x-pos, y-pos, z-pos, x-vel, y-vel, z-vel, x-forwarddir, y-forwarddir, z-forwarddir, x-rightdir, y-rightdir, z-rightdir]
"""
try:
file = open("udp_data.pkl", "rb")
print("Pickle file already exists")
file.close()
except FileNotFoundError:
udp_data = {}
if motion_directory[-1] != "/":
motion_directory += "/"
for f in os.listdir(motion_directory):
timestamp, motion_data = scrape_motion_data(motion_directory, f)
udp_data[timestamp] = motion_data
file = open("udp_data.pkl", "wb")
pickle.dump(udp_data, file)
file.close()
def fetch_timestamp_data(timestamp):
"""
Opens and reads data from pickle file, returns data at specified timestamp
If timestamp is not found in data, returns data with closest timestamp to desired time
Parameters:
- filename: name of pickle object file
- timestamp: the desired time at which the data was collected
Returns: array containing data at specified timestamp range
if timestamp not in data, closest file to desired to time is returned
"""
file = open("udp_data.pkl", "rb")
udp_data = pickle.load(file)
file.close()
try:
return udp_data[timestamp]
except KeyError:
data_copy = np.asarray(list(udp_data.keys()))
idx = (np.abs(data_copy - timestamp)).argmin()
print("Timestamp not found. Closest value to desired time was used: ", data_copy[idx])
return udp_data[data_copy[idx]]
def fetch_data_range(start_time, end_time):
"""
Fetch udp data within specified time range
Parameters:
- start_time: start time of desired range
- end_time: end time of desired range
- filename: name of udp pickle file
Returns: dictionary of udp_data with timestamps in desired range
if there is no data in specified range, and error message is printed
"""
file = open("udp_data.pkl", "rb")
udp_data = pickle.load(file)
file.close()
times = list(udp_data.keys())
relevant_times = []
for i in range(len(times)):
if times[i] < end_time and times[i] > start_time:
relevant_times.append(times[i])
data_copy = {}
for time in relevant_times:
data_copy[time] = udp_data[time]
if data_copy:
return data_copy
else:
print("No timestamps exist within specified range")
return None
def read_scraped_data():
"""
Reads udp data stored from scrape_udp_data
Parameters:
- filename: name of udp pickle file
Returns: 3d numpy array of data sorted by timestamp
"""
file = open("udp_data.pkl", "rb")
udp_data = pickle.load(file)
file.close()
udp_array = np.empty((0, 20, 12)) # 20 cars per packet, 12 variables per car
for timestamp in sorted(udp_data.keys()):
udp_array = np.append(udp_array, [udp_data[timestamp]], axis=0)
return udp_array
def read_processed_data():
"""
Reads udp data stored from read_scraped_data, if available
Parameters:
- filename: name of udp pickle file
Returns: 3d numpy array of data sorted by timestamp
"""
try:
file = open("processed_data.pkl", "rb")
udp_data = pickle.load(file)
file.close()
except:
udp_data = read_scraped_data()
file_write = open("processed_data.pkl", "wb")
pickle.dump(udp_data, file_write)
file_write.close()
return udp_data
def read_rnn_data(filename):
"""
Reads udp data stored from preprocessing
Parameters:
- filename: name of udp pickle file
Returns: 3d numpy array of data formatted for input to a RNN
"""
file = open(filename, 'rb')
udp_array = pickle.load(file)
file.close()
return udp_array
def fetch_image_from_timestamp(timestamp, image_dir):
if image_dir[-1] != "/":
image_dir += "/"
previous = np.inf
for id in range(1, len(os.listdir(image_dir)) // 2 + 1):
image_json = open(image_dir + "image_{0}.json".format(id))
time = json.load(image_json)["timestamp"]
image_json.close()
if np.abs(timestamp - time) < previous:
previous = np.abs(timestamp - time)
else:
break
image_data = imread(image_dir + "image_{0}.jpg".format(id-1))
return image_data
def fetch_image_from_packet_num(packet_num, image_dir):
file = open("udp_data.pkl", "rb")
udp_data = pickle.load(file)
file.close()
timestamps = sorted(udp_data.keys())
return fetch_image_from_timestamp(timestamps[packet_num-1], image_dir)