-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind_earthquakes.py
138 lines (108 loc) · 3.5 KB
/
find_earthquakes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import os
import requests
import logging
import json
import pandas as pd
# import pandas_gbq
from dotenv import load_dotenv
from datetime import datetime
load_dotenv()
# Configure logging
logging.basicConfig(
filename="errors.log",
format="%(asctime)s - %(message)s",
level=logging.ERROR
)
USGS_API_URL = os.getenv("USGS_API_URL")
LOCATIONS_FILE_URL = os.getenv("LOCATIONS_FILE_URL")
# BQ_TABLE_ID = os.getenv("BQ_TABLE_ID")
# GOOGLE_PROJECT_ID = os.getenv("GOOGLE_PROJECT_ID")
def fetch_data(location=None, radius_km=None):
"""
Fetch data from the USGS API
"""
data = df = None
year = datetime.today().year
params = {
"format": "geojson",
"starttime": datetime(year, 1, 1).strftime("%Y-%m-%d"),
}
if location:
params["latitude"] = f"{location['latitude']}"
params["longitude"] = f"{location['longitude']}"
if radius_km:
params["maxradiuskm"] = radius_km
try:
response = requests.get(USGS_API_URL, params=params)
data = response.json()
except requests.exceptions.RequestException as err:
print("Error fetching data")
logging.error(err)
# Check if a response was returned
# and if the "features" key is present in the json response
# (i.e that there is data available for this location)
if (data and data["features"]):
df = pd.json_normalize(data["features"], sep="_")
df["location_name"] = location["name"]
return df
def get_earthquakes_by_location():
"""
Load the locations from a json file and get earthquake data
for a radius of around each of them
"""
# Load locations from json file
try:
f = open(LOCATIONS_FILE_URL)
locations = json.load(f)
f.close()
except FileNotFoundError as err:
logging.error(err)
print(err)
return None
# Fetch earthquake data for each location
radius = "1000"
df = pd.DataFrame()
for location in locations["locations"]:
df = pd.concat(
[df, fetch_data(location, radius)], ignore_index=True)
# Fix data columns and keep only relevant ones
df["geometry_coordinates"] = pd.eval(
df["geometry_coordinates"])
df["longitude"] = df["geometry_coordinates"].apply(
lambda x: x[0])
df["latitude"] = df["geometry_coordinates"].apply(
lambda x: x[1])
df["properties_time"] = pd.to_datetime(
df["properties_time"], unit="ms")
df.rename(columns={
"properties_time": "datetime", "properties_mag": "magnitude",
"properties_place": "place"
}, inplace=True)
df = df[[
"id", "datetime", "magnitude", "latitude",
"longitude", "place", "location_name"
]]
return df
def get_all_earthquakes():
"""
Get all earthquakes that happened in the current calendar year
"""
eq_list = fetch_data()
print(eq_list)
return eq_list
# def insert_to_bigquery(df):
# """
# Insert data into a BigQuery table
# """
# try:
# pandas_gbq.to_gbq(df, BQ_TABLE_ID, project_id=GOOGLE_PROJECT_ID,
# if_exists="replace", api_method="load_csv")
# except Exception as e:
# logging.error(traceback.print_exc())
if __name__ == "__main__":
earthquake_data = get_earthquakes_by_location()
if earthquake_data is not None:
earthquake_data.to_csv("earthquakes.csv")
# insert_to_bigquery(earthquake_data)
# All earhtquakes in the current year
# eq_list_current_year = get_all_earthquakes()