Skip to content

Commit

Permalink
adapted and suppressed unnecessary api functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
tomsail committed Aug 7, 2023
1 parent 903f3be commit e0e97de
Showing 1 changed file with 11 additions and 76 deletions.
87 changes: 11 additions & 76 deletions searvey/ioc.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,43 +102,6 @@
"view",
],
}
IOC_STATIONS_COLUMN_NAMES_API = {
"general": [
"ioc_code",
"GlossID",
"country",
"Location",
"connect",
"DCP_ID",
"last_observation_level",
"last_observation_time",
"delay",
"interval",
"view",
],
"contacts": [
"ioc_code",
"GlossID",
"lat",
"lon",
"country",
"Location",
"connect",
"contacts",
],
"performance": [
"ioc_code",
"GlossID",
"country",
"Location",
"connect",
"added_to_system",
"observations",
"sample_interval",
"average_delay_per_day",
"transmit_interval",
],
}
IOC_STATION_DATA_COLUMNS_TO_DROP = [
"bat",
"sw1",
Expand Down Expand Up @@ -196,7 +159,9 @@ def get_ioc_stations_api_request() -> pd.DataFrame:
assert response.ok, f"failed to download: {url}"
logger.debug("Downloaded: %s", url)
di = json.loads(response.content)
df = pd.DataFrame(di).rename(columns={"Code": "ioc_code"})
df = pd.DataFrame(di).rename(columns={"Code": "ioc_code", "GlossID": "gloss_id"})
df = df.drop(columns=["lon", "lat"])
df = df.rename(columns={"Lon": "lon", "Lat": "lat"})

This comment has been minimized.

Copy link
@pmav99

pmav99 Aug 7, 2023

Member
  1. I haven't tested this, but you probably can use the response.json() method instead of json.loads(). This would eliminate the need for di. Try something like pd.DataFrame(response.json()) or maybe pd.read_json(response.json()). One of them should work.
  2. No need to call .rename() twice. Just .drop() the columns first and call .rename() afterwards.
return df


Expand All @@ -217,12 +182,6 @@ def normalize_ioc_stations(df: pd.DataFrame) -> gpd.GeoDataFrame:


def normalize_ioc_stations_api(df: pd.DataFrame) -> gpd.GeoDataFrame:
df = df.assign(
# fmt: off
GlossID=df.GlossID.astype(pd.Int64Dtype()),
observations_ratio_per_month=df.observations,
# fmt: on
)
gdf = gpd.GeoDataFrame(
data=df,
geometry=gpd.points_from_xy(df.lon, df.lat, crs="EPSG:4326"),
Expand Down Expand Up @@ -343,37 +302,12 @@ def get_ioc_stations_api(
def normalize_ioc_station_data(ioc_code: str, df: pd.DataFrame, truncate_seconds: bool) -> pd.DataFrame:
# Each station may have more than one sensors.
# Some of the sensors have nothing to do with sea level height. We drop these sensors
df = df.rename(columns=IOC_STATION_DATA_COLUMNS)
logger.debug("%s: df contains the following columns: %s", ioc_code, df.columns)
df = df.drop(columns=IOC_STATION_DATA_COLUMNS_TO_DROP, errors="ignore")
if len(df.columns) == 1:
# all the data columns have been dropped!
msg = f"{ioc_code}: The table does not contain any sensor data!"
logger.info(msg)
raise ValueError(msg)
df = df.assign(
ioc_code=ioc_code,
time=pd.to_datetime(df.time),
df = df.rename(columns=IOC_STATION_DATA_COLUMNS).rename(
columns={"stime": "time", "Code": "ioc_code", "Lon": "lon", "Lat": "lat"}
)
if truncate_seconds:
# Truncate seconds from timestamps: https://stackoverflow.com/a/28783971/592289
# WARNING: This can potentially lead to duplicates!
df = df.assign(time=df.time.dt.floor("min"))
if df.time.duplicated().any():
# There are duplicates. Keep the first datapoint per minute.
msg = f"{ioc_code}: Duplicate timestamps have been detected after the truncation of seconds. Keeping the first datapoint per minute"
warnings.warn(msg)
df = df.iloc[df.time.drop_duplicates().index].reset_index(drop=True)
return df


def normalize_ioc_station_data_api(ioc_code: str, df: pd.DataFrame, truncate_seconds: bool) -> pd.DataFrame:
# Each station may have more than one sensors.
# Some of the sensors have nothing to do with sea level height. We drop these sensors
df = df.rename(columns=IOC_STATION_DATA_COLUMNS)
logger.debug("%s: df contains the following columns: %s", ioc_code, df.columns)
df = df.drop(columns=IOC_STATION_DATA_COLUMNS_TO_DROP, errors="ignore")
if len(df.columns) == 1:
if len(df.columns) <= 1:

This comment has been minimized.

Copy link
@pmav99

pmav99 Aug 7, 2023

Member

When are we going to have 0 columns and when 1? If possible add a comment here that explains what is going on.

This comment has been minimized.

Copy link
@tomsail

tomsail Aug 9, 2023

Author Contributor

This is to add the case df.columns = [] i.e. the dataframe is empty

# all the data columns have been dropped!
msg = f"{ioc_code}: The table does not contain any sensor data!"
logger.info(msg)
Expand Down Expand Up @@ -441,8 +375,8 @@ def get_ioc_station_data_api(
while rate_limit.reached(identifier="IOC"):
wait()

endtime = resolve_timestamp(endtime)
starttime = resolve_start_date(endtime, period)
endtime = resolve_timestamp(endtime, timezone="UTC", timezone_aware=False).tz_localize(tz=None)
starttime = resolve_start_date(endtime, period).tz_localize(tz=None)
url = IOC_BASE_URL_API.format(
ioc_code=ioc_code, starttime=starttime.isoformat(), endtime=endtime.isoformat()
)
Expand All @@ -452,14 +386,15 @@ def get_ioc_station_data_api(
assert response.ok, f"failed to download: {url}"
logger.debug("Downloaded: %s", url)
di = json.loads(response.content)
df = pd.DataFrame(di).rename(columns={"stime": "time", "Code": "ioc_code"})
df = pd.DataFrame(di)
if len(df) > 0:
df = normalize_ioc_station_data(ioc_code=ioc_code, df=df, truncate_seconds=truncate_seconds)
except ValueError as exc:
if str(exc) == "No tables found":
logger.info("%s: No data", ioc_code)
else:
logger.exception("%s: Something went wrong", ioc_code)
raise
df = normalize_ioc_station_data_api(ioc_code=ioc_code, df=df, truncate_seconds=truncate_seconds)
return df


Expand Down

0 comments on commit e0e97de

Please sign in to comment.