Skip to content

Commit

Permalink
Merge pull request #185 from ONEcampaign/184-integrate-new-imf-reader
Browse files Browse the repository at this point in the history
184 integrate new imf reader
  • Loading branch information
jm-rivera authored Jul 23, 2024
2 parents 2767cef + eae80d2 commit aa8899e
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 71 deletions.
117 changes: 67 additions & 50 deletions bblocks/import_tools/imf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,38 @@
from typing import Optional

import pandas as pd
import numpy as np
from imf_reader import weo
import os

from imf_reader.weo.reader import gen_latest_version

from bblocks import config
from bblocks.cleaning_tools.clean import clean_numeric_series, convert_to_datetime
from bblocks.cleaning_tools.clean import convert_to_datetime, convert_id
from bblocks.import_tools.common import ImportData
from bblocks.import_tools.imf_weo import WEO
from bblocks.logger import logger


def _check_parameters(latest_y: int | None, latest_r: int | None) -> str | tuple:
if latest_y is None and latest_r is None:
release = "latest"

else:
release = (latest_y, latest_r)

return release


def _update_weo(latest_y: int = None, latest_r: int = None) -> None:
"""Update _data from the World Economic Outlook, using WEO package"""

release = _check_parameters(latest_y, latest_r)

# Download the file from the IMF website and store in directory
WEO(release).update_data()


@dataclass
class WorldEconomicOutlook(ImportData):
"""World Economic Outlook _data"""

year: Optional[int] = None
release: Optional[int] = None

# if year and release are not both None or both not None raise error
def __post_init__(self) -> None:
if (self.year is None and self.release is not None) or (
self.year is not None and self.release is None
):
raise ValueError(
"Both year and release must be specified or must both be `None`"
)

def __repr__(self) -> str:
return f"IMF WEO(year={self.year}, release={self.release})"

def __load_data(
self, latest_y: int | None = None, latest_r: int | None = None
) -> None:
def __load_data(self) -> None:
"""loading WEO as a clean dataframe
Args:
Expand All @@ -54,36 +46,58 @@ def __load_data(
"""

names = {
"ISO": "iso_code",
"WEO Subject Code": "indicator",
"Subject Descriptor": "indicator_name",
# "Country/Series-specific Notes": "indicator_description",
"Units": "units",
"Scale": "scale",
"Estimates Start After": "estimates_start_after",
"CONCEPT_CODE": "indicator",
"CONCEPT_LABEL": "indicator_name",
"UNIT_LABEL": "units",
"SCALE_LABEL": "scale",
"LASTACTUALDATE": "estimates_start_after",
"OBS_VALUE": "value",
"TIME_PERIOD": "year",
"REF_AREA_LABEL": "entity_name",
}

to_drop = [
"WEO Country Code",
"Country",
"Country/Series-specific Notes",
]
# If year and release are not specified, get the latest version
if self.year is None and self.release is None:
version = gen_latest_version()
self.release, self.year = version

release = _check_parameters(latest_y, latest_r)
df = WEO(version=release).load_data().get_old_format_data()
# For compatibility, if the version is provided as int, convert
if self.release == 1:
version = ("April", self.year)
elif self.release == 2:
version = ("October", self.year)

# Define the path where the data will be stored (or should be stored)
path = f"{config.BBPaths.raw_data}/weo_{self.year}_{self.release}.feather"

# try read from disk
if os.path.exists(path):
self._raw_data = pd.read_feather(path)
return

# If not found, fetch the data
df = weo.fetch_data(version=version)

# Check if the fetched version is the same as the requested version
fetched_version = weo.fetch_data.last_version_fetched
if fetched_version != version:
self.release, self.year = version
path = f"{config.BBPaths.raw_data}/weo_{self.year}_{self.release}.feather"

# Load _data into _data object
self._raw_data = (
df.drop(
columns=to_drop,
)
df.loc[:, names.keys()]
.rename(columns=names)
.melt(id_vars=names.values(), var_name="year", value_name="value")
.assign(year=lambda d: convert_to_datetime(d.year))
.assign(iso_code=lambda d: convert_id(d.entity_name, not_found=np.NaN))
.dropna(subset=["iso_code"])
.dropna(subset=["value"])
.reset_index(drop=True)
)

# save data to disk`
self._raw_data.to_feather(path)

def _check_indicators(self, indicators: str | list | None = None) -> None | dict:
if self._raw_data is None:
self.__load_data()
Expand Down Expand Up @@ -140,23 +154,26 @@ def __load_indicator(ind_: str) -> None:

return self

def update_data(
self, year: int | None, release: int | None, reload_data: bool = True
) -> None:
def update_data(self, reload_data: bool = True) -> None:
"""Update the stored WEO _data, using WEO package.
Args:
"""
_update_weo(latest_y=year, latest_r=release)
# clear cache
weo.clear_cache()

# Reset the _data
self._raw_data = None
self._data = {}

logger.info("WEO data updated.")

if reload_data:
self.load_data(indicator=list(self._data.keys()))
indicators_to_load = list(self._data.keys())
self._data = {}
self.load_data(indicator=indicators_to_load)

else:
self._data = {}

logger.info("WEO data updated.")

def available_indicators(self) -> None:
"""Print the available indicators in the dataset"""
Expand Down
4 changes: 2 additions & 2 deletions bblocks/import_tools/imf_weo.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def load_data(self, indicators: str | list = "all") -> ImportData:
else:
self._data[indicator] = self._raw_data[
self._raw_data["concept_code"] == indicator
].reset_index(drop=True)
].reset_index(drop=True)

logger.info("Data loaded to object")
return self
Expand Down Expand Up @@ -417,4 +417,4 @@ def get_old_format_data(self) -> pd.DataFrame:
.assign(ISO=lambda d: clean.convert_id(d.Country, not_found=np.nan))
.dropna(subset="ISO")
.reset_index(drop=True)
)
)
42 changes: 28 additions & 14 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pyarrow = ">=14, <17"
wbgapi = "<1.1"
camelot-py = "^0.11"
pypdf = "^3.17"
imf-reader = "^1.0.0"

[tool.poetry.dev-dependencies]
bump2version = "^1.0.1"
Expand Down
10 changes: 5 additions & 5 deletions tests/test_import_tools/test_weo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ def test_smdx_query_url():

# test April version
assert (
imf_weo._smdx_query_url((2023, 1))
== f"{imf_weo.BASE_URL}/en/Publications/WEO/weo-database/2023/April/download-entire-database"
imf_weo._smdx_query_url((2023, 1))
== f"{imf_weo.BASE_URL}/en/Publications/WEO/weo-database/2023/April/download-entire-database"
)

# test October version
assert (
imf_weo._smdx_query_url((2023, 2))
== f"{imf_weo.BASE_URL}/en/Publications/WEO/weo-database/2023/October/download-entire-database"
imf_weo._smdx_query_url((2023, 2))
== f"{imf_weo.BASE_URL}/en/Publications/WEO/weo-database/2023/October/download-entire-database"
)

# test invalid version
Expand Down Expand Up @@ -170,4 +170,4 @@ def test_init_latest_version(self):
def test_weo_class_invalid_version(self):
invalid_version = "2022"
with pytest.raises(ValueError):
imf_weo.WEO(version=invalid_version)
imf_weo.WEO(version=invalid_version)

0 comments on commit aa8899e

Please sign in to comment.