Skip to content

Commit

Permalink
Merge pull request #15 from lukaszgajewski/main
Browse files Browse the repository at this point in the history
environment + preprocessing clean up
  • Loading branch information
florianjehn authored Jan 24, 2024
2 parents cf7d4ba + 98df76c commit 806135a
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 9 deletions.
4 changes: 4 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ dependencies:
- matplotlib
- pandas
- pytest
- country_converter
- networkx
- geopandas
- seaborn



4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ seaborn==0.13.0
pandas==2.1.3
pytest==7.4.3
jupyter==1.0.0
country_converter==1.2
networkx==3.2.1
geopandas==0.14.2
seaborn==0.13.1
16 changes: 7 additions & 9 deletions src/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"""


def rename_item(item):
def rename_item(item: str) -> str:
"""
Renames specific item entries for readability.
Expand Down Expand Up @@ -234,19 +234,19 @@ def rename_countries(
region: str,
filename: str,
code_type: str = "M49 Code",
) -> pd.DataFrame:
) -> pd.Series | pd.DataFrame:
"""
Rename country codes with country names in either production or trade data.
Arguments:
data (pd.DataFrame): The data to be renamed.
data (pd.Series | pd.DataFrame): The data to be renamed.
region (str): The region of the data.
filename (str): The filename for the country codes CSV file.
code_type (str): The type of country code to be used.
after_union (bool): Whether the index/columns of the data are already unified.
Returns:
pd.DataFrame: The data with country codes replaced by country names.
pd.Series | pd.DataFrame: The data with country codes replaced by country names.
"""
# Read in the country codes from the zip file
faostat_zip = f"data{os.sep}data_raw{os.sep}{filename}_{region}.zip"
Expand All @@ -269,12 +269,10 @@ def rename_countries(
codes_dict = dict(zip(codes[code_type], codes_area_short))

print(f"Replacing country codes with country names in {filename.split('_')[0]} data")
for code in data.index:
data.rename(index={code: codes_dict[code]}, inplace=True)
data.rename(index=codes_dict, inplace=True)

if isinstance(data, pd.DataFrame):
for code in data.columns:
data.rename(columns={code: codes_dict[code]}, inplace=True)
data.rename(columns=codes_dict, inplace=True)

return data

Expand Down Expand Up @@ -344,7 +342,7 @@ def main(
trade_unit="tonnes",
element="Export Quantity",
year="Y2018",
) -> pd.DataFrame:
) -> None:
try:
print(f"Reading in data for {item} in {region}...")
production_pkl = f"data{os.sep}temp_files{os.sep}Production_Crops_Livestock_E_{region}.pkl"
Expand Down

0 comments on commit 806135a

Please sign in to comment.