Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WDI legacy import scripts #1109

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions scripts/world_bank/wdi_legacy/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Importing World Bank World Development Indicators(WDI) DATA

The primary World Bank collection of development indicators, compiled from
officially-recognized international sources. It presents the most current and
accurate global development data available, and includes national estimates.
At the moment, we only include a small subset of variables from this.
See wdi_csv2mcf for detailed data we include.

The data is from:

https://datacatalog.worldbank.org/dataset/world-development-indicators

**To generate MCFs from these files, provide the input/output paths and run:**
Download and unzip the CSV file from
https://datacatalog.worldbank.org/search/dataset/0037712 into
input_files/WDICSV.csv

Then run the command:

`python wdi_csv2mcf.py`
299 changes: 299 additions & 0 deletions scripts/world_bank/wdi_legacy/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,299 @@
"""Constants used across wdi."""

from string import Template

POPS_FILE = 'pops_file'

START_YEAR_COLUMN = 4
YEAR_RANGE_START = 1960
YEAR_RANGE_END = 2023

INDICATOR_NAME = 'Indicator Name'
COUNTRY_CODE = 'Country Code'

CO2_EMISSIONS = 'CO2 emissions (metric tons per capita)'

POP_CO2_EMISSIONS_MCF_TEMPL = Template("""
Node: Pop_CO2_Emissions_$location_abbr
typeOf: schema:StatisticalPopulation
populationType: dcs:Emissions
emittedThing: dcs:CarbonDioxide
location: dcid:$location
""")

OBS_CO2_EMISSIONS_MCF_TEMPL = Template("""
Node: Obs_CO2_Emissions_Amount_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_CO2_Emissions_$location_abbr
observationDate: "$observation_date"
observationPeriod: "P1Y"
measuredProperty: dcs:amount
measuredValue: $measured_value
measurementDenominator: dcs:PerCapita
unit: dcs:MetricTon
""")

ELEC_CONSUMPTION = 'Electric power consumption (kWh per capita)'

POP_ELEC_CONSUMPTION_MCF_TEMPL = Template("""
Node: Pop_Consumption_Electricity_$location_abbr
typeOf: schema:StatisticalPopulation
populationType: dcs:Consumption
consumedThing: dcs:Electricity
location: dcid:$location
""")

OBS_ELEC_CONSUMPTION_MCF_TEMPL = Template("""
Node: Obs_Consumption_Electricity_Amount_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_Consumption_Electricity_$location_abbr
observationDate: "$observation_date"
observationPeriod: "P1Y"
measuredProperty: dcs:amount
measuredValue: $measured_value
measurementDenominator: dcs:PerCapita
unit: dcs:KilowattHour
""")

ENERGY_USE = 'Energy use (kg of oil equivalent per capita)'

POP_ENERGY_USE_MCF_TEMPL = Template("""
Node: Pop_Energy_Use_$location_abbr
typeOf: schema:StatisticalPopulation
populationType: dcs:Consumption
consumedThing: dcs:Energy
location: dcid:$location
""")

OBS_ENERGY_USE_MCF_TEMPL = Template("""
Node: Obs_Energy_Use_Amount_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_Energy_Use_$location_abbr
observationDate: "$observation_date"
observationPeriod: "P1Y"
measuredProperty: dcs:amount
measuredValue: $measured_value
measurementDenominator: dcs:PerCapita
unit: dcs:KilogramOfOilEquivalent
""")

GDP_NOMINAL = 'GDP (current US$)'

POP_GDP_NOMINAL_MCF_TEMPL = Template("""
Node: Pop_GDP_Nominal_$location_abbr
typeOf: schema:StatisticalPopulation
populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
location: dcid:$location
""")

OBS_GDP_NOMINAL_MCF_TEMPL = Template("""
Node: Obs_GDP_Nominal_Amount_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_GDP_Nominal_$location_abbr
observationDate: "$observation_date"
observationPeriod: "P1Y"
measuredProperty: dcs:amount
measuredValue: $measured_value
measurementQualifier: dcs:Nominal
unit: dcs:USDollar
""")

GDP_GROWTH_RATE = 'GDP growth (annual %)'

POP_GDP_GROWTH_RATE_MCF_TEMPL = Template("""
Node: Pop_GDP_Growth_Rate_$location_abbr
typeOf: schema:StatisticalPopulation
populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
location: dcid:$location
""")

OBS_GDP_GROWTH_RATE_TEMPL = Template("""
Node: Obs_GDP_Growth_Rate_Amount_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_GDP_Growth_Rate_$location_abbr
observationDate: "$observation_date"
observationPeriod: "P1Y"
measuredProperty: dcs:amount
growthRate: $measured_value
""")

GDP_NOM_PER_CAPITA = 'GDP per capita (current US$)'

# Use POP_GDP_NOMINAL_MCF_TEMPL as POP template so that only one pop node
# will be generated.

OBS_GDP_NOM_PER_CAPITA_TEMPL = Template("""
Node: Obs_GDP_Nom_Per_Capita_Amount_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_GDP_Nominal_$location_abbr
observationDate: "$observation_date"
observationPeriod: "P1Y"
measuredProperty: dcs:amount
measuredValue: $measured_value
measurementQualifier: dcs:Nominal
measurementDenominator: dcs:PerCapita
unit: dcs:USDollar
""")

GNI_IN_PPP = 'GNI, PPP (current international $)'

POP_GNI_IN_PPP_MCF_TEMPL = Template("""
Node: Pop_GNI_In_PPP_$location_abbr
typeOf: schema:StatisticalPopulation
populationType: dcs:EconomicActivity
activitySource: dcs:GrossNationalIncome
location: dcid:$location
""")

OBS_GNI_IN_PPP_MCF_TEMPL = Template("""
Node: Obs_GNI_In_PPP_Amount_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_GNI_In_PPP_$location_abbr
observationDate: "$observation_date"
observationPeriod: "P1Y"
measuredProperty: dcs:amount
measuredValue: $measured_value
measurementQualifier: dcs:PurchasingPowerParity
unit: dcs:InternationalDollar
""")

GNI_PPP_PER_CAPITA = 'GNI per capita, PPP (current international $)'

# Use POP_GNI_IN_PPP_MCF_TEMPL as POP template so that only one pop node will
# be generated.

OBS_GNI_PPP_PER_CAPITA_MCF_TEMPL = Template("""
Node: Obs_GNI_PPP_Per_Capita_Amount_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_GNI_In_PPP_$location_abbr
observationDate: "$observation_date"
observationPeriod: "P1Y"
measuredProperty: dcs:amount
measuredValue: $measured_value
measurementQualifier: dcs:PurchasingPowerParity
measurementDenominator: dcs:PerCapita
unit: dcs:InternationalDollar
""")

INTER_USER_PERC = 'Individuals using the Internet (% of population)'

POP_INTER_USER_PERC_MCF_TEMPL = Template("""
Node: Pop_Inter_User_Perc_$location_abbr
typeOf: schema:StatisticalPopulation
populationType: schema:Person
isInternetUser: schema:True
location: dcid:$location
""")

OBS_INTER_USER_PERC_MCF_TEMPL = Template("""
Node: Obs_Inter_User_Perc_Count_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_Inter_User_Perc_$location_abbr
observationDate: "$observation_date"
observationPeriod: "P1Y"
measuredProperty: dcs:count
measuredValue: $measured_value
measurementDenominator: dcs:PerCapita
scalingFactor: 100
""")

LIFE_EXPECTANCY = 'Life expectancy at birth, total (years)'

POP_LIFE_EXPECTANCY_MCF_TEMPL = Template("""
Node: Pop_Life_Expectancy_$location_abbr
typeOf: schema:StatisticalPopulation
populationType: schema:Person
location: dcid:$location
""")

OBS_LIFE_EXPECTANCY_MCF_TEMPL = Template("""
Node: Obs_Life_Expectancy_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_Life_Expectancy_$location_abbr
observationDate: "$observation_date"
measuredProperty: dcs:lifeExpectancy
measuredValue: $measured_value
unit: dcs:Year
""")

FERTILITY_RATE = 'Fertility rate, total (births per woman)'

POP_FERTILITY_RATE_MCF_TEMPL = Template("""
Node: Pop_Fertility_Rate_$location_abbr
typeOf: schema:StatisticalPopulation
populationType: schema:Person
gender: schema:Female
location: dcid:$location
""")

OBS_FERTILITY_RATE_MCF_TEMPL = Template("""
Node: Obs_Fertility_Rate_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_Fertility_Rate_$location_abbr
observationDate: "$observation_date"
measuredProperty: dcs:fertilityRate
measuredValue: $measured_value
""")

POPULATION = 'Population, total'

POP_POPULATION_MCF_TEMPL = Template("""
Node: Pop_Population_$location_abbr
typeOf: schema:StatisticalPopulation
populationType: schema:Person
location: dcid:$location
""")

OBS_POPULATION_MCF_TEMPL = Template("""
Node: Obs_Population_Count_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_Population_$location_abbr
observationDate: "$observation_date"
observationPeriod: "P1Y"
measuredProperty: dcs:count
measuredValue: $measured_value
""")

POPU_GROWTH_RATE = 'Population growth (annual %)'

POP_POPU_GROWTH_RATE_MCF_TEMPL = Template("""
Node: Pop_Popu_Growth_Rate_$location_abbr
typeOf: schema:StatisticalPopulation
populationType: schema:Person
location: dcid:$location
""")

OBS_POPU_GROWTH_RATE_MCF_TEMPL = Template("""
Node: Obs_Popu_Growth_Rate_Count_${location_abbr}_$observation_date
typeOf: schema:Observation
observedNode: l:Pop_Popu_Growth_Rate_$location_abbr
observationDate: "$observation_date"
observationPeriod: "P1Y"
measuredProperty: dcs:count
growthRate: $measured_value
""")

INDICATOR_TEMP_MAP = {
CO2_EMISSIONS: (POP_CO2_EMISSIONS_MCF_TEMPL, OBS_CO2_EMISSIONS_MCF_TEMPL),
ELEC_CONSUMPTION:
(POP_ELEC_CONSUMPTION_MCF_TEMPL, OBS_ELEC_CONSUMPTION_MCF_TEMPL),
ENERGY_USE: (POP_ENERGY_USE_MCF_TEMPL, OBS_ENERGY_USE_MCF_TEMPL),
GDP_NOMINAL: (POP_GDP_NOMINAL_MCF_TEMPL, OBS_GDP_NOMINAL_MCF_TEMPL),
GDP_GROWTH_RATE: (POP_GDP_GROWTH_RATE_MCF_TEMPL, OBS_GDP_GROWTH_RATE_TEMPL),
GDP_NOM_PER_CAPITA:
(POP_GDP_NOMINAL_MCF_TEMPL, OBS_GDP_NOM_PER_CAPITA_TEMPL),
GNI_IN_PPP: (POP_GNI_IN_PPP_MCF_TEMPL, OBS_GNI_IN_PPP_MCF_TEMPL),
GNI_PPP_PER_CAPITA:
(POP_GNI_IN_PPP_MCF_TEMPL, OBS_GNI_PPP_PER_CAPITA_MCF_TEMPL),
INTER_USER_PERC:
(POP_INTER_USER_PERC_MCF_TEMPL, OBS_INTER_USER_PERC_MCF_TEMPL),
LIFE_EXPECTANCY:
(POP_LIFE_EXPECTANCY_MCF_TEMPL, OBS_LIFE_EXPECTANCY_MCF_TEMPL),
FERTILITY_RATE:
(POP_FERTILITY_RATE_MCF_TEMPL, OBS_FERTILITY_RATE_MCF_TEMPL),
POPULATION: (POP_POPULATION_MCF_TEMPL, OBS_POPULATION_MCF_TEMPL),
POPU_GROWTH_RATE:
(POP_POPU_GROWTH_RATE_MCF_TEMPL, OBS_POPU_GROWTH_RATE_MCF_TEMPL),
}
Loading
Loading