From 94212ed3187b665c2fb316538520bbff7b0522a1 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Thu, 16 Nov 2023 00:40:46 -0800 Subject: [PATCH] update arb to align with the standardized processing and validation flow (#16) --- offsets_db_data/arb.py | 11 ++++++++++- offsets_db_data/verra.py | 2 +- requirements-dev.txt | 1 + tests/test_integration.py | 17 +++++++++++++---- tests/test_verra.py | 12 ++++++------ 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/offsets_db_data/arb.py b/offsets_db_data/arb.py index 8ea1003..dd1d198 100644 --- a/offsets_db_data/arb.py +++ b/offsets_db_data/arb.py @@ -3,6 +3,9 @@ import pandas as pd import pandas_flavor as pf +from offsets_db_data.common import convert_to_datetime # noqa: F401 +from offsets_db_data.models import credit_without_id_schema + def _get_registry(item): registry_map = { @@ -16,7 +19,7 @@ def _get_registry(item): @pf.register_dataframe_method -def process_arb_data(df: pd.DataFrame) -> pd.DataFrame: +def process_arb(df: pd.DataFrame) -> pd.DataFrame: """ Parameters @@ -31,6 +34,8 @@ def process_arb_data(df: pd.DataFrame) -> pd.DataFrame: """ + df = df.copy() + rename_d = { 'OPR Project ID': 'opr_id', 'ARB Offset Credits Issued': 'issuance', @@ -112,4 +117,8 @@ def process_arb_data(df: pd.DataFrame) -> pd.DataFrame: data['registry'] = data.project_id.apply(_get_registry) data['vintage'] = data['vintage'].astype(int) + data = data.convert_to_datetime(columns=['transaction_date']).validate( + schema=credit_without_id_schema + ) + return data diff --git a/offsets_db_data/verra.py b/offsets_db_data/verra.py index e18b53d..d9f0096 100644 --- a/offsets_db_data/verra.py +++ b/offsets_db_data/verra.py @@ -71,7 +71,7 @@ def calculate_verra_retirements(df: pd.DataFrame) -> pd.DataFrame: @pf.register_dataframe_method -def process_verra_transactions( +def process_verra_credits( df: pd.DataFrame, *, download_type: str = 'transactions', diff --git a/requirements-dev.txt b/requirements-dev.txt index ef68a30..2e335d0 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,3 +4,4 @@ pytest-xdist==3.3.* pytest-mock==3.10.* requests-mock==1.11.* hypothesis==6.86.* +openpyxl diff --git a/tests/test_integration.py b/tests/test_integration.py index b16f2b9..d113e1a 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -2,6 +2,7 @@ import pytest from offsets_db_data.apx import * # noqa: F403 +from offsets_db_data.arb import * # noqa: F403 from offsets_db_data.gcc import * # noqa: F403 from offsets_db_data.gs import * # noqa: F403 from offsets_db_data.models import credit_without_id_schema, project_schema @@ -18,10 +19,18 @@ def bucket() -> str: return 's3://carbonplan-offsets-db/raw' -def test_verra(date, bucket): +@pytest.fixture +def arb() -> pd.DataFrame: + data = pd.read_excel( + 's3://carbonplan-offsets-db/raw/2023-11-10/arb/nc-arboc_issuance.xlsx', sheet_name=3 + ) + return data.process_arb() + + +def test_verra(date, bucket, arb): projects = pd.read_csv(f'{bucket}/{date}/verra/projects.csv.gz') credits = pd.read_csv(f'{bucket}/{date}/verra/transactions.csv.gz') - df_credits = credits.process_verra_transactions() + df_credits = credits.process_verra_credits(arb=arb) df_projects = projects.process_verra_projects(credits=df_credits) project_schema.validate(df_projects) credit_without_id_schema.validate(df_credits) @@ -35,14 +44,14 @@ def test_verra(date, bucket): ('climate-action-reserve', ['issuances', 'retirements', 'cancellations']), ], ) -def test_apx(date, bucket, registry, download_types): +def test_apx(date, bucket, arb, registry, download_types): dfs = [] for key in download_types: credits = pd.read_csv(f'{bucket}/{date}/{registry}/{key}.csv.gz') p = credits.process_apx_credits(download_type=key, registry_name=registry) dfs.append(p) - df_credits = pd.concat(dfs) + df_credits = pd.concat(dfs).merge_with_arb(arb=arb) credit_without_id_schema.validate(df_credits) projects = pd.read_csv(f'{bucket}/{date}/{registry}/projects.csv.gz') diff --git a/tests/test_verra.py b/tests/test_verra.py index 8277d15..de86886 100644 --- a/tests/test_verra.py +++ b/tests/test_verra.py @@ -8,8 +8,8 @@ calculate_verra_retirements, determine_verra_transaction_type, generate_verra_project_ids, + process_verra_credits, process_verra_projects, - process_verra_transactions, set_verra_transaction_dates, set_verra_vintage_year, ) @@ -356,7 +356,7 @@ def test_set_verra_vintage_year(verra_transactions): def test_calculate_verra_issuances(verra_transactions): - # Process the verra_transactions similar to process_verra_transactions + # Process the verra_transactions similar to process_verra_credits processed_data = ( verra_transactions.set_registry(registry_name='verra') .generate_verra_project_ids(prefix='VCS') @@ -384,7 +384,7 @@ def test_calculate_verra_issuances(verra_transactions): def test_calculate_verra_retirements(verra_transactions): - # Process the verra_transactions similar to process_verra_transactions + # Process the verra_transactions similar to process_verra_credits processed_data = ( verra_transactions.set_registry(registry_name='verra') .generate_verra_project_ids(prefix='VCS') @@ -434,7 +434,7 @@ def test_add_vcs_compliance_projects(verra_projects): def test_process_verra_projects(verra_projects, verra_transactions): - verra_credits = process_verra_transactions(verra_transactions) + verra_credits = process_verra_credits(verra_transactions) df = process_verra_projects( verra_projects, credits=verra_credits, registry_name='verra', download_type='projects' ) @@ -464,8 +464,8 @@ def test_process_verra_projects(verra_projects, verra_transactions): def test_process_verra_projects_with_totals_and_dates(verra_projects, verra_transactions): # Process the verra_transactions as per your existing pipeline - # Assuming process_verra_transactions or similar functions are in place - verra_credits = process_verra_transactions(verra_transactions) + # Assuming process_verra_credits or similar functions are in place + verra_credits = process_verra_credits(verra_transactions) # Process the verra_projects processed_projects = process_verra_projects(