Skip to content

Commit

Permalink
update arb to align with the standardized processing and validation f…
Browse files Browse the repository at this point in the history
…low (#16)
  • Loading branch information
andersy005 authored Nov 16, 2023
1 parent f6f6b47 commit 94212ed
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 12 deletions.
11 changes: 10 additions & 1 deletion offsets_db_data/arb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import pandas as pd
import pandas_flavor as pf

from offsets_db_data.common import convert_to_datetime # noqa: F401
from offsets_db_data.models import credit_without_id_schema


def _get_registry(item):
registry_map = {
Expand All @@ -16,7 +19,7 @@ def _get_registry(item):


@pf.register_dataframe_method
def process_arb_data(df: pd.DataFrame) -> pd.DataFrame:
def process_arb(df: pd.DataFrame) -> pd.DataFrame:
"""
Parameters
Expand All @@ -31,6 +34,8 @@ def process_arb_data(df: pd.DataFrame) -> pd.DataFrame:
"""

df = df.copy()

rename_d = {
'OPR Project ID': 'opr_id',
'ARB Offset Credits Issued': 'issuance',
Expand Down Expand Up @@ -112,4 +117,8 @@ def process_arb_data(df: pd.DataFrame) -> pd.DataFrame:
data['registry'] = data.project_id.apply(_get_registry)
data['vintage'] = data['vintage'].astype(int)

data = data.convert_to_datetime(columns=['transaction_date']).validate(
schema=credit_without_id_schema
)

return data
2 changes: 1 addition & 1 deletion offsets_db_data/verra.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def calculate_verra_retirements(df: pd.DataFrame) -> pd.DataFrame:


@pf.register_dataframe_method
def process_verra_transactions(
def process_verra_credits(
df: pd.DataFrame,
*,
download_type: str = 'transactions',
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ pytest-xdist==3.3.*
pytest-mock==3.10.*
requests-mock==1.11.*
hypothesis==6.86.*
openpyxl
17 changes: 13 additions & 4 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest

from offsets_db_data.apx import * # noqa: F403
from offsets_db_data.arb import * # noqa: F403
from offsets_db_data.gcc import * # noqa: F403
from offsets_db_data.gs import * # noqa: F403
from offsets_db_data.models import credit_without_id_schema, project_schema
Expand All @@ -18,10 +19,18 @@ def bucket() -> str:
return 's3://carbonplan-offsets-db/raw'


def test_verra(date, bucket):
@pytest.fixture
def arb() -> pd.DataFrame:
data = pd.read_excel(
's3://carbonplan-offsets-db/raw/2023-11-10/arb/nc-arboc_issuance.xlsx', sheet_name=3
)
return data.process_arb()


def test_verra(date, bucket, arb):
projects = pd.read_csv(f'{bucket}/{date}/verra/projects.csv.gz')
credits = pd.read_csv(f'{bucket}/{date}/verra/transactions.csv.gz')
df_credits = credits.process_verra_transactions()
df_credits = credits.process_verra_credits(arb=arb)
df_projects = projects.process_verra_projects(credits=df_credits)
project_schema.validate(df_projects)
credit_without_id_schema.validate(df_credits)
Expand All @@ -35,14 +44,14 @@ def test_verra(date, bucket):
('climate-action-reserve', ['issuances', 'retirements', 'cancellations']),
],
)
def test_apx(date, bucket, registry, download_types):
def test_apx(date, bucket, arb, registry, download_types):
dfs = []
for key in download_types:
credits = pd.read_csv(f'{bucket}/{date}/{registry}/{key}.csv.gz')
p = credits.process_apx_credits(download_type=key, registry_name=registry)
dfs.append(p)

df_credits = pd.concat(dfs)
df_credits = pd.concat(dfs).merge_with_arb(arb=arb)
credit_without_id_schema.validate(df_credits)

projects = pd.read_csv(f'{bucket}/{date}/{registry}/projects.csv.gz')
Expand Down
12 changes: 6 additions & 6 deletions tests/test_verra.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
calculate_verra_retirements,
determine_verra_transaction_type,
generate_verra_project_ids,
process_verra_credits,
process_verra_projects,
process_verra_transactions,
set_verra_transaction_dates,
set_verra_vintage_year,
)
Expand Down Expand Up @@ -356,7 +356,7 @@ def test_set_verra_vintage_year(verra_transactions):


def test_calculate_verra_issuances(verra_transactions):
# Process the verra_transactions similar to process_verra_transactions
# Process the verra_transactions similar to process_verra_credits
processed_data = (
verra_transactions.set_registry(registry_name='verra')
.generate_verra_project_ids(prefix='VCS')
Expand Down Expand Up @@ -384,7 +384,7 @@ def test_calculate_verra_issuances(verra_transactions):


def test_calculate_verra_retirements(verra_transactions):
# Process the verra_transactions similar to process_verra_transactions
# Process the verra_transactions similar to process_verra_credits
processed_data = (
verra_transactions.set_registry(registry_name='verra')
.generate_verra_project_ids(prefix='VCS')
Expand Down Expand Up @@ -434,7 +434,7 @@ def test_add_vcs_compliance_projects(verra_projects):


def test_process_verra_projects(verra_projects, verra_transactions):
verra_credits = process_verra_transactions(verra_transactions)
verra_credits = process_verra_credits(verra_transactions)
df = process_verra_projects(
verra_projects, credits=verra_credits, registry_name='verra', download_type='projects'
)
Expand Down Expand Up @@ -464,8 +464,8 @@ def test_process_verra_projects(verra_projects, verra_transactions):

def test_process_verra_projects_with_totals_and_dates(verra_projects, verra_transactions):
# Process the verra_transactions as per your existing pipeline
# Assuming process_verra_transactions or similar functions are in place
verra_credits = process_verra_transactions(verra_transactions)
# Assuming process_verra_credits or similar functions are in place
verra_credits = process_verra_credits(verra_transactions)

# Process the verra_projects
processed_projects = process_verra_projects(
Expand Down

0 comments on commit 94212ed

Please sign in to comment.