Skip to content

Commit

Permalink
Merge branch 'master' into release/1.4.5
Browse files Browse the repository at this point in the history
  • Loading branch information
ralphrass authored Oct 14, 2024
2 parents 9ed9af3 + c26d20b commit 9e710cb
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 6 deletions.
30 changes: 30 additions & 0 deletions .checklist.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: quintoandar.com.br/checklist/v2
kind: ServiceChecklist
metadata:
name: butterfree
spec:
description: >-
A solution for Feature Stores.
costCenter: C055
department: engineering
lifecycle: production
docs: true

ownership:
team: data_products_mlops
line: tech_platform
owner: otavio.cals@quintoandar.com.br

libraries:
- name: butterfree
type: common-usage
path: https://quintoandar.github.io/python-package-server/
description: A lib to build Feature Stores.
registries:
- github-packages
tier: T0

channels:
squad: 'mlops'
alerts: 'data-products-reports'
17 changes: 17 additions & 0 deletions .github/workflows/skip_lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# This step is used only because we want to mark the runner-linter check as required
# for PRs to develop, but not for the merge queue to merge into develop,
# github does not have this functionality yet

name: 'Skip github-actions/runner-linter check at merge queue'

on:
merge_group:

jobs:
empty_job:
name: 'github-actions/runner-linter'
runs-on: github-actions-developers-runner
steps:
- name: Skip github-actions/runner-linter check at merge queue
run: |
echo "Done"
2 changes: 1 addition & 1 deletion butterfree/clients/cassandra_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,4 +211,4 @@ def create_table(self, columns: List[CassandraColumn], table: str) -> None:
"""
query = self._get_create_table_query(columns, table)

self.sql(query)
self.sql(query)
9 changes: 6 additions & 3 deletions butterfree/pipelines/feature_set_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,12 @@ def run(
# Step 2: Repartition and sort if required, avoid if not necessary.
if partition_by:
order_by = order_by or partition_by
dataframe = repartition_sort_df(
dataframe, partition_by, order_by, num_processors
)
current_partitions = dataframe.rdd.getNumPartitions()
optimal_partitions = num_processors or current_partitions
if current_partitions != optimal_partitions:
dataframe = repartition_sort_df(
dataframe, partition_by, order_by, num_processors
)

# Step 3: Construct the feature set dataframe using defined transformations.
transformed_dataframe = self.feature_set.construct(
Expand Down
4 changes: 4 additions & 0 deletions butterfree/transform/aggregated_feature_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,10 @@ def construct(
else:
output_df = self._aggregate(output_df, features=self.features)

output_df = self.incremental_strategy.filter_with_incremental_strategy(
dataframe=output_df, start_date=start_date, end_date=end_date
)

output_df = output_df.select(*self.columns).replace( # type: ignore
float("nan"), None
)
Expand Down
16 changes: 16 additions & 0 deletions docs/source/butterfree.dataframe_service.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ butterfree.dataframe\_service.partitioning module
butterfree.dataframe\_service.repartition module
------------------------------------------------

.. automodule:: butterfree.dataframe_service.incremental_strategy
:members:
:undoc-members:
:show-inheritance:

butterfree.dataframe\_service.partitioning module
-------------------------------------------------

.. automodule:: butterfree.dataframe_service.partitioning
:members:
:undoc-members:
:show-inheritance:

butterfree.dataframe\_service.repartition module
------------------------------------------------

.. automodule:: butterfree.dataframe_service.repartition
:members:
:undoc-members:
Expand Down
Empty file added logging.json
Empty file.
2 changes: 0 additions & 2 deletions tests/unit/butterfree/clients/test_cassandra_client.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from typing import Any, Dict, List
from unittest.mock import MagicMock

import pytest

from butterfree.clients import CassandraClient
from butterfree.clients.cassandra_client import (
EMPTY_STRING_HOST_ERROR,
Expand Down

0 comments on commit 9e710cb

Please sign in to comment.