diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 0957a958a..d33e4aa03 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -9,8 +9,7 @@ jobs: Pipeline: if: github.ref == 'refs/heads/master' - runs-on: ubuntu-22.04 - container: quintoandar/python-3-7-java + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 diff --git a/Makefile b/Makefile index 3164f5039..bf9ccd647 100644 --- a/Makefile +++ b/Makefile @@ -9,8 +9,8 @@ VERSION := $(shell grep __version__ setup.py | head -1 | cut -d \" -f2 | cut -d .PHONY: environment ## create virtual environment for butterfree environment: - @pyenv install -s 3.7.13 - @pyenv virtualenv 3.7.13 butterfree + @pyenv install -s 3.9.19 + @pyenv virtualenv 3.9.19 butterfree @pyenv local butterfree @PYTHONPATH=. python -m pip install --upgrade pip diff --git a/butterfree/_cli/main.py b/butterfree/_cli/main.py index 636fdb25e..b8b12f14d 100644 --- a/butterfree/_cli/main.py +++ b/butterfree/_cli/main.py @@ -2,7 +2,7 @@ from butterfree._cli import migrate -app = typer.Typer() +app = typer.Typer(no_args_is_help=True) app.add_typer(migrate.app, name="migrate") if __name__ == "__main__": diff --git a/butterfree/_cli/migrate.py b/butterfree/_cli/migrate.py index ed62f1a24..f51615097 100644 --- a/butterfree/_cli/migrate.py +++ b/butterfree/_cli/migrate.py @@ -16,7 +16,9 @@ from butterfree.migrations.database_migration import ALLOWED_DATABASE from butterfree.pipelines import FeatureSetPipeline -app = typer.Typer(help="Apply the automatic migrations in a database.") +app = typer.Typer( + help="Apply the automatic migrations in a database.", no_args_is_help=True +) logger = __logger("migrate", True) @@ -89,7 +91,7 @@ def __fs_objects(path: str) -> Set[FeatureSetPipeline]: instances.add(value) logger.info("Creating instances...") - return set(value() for value in instances) + return set(value() for value in instances) # type: ignore PATH = typer.Argument( diff --git a/butterfree/clients/__init__.py b/butterfree/clients/__init__.py index 5f6f0ffa6..7e8d1a95b 100644 --- a/butterfree/clients/__init__.py +++ b/butterfree/clients/__init__.py @@ -1,4 +1,5 @@ """Holds connection clients.""" + from butterfree.clients.abstract_client import AbstractClient from butterfree.clients.cassandra_client import CassandraClient from butterfree.clients.spark_client import SparkClient diff --git a/butterfree/clients/abstract_client.py b/butterfree/clients/abstract_client.py index ce5d33b64..b9027bd88 100644 --- a/butterfree/clients/abstract_client.py +++ b/butterfree/clients/abstract_client.py @@ -1,6 +1,7 @@ """Abstract class for database clients.""" + from abc import ABC, abstractmethod -from typing import Any +from typing import Any, Optional class AbstractClient(ABC): @@ -25,7 +26,7 @@ def sql(self, query: str) -> Any: pass @abstractmethod - def get_schema(self, table: str, database: str = None) -> Any: + def get_schema(self, table: str, database: Optional[str] = None) -> Any: """Returns desired table schema. Attributes: diff --git a/butterfree/clients/cassandra_client.py b/butterfree/clients/cassandra_client.py index 5a7231555..714e82483 100644 --- a/butterfree/clients/cassandra_client.py +++ b/butterfree/clients/cassandra_client.py @@ -1,4 +1,5 @@ """CassandraClient entity.""" + from ssl import CERT_REQUIRED, PROTOCOL_TLSv1 from typing import Dict, List, Optional @@ -102,7 +103,9 @@ def sql(self, query: str) -> ResponseFuture: """ return self.conn.execute(query) - def get_schema(self, table: str, database: str = None) -> List[Dict[str, str]]: + def get_schema( + self, table: str, database: Optional[str] = None + ) -> List[Dict[str, str]]: """Returns desired table schema. Attributes: diff --git a/butterfree/clients/spark_client.py b/butterfree/clients/spark_client.py index e2b868caf..933c21651 100644 --- a/butterfree/clients/spark_client.py +++ b/butterfree/clients/spark_client.py @@ -69,7 +69,7 @@ def read( return df_reader.format(format).load(path=path, **options) # type: ignore - def read_table(self, table: str, database: str = None) -> DataFrame: + def read_table(self, table: str, database: Optional[str] = None) -> DataFrame: """Use the SparkSession.read interface to read a metastore table. Args: @@ -179,9 +179,9 @@ def write_table( database: Optional[str], table_name: str, path: str, - format_: str = None, - mode: str = None, - partition_by: List[str] = None, + format_: Optional[str] = None, + mode: Optional[str] = None, + partition_by: Optional[List[str]] = None, **options: Any, ) -> None: """Receive a spark DataFrame and write it as a table in metastore. @@ -231,7 +231,10 @@ def create_temporary_view(dataframe: DataFrame, name: str) -> Any: return dataframe.writeStream.format("memory").queryName(name).start() def add_table_partitions( - self, partitions: List[Dict[str, Any]], table: str, database: str = None + self, + partitions: List[Dict[str, Any]], + table: str, + database: Optional[str] = None, ) -> None: """Add partitions to an existing table. @@ -259,9 +262,11 @@ def add_table_partitions( key_values_expr = [ ", ".join( [ - "{} = {}".format(k, v) - if not isinstance(v, str) - else "{} = '{}'".format(k, v) + ( + "{} = {}".format(k, v) + if not isinstance(v, str) + else "{} = '{}'".format(k, v) + ) for k, v in partition.items() ] ) @@ -314,7 +319,9 @@ def _convert_schema(self, schema: DataFrame) -> List[Dict[str, str]]: return converted_schema - def get_schema(self, table: str, database: str = None) -> List[Dict[str, str]]: + def get_schema( + self, table: str, database: Optional[str] = None + ) -> List[Dict[str, str]]: """Returns desired table schema. Attributes: diff --git a/butterfree/configs/db/cassandra_config.py b/butterfree/configs/db/cassandra_config.py index a038cb177..d60bb6977 100644 --- a/butterfree/configs/db/cassandra_config.py +++ b/butterfree/configs/db/cassandra_config.py @@ -1,4 +1,5 @@ """Holds configurations to read and write with Spark to Cassandra DB.""" + from typing import Any, Dict, List, Optional from butterfree.configs import environment @@ -32,18 +33,18 @@ class CassandraConfig(AbstractWriteConfig): def __init__( self, - username: str = None, - password: str = None, - host: str = None, - keyspace: str = None, - mode: str = None, - format_: str = None, - stream_processing_time: str = None, - stream_output_mode: str = None, - stream_checkpoint_path: str = None, - read_consistency_level: str = None, - write_consistency_level: str = None, - local_dc: str = None, + username: Optional[str] = None, + password: Optional[str] = None, + host: Optional[str] = None, + keyspace: Optional[str] = None, + mode: Optional[str] = None, + format_: Optional[str] = None, + stream_processing_time: Optional[str] = None, + stream_output_mode: Optional[str] = None, + stream_checkpoint_path: Optional[str] = None, + read_consistency_level: Optional[str] = None, + write_consistency_level: Optional[str] = None, + local_dc: Optional[str] = None, ): self.username = username self.password = password diff --git a/butterfree/configs/db/kafka_config.py b/butterfree/configs/db/kafka_config.py index 79cad15b2..e0c14baf3 100644 --- a/butterfree/configs/db/kafka_config.py +++ b/butterfree/configs/db/kafka_config.py @@ -1,4 +1,5 @@ """Holds configurations to read and write with Spark to Kafka.""" + from typing import Any, Dict, List, Optional from butterfree.configs import environment @@ -25,13 +26,13 @@ class KafkaConfig(AbstractWriteConfig): def __init__( self, - kafka_topic: str = None, - kafka_connection_string: str = None, - mode: str = None, - format_: str = None, - stream_processing_time: str = None, - stream_output_mode: str = None, - stream_checkpoint_path: str = None, + kafka_topic: Optional[str] = None, + kafka_connection_string: Optional[str] = None, + mode: Optional[str] = None, + format_: Optional[str] = None, + stream_processing_time: Optional[str] = None, + stream_output_mode: Optional[str] = None, + stream_checkpoint_path: Optional[str] = None, ): self.kafka_topic = kafka_topic self.kafka_connection_string = kafka_connection_string @@ -147,4 +148,4 @@ def translate(self, schema: List[Dict[str, Any]]) -> List[Dict[str, Any]]: Kafka schema. """ - pass + return [{}] diff --git a/butterfree/configs/db/metastore_config.py b/butterfree/configs/db/metastore_config.py index ff7ed01df..323aded0c 100644 --- a/butterfree/configs/db/metastore_config.py +++ b/butterfree/configs/db/metastore_config.py @@ -25,10 +25,10 @@ class MetastoreConfig(AbstractWriteConfig): def __init__( self, - path: str = None, - mode: str = None, - format_: str = None, - file_system: str = None, + path: Optional[str] = None, + mode: Optional[str] = None, + format_: Optional[str] = None, + file_system: Optional[str] = None, ): self.path = path self.mode = mode diff --git a/butterfree/configs/environment.py b/butterfree/configs/environment.py index f56efc5d5..f6ba18a5b 100644 --- a/butterfree/configs/environment.py +++ b/butterfree/configs/environment.py @@ -1,4 +1,5 @@ """Holds functions for managing the running environment.""" + import os from typing import Optional @@ -34,7 +35,9 @@ def __init__(self, variable_name: str): ) -def get_variable(variable_name: str, default_value: str = None) -> Optional[str]: +def get_variable( + variable_name: str, default_value: Optional[str] = None +) -> Optional[str]: """Gets an environment variable. The variable comes from it's explicitly declared value in the running diff --git a/butterfree/constants/__init__.py b/butterfree/constants/__init__.py index ec70d41b5..aa0c76e65 100644 --- a/butterfree/constants/__init__.py +++ b/butterfree/constants/__init__.py @@ -1,4 +1,5 @@ """Holds constant attributes that are common for Butterfree.""" + from butterfree.constants.data_type import DataType __all__ = ["DataType"] diff --git a/butterfree/constants/migrations.py b/butterfree/constants/migrations.py index b1c0947db..f31d08418 100644 --- a/butterfree/constants/migrations.py +++ b/butterfree/constants/migrations.py @@ -1,4 +1,5 @@ """Migrations' Constants.""" + from butterfree.constants import columns PARTITION_BY = [ diff --git a/butterfree/dataframe_service/__init__.py b/butterfree/dataframe_service/__init__.py index c227dae24..5fd02d453 100644 --- a/butterfree/dataframe_service/__init__.py +++ b/butterfree/dataframe_service/__init__.py @@ -1,4 +1,5 @@ """Dataframe optimization components regarding Butterfree.""" + from butterfree.dataframe_service.incremental_strategy import IncrementalStrategy from butterfree.dataframe_service.partitioning import extract_partition_values from butterfree.dataframe_service.repartition import repartition_df, repartition_sort_df diff --git a/butterfree/dataframe_service/incremental_strategy.py b/butterfree/dataframe_service/incremental_strategy.py index 6554d3b77..957064f15 100644 --- a/butterfree/dataframe_service/incremental_strategy.py +++ b/butterfree/dataframe_service/incremental_strategy.py @@ -2,6 +2,8 @@ from __future__ import annotations +from typing import Optional + from pyspark.sql import DataFrame @@ -18,7 +20,7 @@ class IncrementalStrategy: filter can properly work with the defined upper and lower bounds. """ - def __init__(self, column: str = None): + def __init__(self, column: Optional[str] = None): self.column = column def from_milliseconds(self, column_name: str) -> IncrementalStrategy: @@ -32,7 +34,9 @@ def from_milliseconds(self, column_name: str) -> IncrementalStrategy: """ return IncrementalStrategy(column=f"from_unixtime({column_name}/ 1000.0)") - def from_string(self, column_name: str, mask: str = None) -> IncrementalStrategy: + def from_string( + self, column_name: str, mask: Optional[str] = None + ) -> IncrementalStrategy: """Create a column expression from ts column defined as a simple string. Args: @@ -66,7 +70,9 @@ def from_year_month_day_partitions( f"'-', string({day_column}))" ) - def get_expression(self, start_date: str = None, end_date: str = None) -> str: + def get_expression( + self, start_date: Optional[str] = None, end_date: Optional[str] = None + ) -> str: """Get the incremental filter expression using the defined dates. Both arguments can be set to defined a specific date interval, but it's @@ -95,7 +101,10 @@ def get_expression(self, start_date: str = None, end_date: str = None) -> str: return f"date({self.column}) <= date('{end_date}')" def filter_with_incremental_strategy( - self, dataframe: DataFrame, start_date: str = None, end_date: str = None + self, + dataframe: DataFrame, + start_date: Optional[str] = None, + end_date: Optional[str] = None, ) -> DataFrame: """Filters the dataframe according to the date boundaries. diff --git a/butterfree/dataframe_service/repartition.py b/butterfree/dataframe_service/repartition.py index 8635557f9..e84202ba7 100644 --- a/butterfree/dataframe_service/repartition.py +++ b/butterfree/dataframe_service/repartition.py @@ -1,5 +1,6 @@ """Module where there are repartition methods.""" -from typing import List + +from typing import List, Optional from pyspark.sql.dataframe import DataFrame @@ -10,7 +11,7 @@ def _num_partitions_definition( - num_processors: int = None, num_partitions: int = None + num_processors: Optional[int] = None, num_partitions: Optional[int] = None ) -> int: num_partitions = ( num_processors * PARTITION_PROCESSOR_RATIO @@ -24,8 +25,8 @@ def _num_partitions_definition( def repartition_df( dataframe: DataFrame, partition_by: List[str], - num_partitions: int = None, - num_processors: int = None, + num_partitions: Optional[int] = None, + num_processors: Optional[int] = None, ) -> DataFrame: """Partition the DataFrame. @@ -47,8 +48,8 @@ def repartition_sort_df( dataframe: DataFrame, partition_by: List[str], order_by: List[str], - num_processors: int = None, - num_partitions: int = None, + num_processors: Optional[int] = None, + num_partitions: Optional[int] = None, ) -> DataFrame: """Partition and Sort the DataFrame. diff --git a/butterfree/extract/__init__.py b/butterfree/extract/__init__.py index bb056255b..64c8ae4a1 100644 --- a/butterfree/extract/__init__.py +++ b/butterfree/extract/__init__.py @@ -1,4 +1,5 @@ """The Source Component of a Feature Set.""" + from butterfree.extract.source import Source __all__ = ["Source"] diff --git a/butterfree/extract/pre_processing/__init__.py b/butterfree/extract/pre_processing/__init__.py index 72b37c4db..e142de6d1 100644 --- a/butterfree/extract/pre_processing/__init__.py +++ b/butterfree/extract/pre_processing/__init__.py @@ -1,4 +1,5 @@ """Pre Processing Components regarding Readers.""" + from butterfree.extract.pre_processing.explode_json_column_transform import ( explode_json_column, ) diff --git a/butterfree/extract/pre_processing/explode_json_column_transform.py b/butterfree/extract/pre_processing/explode_json_column_transform.py index db79b5ce0..76c90f739 100644 --- a/butterfree/extract/pre_processing/explode_json_column_transform.py +++ b/butterfree/extract/pre_processing/explode_json_column_transform.py @@ -1,4 +1,5 @@ """Explode json column for dataframes.""" + from pyspark.sql.dataframe import DataFrame, StructType from pyspark.sql.functions import from_json, get_json_object diff --git a/butterfree/extract/pre_processing/filter_transform.py b/butterfree/extract/pre_processing/filter_transform.py index 78e5df78f..a7e4fff81 100644 --- a/butterfree/extract/pre_processing/filter_transform.py +++ b/butterfree/extract/pre_processing/filter_transform.py @@ -1,4 +1,5 @@ """Module where filter DataFrames coming from readers.""" + from pyspark.sql.dataframe import DataFrame diff --git a/butterfree/extract/pre_processing/forward_fill_transform.py b/butterfree/extract/pre_processing/forward_fill_transform.py index 96d9bcdda..2d3a232d6 100644 --- a/butterfree/extract/pre_processing/forward_fill_transform.py +++ b/butterfree/extract/pre_processing/forward_fill_transform.py @@ -1,6 +1,7 @@ """Forward Fill Transform for dataframes.""" + import sys -from typing import List, Union +from typing import List, Optional, Union from pyspark.sql import DataFrame, Window, functions @@ -10,7 +11,7 @@ def forward_fill( partition_by: Union[str, List[str]], order_by: Union[str, List[str]], fill_column: str, - filled_column: str = None, + filled_column: Optional[str] = None, ) -> DataFrame: """Applies a forward fill to a single column. diff --git a/butterfree/extract/pre_processing/pivot_transform.py b/butterfree/extract/pre_processing/pivot_transform.py index 078b47464..f255f4577 100644 --- a/butterfree/extract/pre_processing/pivot_transform.py +++ b/butterfree/extract/pre_processing/pivot_transform.py @@ -1,5 +1,6 @@ """Pivot Transform for dataframes.""" -from typing import Callable, List, Union + +from typing import Callable, List, Optional, Union from pyspark.sql import DataFrame, functions from pyspark.sql.types import DataType @@ -13,8 +14,8 @@ def pivot( pivot_column: str, agg_column: str, aggregation: Callable, - mock_value: Union[float, str] = None, - mock_type: Union[DataType, str] = None, + mock_value: Optional[Union[float, str]] = None, + mock_type: Optional[Union[DataType, str]] = None, with_forward_fill: bool = False, ) -> DataFrame: """Defines a pivot transformation. diff --git a/butterfree/extract/pre_processing/replace_transform.py b/butterfree/extract/pre_processing/replace_transform.py index a7dd1d67a..3127c6d9d 100644 --- a/butterfree/extract/pre_processing/replace_transform.py +++ b/butterfree/extract/pre_processing/replace_transform.py @@ -1,4 +1,5 @@ """Replace transformer for dataframes.""" + from itertools import chain from typing import Dict diff --git a/butterfree/extract/readers/__init__.py b/butterfree/extract/readers/__init__.py index 37da63a6c..8c7bd74e0 100644 --- a/butterfree/extract/readers/__init__.py +++ b/butterfree/extract/readers/__init__.py @@ -1,4 +1,5 @@ """The Reader Component of a Source.""" + from butterfree.extract.readers.file_reader import FileReader from butterfree.extract.readers.kafka_reader import KafkaReader from butterfree.extract.readers.table_reader import TableReader diff --git a/butterfree/extract/readers/file_reader.py b/butterfree/extract/readers/file_reader.py index 8cf155998..da046f083 100644 --- a/butterfree/extract/readers/file_reader.py +++ b/butterfree/extract/readers/file_reader.py @@ -1,5 +1,6 @@ """FileReader entity.""" -from typing import Any, Dict + +from typing import Any, Dict, Optional from pyspark.sql import DataFrame from pyspark.sql.types import StructType @@ -75,8 +76,8 @@ def __init__( id: str, path: str, format: str, - schema: StructType = None, - format_options: Dict[Any, Any] = None, + schema: Optional[StructType] = None, + format_options: Optional[Dict[Any, Any]] = None, stream: bool = False, ): super().__init__(id) diff --git a/butterfree/extract/readers/kafka_reader.py b/butterfree/extract/readers/kafka_reader.py index 1b8042bce..44731d207 100644 --- a/butterfree/extract/readers/kafka_reader.py +++ b/butterfree/extract/readers/kafka_reader.py @@ -1,5 +1,6 @@ """KafkaSource entity.""" -from typing import Any, Dict + +from typing import Any, Dict, Optional from pyspark.sql.dataframe import DataFrame, StructType from pyspark.sql.functions import col, struct @@ -107,8 +108,8 @@ def __init__( id: str, topic: str, value_schema: StructType, - connection_string: str = None, - topic_options: Dict[Any, Any] = None, + connection_string: Optional[str] = None, + topic_options: Optional[Dict[Any, Any]] = None, stream: bool = True, ): super().__init__(id) diff --git a/butterfree/extract/readers/reader.py b/butterfree/extract/readers/reader.py index 597c870ff..5053d82c4 100644 --- a/butterfree/extract/readers/reader.py +++ b/butterfree/extract/readers/reader.py @@ -21,7 +21,9 @@ class Reader(ABC, HookableComponent): """ - def __init__(self, id: str, incremental_strategy: IncrementalStrategy = None): + def __init__( + self, id: str, incremental_strategy: Optional[IncrementalStrategy] = None + ): super().__init__() self.id = id self.transformations: List[Dict[str, Any]] = [] @@ -82,9 +84,9 @@ def consume(self, client: SparkClient) -> DataFrame: def build( self, client: SparkClient, - columns: List[Any] = None, - start_date: str = None, - end_date: str = None, + columns: Optional[List[Any]] = None, + start_date: Optional[str] = None, + end_date: Optional[str] = None, ) -> None: """Register the data got from the reader in the Spark metastore. diff --git a/butterfree/extract/readers/table_reader.py b/butterfree/extract/readers/table_reader.py index 343f25f38..b5decfc1f 100644 --- a/butterfree/extract/readers/table_reader.py +++ b/butterfree/extract/readers/table_reader.py @@ -1,5 +1,7 @@ """TableSource entity.""" +from typing import Optional + from pyspark.sql import DataFrame from butterfree.clients import SparkClient @@ -44,7 +46,7 @@ class TableReader(Reader): __name__ = "Table Reader" - def __init__(self, id: str, table: str, database: str = None): + def __init__(self, id: str, table: str, database: Optional[str] = None): super().__init__(id) if not isinstance(table, str): raise ValueError( diff --git a/butterfree/extract/source.py b/butterfree/extract/source.py index 281ed15ad..bfc15271f 100644 --- a/butterfree/extract/source.py +++ b/butterfree/extract/source.py @@ -1,6 +1,6 @@ """Holds the SourceSelector class.""" -from typing import List +from typing import List, Optional from pyspark.sql import DataFrame @@ -70,7 +70,10 @@ def __init__( self.eager_evaluation = eager_evaluation def construct( - self, client: SparkClient, start_date: str = None, end_date: str = None + self, + client: SparkClient, + start_date: Optional[str] = None, + end_date: Optional[str] = None, ) -> DataFrame: """Construct an entry point dataframe for a feature set. diff --git a/butterfree/hooks/__init__.py b/butterfree/hooks/__init__.py index 90bedeb26..e4a32170c 100644 --- a/butterfree/hooks/__init__.py +++ b/butterfree/hooks/__init__.py @@ -1,4 +1,5 @@ """Holds Hooks definitions.""" + from butterfree.hooks.hook import Hook from butterfree.hooks.hookable_component import HookableComponent diff --git a/butterfree/hooks/schema_compatibility/__init__.py b/butterfree/hooks/schema_compatibility/__init__.py index edf748bf8..a00adef8d 100644 --- a/butterfree/hooks/schema_compatibility/__init__.py +++ b/butterfree/hooks/schema_compatibility/__init__.py @@ -1,4 +1,5 @@ """Holds Schema Compatibility Hooks definitions.""" + from butterfree.hooks.schema_compatibility.cassandra_table_schema_compatibility_hook import ( # noqa CassandraTableSchemaCompatibilityHook, ) diff --git a/butterfree/hooks/schema_compatibility/spark_table_schema_compatibility_hook.py b/butterfree/hooks/schema_compatibility/spark_table_schema_compatibility_hook.py index b08dd56aa..eea50c06d 100644 --- a/butterfree/hooks/schema_compatibility/spark_table_schema_compatibility_hook.py +++ b/butterfree/hooks/schema_compatibility/spark_table_schema_compatibility_hook.py @@ -1,5 +1,7 @@ """Spark table schema compatibility Hook definition.""" +from typing import Optional + from pyspark.sql import DataFrame from butterfree.clients import SparkClient @@ -18,7 +20,9 @@ class SparkTableSchemaCompatibilityHook(Hook): database: database name. """ - def __init__(self, spark_client: SparkClient, table: str, database: str = None): + def __init__( + self, spark_client: SparkClient, table: str, database: Optional[str] = None + ): self.spark_client = spark_client self.table_expression = (f"`{database}`." if database else "") + f"`{table}`" diff --git a/butterfree/load/processing/__init__.py b/butterfree/load/processing/__init__.py index e2ad51578..06c5cb450 100644 --- a/butterfree/load/processing/__init__.py +++ b/butterfree/load/processing/__init__.py @@ -1,4 +1,5 @@ """Pre Processing Components regarding Readers.""" + from butterfree.load.processing.json_transform import json_transform __all__ = ["json_transform"] diff --git a/butterfree/load/processing/json_transform.py b/butterfree/load/processing/json_transform.py index 19ddecae2..598064dba 100644 --- a/butterfree/load/processing/json_transform.py +++ b/butterfree/load/processing/json_transform.py @@ -1,4 +1,5 @@ """Json conversion for writers.""" + from pyspark.sql.dataframe import DataFrame from pyspark.sql.functions import struct, to_json diff --git a/butterfree/load/sink.py b/butterfree/load/sink.py index 7c0328d6f..59b001a53 100644 --- a/butterfree/load/sink.py +++ b/butterfree/load/sink.py @@ -1,4 +1,5 @@ """Holds the Sink class.""" + from typing import List, Optional from pyspark.sql.dataframe import DataFrame diff --git a/butterfree/load/writers/historical_feature_store_writer.py b/butterfree/load/writers/historical_feature_store_writer.py index 1a64afdf3..c01fee1d8 100644 --- a/butterfree/load/writers/historical_feature_store_writer.py +++ b/butterfree/load/writers/historical_feature_store_writer.py @@ -1,7 +1,7 @@ """Holds the Historical Feature Store writer class.""" import os -from typing import Any +from typing import Any, Optional from pyspark.sql.dataframe import DataFrame from pyspark.sql.functions import dayofmonth, month, year @@ -106,13 +106,13 @@ class HistoricalFeatureStoreWriter(Writer): def __init__( self, - db_config: AbstractWriteConfig = None, - database: str = None, - num_partitions: int = None, + db_config: Optional[AbstractWriteConfig] = None, + database: Optional[str] = None, + num_partitions: Optional[int] = None, validation_threshold: float = DEFAULT_VALIDATION_THRESHOLD, debug_mode: bool = False, interval_mode: bool = False, - check_schema_hook: Hook = None, + check_schema_hook: Optional[Hook] = None, row_count_validation: bool = True, ): super(HistoricalFeatureStoreWriter, self).__init__( @@ -152,7 +152,8 @@ def write( dataframe = self._apply_transformations(dataframe) if self.interval_mode: - partition_overwrite_mode = spark_client.conn.conf.get( + + partition_overwrite_mode = spark_client.conn.conf.get( # type: ignore "spark.sql.sources.partitionOverwriteMode" ).lower() @@ -249,7 +250,11 @@ def _create_partitions(self, dataframe: DataFrame) -> DataFrame: return repartition_df(dataframe, self.PARTITION_BY, self.num_partitions) def check_schema( - self, client: Any, dataframe: DataFrame, table_name: str, database: str = None + self, + client: Any, + dataframe: DataFrame, + table_name: str, + database: Optional[str] = None, ) -> DataFrame: """Instantiate the schema check hook to check schema between dataframe and database. diff --git a/butterfree/load/writers/online_feature_store_writer.py b/butterfree/load/writers/online_feature_store_writer.py index d0bcde948..bce5a3751 100644 --- a/butterfree/load/writers/online_feature_store_writer.py +++ b/butterfree/load/writers/online_feature_store_writer.py @@ -1,7 +1,7 @@ """Holds the Online Feature Store writer class.""" import os -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional, Union from pyspark.sql import DataFrame, Window from pyspark.sql.functions import col, row_number @@ -80,12 +80,12 @@ class OnlineFeatureStoreWriter(Writer): def __init__( self, - db_config: AbstractWriteConfig = None, - database: str = None, - debug_mode: bool = False, - write_to_entity: bool = False, - interval_mode: bool = False, - check_schema_hook: Hook = None, + db_config: Optional[AbstractWriteConfig] = None, + database: Optional[str] = None, + debug_mode: Optional[bool] = False, + write_to_entity: Optional[bool] = False, + interval_mode: Optional[bool] = False, + check_schema_hook: Optional[Hook] = None, ): super(OnlineFeatureStoreWriter, self).__init__( db_config or CassandraConfig(), debug_mode, interval_mode, write_to_entity @@ -256,7 +256,11 @@ def get_db_schema(self, feature_set: FeatureSet) -> List[Dict[Any, Any]]: return db_schema def check_schema( - self, client: Any, dataframe: DataFrame, table_name: str, database: str = None + self, + client: Any, + dataframe: DataFrame, + table_name: str, + database: Optional[str] = None, ) -> DataFrame: """Instantiate the schema check hook to check schema between dataframe and database. diff --git a/butterfree/load/writers/writer.py b/butterfree/load/writers/writer.py index 1dae795c6..780b9ec2d 100644 --- a/butterfree/load/writers/writer.py +++ b/butterfree/load/writers/writer.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from functools import reduce -from typing import Any, Callable, Dict, List +from typing import Any, Callable, Dict, List, Optional from pyspark.sql.dataframe import DataFrame @@ -23,10 +23,10 @@ class Writer(ABC, HookableComponent): def __init__( self, db_config: AbstractWriteConfig, - debug_mode: bool = False, - interval_mode: bool = False, - write_to_entity: bool = False, - row_count_validation: bool = True, + debug_mode: Optional[bool] = False, + interval_mode: Optional[bool] = False, + write_to_entity: Optional[bool] = False, + row_count_validation: Optional[bool] = True, ) -> None: super().__init__() self.db_config = db_config @@ -90,7 +90,11 @@ def write( @abstractmethod def check_schema( - self, client: Any, dataframe: DataFrame, table_name: str, database: str = None + self, + client: Any, + dataframe: DataFrame, + table_name: str, + database: Optional[str] = None, ) -> DataFrame: """Instantiate the schema check hook to check schema between dataframe and database. diff --git a/butterfree/migrations/database_migration/database_migration.py b/butterfree/migrations/database_migration/database_migration.py index 468c028ec..351a47243 100644 --- a/butterfree/migrations/database_migration/database_migration.py +++ b/butterfree/migrations/database_migration/database_migration.py @@ -1,8 +1,9 @@ """Migration entity.""" + from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum, auto -from typing import Any, Dict, List, Set +from typing import Any, Dict, List, Optional, Set from butterfree.clients import AbstractClient from butterfree.configs.logger import __logger @@ -106,7 +107,10 @@ def _get_alter_column_type_query(self, column: Diff, table_name: str) -> str: pass def _get_queries( - self, schema_diff: Set[Diff], table_name: str, write_on_entity: bool = None + self, + schema_diff: Set[Diff], + table_name: str, + write_on_entity: Optional[bool] = None, ) -> Any: """Create the desired queries for migration. @@ -162,8 +166,8 @@ def create_query( self, fs_schema: List[Dict[str, Any]], table_name: str, - db_schema: List[Dict[str, Any]] = None, - write_on_entity: bool = None, + db_schema: Optional[List[Dict[str, Any]]] = None, + write_on_entity: Optional[bool] = None, ) -> Any: """Create a query regarding a data source. @@ -246,7 +250,7 @@ def _get_diff( return schema_diff def _get_schema( - self, table_name: str, database: str = None + self, table_name: str, database: Optional[str] = None ) -> List[Dict[str, Any]]: """Get a table schema in the respective database. diff --git a/butterfree/migrations/database_migration/metastore_migration.py b/butterfree/migrations/database_migration/metastore_migration.py index 8c6c211ae..07e2bd89f 100644 --- a/butterfree/migrations/database_migration/metastore_migration.py +++ b/butterfree/migrations/database_migration/metastore_migration.py @@ -1,6 +1,6 @@ """Metastore Migration entity.""" -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from butterfree.clients import SparkClient from butterfree.configs import environment @@ -32,7 +32,7 @@ class MetastoreMigration(DatabaseMigration): def __init__( self, - database: str = None, + database: Optional[str] = None, ) -> None: self._db_config = MetastoreConfig() self.database = database or environment.get_variable( diff --git a/butterfree/pipelines/__init__.py b/butterfree/pipelines/__init__.py index a868e48f2..8bbc5c39e 100644 --- a/butterfree/pipelines/__init__.py +++ b/butterfree/pipelines/__init__.py @@ -1,4 +1,5 @@ """ETL Pipelines.""" + from butterfree.pipelines.feature_set_pipeline import FeatureSetPipeline __all__ = ["FeatureSetPipeline"] diff --git a/butterfree/pipelines/feature_set_pipeline.py b/butterfree/pipelines/feature_set_pipeline.py index 8aec54ec2..8ba1a636c 100644 --- a/butterfree/pipelines/feature_set_pipeline.py +++ b/butterfree/pipelines/feature_set_pipeline.py @@ -1,5 +1,6 @@ """FeatureSetPipeline entity.""" -from typing import List + +from typing import List, Optional from butterfree.clients import SparkClient from butterfree.dataframe_service import repartition_sort_df @@ -135,7 +136,7 @@ def __init__( source: Source, feature_set: FeatureSet, sink: Sink, - spark_client: SparkClient = None, + spark_client: Optional[SparkClient] = None, ): self.source = source self.feature_set = feature_set @@ -190,11 +191,11 @@ def spark_client(self, spark_client: SparkClient) -> None: def run( self, - end_date: str = None, - partition_by: List[str] = None, - order_by: List[str] = None, - num_processors: int = None, - start_date: str = None, + end_date: Optional[str] = None, + partition_by: Optional[List[str]] = None, + order_by: Optional[List[str]] = None, + num_processors: Optional[int] = None, + start_date: Optional[str] = None, ) -> None: """Runs the defined feature set pipeline. @@ -243,10 +244,10 @@ def run( def run_for_date( self, - execution_date: str = None, - partition_by: List[str] = None, - order_by: List[str] = None, - num_processors: int = None, + execution_date: Optional[str] = None, + partition_by: Optional[List[str]] = None, + order_by: Optional[List[str]] = None, + num_processors: Optional[int] = None, ) -> None: """Runs the defined feature set pipeline for a specific date. diff --git a/butterfree/reports/__init__.py b/butterfree/reports/__init__.py index 4b57dafc2..d272943d9 100644 --- a/butterfree/reports/__init__.py +++ b/butterfree/reports/__init__.py @@ -1,4 +1,5 @@ """Reports module.""" + from butterfree.reports.metadata import Metadata __all__ = ["Metadata"] diff --git a/butterfree/testing/dataframe/__init__.py b/butterfree/testing/dataframe/__init__.py index 15481a54a..5b465bc64 100644 --- a/butterfree/testing/dataframe/__init__.py +++ b/butterfree/testing/dataframe/__init__.py @@ -1,6 +1,7 @@ """Methods to assert properties regarding Apache Spark Dataframes.""" + from json import dumps -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from pyspark import SparkContext from pyspark.sql import Column, DataFrame, SparkSession @@ -72,7 +73,7 @@ def create_df_from_collection( data: List[Dict[Any, Any]], spark_context: SparkContext, spark_session: SparkSession, - schema: StructType = None, + schema: Optional[StructType] = None, ) -> DataFrame: """Creates a dataframe from a list of dicts.""" return spark_session.read.json( diff --git a/butterfree/transform/aggregated_feature_set.py b/butterfree/transform/aggregated_feature_set.py index c86a95c3d..6706bf8cf 100644 --- a/butterfree/transform/aggregated_feature_set.py +++ b/butterfree/transform/aggregated_feature_set.py @@ -1,4 +1,5 @@ """AggregatedFeatureSet entity.""" + import itertools from datetime import datetime, timedelta from functools import reduce @@ -261,8 +262,8 @@ def _has_aggregated_transform_only(features: List[Feature]) -> bool: @staticmethod def _build_feature_column_name( feature_column: str, - pivot_value: Union[float, str] = None, - window: Window = None, + pivot_value: Optional[Union[float, str]] = None, + window: Optional[Window] = None, ) -> str: base_name = feature_column if pivot_value is not None: @@ -311,7 +312,7 @@ def with_distinct(self, subset: List, keep: str = "last") -> "AggregatedFeatureS return self def with_windows( - self, definitions: List[str], slide: str = None + self, definitions: List[str], slide: Optional[str] = None ) -> "AggregatedFeatureSet": """Create a list with windows defined.""" self._windows = [ @@ -367,7 +368,7 @@ def _dataframe_join( right: DataFrame, on: List[str], how: str, - num_processors: int = None, + num_processors: Optional[int] = None, ) -> DataFrame: # make both tables co-partitioned to improve join performance left = repartition_df(left, partition_by=on, num_processors=num_processors) @@ -379,7 +380,7 @@ def _aggregate( dataframe: DataFrame, features: List[Feature], window: Optional[Window] = None, - num_processors: int = None, + num_processors: Optional[int] = None, ) -> DataFrame: aggregations = [ c.function for f in features for c in f.transformation.aggregations @@ -512,7 +513,7 @@ def _get_biggest_window_in_days(definitions: List[str]) -> float: ) return max(windows_list) / (60 * 60 * 24) - def define_start_date(self, start_date: str = None) -> Optional[str]: + def define_start_date(self, start_date: Optional[str] = None) -> Optional[str]: """Get aggregated feature set start date. Args: @@ -539,9 +540,9 @@ def construct( self, dataframe: DataFrame, client: SparkClient, - end_date: str = None, - num_processors: int = None, - start_date: str = None, + end_date: Optional[str] = None, + num_processors: Optional[int] = None, + start_date: Optional[str] = None, ) -> DataFrame: """Use all the features to build the feature set dataframe. diff --git a/butterfree/transform/feature_set.py b/butterfree/transform/feature_set.py index 469a353a8..369eaf290 100644 --- a/butterfree/transform/feature_set.py +++ b/butterfree/transform/feature_set.py @@ -1,4 +1,5 @@ """FeatureSet entity.""" + import itertools from functools import reduce from typing import Any, Dict, List, Optional @@ -389,7 +390,7 @@ def _filter_duplicated_rows(self, df: DataFrame) -> DataFrame: return df.select([column for column in self.columns]) - def define_start_date(self, start_date: str = None) -> Optional[str]: + def define_start_date(self, start_date: Optional[str] = None) -> Optional[str]: """Get feature set start date. Args: @@ -404,9 +405,9 @@ def construct( self, dataframe: DataFrame, client: SparkClient, - end_date: str = None, - num_processors: int = None, - start_date: str = None, + end_date: Optional[str] = None, + num_processors: Optional[int] = None, + start_date: Optional[str] = None, ) -> DataFrame: """Use all the features to build the feature set dataframe. diff --git a/butterfree/transform/features/feature.py b/butterfree/transform/features/feature.py index 612fc4a2f..cfd8a2f61 100644 --- a/butterfree/transform/features/feature.py +++ b/butterfree/transform/features/feature.py @@ -1,6 +1,7 @@ """Feature entity.""" + import warnings -from typing import Any, List +from typing import Any, List, Optional from pyspark.sql import DataFrame from pyspark.sql.functions import col @@ -41,9 +42,9 @@ def __init__( self, name: str, description: str, - dtype: DataType = None, - from_column: str = None, - transformation: TransformComponent = None, + dtype: Optional[DataType] = None, + from_column: Optional[str] = None, + transformation: Optional[TransformComponent] = None, ) -> None: self.name = name self.description = description diff --git a/butterfree/transform/features/key_feature.py b/butterfree/transform/features/key_feature.py index a7ad350cb..74626d6fa 100644 --- a/butterfree/transform/features/key_feature.py +++ b/butterfree/transform/features/key_feature.py @@ -1,5 +1,7 @@ """KeyFeature entity.""" +from typing import Optional + from butterfree.constants.data_type import DataType from butterfree.transform.features.feature import Feature from butterfree.transform.transformations import TransformComponent @@ -31,8 +33,8 @@ def __init__( name: str, description: str, dtype: DataType, - from_column: str = None, - transformation: TransformComponent = None, + from_column: Optional[str] = None, + transformation: Optional[TransformComponent] = None, ) -> None: super(KeyFeature, self).__init__( name=name, diff --git a/butterfree/transform/features/timestamp_feature.py b/butterfree/transform/features/timestamp_feature.py index b131eaeee..aa30dfc4a 100644 --- a/butterfree/transform/features/timestamp_feature.py +++ b/butterfree/transform/features/timestamp_feature.py @@ -1,4 +1,7 @@ """TimestampFeature entity.""" + +from typing import Optional + from pyspark.sql import DataFrame from pyspark.sql.functions import to_timestamp @@ -38,10 +41,10 @@ class TimestampFeature(Feature): def __init__( self, - from_column: str = None, - transformation: TransformComponent = None, + from_column: Optional[str] = None, + transformation: Optional[TransformComponent] = None, from_ms: bool = False, - mask: str = None, + mask: Optional[str] = None, ) -> None: description = "Time tag for the state of all features." super(TimestampFeature, self).__init__( @@ -70,7 +73,7 @@ def transform(self, dataframe: DataFrame) -> DataFrame: ts_column = ts_column / 1000 dataframe = dataframe.withColumn( - column_name, to_timestamp(ts_column, self.mask) + column_name, to_timestamp(ts_column, self.mask) # type: ignore ) return super().transform(dataframe) diff --git a/butterfree/transform/transformations/aggregated_transform.py b/butterfree/transform/transformations/aggregated_transform.py index a9581ef00..406ca72a9 100644 --- a/butterfree/transform/transformations/aggregated_transform.py +++ b/butterfree/transform/transformations/aggregated_transform.py @@ -1,6 +1,7 @@ """Aggregated Transform entity.""" + from collections import namedtuple -from typing import List, Tuple +from typing import List, Optional, Tuple from pyspark.sql import DataFrame from pyspark.sql.functions import col, expr, when @@ -56,7 +57,9 @@ class AggregatedTransform(TransformComponent): NotImplementedError: ... """ - def __init__(self, functions: List[Function], filter_expression: str = None): + def __init__( + self, functions: List[Function], filter_expression: Optional[str] = None + ): super(AggregatedTransform, self).__init__() self.functions = functions self.filter_expression = filter_expression diff --git a/butterfree/transform/transformations/custom_transform.py b/butterfree/transform/transformations/custom_transform.py index 7860fdc20..a12310127 100644 --- a/butterfree/transform/transformations/custom_transform.py +++ b/butterfree/transform/transformations/custom_transform.py @@ -69,7 +69,7 @@ def transformer(self) -> Callable[..., Any]: @transformer.setter def transformer(self, method: Callable[..., Any]) -> None: - if not method: + if method is None: raise ValueError("A method must be provided to CustomTransform") self._transformer = method diff --git a/butterfree/transform/transformations/spark_function_transform.py b/butterfree/transform/transformations/spark_function_transform.py index 8fb24dd79..34384518d 100644 --- a/butterfree/transform/transformations/spark_function_transform.py +++ b/butterfree/transform/transformations/spark_function_transform.py @@ -1,5 +1,6 @@ """Spark Function Transform entity.""" -from typing import Any, List + +from typing import Any, List, Optional from pyspark.sql import DataFrame @@ -87,8 +88,8 @@ def with_window( self, partition_by: str, window_definition: List[str], - order_by: str = None, - mode: str = None, + order_by: Optional[str] = None, + mode: Optional[str] = None, ) -> "SparkFunctionTransform": """Create a list with windows defined.""" if mode is not None: @@ -103,7 +104,9 @@ def with_window( ] return self - def _get_output_name(self, function: object, window: Window = None) -> str: + def _get_output_name( + self, function: object, window: Optional[Window] = None + ) -> str: base_name = ( "__".join([self._parent.name, function.__name__]) if hasattr(function, "__name__") diff --git a/butterfree/transform/transformations/transform_component.py b/butterfree/transform/transformations/transform_component.py index 7ecec332a..94bc19f8c 100644 --- a/butterfree/transform/transformations/transform_component.py +++ b/butterfree/transform/transformations/transform_component.py @@ -1,4 +1,5 @@ """Transform Abstract Class.""" + from abc import ABC, abstractmethod from typing import Any, List diff --git a/butterfree/transform/transformations/user_defined_functions/mode.py b/butterfree/transform/transformations/user_defined_functions/mode.py index 65790b939..5b6c7f17d 100644 --- a/butterfree/transform/transformations/user_defined_functions/mode.py +++ b/butterfree/transform/transformations/user_defined_functions/mode.py @@ -1,4 +1,5 @@ """Method to compute mode aggregation.""" + import pandas as pd from pyspark.sql.functions import pandas_udf from pyspark.sql.types import StringType diff --git a/butterfree/transform/transformations/user_defined_functions/most_frequent_set.py b/butterfree/transform/transformations/user_defined_functions/most_frequent_set.py index 20ccd3ba3..6dd6779f1 100644 --- a/butterfree/transform/transformations/user_defined_functions/most_frequent_set.py +++ b/butterfree/transform/transformations/user_defined_functions/most_frequent_set.py @@ -1,4 +1,5 @@ """Method to compute most frequent set aggregation.""" + from typing import Any import pandas as pd diff --git a/butterfree/transform/utils/__init__.py b/butterfree/transform/utils/__init__.py index abf7ed3fb..66004a374 100644 --- a/butterfree/transform/utils/__init__.py +++ b/butterfree/transform/utils/__init__.py @@ -1,4 +1,5 @@ """This module holds utils to be used by transformations.""" + from butterfree.transform.utils.function import Function from butterfree.transform.utils.window_spec import Window diff --git a/butterfree/transform/utils/date_range.py b/butterfree/transform/utils/date_range.py index 78e0e6e3c..4bdd29772 100644 --- a/butterfree/transform/utils/date_range.py +++ b/butterfree/transform/utils/date_range.py @@ -1,7 +1,7 @@ """Utils for date range generation.""" from datetime import datetime -from typing import Union +from typing import Optional, Union from pyspark.sql import DataFrame, functions @@ -14,7 +14,7 @@ def get_date_range( client: SparkClient, start_date: Union[str, datetime], end_date: Union[str, datetime], - step: int = None, + step: Optional[int] = None, ) -> DataFrame: """Create a date range dataframe. @@ -44,7 +44,7 @@ def get_date_range( for c in ("start_date", "end_date") ] ) - start_date, end_date = date_df.first() + start_date, end_date = date_df.first() # type: ignore return client.conn.range( start_date, end_date + day_in_seconds, step # type: ignore ).select(functions.col("id").cast(DataType.TIMESTAMP.spark).alias(TIMESTAMP_COLUMN)) diff --git a/butterfree/transform/utils/function.py b/butterfree/transform/utils/function.py index fcf6679fb..951a232ca 100644 --- a/butterfree/transform/utils/function.py +++ b/butterfree/transform/utils/function.py @@ -32,9 +32,9 @@ def func(self) -> Callable: @func.setter def func(self, value: Callable) -> None: """Definitions to be used in the transformation.""" - if not value: + if value is None: raise ValueError("Function must not be empty.") - if not callable(value): + if callable(value) is False: raise TypeError("Function must be callable.") self._func = value diff --git a/butterfree/transform/utils/window_spec.py b/butterfree/transform/utils/window_spec.py index 53ecd2fd3..b95dd73a6 100644 --- a/butterfree/transform/utils/window_spec.py +++ b/butterfree/transform/utils/window_spec.py @@ -1,4 +1,5 @@ """Holds function for defining window in DataFrames.""" + from typing import Any, List, Optional, Union from pyspark import sql @@ -69,8 +70,8 @@ def __init__( window_definition: str, partition_by: Optional[Union[Column, str, List[str]]] = None, order_by: Optional[Union[Column, str]] = None, - mode: str = None, - slide: str = None, + mode: Optional[str] = None, + slide: Optional[str] = None, ): self.partition_by = partition_by self.order_by = order_by or TIMESTAMP_COLUMN diff --git a/butterfree/validations/basic_validaton.py b/butterfree/validations/basic_validaton.py index d3a5558c7..01bc9ec21 100644 --- a/butterfree/validations/basic_validaton.py +++ b/butterfree/validations/basic_validaton.py @@ -1,5 +1,7 @@ """Validation implementing basic checks over the dataframe.""" +from typing import Optional + from pyspark.sql.dataframe import DataFrame from butterfree.constants.columns import TIMESTAMP_COLUMN @@ -14,7 +16,7 @@ class BasicValidation(Validation): """ - def __init__(self, dataframe: DataFrame = None): + def __init__(self, dataframe: Optional[DataFrame] = None): super().__init__(dataframe) def check(self) -> None: diff --git a/butterfree/validations/validation.py b/butterfree/validations/validation.py index 9915906cd..551859d82 100644 --- a/butterfree/validations/validation.py +++ b/butterfree/validations/validation.py @@ -1,5 +1,7 @@ """Abstract Validation class.""" + from abc import ABC, abstractmethod +from typing import Optional from pyspark.sql.dataframe import DataFrame @@ -12,7 +14,7 @@ class Validation(ABC): """ - def __init__(self, dataframe: DataFrame = None): + def __init__(self, dataframe: Optional[DataFrame] = None): self.dataframe = dataframe def input(self, dataframe: DataFrame) -> "Validation": diff --git a/docs/source/conf.py b/docs/source/conf.py index 77fdc1256..0a5377392 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,4 +1,5 @@ """Sphinx Configuration.""" + # -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. diff --git a/mypy.ini b/mypy.ini index fc2931493..eb867a477 100644 --- a/mypy.ini +++ b/mypy.ini @@ -9,42 +9,3 @@ show_error_codes = True show_error_context = True disable_error_code = attr-defined, list-item, operator pretty = True - -[mypy-butterfree.pipelines.*] -ignore_errors = True - -[mypy-butterfree.load.*] -ignore_errors = True - -[mypy-butterfree.transform.*] -ignore_errors = True - -[mypy-butterfree.extract.*] -ignore_errors = True - -[mypy-butterfree.config.*] -ignore_errors = True - -[mypy-butterfree.clients.*] -ignore_errors = True - -[mypy-butterfree.configs.*] -ignore_errors = True - -[mypy-butterfree.dataframe_service.*] -ignore_errors = True - -[mypy-butterfree.validations.*] -ignore_errors = True - -[mypy-butterfree.migrations.*] -ignore_errors = True - -[mypy-butterfree.testing.*] -ignore_errors = True - -[mypy-butterfree.hooks.*] -ignore_errors = True - -[mypy-butterfree._cli.*] -ignore_errors = True diff --git a/requirements.lint.txt b/requirements.lint.txt index 66641a952..1ad6499d1 100644 --- a/requirements.lint.txt +++ b/requirements.lint.txt @@ -1,4 +1,4 @@ -black==21.12b0 +black==24.3.0 flake8==4.0.1 flake8-isort==4.1.1 flake8-docstrings==1.5.0 diff --git a/requirements.txt b/requirements.txt index f3af42540..0af8a62ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ mdutils>=1.2.2,<2.0 pandas>=0.24,<2.0 parameters-validation>=1.1.5,<2.0 pyspark==3.5.1 -typer==0.3.2 +typer==0.4.2 typing-extensions>3.7.4,<5 boto3==1.17.* diff --git a/tests/integration/butterfree/pipelines/conftest.py b/tests/integration/butterfree/pipelines/conftest.py index 5f304972d..1466a8d98 100644 --- a/tests/integration/butterfree/pipelines/conftest.py +++ b/tests/integration/butterfree/pipelines/conftest.py @@ -139,7 +139,10 @@ def feature_set_pipeline( feature_set_pipeline = FeatureSetPipeline( source=Source( readers=[ - TableReader(id="b_source", table="b_table",).with_incremental_strategy( + TableReader( + id="b_source", + table="b_table", + ).with_incremental_strategy( incremental_strategy=IncrementalStrategy(column="timestamp") ), ], diff --git a/tests/unit/butterfree/transform/transformations/test_aggregated_transform.py b/tests/unit/butterfree/transform/transformations/test_aggregated_transform.py index f0ae2f854..96ff682a8 100644 --- a/tests/unit/butterfree/transform/transformations/test_aggregated_transform.py +++ b/tests/unit/butterfree/transform/transformations/test_aggregated_transform.py @@ -67,7 +67,7 @@ def test_blank_aggregation(self, feature_set_dataframe): name="feature1", description="unit test", transformation=AggregatedTransform( - functions=[Function(func="", data_type="")] + functions=[Function(func=None, data_type="")] ), )