From 56a208b5c2cdb312b84e604633ff3c1b07b6313e Mon Sep 17 00:00:00 2001 From: Don Heppner Date: Thu, 26 Jan 2023 10:14:15 -0500 Subject: [PATCH] [DOCS] Adding docstrings (#6854) Co-authored-by: Anthony Burdi --- .../data_context/abstract_data_context.py | 172 +++++++++++++----- ...ect_column_distinct_values_to_be_in_set.py | 18 +- ...ect_column_distinct_values_to_equal_set.py | 19 +- ...ct_column_kl_divergence_to_be_less_than.py | 21 ++- .../core/expect_column_mean_to_be_between.py | 18 +- great_expectations/validator/validator.py | 2 +- 6 files changed, 190 insertions(+), 60 deletions(-) diff --git a/great_expectations/data_context/data_context/abstract_data_context.py b/great_expectations/data_context/data_context/abstract_data_context.py index ac753540cd42..5de2ab78c62f 100644 --- a/great_expectations/data_context/data_context/abstract_data_context.py +++ b/great_expectations/data_context/data_context/abstract_data_context.py @@ -315,6 +315,7 @@ def _save_project_config(self) -> None: """ self.variables.save_config() + @public_api def update_project_config( self, project_config: DataContextConfig | Mapping ) -> None: @@ -325,6 +326,7 @@ def update_project_config( """ self.config.update(project_config) + @public_api @usage_statistics_enabled_method( event_name=UsageStatsEvents.DATA_CONTEXT_SAVE_EXPECTATION_SUITE, args_payload_fn=save_expectation_suite_usage_statistics, @@ -337,8 +339,21 @@ def save_expectation_suite( include_rendered_content: Optional[bool] = None, **kwargs: Optional[dict], ) -> None: - """ - Each DataContext will define how ExpectationSuite will be saved. + """Save the provided ExpectationSuite into the DataContext using the configured ExpectationStore. + + Args: + expectation_suite: The ExpectationSuite to save. + expectation_suite_name: The name of this ExpectationSuite. If no name is provided, the name will be read + from the suite. + overwrite_existing: Whether to overwrite the suite if it already exists. + include_rendered_content: Whether to save the prescriptive rendered content for each expectation. + kwargs: Additional parameters, unused + + Returns: + None + + Raises: + DataContextError: If a suite with the same name exists and `overwrite_existing` is set to `False`. """ if expectation_suite_name is None: key = ExpectationSuiteIdentifier( @@ -1444,6 +1459,7 @@ def delete_checkpoint( name=name, ge_cloud_id=ge_cloud_id ) + @public_api @usage_statistics_enabled_method( event_name=UsageStatsEvents.DATA_CONTEXT_RUN_CHECKPOINT, ) @@ -1467,8 +1483,7 @@ def run_checkpoint( expectation_suite_ge_cloud_id: Optional[str] = None, **kwargs, ) -> CheckpointResult: - """ - Validate against a pre-defined Checkpoint. (Experimental) + """Validate using an existing Checkpoint. Args: checkpoint_name: The name of a Checkpoint defined via the CLI or by manually creating a yml file @@ -1558,6 +1573,7 @@ def list_expectation_suites( ) return keys # type: ignore[return-value] + @public_api def get_validator( self, datasource_name: Optional[str] = None, @@ -1589,10 +1605,60 @@ def get_validator( include_rendered_content: Optional[bool] = None, **kwargs: Optional[dict], ) -> Validator: - """ - This method applies only to the new (V3) Datasource schema. - """ + """Retrieve a Validator with a batch list and an `ExpectationSuite`. + `get_validator` first calls `get_batch_list` to retrieve a batch list, then creates or retrieves + an `ExpectationSuite` used to validate the Batches in the list. + + Args: + datasource_name: The name of the Datasource that defines the Data Asset to retrieve the batch for + data_connector_name: The Data Connector within the datasource for the Data Asset + data_asset_name: The name of the Data Asset within the Data Connector + batch: The Batch to use with the Validator + batch_list: The List of Batches to use with the Validator + batch_request: Encapsulates all the parameters used here to retrieve a BatchList. Use either + `batch_request` or the other params (but not both) + batch_request_list: A List of `BatchRequest` to use with the Validator + batch_data: Provides runtime data for the batch; is added as the key `batch_data` to + the `runtime_parameters` dictionary of a BatchRequest + query: Provides runtime data for the batch; is added as the key `query` to + the `runtime_parameters` dictionary of a BatchRequest + path: Provides runtime data for the batch; is added as the key `path` to + the `runtime_parameters` dictionary of a BatchRequest + runtime_parameters: Specifies runtime parameters for the BatchRequest; can includes keys `batch_data`, + `query`, and `path` + data_connector_query: Used to specify connector query parameters; specifically `batch_filter_parameters`, + `limit`, `index`, and `custom_filter_function` + batch_identifiers: Any identifiers of batches for the BatchRequest + batch_filter_parameters: Filter parameters used in the data connector query + limit: Part of the data_connector_query, limits the number of batches in the batch list + index: Part of the data_connector_query, used to specify the index of which batch to return. Negative + numbers retrieve from the end of the list (ex: `-1` retrieves the last or latest batch) + custom_filter_function: A `Callable` function that accepts `batch_identifiers` and returns a `bool` + sampling_method: The method used to sample Batch data (see: Splitting and Sampling) + sampling_kwargs: Arguments for the sampling method + splitter_method: The method used to split the Data Asset into Batches + splitter_kwargs: Arguments for the splitting method + batch_spec_passthrough: Arguments specific to the `ExecutionEngine` that aid in Batch retrieval + expectation_suite_ge_cloud_id: The identifier of the ExpectationSuite to retrieve from the DataContext + (can be used in place of `expectation_suite_name`) + expectation_suite_name: The name of the ExpectationSuite to retrieve from the DataContext + expectation_suite: The ExpectationSuite to use with the validator + create_expectation_suite_with_name: Creates a Validator with a new ExpectationSuite with the provided name + include_rendered_content: If `True` the ExpectationSuite will include rendered content when saved + **kwargs: Used to specify either `batch_identifiers` or `batch_filter_parameters` + + Returns: + Validator: A Validator with the specified Batch list and ExpectationSuite + + Raises: + DatasourceError: If the specified `datasource_name` does not exist in the DataContext + TypeError: If the specified types of the `batch_request` are not supported, or if the + `datasource_name` is not a `str` + ValueError: If more than one exclusive parameter is specified (ex: specifing more than one + of `batch_data`, `query` or `path`), or if the `ExpectationSuite` cannot be created or + retrieved using either the provided name or identifier + """ include_rendered_content = ( self._determine_if_expectation_validation_result_include_rendered_content( include_rendered_content=include_rendered_content @@ -1748,6 +1814,7 @@ def get_validator_using_batch_list( return validator + @public_api @usage_statistics_enabled_method( event_name=UsageStatsEvents.DATA_CONTEXT_GET_BATCH_LIST, args_payload_fn=get_batch_list_usage_statistics, @@ -1776,48 +1843,53 @@ def get_batch_list( **kwargs: Optional[dict], ) -> List[Batch]: """Get the list of zero or more batches, based on a variety of flexible input types. - This method applies only to the new (V3) Datasource schema. - - Args: - batch_request - - datasource_name - data_connector_name - data_asset_name - batch_request - batch_data - query - path - runtime_parameters - data_connector_query - batch_identifiers - batch_filter_parameters - - limit - index - custom_filter_function - - sampling_method - sampling_kwargs - - splitter_method - splitter_kwargs + `get_batch_list` is the main user-facing API for getting batches. + In contrast to virtually all other methods in the class, it does not require typed or nested inputs. + Instead, this method is intended to help the user pick the right parameters - batch_spec_passthrough + This method attempts to return any number of batches, including an empty list. - **kwargs + Args: + datasource_name: The name of the Datasource that defines the Data Asset to retrieve the batch for + data_connector_name: The Data Connector within the datasource for the Data Asset + data_asset_name: The name of the Data Asset within the Data Connector + batch_request: Encapsulates all the parameters used here to retrieve a BatchList. Use either + `batch_request` or the other params (but not both) + batch_data: Provides runtime data for the batch; is added as the key `batch_data` to + the `runtime_parameters` dictionary of a BatchRequest + query: Provides runtime data for the batch; is added as the key `query` to + the `runtime_parameters` dictionary of a BatchRequest + path: Provides runtime data for the batch; is added as the key `path` to + the `runtime_parameters` dictionary of a BatchRequest + runtime_parameters: Specifies runtime parameters for the BatchRequest; can includes keys `batch_data`, + `query`, and `path` + data_connector_query: Used to specify connector query parameters; specifically `batch_filter_parameters`, + `limit`, `index`, and `custom_filter_function` + batch_identifiers: Any identifiers of batches for the BatchRequest + batch_filter_parameters: Filter parameters used in the data connector query + limit: Part of the data_connector_query, limits the number of batches in the batch list + index: Part of the data_connector_query, used to specify the index of which batch to return. Negative + numbers retrieve from the end of the list (ex: `-1` retrieves the last or latest batch) + custom_filter_function: A `Callable` function that accepts `batch_identifiers` and returns a `bool` + sampling_method: The method used to sample Batch data (see: Splitting and Sampling) + sampling_kwargs: Arguments for the sampling method + splitter_method: The method used to split the Data Asset into Batches + splitter_kwargs: Arguments for the splitting method + batch_spec_passthrough: Arguments specific to the `ExecutionEngine` that aid in Batch retrieval + **kwargs: Used to specify either `batch_identifiers` or `batch_filter_parameters` Returns: - (Batch) The requested batch + (Batch) The `list` of requested Batch instances - `get_batch` is the main user-facing API for getting batches. - In contrast to virtually all other methods in the class, it does not require typed or nested inputs. - Instead, this method is intended to help the user pick the right parameters + Raises: + DatasourceError: If the specified `datasource_name` does not exist in the DataContext + TypeError: If the specified types of the `batch_request` are not supported, or if the + `datasource_name` is not a `str` + ValueError: If more than one exclusive parameter is specified (ex: specifing more than one + of `batch_data`, `query` or `path`) - This method attempts to return any number of batches, including an empty list. """ - batch_request = get_batch_request_from_acceptable_arguments( datasource_name=datasource_name, data_connector_name=data_connector_name, @@ -1936,22 +2008,36 @@ def delete_expectation_suite( self.expectations_store.remove_key(key) return True + @public_api + @deprecated_argument(argument_name="ge_cloud_id", version="0.15.45") def get_expectation_suite( self, expectation_suite_name: Optional[str] = None, include_rendered_content: Optional[bool] = None, ge_cloud_id: Optional[str] = None, ) -> ExpectationSuite: - """Get an Expectation Suite by name or GX Cloud ID + """Get an Expectation Suite by name. + Args: expectation_suite_name (str): The name of the Expectation Suite include_rendered_content (bool): Whether or not to re-populate rendered_content for each ExpectationConfiguration. - ge_cloud_id (str): The GX Cloud ID for the Expectation Suite. + ge_cloud_id (str): The GX Cloud ID for the Expectation Suite (unused) Returns: An existing ExpectationSuite + + Raises: + DataContextError: There is no expectation suite with the name provided """ + if ge_cloud_id is not None: + # deprecated-v0.15.45 + warnings.warn( + "ge_cloud_id is deprecated as of v0.15.45 and will be removed in v0.16. Please use" + "expectation_suite_name instead", + DeprecationWarning, + ) + key: Optional[ExpectationSuiteIdentifier] = ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite_name # type: ignore[arg-type] ) diff --git a/great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py b/great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py index 4ca1dbcb4f0e..42934739f5a3 100644 --- a/great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py +++ b/great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py @@ -7,6 +7,7 @@ ExpectationConfiguration, ExpectationValidationResult, ) +from great_expectations.core._docs_decorators import public_api from great_expectations.execution_engine import ExecutionEngine from great_expectations.expectations.expectation import ( ColumnExpectation, @@ -330,10 +331,25 @@ def _descriptive_value_counts_bar_chart_renderer( return new_block + @public_api def validate_configuration( self, configuration: Optional[ExpectationConfiguration] = None ) -> None: - """Validating that user has inputted a value set and that configuration has been initialized""" + """Validates configuration for the Expectation. + + For `expect_column_distinct_values_to_be_in_set` we require that the `configuraton.kwargs` contain + a `value_set` key that is either a `list`, `set`, or `dict`. + + The configuration will also be validated using each of the `validate_configuration` methods in its Expectation + superclass hierarchy. + + Args: + configuration: The ExpectationConfiguration to be validated. + + Raises: + InvalidExpectationConfigurationError: The configuraton does not contain the values required by the + Expectation. + """ super().validate_configuration(configuration) configuration = configuration or self.configuration try: diff --git a/great_expectations/expectations/core/expect_column_distinct_values_to_equal_set.py b/great_expectations/expectations/core/expect_column_distinct_values_to_equal_set.py index 4e55c7063694..aeb24a260ce3 100644 --- a/great_expectations/expectations/core/expect_column_distinct_values_to_equal_set.py +++ b/great_expectations/expectations/core/expect_column_distinct_values_to_equal_set.py @@ -4,6 +4,7 @@ ExpectationConfiguration, ExpectationValidationResult, ) +from great_expectations.core._docs_decorators import public_api from great_expectations.execution_engine import ExecutionEngine from great_expectations.expectations.expectation import ( ColumnExpectation, @@ -98,10 +99,26 @@ class ExpectColumnDistinctValuesToEqualSet(ColumnExpectation): "value_set", ) + @public_api def validate_configuration( self, configuration: Optional[ExpectationConfiguration] = None ) -> None: - """Validating that user has inputted a value set and that configuration has been initialized""" + """Validates configuration for the Expectation. + + For `expect_column_distinct_values_to_equal_set` we require that the `configuraton.kwargs` contain + a `value_set` key that is either a `list`, `set`, or `dict`. + + + The configuration will also be validated using each of the `validate_configuration` methods in its Expectation + superclass hierarchy. + + Args: + configuration: The configuration to be validated. + + Raises: + InvalidExpectationConfigurationError: The configuraton does not contain the values required by the + Expectation. + """ super().validate_configuration(configuration) configuration = configuration or self.configuration try: diff --git a/great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py b/great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py index 2854a85eab8a..b738f8972982 100644 --- a/great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py +++ b/great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py @@ -10,6 +10,7 @@ ExpectationConfiguration, ExpectationValidationResult, ) +from great_expectations.core._docs_decorators import public_api from great_expectations.execution_engine import ExecutionEngine from great_expectations.execution_engine.util import ( is_valid_categorical_partition_object, @@ -187,18 +188,24 @@ class ExpectColumnKlDivergenceToBeLessThan(ColumnExpectation): "threshold", ) + @public_api def validate_configuration( self, configuration: Optional[ExpectationConfiguration] = None ) -> None: - """ - Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that - necessary configuration arguments have been provided for the validation of the expectation. + """Validates configuration for the Expectation. + + For `expect_column_kl_divergence_to_be_less_than`, `configuraton.kwargs` may contain `min_value` and + `max_value` whose value is either a number or date. + + The configuration will also be validated using each of the `validate_configuration` methods in its Expectation + superclass hierarchy. Args: - configuration (OPTIONAL[ExpectationConfiguration]): \ - An optional Expectation Configuration entry that will be used to configure the expectation - Returns: - None. Raises InvalidExpectationConfigurationError if the config is not validated successfully + configuration: The configuration to be validated. + + Raises: + InvalidExpectationConfigurationError: The configuraton does not contain the values required by the + Expectation. """ super().validate_configuration(configuration) self.validate_metric_value_between_configuration(configuration=configuration) diff --git a/great_expectations/expectations/core/expect_column_mean_to_be_between.py b/great_expectations/expectations/core/expect_column_mean_to_be_between.py index cf7190621188..b36b00e686ca 100644 --- a/great_expectations/expectations/core/expect_column_mean_to_be_between.py +++ b/great_expectations/expectations/core/expect_column_mean_to_be_between.py @@ -4,6 +4,7 @@ ExpectationConfiguration, ExpectationValidationResult, ) +from great_expectations.core._docs_decorators import public_api from great_expectations.execution_engine import ExecutionEngine from great_expectations.expectations.expectation import ( ColumnExpectation, @@ -249,18 +250,21 @@ class ExpectColumnMeanToBeBetween(ColumnExpectation): "required": ["column"], } + @public_api def validate_configuration( self, configuration: Optional[ExpectationConfiguration] = None ) -> None: - """ - Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that - necessary configuration arguments have been provided for the validation of the expectation. + """Validates configuration for the Expectation. + + For `expect_column_mean_to_be_between`, `configuraton.kwargs` may contain `min_value` and + `max_value` whose value is either a number or date. Args: - configuration (OPTIONAL[ExpectationConfiguration]): \ - An optional Expectation Configuration entry that will be used to configure the expectation - Returns: - None. Raises InvalidExpectationConfigurationError if the config is not validated successfully + configuration: The configuration to be validated. + + Raises: + InvalidExpectationConfigurationError: The configuraton does not contain the values required by the + Expectation. """ super().validate_configuration(configuration) self.validate_metric_value_between_configuration(configuration=configuration) diff --git a/great_expectations/validator/validator.py b/great_expectations/validator/validator.py index 52451a236ca9..65e25e2b0fce 100644 --- a/great_expectations/validator/validator.py +++ b/great_expectations/validator/validator.py @@ -1557,7 +1557,7 @@ def save_expectation_suite( message="Only the str version of this argument is deprecated. run_id should be a RunIdentifier or dict. Support will be removed in 0.16.0.", version="0.13.0", ) - def validate( # noqa: C901 - complexity 31 + def validate( # noqa: C901 - Complexity 31 self, expectation_suite: str | ExpectationSuite | None = None, run_id: str | RunIdentifier | Dict[str, str] | None = None,