Skip to content

Commit

Permalink
[DOCS] Adding docstrings (#6854)
Browse files Browse the repository at this point in the history
Co-authored-by: Anthony Burdi <anthony@greatexpectations.io>
  • Loading branch information
donaldheppner and anthonyburdi authored Jan 26, 2023
1 parent f42d7df commit 56a208b
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 60 deletions.
172 changes: 129 additions & 43 deletions great_expectations/data_context/data_context/abstract_data_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ def _save_project_config(self) -> None:
"""
self.variables.save_config()

@public_api
def update_project_config(
self, project_config: DataContextConfig | Mapping
) -> None:
Expand All @@ -325,6 +326,7 @@ def update_project_config(
"""
self.config.update(project_config)

@public_api
@usage_statistics_enabled_method(
event_name=UsageStatsEvents.DATA_CONTEXT_SAVE_EXPECTATION_SUITE,
args_payload_fn=save_expectation_suite_usage_statistics,
Expand All @@ -337,8 +339,21 @@ def save_expectation_suite(
include_rendered_content: Optional[bool] = None,
**kwargs: Optional[dict],
) -> None:
"""
Each DataContext will define how ExpectationSuite will be saved.
"""Save the provided ExpectationSuite into the DataContext using the configured ExpectationStore.
Args:
expectation_suite: The ExpectationSuite to save.
expectation_suite_name: The name of this ExpectationSuite. If no name is provided, the name will be read
from the suite.
overwrite_existing: Whether to overwrite the suite if it already exists.
include_rendered_content: Whether to save the prescriptive rendered content for each expectation.
kwargs: Additional parameters, unused
Returns:
None
Raises:
DataContextError: If a suite with the same name exists and `overwrite_existing` is set to `False`.
"""
if expectation_suite_name is None:
key = ExpectationSuiteIdentifier(
Expand Down Expand Up @@ -1444,6 +1459,7 @@ def delete_checkpoint(
name=name, ge_cloud_id=ge_cloud_id
)

@public_api
@usage_statistics_enabled_method(
event_name=UsageStatsEvents.DATA_CONTEXT_RUN_CHECKPOINT,
)
Expand All @@ -1467,8 +1483,7 @@ def run_checkpoint(
expectation_suite_ge_cloud_id: Optional[str] = None,
**kwargs,
) -> CheckpointResult:
"""
Validate against a pre-defined Checkpoint. (Experimental)
"""Validate using an existing Checkpoint.
Args:
checkpoint_name: The name of a Checkpoint defined via the CLI or by manually creating a yml file
Expand Down Expand Up @@ -1558,6 +1573,7 @@ def list_expectation_suites(
)
return keys # type: ignore[return-value]

@public_api
def get_validator(
self,
datasource_name: Optional[str] = None,
Expand Down Expand Up @@ -1589,10 +1605,60 @@ def get_validator(
include_rendered_content: Optional[bool] = None,
**kwargs: Optional[dict],
) -> Validator:
"""
This method applies only to the new (V3) Datasource schema.
"""
"""Retrieve a Validator with a batch list and an `ExpectationSuite`.
`get_validator` first calls `get_batch_list` to retrieve a batch list, then creates or retrieves
an `ExpectationSuite` used to validate the Batches in the list.
Args:
datasource_name: The name of the Datasource that defines the Data Asset to retrieve the batch for
data_connector_name: The Data Connector within the datasource for the Data Asset
data_asset_name: The name of the Data Asset within the Data Connector
batch: The Batch to use with the Validator
batch_list: The List of Batches to use with the Validator
batch_request: Encapsulates all the parameters used here to retrieve a BatchList. Use either
`batch_request` or the other params (but not both)
batch_request_list: A List of `BatchRequest` to use with the Validator
batch_data: Provides runtime data for the batch; is added as the key `batch_data` to
the `runtime_parameters` dictionary of a BatchRequest
query: Provides runtime data for the batch; is added as the key `query` to
the `runtime_parameters` dictionary of a BatchRequest
path: Provides runtime data for the batch; is added as the key `path` to
the `runtime_parameters` dictionary of a BatchRequest
runtime_parameters: Specifies runtime parameters for the BatchRequest; can includes keys `batch_data`,
`query`, and `path`
data_connector_query: Used to specify connector query parameters; specifically `batch_filter_parameters`,
`limit`, `index`, and `custom_filter_function`
batch_identifiers: Any identifiers of batches for the BatchRequest
batch_filter_parameters: Filter parameters used in the data connector query
limit: Part of the data_connector_query, limits the number of batches in the batch list
index: Part of the data_connector_query, used to specify the index of which batch to return. Negative
numbers retrieve from the end of the list (ex: `-1` retrieves the last or latest batch)
custom_filter_function: A `Callable` function that accepts `batch_identifiers` and returns a `bool`
sampling_method: The method used to sample Batch data (see: Splitting and Sampling)
sampling_kwargs: Arguments for the sampling method
splitter_method: The method used to split the Data Asset into Batches
splitter_kwargs: Arguments for the splitting method
batch_spec_passthrough: Arguments specific to the `ExecutionEngine` that aid in Batch retrieval
expectation_suite_ge_cloud_id: The identifier of the ExpectationSuite to retrieve from the DataContext
(can be used in place of `expectation_suite_name`)
expectation_suite_name: The name of the ExpectationSuite to retrieve from the DataContext
expectation_suite: The ExpectationSuite to use with the validator
create_expectation_suite_with_name: Creates a Validator with a new ExpectationSuite with the provided name
include_rendered_content: If `True` the ExpectationSuite will include rendered content when saved
**kwargs: Used to specify either `batch_identifiers` or `batch_filter_parameters`
Returns:
Validator: A Validator with the specified Batch list and ExpectationSuite
Raises:
DatasourceError: If the specified `datasource_name` does not exist in the DataContext
TypeError: If the specified types of the `batch_request` are not supported, or if the
`datasource_name` is not a `str`
ValueError: If more than one exclusive parameter is specified (ex: specifing more than one
of `batch_data`, `query` or `path`), or if the `ExpectationSuite` cannot be created or
retrieved using either the provided name or identifier
"""
include_rendered_content = (
self._determine_if_expectation_validation_result_include_rendered_content(
include_rendered_content=include_rendered_content
Expand Down Expand Up @@ -1748,6 +1814,7 @@ def get_validator_using_batch_list(

return validator

@public_api
@usage_statistics_enabled_method(
event_name=UsageStatsEvents.DATA_CONTEXT_GET_BATCH_LIST,
args_payload_fn=get_batch_list_usage_statistics,
Expand Down Expand Up @@ -1776,48 +1843,53 @@ def get_batch_list(
**kwargs: Optional[dict],
) -> List[Batch]:
"""Get the list of zero or more batches, based on a variety of flexible input types.
This method applies only to the new (V3) Datasource schema.
Args:
batch_request
datasource_name
data_connector_name
data_asset_name
batch_request
batch_data
query
path
runtime_parameters
data_connector_query
batch_identifiers
batch_filter_parameters
limit
index
custom_filter_function
sampling_method
sampling_kwargs
splitter_method
splitter_kwargs
`get_batch_list` is the main user-facing API for getting batches.
In contrast to virtually all other methods in the class, it does not require typed or nested inputs.
Instead, this method is intended to help the user pick the right parameters
batch_spec_passthrough
This method attempts to return any number of batches, including an empty list.
**kwargs
Args:
datasource_name: The name of the Datasource that defines the Data Asset to retrieve the batch for
data_connector_name: The Data Connector within the datasource for the Data Asset
data_asset_name: The name of the Data Asset within the Data Connector
batch_request: Encapsulates all the parameters used here to retrieve a BatchList. Use either
`batch_request` or the other params (but not both)
batch_data: Provides runtime data for the batch; is added as the key `batch_data` to
the `runtime_parameters` dictionary of a BatchRequest
query: Provides runtime data for the batch; is added as the key `query` to
the `runtime_parameters` dictionary of a BatchRequest
path: Provides runtime data for the batch; is added as the key `path` to
the `runtime_parameters` dictionary of a BatchRequest
runtime_parameters: Specifies runtime parameters for the BatchRequest; can includes keys `batch_data`,
`query`, and `path`
data_connector_query: Used to specify connector query parameters; specifically `batch_filter_parameters`,
`limit`, `index`, and `custom_filter_function`
batch_identifiers: Any identifiers of batches for the BatchRequest
batch_filter_parameters: Filter parameters used in the data connector query
limit: Part of the data_connector_query, limits the number of batches in the batch list
index: Part of the data_connector_query, used to specify the index of which batch to return. Negative
numbers retrieve from the end of the list (ex: `-1` retrieves the last or latest batch)
custom_filter_function: A `Callable` function that accepts `batch_identifiers` and returns a `bool`
sampling_method: The method used to sample Batch data (see: Splitting and Sampling)
sampling_kwargs: Arguments for the sampling method
splitter_method: The method used to split the Data Asset into Batches
splitter_kwargs: Arguments for the splitting method
batch_spec_passthrough: Arguments specific to the `ExecutionEngine` that aid in Batch retrieval
**kwargs: Used to specify either `batch_identifiers` or `batch_filter_parameters`
Returns:
(Batch) The requested batch
(Batch) The `list` of requested Batch instances
`get_batch` is the main user-facing API for getting batches.
In contrast to virtually all other methods in the class, it does not require typed or nested inputs.
Instead, this method is intended to help the user pick the right parameters
Raises:
DatasourceError: If the specified `datasource_name` does not exist in the DataContext
TypeError: If the specified types of the `batch_request` are not supported, or if the
`datasource_name` is not a `str`
ValueError: If more than one exclusive parameter is specified (ex: specifing more than one
of `batch_data`, `query` or `path`)
This method attempts to return any number of batches, including an empty list.
"""

batch_request = get_batch_request_from_acceptable_arguments(
datasource_name=datasource_name,
data_connector_name=data_connector_name,
Expand Down Expand Up @@ -1936,22 +2008,36 @@ def delete_expectation_suite(
self.expectations_store.remove_key(key)
return True

@public_api
@deprecated_argument(argument_name="ge_cloud_id", version="0.15.45")
def get_expectation_suite(
self,
expectation_suite_name: Optional[str] = None,
include_rendered_content: Optional[bool] = None,
ge_cloud_id: Optional[str] = None,
) -> ExpectationSuite:
"""Get an Expectation Suite by name or GX Cloud ID
"""Get an Expectation Suite by name.
Args:
expectation_suite_name (str): The name of the Expectation Suite
include_rendered_content (bool): Whether or not to re-populate rendered_content for each
ExpectationConfiguration.
ge_cloud_id (str): The GX Cloud ID for the Expectation Suite.
ge_cloud_id (str): The GX Cloud ID for the Expectation Suite (unused)
Returns:
An existing ExpectationSuite
Raises:
DataContextError: There is no expectation suite with the name provided
"""
if ge_cloud_id is not None:
# deprecated-v0.15.45
warnings.warn(
"ge_cloud_id is deprecated as of v0.15.45 and will be removed in v0.16. Please use"
"expectation_suite_name instead",
DeprecationWarning,
)

key: Optional[ExpectationSuiteIdentifier] = ExpectationSuiteIdentifier(
expectation_suite_name=expectation_suite_name # type: ignore[arg-type]
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
ExpectationConfiguration,
ExpectationValidationResult,
)
from great_expectations.core._docs_decorators import public_api
from great_expectations.execution_engine import ExecutionEngine
from great_expectations.expectations.expectation import (
ColumnExpectation,
Expand Down Expand Up @@ -330,10 +331,25 @@ def _descriptive_value_counts_bar_chart_renderer(

return new_block

@public_api
def validate_configuration(
self, configuration: Optional[ExpectationConfiguration] = None
) -> None:
"""Validating that user has inputted a value set and that configuration has been initialized"""
"""Validates configuration for the Expectation.
For `expect_column_distinct_values_to_be_in_set` we require that the `configuraton.kwargs` contain
a `value_set` key that is either a `list`, `set`, or `dict`.
The configuration will also be validated using each of the `validate_configuration` methods in its Expectation
superclass hierarchy.
Args:
configuration: The ExpectationConfiguration to be validated.
Raises:
InvalidExpectationConfigurationError: The configuraton does not contain the values required by the
Expectation.
"""
super().validate_configuration(configuration)
configuration = configuration or self.configuration
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
ExpectationConfiguration,
ExpectationValidationResult,
)
from great_expectations.core._docs_decorators import public_api
from great_expectations.execution_engine import ExecutionEngine
from great_expectations.expectations.expectation import (
ColumnExpectation,
Expand Down Expand Up @@ -98,10 +99,26 @@ class ExpectColumnDistinctValuesToEqualSet(ColumnExpectation):
"value_set",
)

@public_api
def validate_configuration(
self, configuration: Optional[ExpectationConfiguration] = None
) -> None:
"""Validating that user has inputted a value set and that configuration has been initialized"""
"""Validates configuration for the Expectation.
For `expect_column_distinct_values_to_equal_set` we require that the `configuraton.kwargs` contain
a `value_set` key that is either a `list`, `set`, or `dict`.
The configuration will also be validated using each of the `validate_configuration` methods in its Expectation
superclass hierarchy.
Args:
configuration: The configuration to be validated.
Raises:
InvalidExpectationConfigurationError: The configuraton does not contain the values required by the
Expectation.
"""
super().validate_configuration(configuration)
configuration = configuration or self.configuration
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
ExpectationConfiguration,
ExpectationValidationResult,
)
from great_expectations.core._docs_decorators import public_api
from great_expectations.execution_engine import ExecutionEngine
from great_expectations.execution_engine.util import (
is_valid_categorical_partition_object,
Expand Down Expand Up @@ -187,18 +188,24 @@ class ExpectColumnKlDivergenceToBeLessThan(ColumnExpectation):
"threshold",
)

@public_api
def validate_configuration(
self, configuration: Optional[ExpectationConfiguration] = None
) -> None:
"""
Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
necessary configuration arguments have been provided for the validation of the expectation.
"""Validates configuration for the Expectation.
For `expect_column_kl_divergence_to_be_less_than`, `configuraton.kwargs` may contain `min_value` and
`max_value` whose value is either a number or date.
The configuration will also be validated using each of the `validate_configuration` methods in its Expectation
superclass hierarchy.
Args:
configuration (OPTIONAL[ExpectationConfiguration]): \
An optional Expectation Configuration entry that will be used to configure the expectation
Returns:
None. Raises InvalidExpectationConfigurationError if the config is not validated successfully
configuration: The configuration to be validated.
Raises:
InvalidExpectationConfigurationError: The configuraton does not contain the values required by the
Expectation.
"""
super().validate_configuration(configuration)
self.validate_metric_value_between_configuration(configuration=configuration)
Expand Down
Loading

0 comments on commit 56a208b

Please sign in to comment.