[DOCS] Adding docstrings (#6854)

Co-authored-by: Anthony Burdi <anthony@greatexpectations.io>
great-expectations · Jan 26, 2023 · 56a208b · 56a208b
1 parent f42d7df
commit 56a208b
Show file tree

Hide file tree

Showing 6 changed files with 190 additions and 60 deletions.
diff --git a/great_expectations/data_context/data_context/abstract_data_context.py b/great_expectations/data_context/data_context/abstract_data_context.py
@@ -315,6 +315,7 @@ def _save_project_config(self) -> None:
         """
         self.variables.save_config()
 
+    @public_api
     def update_project_config(
         self, project_config: DataContextConfig | Mapping
     ) -> None:
@@ -325,6 +326,7 @@ def update_project_config(
         """
         self.config.update(project_config)
 
+    @public_api
     @usage_statistics_enabled_method(
         event_name=UsageStatsEvents.DATA_CONTEXT_SAVE_EXPECTATION_SUITE,
         args_payload_fn=save_expectation_suite_usage_statistics,
@@ -337,8 +339,21 @@ def save_expectation_suite(
         include_rendered_content: Optional[bool] = None,
         **kwargs: Optional[dict],
     ) -> None:
-        """
-        Each DataContext will define how ExpectationSuite will be saved.
+        """Save the provided ExpectationSuite into the DataContext using the configured ExpectationStore.
+
+        Args:
+            expectation_suite: The ExpectationSuite to save.
+            expectation_suite_name: The name of this ExpectationSuite. If no name is provided, the name will be read
+                from the suite.
+            overwrite_existing: Whether to overwrite the suite if it already exists.
+            include_rendered_content: Whether to save the prescriptive rendered content for each expectation.
+            kwargs: Additional parameters, unused
+
+        Returns:
+            None
+
+        Raises:
+            DataContextError: If a suite with the same name exists and `overwrite_existing` is set to `False`.
         """
         if expectation_suite_name is None:
             key = ExpectationSuiteIdentifier(
@@ -1444,6 +1459,7 @@ def delete_checkpoint(
             name=name, ge_cloud_id=ge_cloud_id
         )
 
+    @public_api
     @usage_statistics_enabled_method(
         event_name=UsageStatsEvents.DATA_CONTEXT_RUN_CHECKPOINT,
     )
@@ -1467,8 +1483,7 @@ def run_checkpoint(
         expectation_suite_ge_cloud_id: Optional[str] = None,
         **kwargs,
     ) -> CheckpointResult:
-        """
-        Validate against a pre-defined Checkpoint. (Experimental)
+        """Validate using an existing Checkpoint.
 
         Args:
             checkpoint_name: The name of a Checkpoint defined via the CLI or by manually creating a yml file
@@ -1558,6 +1573,7 @@ def list_expectation_suites(
             )
         return keys  # type: ignore[return-value]
 
+    @public_api
     def get_validator(
         self,
         datasource_name: Optional[str] = None,
@@ -1589,10 +1605,60 @@ def get_validator(
         include_rendered_content: Optional[bool] = None,
         **kwargs: Optional[dict],
     ) -> Validator:
-        """
-        This method applies only to the new (V3) Datasource schema.
-        """
+        """Retrieve a Validator with a batch list and an `ExpectationSuite`.
 
+        `get_validator` first calls `get_batch_list` to retrieve a batch list, then creates or retrieves
+        an `ExpectationSuite` used to validate the Batches in the list.
+
+        Args:
+            datasource_name: The name of the Datasource that defines the Data Asset to retrieve the batch for
+            data_connector_name: The Data Connector within the datasource for the Data Asset
+            data_asset_name: The name of the Data Asset within the Data Connector
+            batch: The Batch to use with the Validator
+            batch_list: The List of Batches to use with the Validator
+            batch_request: Encapsulates all the parameters used here to retrieve a BatchList. Use either
+                `batch_request` or the other params (but not both)
+            batch_request_list: A List of `BatchRequest` to use with the Validator
+            batch_data: Provides runtime data for the batch; is added as the key `batch_data` to
+                the `runtime_parameters` dictionary of a BatchRequest
+            query: Provides runtime data for the batch; is added as the key `query` to
+                the `runtime_parameters` dictionary of a BatchRequest
+            path: Provides runtime data for the batch; is added as the key `path` to
+                the `runtime_parameters` dictionary of a BatchRequest
+            runtime_parameters: Specifies runtime parameters for the BatchRequest; can includes keys `batch_data`,
+                `query`, and `path`
+            data_connector_query: Used to specify connector query parameters; specifically `batch_filter_parameters`,
+                `limit`, `index`, and `custom_filter_function`
+            batch_identifiers: Any identifiers of batches for the BatchRequest
+            batch_filter_parameters: Filter parameters used in the data connector query
+            limit: Part of the data_connector_query, limits the number of batches in the batch list
+            index: Part of the data_connector_query, used to specify the index of which batch to return. Negative
+                numbers retrieve from the end of the list (ex: `-1` retrieves the last or latest batch)
+            custom_filter_function: A `Callable` function that accepts `batch_identifiers` and returns a `bool`
+            sampling_method: The method used to sample Batch data (see: Splitting and Sampling)
+            sampling_kwargs: Arguments for the sampling method
+            splitter_method: The method used to split the Data Asset into Batches
+            splitter_kwargs: Arguments for the splitting method
+            batch_spec_passthrough: Arguments specific to the `ExecutionEngine` that aid in Batch retrieval
+            expectation_suite_ge_cloud_id: The identifier of the ExpectationSuite to retrieve from the DataContext
+                (can be used in place of `expectation_suite_name`)
+            expectation_suite_name: The name of the ExpectationSuite to retrieve from the DataContext
+            expectation_suite: The ExpectationSuite to use with the validator
+            create_expectation_suite_with_name: Creates a Validator with a new ExpectationSuite with the provided name
+            include_rendered_content: If `True` the ExpectationSuite will include rendered content when saved
+            **kwargs: Used to specify either `batch_identifiers` or `batch_filter_parameters`
+
+        Returns:
+            Validator: A Validator with the specified Batch list and ExpectationSuite
+
+        Raises:
+            DatasourceError: If the specified `datasource_name` does not exist in the DataContext
+            TypeError: If the specified types of the `batch_request` are not supported, or if the
+                `datasource_name` is not a `str`
+            ValueError: If more than one exclusive parameter is specified (ex: specifing more than one
+                of `batch_data`, `query` or `path`), or if the `ExpectationSuite` cannot be created or
+                retrieved using either the provided name or identifier
+        """
         include_rendered_content = (
             self._determine_if_expectation_validation_result_include_rendered_content(
                 include_rendered_content=include_rendered_content
@@ -1748,6 +1814,7 @@ def get_validator_using_batch_list(
 
         return validator
 
+    @public_api
     @usage_statistics_enabled_method(
         event_name=UsageStatsEvents.DATA_CONTEXT_GET_BATCH_LIST,
         args_payload_fn=get_batch_list_usage_statistics,
@@ -1776,48 +1843,53 @@ def get_batch_list(
         **kwargs: Optional[dict],
     ) -> List[Batch]:
         """Get the list of zero or more batches, based on a variety of flexible input types.
-        This method applies only to the new (V3) Datasource schema.
-
-        Args:
-            batch_request
-
-            datasource_name
-            data_connector_name
-            data_asset_name
 
-            batch_request
-            batch_data
-            query
-            path
-            runtime_parameters
-            data_connector_query
-            batch_identifiers
-            batch_filter_parameters
-
-            limit
-            index
-            custom_filter_function
-
-            sampling_method
-            sampling_kwargs
-
-            splitter_method
-            splitter_kwargs
+        `get_batch_list` is the main user-facing API for getting batches.
+        In contrast to virtually all other methods in the class, it does not require typed or nested inputs.
+        Instead, this method is intended to help the user pick the right parameters
 
-            batch_spec_passthrough
+        This method attempts to return any number of batches, including an empty list.
 
-            **kwargs
+        Args:
+            datasource_name: The name of the Datasource that defines the Data Asset to retrieve the batch for
+            data_connector_name: The Data Connector within the datasource for the Data Asset
+            data_asset_name: The name of the Data Asset within the Data Connector
+            batch_request: Encapsulates all the parameters used here to retrieve a BatchList. Use either
+                `batch_request` or the other params (but not both)
+            batch_data: Provides runtime data for the batch; is added as the key `batch_data` to
+                the `runtime_parameters` dictionary of a BatchRequest
+            query: Provides runtime data for the batch; is added as the key `query` to
+                the `runtime_parameters` dictionary of a BatchRequest
+            path: Provides runtime data for the batch; is added as the key `path` to
+                the `runtime_parameters` dictionary of a BatchRequest
+            runtime_parameters: Specifies runtime parameters for the BatchRequest; can includes keys `batch_data`,
+                `query`, and `path`
+            data_connector_query: Used to specify connector query parameters; specifically `batch_filter_parameters`,
+                `limit`, `index`, and `custom_filter_function`
+            batch_identifiers: Any identifiers of batches for the BatchRequest
+            batch_filter_parameters: Filter parameters used in the data connector query
+            limit: Part of the data_connector_query, limits the number of batches in the batch list
+            index: Part of the data_connector_query, used to specify the index of which batch to return. Negative
+                numbers retrieve from the end of the list (ex: `-1` retrieves the last or latest batch)
+            custom_filter_function: A `Callable` function that accepts `batch_identifiers` and returns a `bool`
+            sampling_method: The method used to sample Batch data (see: Splitting and Sampling)
+            sampling_kwargs: Arguments for the sampling method
+            splitter_method: The method used to split the Data Asset into Batches
+            splitter_kwargs: Arguments for the splitting method
+            batch_spec_passthrough: Arguments specific to the `ExecutionEngine` that aid in Batch retrieval
+            **kwargs: Used to specify either `batch_identifiers` or `batch_filter_parameters`
 
         Returns:
-            (Batch) The requested batch
+            (Batch) The `list` of requested Batch instances
 
-        `get_batch` is the main user-facing API for getting batches.
-        In contrast to virtually all other methods in the class, it does not require typed or nested inputs.
-        Instead, this method is intended to help the user pick the right parameters
+        Raises:
+            DatasourceError: If the specified `datasource_name` does not exist in the DataContext
+            TypeError: If the specified types of the `batch_request` are not supported, or if the
+                `datasource_name` is not a `str`
+            ValueError: If more than one exclusive parameter is specified (ex: specifing more than one
+                of `batch_data`, `query` or `path`)
 
-        This method attempts to return any number of batches, including an empty list.
         """
-
         batch_request = get_batch_request_from_acceptable_arguments(
             datasource_name=datasource_name,
             data_connector_name=data_connector_name,
@@ -1936,22 +2008,36 @@ def delete_expectation_suite(
             self.expectations_store.remove_key(key)
             return True
 
+    @public_api
+    @deprecated_argument(argument_name="ge_cloud_id", version="0.15.45")
     def get_expectation_suite(
         self,
         expectation_suite_name: Optional[str] = None,
         include_rendered_content: Optional[bool] = None,
         ge_cloud_id: Optional[str] = None,
     ) -> ExpectationSuite:
-        """Get an Expectation Suite by name or GX Cloud ID
+        """Get an Expectation Suite by name.
+
         Args:
             expectation_suite_name (str): The name of the Expectation Suite
             include_rendered_content (bool): Whether or not to re-populate rendered_content for each
                 ExpectationConfiguration.
-            ge_cloud_id (str): The GX Cloud ID for the Expectation Suite.
+            ge_cloud_id (str): The GX Cloud ID for the Expectation Suite (unused)
 
         Returns:
             An existing ExpectationSuite
+
+        Raises:
+            DataContextError: There is no expectation suite with the name provided
         """
+        if ge_cloud_id is not None:
+            # deprecated-v0.15.45
+            warnings.warn(
+                "ge_cloud_id is deprecated as of v0.15.45 and will be removed in v0.16. Please use"
+                "expectation_suite_name instead",
+                DeprecationWarning,
+            )
+
         key: Optional[ExpectationSuiteIdentifier] = ExpectationSuiteIdentifier(
             expectation_suite_name=expectation_suite_name  # type: ignore[arg-type]
         )

diff --git a/great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py b/great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py
@@ -7,6 +7,7 @@
     ExpectationConfiguration,
     ExpectationValidationResult,
 )
+from great_expectations.core._docs_decorators import public_api
 from great_expectations.execution_engine import ExecutionEngine
 from great_expectations.expectations.expectation import (
     ColumnExpectation,
@@ -330,10 +331,25 @@ def _descriptive_value_counts_bar_chart_renderer(
 
         return new_block
 
+    @public_api
     def validate_configuration(
         self, configuration: Optional[ExpectationConfiguration] = None
     ) -> None:
-        """Validating that user has inputted a value set and that configuration has been initialized"""
+        """Validates configuration for the Expectation.
+
+        For `expect_column_distinct_values_to_be_in_set` we require that the `configuraton.kwargs` contain
+        a `value_set` key that is either a `list`, `set`, or `dict`.
+
+        The configuration will also be validated using each of the `validate_configuration` methods in its Expectation
+        superclass hierarchy.
+
+        Args:
+            configuration: The ExpectationConfiguration to be validated.
+
+        Raises:
+            InvalidExpectationConfigurationError: The configuraton does not contain the values required by the
+                Expectation.
+        """
         super().validate_configuration(configuration)
         configuration = configuration or self.configuration
         try:

diff --git a/great_expectations/expectations/core/expect_column_distinct_values_to_equal_set.py b/great_expectations/expectations/core/expect_column_distinct_values_to_equal_set.py
@@ -4,6 +4,7 @@
     ExpectationConfiguration,
     ExpectationValidationResult,
 )
+from great_expectations.core._docs_decorators import public_api
 from great_expectations.execution_engine import ExecutionEngine
 from great_expectations.expectations.expectation import (
     ColumnExpectation,
@@ -98,10 +99,26 @@ class ExpectColumnDistinctValuesToEqualSet(ColumnExpectation):
         "value_set",
     )
 
+    @public_api
     def validate_configuration(
         self, configuration: Optional[ExpectationConfiguration] = None
     ) -> None:
-        """Validating that user has inputted a value set and that configuration has been initialized"""
+        """Validates configuration for the Expectation.
+
+        For `expect_column_distinct_values_to_equal_set` we require that the `configuraton.kwargs` contain
+        a `value_set` key that is either a `list`, `set`, or `dict`.
+
+
+        The configuration will also be validated using each of the `validate_configuration` methods in its Expectation
+        superclass hierarchy.
+
+        Args:
+            configuration: The configuration to be validated.
+
+        Raises:
+            InvalidExpectationConfigurationError: The configuraton does not contain the values required by the
+                Expectation.
+        """
         super().validate_configuration(configuration)
         configuration = configuration or self.configuration
         try:

diff --git a/great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py b/great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py
@@ -10,6 +10,7 @@
     ExpectationConfiguration,
     ExpectationValidationResult,
 )
+from great_expectations.core._docs_decorators import public_api
 from great_expectations.execution_engine import ExecutionEngine
 from great_expectations.execution_engine.util import (
     is_valid_categorical_partition_object,
@@ -187,18 +188,24 @@ class ExpectColumnKlDivergenceToBeLessThan(ColumnExpectation):
         "threshold",
     )
 
+    @public_api
     def validate_configuration(
         self, configuration: Optional[ExpectationConfiguration] = None
     ) -> None:
-        """
-        Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
-        necessary configuration arguments have been provided for the validation of the expectation.
+        """Validates configuration for the Expectation.
+
+        For `expect_column_kl_divergence_to_be_less_than`, `configuraton.kwargs` may contain `min_value` and
+        `max_value` whose value is either a number or date.
+
+        The configuration will also be validated using each of the `validate_configuration` methods in its Expectation
+        superclass hierarchy.
 
         Args:
-            configuration (OPTIONAL[ExpectationConfiguration]): \
-                An optional Expectation Configuration entry that will be used to configure the expectation
-        Returns:
-            None. Raises InvalidExpectationConfigurationError if the config is not validated successfully
+            configuration: The configuration to be validated.
+
+        Raises:
+            InvalidExpectationConfigurationError: The configuraton does not contain the values required by the
+                Expectation.
         """
         super().validate_configuration(configuration)
         self.validate_metric_value_between_configuration(configuration=configuration)