Add diff printout for ruff format failure (#2084)

sdv-dev · Jun 21, 2024 · e0ccc94 · e0ccc94
1 parent 36a5619
commit e0ccc94
Show file tree

Hide file tree

Showing 14 changed files with 42 additions and 24 deletions.
diff --git a/Makefile b/Makefile
@@ -85,14 +85,13 @@ install-develop: clean-build clean-pyc ## install the package in editable mode a
 
 .PHONY: lint-sdv
 lint-sdv: ## check style with flake8 and isort
-	flake8 sdv
-	isort -c sdv
-	pydocstyle sdv
+	ruff check sdv/
+	ruff format --check --diff sdv/
 
 .PHONY: lint-tests
 lint-tests: ## check style with flake8 and isort
-	flake8 --ignore=D,SFS2 tests
-	isort -c tests
+	ruff check tests/
+	ruff format --check --diff tests/
 
 .PHONY: check-dependencies
 check-dependencies: ## test if there are any broken dependencies
@@ -104,8 +103,7 @@ lint:
 
 .PHONY: fix-lint
 fix-lint:
-	ruff check --fix .
-	ruff format
+	invoke fix-lint
 
 
 # TEST TARGETS

diff --git a/pyproject.toml b/pyproject.toml
@@ -192,14 +192,32 @@ select = [
     # Pycodestyle
     "E",
     "W",
-    "D200",
+    # pydocstyle
+    "D",
     # isort
     "I001",
+    # print statements
+    "T201",
+    # pandas-vet
+    "PD"
 ]
 ignore = [
     "E501",
+    # pydocstyle
+    "D100",
+    "D101",
+    "D102",
+    "D103",
+    "D104",
+    "D105", # Missing docstring in magic method
     "D107",  # Missing docstring in __init__
+    "D205",
+    "D301",
+    "D415",
     "D417",   # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449
+    # pandas-vet
+    "PD901",
+    "PD101",
 ]
 
 [tool.ruff.format]
@@ -211,6 +229,7 @@ docstring-code-line-length = "dynamic"
 
 [tool.ruff.lint.isort]
 known-first-party = ["sdv"]
+lines-between-types = 0
 
 [tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["F401", "E402", "F403", "F405", "E501", "I001"]

diff --git a/sdv/multi_table/base.py b/sdv/multi_table/base.py
@@ -85,7 +85,7 @@ def _get_pbar_args(self, **kwargs):
 
     def _print(self, text='', **kwargs):
         if self.verbose:
-            print(text, **kwargs)  # noqa: T001
+            print(text, **kwargs)  # noqa: T201
 
     def _check_metadata_updated(self):
         if self.metadata._check_updated_flag():

diff --git a/sdv/multi_table/utils.py b/sdv/multi_table/utils.py
@@ -363,7 +363,7 @@ def _print_simplified_schema_summary(data_before, data_after):
         ],
     })
     message.append(summary.to_string(index=False))
-    print('\n'.join(message))  # noqa: T001
+    print('\n'.join(message))  # noqa: T201
 
 
 def _get_rows_to_drop(data, metadata):
@@ -660,4 +660,4 @@ def _print_subsample_summary(data_before, data_after):
     subsample_rows = 100 * (1 - summary['# Rows (After)'].sum() / summary['# Rows (Before)'].sum())
     message = [f'Success! Your subset has {round(subsample_rows)}% less rows than the original.\n']
     message.append(summary.to_string(index=False))
-    print('\n'.join(message))  # noqa: T001
+    print('\n'.join(message))  # noqa: T201
diff --git a/sdv/single_table/ctgan.py b/sdv/single_table/ctgan.py
@@ -255,7 +255,7 @@ def _print_warning(self, data):
                 lines_to_print.append(f'{column:{cap}} {num_generated_columns}')
 
             generated_columns_str = '\n'.join(lines_to_print)
-            print(  # noqa: T001
+            print(  # noqa: T201
                 'PerformanceAlert: Using the CTGANSynthesizer on this data is not recommended. '
                 'To model this data, CTGAN will generate a large number of columns.'
                 '\n\n'

diff --git a/tasks.py b/tasks.py
@@ -123,11 +123,18 @@ def tutorials(c):
 def lint(c):
     check_dependencies(c)
     c.run('ruff check .')
-    c.run('ruff format . --check')
+    c.run('ruff format --check --diff .')
+
+
+@task
+def fix_lint(c):
+    check_dependencies(c)
+    c.run('ruff check --fix .')
+    c.run('ruff format .')
 
 
 def remove_readonly(func, path, _):
-    "Clear the readonly bit and reattempt the removal"
+    """Clear the readonly bit and reattempt the removal"""
     os.chmod(path, stat.S_IWRITE)
     func(path)
 

diff --git a/tests/integration/metadata/test_multi_table.py b/tests/integration/metadata/test_multi_table.py
@@ -10,7 +10,6 @@
 
 def test_multi_table_metadata():
     """Test ``MultiTableMetadata``."""
-
     # Create an instance
     instance = MultiTableMetadata()
 

diff --git a/tests/integration/metadata/test_single_table.py b/tests/integration/metadata/test_single_table.py
@@ -15,7 +15,6 @@
 
 def test_single_table_metadata():
     """Test ``SingleTableMetadata``."""
-
     # Create an instance
     instance = SingleTableMetadata()
 

diff --git a/tests/integration/multi_table/test_hma.py b/tests/integration/multi_table/test_hma.py
@@ -457,7 +457,6 @@ def test_synthesize_multiple_tables_using_hma(self, tmp_path):
             * Saving, loading and sampling from the loaded model
             * Using a custom configuration for the ``HMASynthesizer``
         """
-
         # Loading the demo data
         real_data, metadata = download_demo(modality='multi_table', dataset_name='fake_hotels')
 

diff --git a/tests/unit/constraints/test_base.py b/tests/unit/constraints/test_base.py
@@ -655,7 +655,6 @@ def test___init__(self):
         Side Effects:
             - ``instance.constraint_columns`` is a list from the string given before.
         """
-
         # Run
         constraint = Mock()
         instance = ColumnsModel(constraint, 'age')
@@ -678,7 +677,6 @@ def test___init__list(self):
         Side Effects:
             - ``instance.constraint_columns`` is the input list.
         """
-
         # Run
         constraint = Mock()
         instance = ColumnsModel(constraint, ['age', 'age_when_joined'])

diff --git a/tests/unit/constraints/test_tabular.py b/tests/unit/constraints/test_tabular.py
@@ -101,6 +101,7 @@ def test__validate_inputs(self):
 
         Input:
         -  Incorrect parameters for the method.
+
         Raises:
         - List of ValueErrors
         """
@@ -3086,7 +3087,6 @@ def test_is_valid_invalid(self):
 
     def test_is_valid_with_nans(self):
         """Test the ``Range.is_valid`` when there are NaNs in the columns."""
-
         # Setup
         table_data_valid = pd.DataFrame({
             'low': [1, np.nan, 3, 4, np.nan, 1],

diff --git a/tests/unit/data_processing/test_data_processor.py b/tests/unit/data_processing/test_data_processor.py
@@ -1477,8 +1477,7 @@ def test_update_transformers_ignores_rdt_refit_warning(self):
             dp.update_transformers({'col1': GaussianNormalizer()})
 
     def test_update_transformers_for_key(self):
-        """
-        Test when ``transformer`` is not ``AnonymizedFaker``, ``IDGenerator,
+        """Test when ``transformer`` is not ``AnonymizedFaker``, ``IDGenerator,
         or ``RegexGenerator`` for keys.
         """
         # Setup

diff --git a/tests/unit/metadata/test_multi_table.py b/tests/unit/metadata/test_multi_table.py
@@ -141,7 +141,6 @@ def test__reset_updated_flag(self):
     def test__validate_missing_relationship_keys_foreign_key(self):
         """Test the ``_validate_missing_relationship_keys`` method of ``MultiTableMetadata``.
 
-
         Setup:
             - Mock ``parent_table`` and ``child_table``.
             - Instance of ``MultiTableMetadata``.

diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
@@ -961,7 +961,8 @@ def test__determine_sdtype_for_numbers(self):
               categorical sdtype
             - A series of numbers with all unique values. Should be detected as id sdtype
             - A series of integers. Should be detected as numerical sdtype
-            - A series of floats. Should be detected as numerical sdtype"""
+        - A series of floats. Should be detected as numerical sdtype
+        """
         # Setup
         instance = SingleTableMetadata()