From eb78c670262996290dacba7e1406555c98494dc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Bel=C3=A1k?= Date: Thu, 7 Mar 2024 20:28:46 +0100 Subject: [PATCH] refactor: use ruff (#217) ruff replaces `black`, `isort` & `pylint` for formatting & linting. Note that `isort` is still used to sort imports when exporting to notebook since ruff does not provide a Python API. Resolves #214 --- .github/workflows/test.yaml | 5 +- CONTRIBUTING.md | 32 +++----- README.md | 8 +- docs/conf.py | 80 ++++++++++--------- edvart/data_types.py | 2 +- edvart/report_sections/bivariate_analysis.py | 3 +- .../report_sections/code_string_formatting.py | 4 +- .../timeseries_analysis/short_time_ft.py | 2 +- .../timeseries_analysis/stationarity_tests.py | 4 +- .../timeseries_analysis.py | 6 +- pyproject.toml | 23 +++++- tests/test_multivariate_analysis.py | 4 +- tests/test_timeseries_analysis.py | 16 ++-- 13 files changed, 98 insertions(+), 91 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index b32f8a8..0e473a4 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -45,9 +45,8 @@ jobs: poetry run python -m pytest -n auto --disable-warnings --cov=edvart tests/ - name: Lint run: | - poetry run pylint --rcfile=.pylintrc edvart/ - poetry run black --check --line-length 100 edvart/ tests/ - poetry run isort --check --line-length 100 --profile black edvart/ tests/ + poetry run ruff check . + poetry run ruff format --check . dismiss-stale-reviews: runs-on: ubuntu-22.04 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 98fe574..70bf401 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -85,30 +85,20 @@ If you add a new section, add the section description into `docs/sections.rst` ## Code style -* The line length is limited to 100 characters in Python code, except if it would make the code less readable. -* `black` is used for formatting Python code. The following command can be used to properly format the code: +* The line length is limited to 100 characters in Python code, +except if it would make the code less readable. +* `ruff` is used for formatting and linting Python code. +The following commands can be used to properly format the code and check +for linting errors with automatic fixing: ```bash -poetry run black --line-length 100 edvart/ tests/ +poetry run ruff format . +poetry run ruff check . --fix ``` -The following command can be used to check if the code is properly formatted: +The following command can be used to check if the code is properly +formatted and check for linting errors: ```bash -poetry run black --check --line-length 100 edvart/ tests/ -```` - -* `isort` is used for sorting imports. -The following command can be used to properly sort imports: -```bash -poetry run isort --line-length 100 --profile black edvart/ tests/ -``` -The following command can be used to check if the imports are properly sorted: -```bash -poetry run isort --check --line-length 100 --profile black edvart/ tests/ -``` - -* `pylint` is used to lint Python code. Tests are not required to be linted. -The following command can be used to lint the code: -```bash -poetry run pylint --rcfile=".pylintrc" edvart +poetry run ruff format --check . +poetry run ruff check . ``` All of the above code style requirements are enforced by the CI pipeline. diff --git a/README.md b/README.md index 1c0a579..c88d618 100644 --- a/README.md +++ b/README.md @@ -10,13 +10,9 @@ PyPI - Downloads - - Code style: black + + Ruff - - Imports: isort - -

Edvart is an open-source Python library designed to simplify and streamline diff --git a/docs/conf.py b/docs/conf.py index b7ab5a9..1b08695 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,28 +14,27 @@ # import os import sys -import re from datetime import datetime from pathlib import Path -sys.path.insert(0, os.path.abspath('.')) -sys.path.insert(0, os.path.abspath('../')) +import toml # make sure that toml is a developer dependency -# -- Project information ----------------------------------------------------- +sys.path.insert(0, os.path.abspath(".")) +sys.path.insert(0, os.path.abspath("../")) -project = 'edvart' -copyright = f'{datetime.now().year}, Datamole' -author = 'Hellen team' +# -- Project information ----------------------------------------------------- +project = "edvart" +copyright = f"{datetime.now().year}, Datamole" +author = "Hellen team" -import toml # make sure that toml is a developer dependency metadata = toml.load(Path(__file__).parent.parent / "pyproject.toml")["tool"]["poetry"] version = release = metadata["version"] version_long = version # The short X.Y.Z version -version = '.'.join(version_long.split('.')[0:3]) +version = ".".join(version_long.split(".")[0:3]) # The full version, including alpha/beta/rc tags release = version_long @@ -51,12 +50,12 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - 'sphinx.ext.mathjax', - 'sphinx.ext.viewcode', - 'sphinx.ext.napoleon', - 'sphinx_copybutton', + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx_copybutton", ] # napoleon_google_docstring = False @@ -64,16 +63,16 @@ # napoleon_use_ivar = True # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -85,10 +84,10 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # -- Options for HTML output ------------------------------------------------- @@ -96,9 +95,9 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -#html_theme = 'alabaster' -#html_theme = 'sphinx_rtd_theme' -html_theme = 'sphinx_rtd_theme' +# html_theme = 'alabaster' +# html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -109,7 +108,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -125,7 +124,7 @@ # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'edvartdoc' +htmlhelp_basename = "edvartdoc" # -- Options for LaTeX output ------------------------------------------------ @@ -134,15 +133,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -152,8 +148,13 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'edvart.tex', 'edvart Documentation', - 'Your name (or your organization/company/team)', 'manual'), + ( + master_doc, + "edvart.tex", + "edvart Documentation", + "Your name (or your organization/company/team)", + "manual", + ), ] @@ -161,10 +162,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'edvart', 'edvart Documentation', - [author], 1) -] +man_pages = [(master_doc, "edvart", "edvart Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------------- @@ -173,9 +171,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'edvart', 'edvart Documentation', - author, 'edvart', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "edvart", + "edvart Documentation", + author, + "edvart", + "One line description of project.", + "Miscellaneous", + ), ] @@ -184,4 +188,4 @@ # -- Options for intersphinx extension --------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = {"https://docs.python.org/": None} diff --git a/edvart/data_types.py b/edvart/data_types.py index cba19b9..f4ffcfb 100644 --- a/edvart/data_types.py +++ b/edvart/data_types.py @@ -5,7 +5,7 @@ import pandas as pd try: - import pyarrow # pylint: disable=unused-import + import pyarrow # noqa: F401 except ImportError: PYARROW_PANDAS_BACKEND_AVAILABLE = False else: diff --git a/edvart/report_sections/bivariate_analysis.py b/edvart/report_sections/bivariate_analysis.py index cad75eb..a7e5651 100644 --- a/edvart/report_sections/bivariate_analysis.py +++ b/edvart/report_sections/bivariate_analysis.py @@ -986,8 +986,7 @@ def contingency_table( n_digits_max = 1 + np.floor(np.log10(table.max().max())) size_factor = ( # Constants chosen empirically to make the numbers fit in the cells - 0.18 - * max(4, n_digits_max) + 0.18 * max(4, n_digits_max) ) ax.figure.set_size_inches(size_factor * len(table.columns), size_factor * len(table)) # Set y axis diff --git a/edvart/report_sections/code_string_formatting.py b/edvart/report_sections/code_string_formatting.py index 0a2d2ee..ad0c3f1 100644 --- a/edvart/report_sections/code_string_formatting.py +++ b/edvart/report_sections/code_string_formatting.py @@ -18,7 +18,7 @@ def total_dedent(input_string: str) -> str: input_string with no whitespace at the beginning of each line. """ input_string = input_string.strip() - lstripped_lines = [l.strip() for l in input_string.split("\n")] + lstripped_lines = [line.strip() for line in input_string.split("\n")] return "\n".join(lstripped_lines) @@ -53,7 +53,7 @@ def dedecorate(input_string: str) -> str: input_string with beginning lines starting with '@' removed. """ lines = input_string.splitlines() - filtered_lines = dropwhile(lambda l: l.lstrip().startswith("@"), lines) + filtered_lines = dropwhile(lambda line_: line_.lstrip().startswith("@"), lines) return "\n".join(filtered_lines) diff --git a/edvart/report_sections/timeseries_analysis/short_time_ft.py b/edvart/report_sections/timeseries_analysis/short_time_ft.py index 602c5d2..6342a60 100644 --- a/edvart/report_sections/timeseries_analysis/short_time_ft.py +++ b/edvart/report_sections/timeseries_analysis/short_time_ft.py @@ -128,7 +128,7 @@ def show_short_time_ft( columns: Optional[List[str]] = None, overlap: Optional[int] = None, log: bool = True, - window: Union[str, Tuple, "array-like"] = "hamming", + window: Union[str, Tuple, np.typing.ArrayLike] = "hamming", scaling: str = "spectrum", figsize: Tuple[float, float] = (20, 7), colormap: Any = "viridis", diff --git a/edvart/report_sections/timeseries_analysis/stationarity_tests.py b/edvart/report_sections/timeseries_analysis/stationarity_tests.py index a073044..3fd3517 100644 --- a/edvart/report_sections/timeseries_analysis/stationarity_tests.py +++ b/edvart/report_sections/timeseries_analysis/stationarity_tests.py @@ -1,6 +1,6 @@ import warnings from functools import partial -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional, Tuple import nbformat.v4 as nbfv4 import pandas as pd @@ -96,7 +96,7 @@ def show(self, df: pd.DataFrame) -> None: show_stationarity_tests(df=df, columns=self.columns) -def default_stationarity_tests() -> Dict[pd.Series, Callable[[pd.Series], "test_result"]]: +def default_stationarity_tests() -> Dict[str, Callable[[pd.Series], Tuple]]: """Return a dictionary of stationarity test and functions. Stationarity tests are: diff --git a/edvart/report_sections/timeseries_analysis/timeseries_analysis.py b/edvart/report_sections/timeseries_analysis/timeseries_analysis.py index 1d85672..050f39a 100644 --- a/edvart/report_sections/timeseries_analysis/timeseries_analysis.py +++ b/edvart/report_sections/timeseries_analysis/timeseries_analysis.py @@ -144,9 +144,9 @@ def __init__( } # Add FT and STFT only if required parameters specified if sampling_rate is not None: - enum_to_implementation[ - TimeseriesAnalysisSubsection.FourierTransform - ] = FourierTransform(sampling_rate, verbosity_fourier_transform, columns) + enum_to_implementation[TimeseriesAnalysisSubsection.FourierTransform] = ( + FourierTransform(sampling_rate, verbosity_fourier_transform, columns) + ) if stft_window_size is not None: enum_to_implementation[TimeseriesAnalysisSubsection.ShortTimeFT] = ShortTimeFT( sampling_rate, stft_window_size, verbosity_short_time_ft, columns diff --git a/pyproject.toml b/pyproject.toml index 848bab1..6a5a6e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,11 +49,30 @@ sphinx = "~7.1" sphinx-rtd-theme = "~1.3.0" toml = "^0.10.0" jupyter = "*" -black = "^22.3.0" -pylint = "~3.1" sphinx-copybutton = "^0.5.2" pytest-xdist = "^3.3.1" +ruff = "^0.3.0" [build-system] requires = ["poetry_core>=1.0.0"] build-backend = "poetry.core.masonry.api" + +[tool.ruff] +line-length = 100 +target-version = "py310" +src = ["task"] + +[tool.ruff.lint] +select = [ + "E", + "F", + "N", + "W", + "I001", +] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] + +[tool.ruff.lint.isort] +known-first-party = ["edvart"] diff --git a/tests/test_multivariate_analysis.py b/tests/test_multivariate_analysis.py index 5a4eb8d..3a480af 100644 --- a/tests/test_multivariate_analysis.py +++ b/tests/test_multivariate_analysis.py @@ -6,8 +6,6 @@ import plotly.io as pio import pytest -pio.renderers.default = "json" - from edvart import utils from edvart.report_sections import multivariate_analysis from edvart.report_sections.code_string_formatting import code_dedent, get_code @@ -25,6 +23,8 @@ from .execution_utils import check_section_executes from .pyarrow_utils import pyarrow_parameterize +pio.renderers.default = "json" + def get_test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame: test_df = pd.DataFrame( diff --git a/tests/test_timeseries_analysis.py b/tests/test_timeseries_analysis.py index f536309..33757f5 100644 --- a/tests/test_timeseries_analysis.py +++ b/tests/test_timeseries_analysis.py @@ -168,18 +168,18 @@ def test_ft_stft_included(): def test_ft_no_sampling_rate_error(): with pytest.raises(ValueError): - ts = TimeseriesAnalysis(subsections=[TimeseriesAnalysisSubsection.FourierTransform]) + _ts = TimeseriesAnalysis(subsections=[TimeseriesAnalysisSubsection.FourierTransform]) with pytest.raises(ValueError): - ts = TimeseriesAnalysis( + _ts = TimeseriesAnalysis( subsections=[TimeseriesAnalysisSubsection.FourierTransform], stft_window_size=2, ) with pytest.raises(ValueError): - ts = TimeseriesAnalysis( + _ts = TimeseriesAnalysis( subsections=[TimeseriesAnalysisSubsection.ShortTimeFT], ) with pytest.raises(ValueError): - ts = TimeseriesAnalysis( + _ts = TimeseriesAnalysis( subsections=[TimeseriesAnalysisSubsection.ShortTimeFT], sampling_rate=1, ) @@ -424,7 +424,7 @@ def month_func(x: datetime) -> str: def test_boxplots_over_time_lambda(): - month_lambda = lambda x: x.month + month_lambda = lambda x: x.month # noqa: E731 boxplots_sub = BoxplotsOverTime(grouping_name="Month", grouping_function=month_lambda) @@ -451,7 +451,7 @@ def test_imports_verbosity_low(): exported_imports = ts_section.required_imports() expected_imports = [ - "from edvart.report_sections.timeseries_analysis.timeseries_analysis import show_timeseries_analysis" + "from edvart.report_sections.timeseries_analysis.timeseries_analysis import show_timeseries_analysis" # noqa: E501 ] assert isinstance(exported_imports, list) @@ -504,8 +504,8 @@ def test_imports_verbosity_low_different_subsection_verbosities(): exported_imports = ts_section.required_imports() expected_imports = { - "from edvart.report_sections.timeseries_analysis.timeseries_analysis import show_timeseries_analysis", - "from edvart.report_sections.timeseries_analysis.timeseries_analysis import TimeseriesAnalysisSubsection", + "from edvart.report_sections.timeseries_analysis.timeseries_analysis import show_timeseries_analysis", # noqa: E501 + "from edvart.report_sections.timeseries_analysis.timeseries_analysis import TimeseriesAnalysisSubsection", # noqa: E501 } for s in ts_section.subsections: if s.verbosity > Verbosity.LOW: