From e8c691778ccbcb8e808fce6d39a6adb3cd450f0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Bel=C3=A1k?= Date: Sat, 2 Mar 2024 08:59:57 +0100 Subject: [PATCH 1/6] ci: update actions (#210) --- .github/workflows/publish.yaml | 18 +++++++++--------- .github/workflows/test.yaml | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 19d8293..b22b829 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -26,7 +26,7 @@ jobs: outputs: new_version: ${{ steps.set-vars.outputs.new_version }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Install poetry @@ -87,13 +87,13 @@ jobs: runs-on: ubuntu-22.04 needs: publish-python steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: 3.11 - name: Install poetry run: curl -sSL https://install.python-poetry.org | python3 - --version ${{ env.POETRY_VERSION }} - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: .venv/ key: ${{ runner.os }}-python-3.11-poetry-${{ hashFiles('pyproject.toml') }} @@ -108,7 +108,7 @@ jobs: poetry run sphinx-apidoc -o docs/source/ edvart poetry run make -C docs html - name: Upload HTML - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: html path: docs/build/html @@ -126,15 +126,15 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Setup Pages - uses: actions/configure-pages@v3 - - uses: actions/download-artifact@v3 + uses: actions/configure-pages@v4 + - uses: actions/download-artifact@v4 with: name: html path: docs/build/html - name: Upload artifact - uses: actions/upload-pages-artifact@v2 + uses: actions/upload-pages-artifact@v3 with: path: docs/build/html - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v2 + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 6f26019..146da36 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -23,14 +23,14 @@ jobs: # Specified only minor version: the latest patch is used python-version: ['3.11', '3.10', '3.9', '3.8'] steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install poetry run: curl -sSL https://install.python-poetry.org | python3 - --version ${{ env.POETRY_VERSION }} - id: cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: .venv/ key: ${{ runner.os }}-python-${{ matrix.python-version }}-poetry-${{ hashFiles('pyproject.toml') }} @@ -64,7 +64,7 @@ jobs: commit-check: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # actions/checkout checks out HEAD only by default - name: Get convco From 82a8ef18f1bd3d1ae8e28cc1e9121c5150bfc0e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Bel=C3=A1k?= Date: Mon, 4 Mar 2024 10:32:04 +0100 Subject: [PATCH 2/6] feat!: add initial support for python 3.12 (#209) NOTE: extras umap-learn is not supported in Python 3.12 since umap-learn does not currently support Python 3.12. BREAKING CHANGE: Support for Python 3.8 is dropped. --- .github/workflows/test.yaml | 2 +- pyproject.toml | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 146da36..7d86942 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -21,7 +21,7 @@ jobs: matrix: # Test against all supported Python minor versions # Specified only minor version: the latest patch is used - python-version: ['3.11', '3.10', '3.9', '3.8'] + python-version: ['3.12', '3.11', '3.10', '3.9'] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/pyproject.toml b/pyproject.toml index e02f783..a8e9a20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,13 +10,14 @@ exclude = ["tests"] readme = "README.md" [tool.poetry.dependencies] -python = ">=3.8, <3.12" +python = ">=3.9, <3.13" ipykernel = "*" -pandas = [ - { version = ">=1.5, <2.1", python = "~3.8" }, - { version = ">=1.5, <2.3", python = ">=3.9" } +pandas = ">=1.5, <2.3" +numpy = [ + { version = "*", python = "<3.12" }, + { version = "^1.26.0", python = ">=3.12" }, ] -numpy = "*" +scipy = { version = "^1.11", python = ">=3.12" } matplotlib = "^3.3" seaborn = "^0.13" plotly = ">=4.0.0, <6" @@ -26,12 +27,12 @@ ipywidgets = ">=7.5, <9" colorlover = "*" scikit-learn = ">=0.22.1" nbconvert = "^7.0.0" # Required for exporting to HTML -umap-learn = { version = "^0.5.4", optional = true} +umap-learn = { version = "^0.5.4", optional = true, python = "<3.12"} # umap-learn dependes on numba. It is specified explicitly to install # a newer version, since by default it installs an older version of numba, # which also installs an older version of llmvlite, which is incompatible # with newer version of LLVM binaries. -numba = { version = "^0.57", optional = true } +numba = { version = "^0.59", optional = true } pyarrow = { version = "^14.0.1", optional = true } isort = "^5.10.1" @@ -48,7 +49,7 @@ sphinx-rtd-theme = "~1.3.0" toml = "^0.10.0" jupyter = "*" black = "^22.3.0" -pylint = "^2.14.3" +pylint = "~3.1" sphinx-copybutton = "^0.5.2" pytest-xdist = "^3.3.1" From 8f63194d48bd707c3c2f43d42a87773c116b0181 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Bel=C3=A1k?= Date: Mon, 4 Mar 2024 14:11:01 +0100 Subject: [PATCH 3/6] feat: support extras umap with Python 3.12 https://github.com/datamole-ai/edvart/pull/209 introduced support for Python 3.12 with extras umap not being supported. However, there is in reality nothing preventing umap from being supported. The missing support for 3.12 by `umap-learn` was based on a false premise that a new version of the library needs to be released in order to add support for Python 3.12. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a8e9a20..bdb9d92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ ipywidgets = ">=7.5, <9" colorlover = "*" scikit-learn = ">=0.22.1" nbconvert = "^7.0.0" # Required for exporting to HTML -umap-learn = { version = "^0.5.4", optional = true, python = "<3.12"} +umap-learn = { version = "^0.5.4", optional = true } # umap-learn dependes on numba. It is specified explicitly to install # a newer version, since by default it installs an older version of numba, # which also installs an older version of llmvlite, which is incompatible From 9e6ec936d60217e5767d178759b8f4e19ee1029c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Bel=C3=A1k?= Date: Thu, 7 Mar 2024 20:20:09 +0100 Subject: [PATCH 4/6] feat: add option to embed data when exporting report to notebook (#206) Resolves #21 --- edvart/report.py | 88 ++++++++++++++++++++++++++++++++++++++++---- pyproject.toml | 1 + tests/test_report.py | 36 +++++++++++++++++- 3 files changed, 117 insertions(+), 8 deletions(-) diff --git a/edvart/report.py b/edvart/report.py index 0634ce2..c0f565f 100755 --- a/edvart/report.py +++ b/edvart/report.py @@ -1,9 +1,12 @@ import base64 import logging +import os import pickle +import sys import warnings from abc import ABC from copy import copy +from enum import auto from typing import List, Optional, Tuple, Union import isort @@ -30,11 +33,25 @@ from edvart.report_sections.univariate_analysis import UnivariateAnalysis from edvart.utils import env_var +if sys.version_info < (3, 11): + # Python 3.11+ StrEnum behaves as LowercaseStrEnum from strenum package + from strenum import LowercaseStrEnum as StrEnum +else: + from enum import StrEnum + class EmptyReportWarning(UserWarning): """Warning raised when a report contains no sections.""" +class ExportDataMode(StrEnum): + """Data export mode for the report.""" + + NONE = auto() + FILE = auto() + EMBED = auto() + + class ReportBase(ABC): """ Abstract base class for reports. @@ -55,6 +72,8 @@ class ReportBase(ABC): "import plotly.io as pio", } + _DEFAULT_LOAD_DATA_CODE = "df = ... # TODO: Fill in code for loading data" + def __init__( self, dataframe: pd.DataFrame, @@ -84,27 +103,77 @@ def show(self) -> None: for section in self.sections: section.show(self.df) + def _export_data( + self, export_data_mode: ExportDataMode, notebook_file_path: Union[str, os.PathLike] + ) -> Tuple[str, List[str]]: + """ + Generates code for loading exported data into the exported notebook. + + Parameters + ---------- + export_data_mode : ExportDataMode + The mode of exporting the data. + notebook_file_path : str or PathLike + Filepath of the exported notebook. + + ------- + Tuple[str, List[str]] + A tuple containing the code for loading the data and a list of imports required for + the code. + """ + if export_data_mode == ExportDataMode.NONE: + return self._DEFAULT_LOAD_DATA_CODE, [] + if export_data_mode == ExportDataMode.FILE: + parquet_file_name = str(notebook_file_path).rstrip(".ipynb") + "-data.parquet" + self.df.to_parquet(parquet_file_name) + return f"df = pd.read_parquet('{parquet_file_name}')", ["import pandas as pd"] + assert export_data_mode == ExportDataMode.EMBED + buffer = base64.b85encode(self.df.to_parquet()) + return ( + code_dedent( + f""" + df_parquet = BytesIO(base64.b85decode({buffer}.decode())) + df = pd.read_parquet(df_parquet)""" + ), + ["import base64", "import pandas as pd", "from io import BytesIO"], + ) + def export_notebook( self, - notebook_filepath: str, + notebook_filepath: Union[str, os.PathLike], dataset_name: str = "[INSERT DATASET NAME]", dataset_description: str = "[INSERT DATASET DESCRIPTION]", + export_data_mode: ExportDataMode = ExportDataMode.NONE, ) -> None: """Exports the report as an .ipynb file. Parameters ---------- - notebook_filepath : str + notebook_filepath : str or PathLike Filepath of the exported notebook. dataset_name : str (default = "[INSERT DATASET NAME]") Name of dataset to be used in the title of the report. dataset_description : str (default = "[INSERT DATASET DESCRIPTION]") Description of dataset to be used below the title of the report. + export_data_mode : ExportDataMode (default = ExportDataMode.NONE) + Mode for exporting the data to the notebook. + If ExportDataMode.NONE, the data is not exported to the notebook. + If ExportDataMode.FILE, the data is exported to a parquet file + and loaded from there. + If ExportDataMode.EMBED, the data is embedded into the notebook + as a base64 string. """ + load_data_code, load_data_imports = self._export_data( + export_data_mode, notebook_file_path=notebook_filepath + ) # Generate a notebook containing dataset name and description self._warn_if_empty() nb = self._generate_notebook( - dataset_name=dataset_name, dataset_description=dataset_description + dataset_name=dataset_name, + dataset_description=dataset_description, + load_data_code=load_data_code, + hide_load_data_code=export_data_mode == ExportDataMode.EMBED, + extra_imports=load_data_imports, ) # Save notebook to file @@ -113,9 +182,10 @@ def export_notebook( def _generate_notebook( self, + load_data_code: str, + hide_load_data_code: bool, dataset_name: str = "[INSERT DATASET NAME]", dataset_description: str = "[INSERT DATASET DESCRIPTION]", - load_df: str = "df = ...", extra_imports: Optional[List[str]] = None, show_load_data: bool = True, ) -> nbf.NotebookNode: @@ -127,7 +197,7 @@ def _generate_notebook( Name of dataset to be used in the title of the report. dataset_description : str (default = "[INSERT DATASET DESCRIPTION]") Description of dataset to be used below the title of the report. - load_df : str (default = 'df = ...') + load_data_code : str (default = 'df = ...') Code string for loading a dataset to variable `df`. extra_imports : List[str], optional Any additional imports to be included in imports section @@ -166,7 +236,10 @@ def _generate_notebook( # Add load data cell if show_load_data: nb["cells"].append(nbf4.new_markdown_cell("## Load Data\n---")) - nb["cells"].append(nbf4.new_code_cell(load_df)) + load_data_cell = nbf4.new_code_cell(load_data_code) + if hide_load_data_code: + load_data_cell["metadata"] = {"jupyter": {"source_hidden": True}} + nb["cells"].append(load_data_cell) # Generate code for each report section if self._table_of_contents is not None: @@ -313,7 +386,8 @@ def _dev_export_notebook(self, notebook_filepath: str) -> None: """ nb = self._generate_notebook( extra_imports=["import edvart"], - load_df="df = edvart.example_datasets.dataset_titanic()", + load_data_code="df = edvart.example_datasets.dataset_titanic()", + hide_load_data_code=False, ) # Save notebook to file diff --git a/pyproject.toml b/pyproject.toml index bdb9d92..848bab1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ umap-learn = { version = "^0.5.4", optional = true } numba = { version = "^0.59", optional = true } pyarrow = { version = "^14.0.1", optional = true } isort = "^5.10.1" +strenum = { version = "^0.4.15", python = "<3.11" } [tool.poetry.extras] umap = ["umap-learn", "numba"] diff --git a/tests/test_report.py b/tests/test_report.py index 808f4b2..50f419e 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -1,10 +1,13 @@ +import pathlib import warnings from contextlib import redirect_stdout +import nbconvert +import nbformat import numpy as np import pandas as pd -from edvart.report import DefaultReport, Report +from edvart.report import DefaultReport, ExportDataMode, Report from edvart.report_sections.bivariate_analysis import BivariateAnalysis from edvart.report_sections.section_base import Verbosity from edvart.report_sections.univariate_analysis import UnivariateAnalysis @@ -90,3 +93,34 @@ def test_show(): warnings.simplefilter("ignore", UserWarning) with redirect_stdout(None): report.show() + + +def test_notebook_export(tmp_path: pathlib.Path): + report = Report(dataframe=_get_test_df()) + + report.add_overview() + for export_data_mode in ( + ExportDataMode.NONE, + ExportDataMode.EMBED, + ExportDataMode.FILE, + "embed", + "none", + "file", + ): + report.export_notebook( + tmp_path / f"export_{export_data_mode}.ipynb", export_data_mode=export_data_mode + ) + + +def test_exported_notebook_executes(tmp_path: pathlib.Path): + report = Report(dataframe=_get_test_df()) + + report.add_overview() + for export_data_mode in (ExportDataMode.EMBED, ExportDataMode.FILE): + export_path = tmp_path / "export_{export_data_mode}.ipynb" + report.export_notebook(export_path, export_data_mode=export_data_mode) + + notebook = nbformat.read(export_path, as_version=4) + preprocessor = nbconvert.preprocessors.ExecutePreprocessor(timeout=60) + + preprocessor.preprocess(notebook) From 2b29337c8cde96652434bdf4ce70eb0435fff247 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Bel=C3=A1k?= Date: Thu, 7 Mar 2024 20:20:56 +0100 Subject: [PATCH 5/6] ci: update poetry to `1.8.2` (#216) --- .github/workflows/publish.yaml | 2 +- .github/workflows/test.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index b22b829..0811c91 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -3,7 +3,7 @@ name: Publish on: workflow_dispatch env: - POETRY_VERSION: 1.7.1 + POETRY_VERSION: 1.8.2 CONVCO_VERSION: v0.4.2 GITCLIFF_VERSION: 2.0.4 CHANGELOG_FILE: CHANGELOG.md diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 7d86942..b32f8a8 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -10,7 +10,7 @@ on: branches: [main] env: - POETRY_VERSION: 1.7.1 + POETRY_VERSION: 1.8.2 CONVCO_VERSION: v0.4.2 jobs: From eb78c670262996290dacba7e1406555c98494dc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Bel=C3=A1k?= Date: Thu, 7 Mar 2024 20:28:46 +0100 Subject: [PATCH 6/6] refactor: use ruff (#217) ruff replaces `black`, `isort` & `pylint` for formatting & linting. Note that `isort` is still used to sort imports when exporting to notebook since ruff does not provide a Python API. Resolves #214 --- .github/workflows/test.yaml | 5 +- CONTRIBUTING.md | 32 +++----- README.md | 8 +- docs/conf.py | 80 ++++++++++--------- edvart/data_types.py | 2 +- edvart/report_sections/bivariate_analysis.py | 3 +- .../report_sections/code_string_formatting.py | 4 +- .../timeseries_analysis/short_time_ft.py | 2 +- .../timeseries_analysis/stationarity_tests.py | 4 +- .../timeseries_analysis.py | 6 +- pyproject.toml | 23 +++++- tests/test_multivariate_analysis.py | 4 +- tests/test_timeseries_analysis.py | 16 ++-- 13 files changed, 98 insertions(+), 91 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index b32f8a8..0e473a4 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -45,9 +45,8 @@ jobs: poetry run python -m pytest -n auto --disable-warnings --cov=edvart tests/ - name: Lint run: | - poetry run pylint --rcfile=.pylintrc edvart/ - poetry run black --check --line-length 100 edvart/ tests/ - poetry run isort --check --line-length 100 --profile black edvart/ tests/ + poetry run ruff check . + poetry run ruff format --check . dismiss-stale-reviews: runs-on: ubuntu-22.04 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 98fe574..70bf401 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -85,30 +85,20 @@ If you add a new section, add the section description into `docs/sections.rst` ## Code style -* The line length is limited to 100 characters in Python code, except if it would make the code less readable. -* `black` is used for formatting Python code. The following command can be used to properly format the code: +* The line length is limited to 100 characters in Python code, +except if it would make the code less readable. +* `ruff` is used for formatting and linting Python code. +The following commands can be used to properly format the code and check +for linting errors with automatic fixing: ```bash -poetry run black --line-length 100 edvart/ tests/ +poetry run ruff format . +poetry run ruff check . --fix ``` -The following command can be used to check if the code is properly formatted: +The following command can be used to check if the code is properly +formatted and check for linting errors: ```bash -poetry run black --check --line-length 100 edvart/ tests/ -```` - -* `isort` is used for sorting imports. -The following command can be used to properly sort imports: -```bash -poetry run isort --line-length 100 --profile black edvart/ tests/ -``` -The following command can be used to check if the imports are properly sorted: -```bash -poetry run isort --check --line-length 100 --profile black edvart/ tests/ -``` - -* `pylint` is used to lint Python code. Tests are not required to be linted. -The following command can be used to lint the code: -```bash -poetry run pylint --rcfile=".pylintrc" edvart +poetry run ruff format --check . +poetry run ruff check . ``` All of the above code style requirements are enforced by the CI pipeline. diff --git a/README.md b/README.md index 1c0a579..c88d618 100644 --- a/README.md +++ b/README.md @@ -10,13 +10,9 @@ PyPI - Downloads - - Code style: black + + Ruff - - Imports: isort - -

Edvart is an open-source Python library designed to simplify and streamline diff --git a/docs/conf.py b/docs/conf.py index b7ab5a9..1b08695 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,28 +14,27 @@ # import os import sys -import re from datetime import datetime from pathlib import Path -sys.path.insert(0, os.path.abspath('.')) -sys.path.insert(0, os.path.abspath('../')) +import toml # make sure that toml is a developer dependency -# -- Project information ----------------------------------------------------- +sys.path.insert(0, os.path.abspath(".")) +sys.path.insert(0, os.path.abspath("../")) -project = 'edvart' -copyright = f'{datetime.now().year}, Datamole' -author = 'Hellen team' +# -- Project information ----------------------------------------------------- +project = "edvart" +copyright = f"{datetime.now().year}, Datamole" +author = "Hellen team" -import toml # make sure that toml is a developer dependency metadata = toml.load(Path(__file__).parent.parent / "pyproject.toml")["tool"]["poetry"] version = release = metadata["version"] version_long = version # The short X.Y.Z version -version = '.'.join(version_long.split('.')[0:3]) +version = ".".join(version_long.split(".")[0:3]) # The full version, including alpha/beta/rc tags release = version_long @@ -51,12 +50,12 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - 'sphinx.ext.mathjax', - 'sphinx.ext.viewcode', - 'sphinx.ext.napoleon', - 'sphinx_copybutton', + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx_copybutton", ] # napoleon_google_docstring = False @@ -64,16 +63,16 @@ # napoleon_use_ivar = True # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -85,10 +84,10 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # -- Options for HTML output ------------------------------------------------- @@ -96,9 +95,9 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -#html_theme = 'alabaster' -#html_theme = 'sphinx_rtd_theme' -html_theme = 'sphinx_rtd_theme' +# html_theme = 'alabaster' +# html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -109,7 +108,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -125,7 +124,7 @@ # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'edvartdoc' +htmlhelp_basename = "edvartdoc" # -- Options for LaTeX output ------------------------------------------------ @@ -134,15 +133,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -152,8 +148,13 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'edvart.tex', 'edvart Documentation', - 'Your name (or your organization/company/team)', 'manual'), + ( + master_doc, + "edvart.tex", + "edvart Documentation", + "Your name (or your organization/company/team)", + "manual", + ), ] @@ -161,10 +162,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'edvart', 'edvart Documentation', - [author], 1) -] +man_pages = [(master_doc, "edvart", "edvart Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------------- @@ -173,9 +171,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'edvart', 'edvart Documentation', - author, 'edvart', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "edvart", + "edvart Documentation", + author, + "edvart", + "One line description of project.", + "Miscellaneous", + ), ] @@ -184,4 +188,4 @@ # -- Options for intersphinx extension --------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = {"https://docs.python.org/": None} diff --git a/edvart/data_types.py b/edvart/data_types.py index cba19b9..f4ffcfb 100644 --- a/edvart/data_types.py +++ b/edvart/data_types.py @@ -5,7 +5,7 @@ import pandas as pd try: - import pyarrow # pylint: disable=unused-import + import pyarrow # noqa: F401 except ImportError: PYARROW_PANDAS_BACKEND_AVAILABLE = False else: diff --git a/edvart/report_sections/bivariate_analysis.py b/edvart/report_sections/bivariate_analysis.py index cad75eb..a7e5651 100644 --- a/edvart/report_sections/bivariate_analysis.py +++ b/edvart/report_sections/bivariate_analysis.py @@ -986,8 +986,7 @@ def contingency_table( n_digits_max = 1 + np.floor(np.log10(table.max().max())) size_factor = ( # Constants chosen empirically to make the numbers fit in the cells - 0.18 - * max(4, n_digits_max) + 0.18 * max(4, n_digits_max) ) ax.figure.set_size_inches(size_factor * len(table.columns), size_factor * len(table)) # Set y axis diff --git a/edvart/report_sections/code_string_formatting.py b/edvart/report_sections/code_string_formatting.py index 0a2d2ee..ad0c3f1 100644 --- a/edvart/report_sections/code_string_formatting.py +++ b/edvart/report_sections/code_string_formatting.py @@ -18,7 +18,7 @@ def total_dedent(input_string: str) -> str: input_string with no whitespace at the beginning of each line. """ input_string = input_string.strip() - lstripped_lines = [l.strip() for l in input_string.split("\n")] + lstripped_lines = [line.strip() for line in input_string.split("\n")] return "\n".join(lstripped_lines) @@ -53,7 +53,7 @@ def dedecorate(input_string: str) -> str: input_string with beginning lines starting with '@' removed. """ lines = input_string.splitlines() - filtered_lines = dropwhile(lambda l: l.lstrip().startswith("@"), lines) + filtered_lines = dropwhile(lambda line_: line_.lstrip().startswith("@"), lines) return "\n".join(filtered_lines) diff --git a/edvart/report_sections/timeseries_analysis/short_time_ft.py b/edvart/report_sections/timeseries_analysis/short_time_ft.py index 602c5d2..6342a60 100644 --- a/edvart/report_sections/timeseries_analysis/short_time_ft.py +++ b/edvart/report_sections/timeseries_analysis/short_time_ft.py @@ -128,7 +128,7 @@ def show_short_time_ft( columns: Optional[List[str]] = None, overlap: Optional[int] = None, log: bool = True, - window: Union[str, Tuple, "array-like"] = "hamming", + window: Union[str, Tuple, np.typing.ArrayLike] = "hamming", scaling: str = "spectrum", figsize: Tuple[float, float] = (20, 7), colormap: Any = "viridis", diff --git a/edvart/report_sections/timeseries_analysis/stationarity_tests.py b/edvart/report_sections/timeseries_analysis/stationarity_tests.py index a073044..3fd3517 100644 --- a/edvart/report_sections/timeseries_analysis/stationarity_tests.py +++ b/edvart/report_sections/timeseries_analysis/stationarity_tests.py @@ -1,6 +1,6 @@ import warnings from functools import partial -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional, Tuple import nbformat.v4 as nbfv4 import pandas as pd @@ -96,7 +96,7 @@ def show(self, df: pd.DataFrame) -> None: show_stationarity_tests(df=df, columns=self.columns) -def default_stationarity_tests() -> Dict[pd.Series, Callable[[pd.Series], "test_result"]]: +def default_stationarity_tests() -> Dict[str, Callable[[pd.Series], Tuple]]: """Return a dictionary of stationarity test and functions. Stationarity tests are: diff --git a/edvart/report_sections/timeseries_analysis/timeseries_analysis.py b/edvart/report_sections/timeseries_analysis/timeseries_analysis.py index 1d85672..050f39a 100644 --- a/edvart/report_sections/timeseries_analysis/timeseries_analysis.py +++ b/edvart/report_sections/timeseries_analysis/timeseries_analysis.py @@ -144,9 +144,9 @@ def __init__( } # Add FT and STFT only if required parameters specified if sampling_rate is not None: - enum_to_implementation[ - TimeseriesAnalysisSubsection.FourierTransform - ] = FourierTransform(sampling_rate, verbosity_fourier_transform, columns) + enum_to_implementation[TimeseriesAnalysisSubsection.FourierTransform] = ( + FourierTransform(sampling_rate, verbosity_fourier_transform, columns) + ) if stft_window_size is not None: enum_to_implementation[TimeseriesAnalysisSubsection.ShortTimeFT] = ShortTimeFT( sampling_rate, stft_window_size, verbosity_short_time_ft, columns diff --git a/pyproject.toml b/pyproject.toml index 848bab1..6a5a6e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,11 +49,30 @@ sphinx = "~7.1" sphinx-rtd-theme = "~1.3.0" toml = "^0.10.0" jupyter = "*" -black = "^22.3.0" -pylint = "~3.1" sphinx-copybutton = "^0.5.2" pytest-xdist = "^3.3.1" +ruff = "^0.3.0" [build-system] requires = ["poetry_core>=1.0.0"] build-backend = "poetry.core.masonry.api" + +[tool.ruff] +line-length = 100 +target-version = "py310" +src = ["task"] + +[tool.ruff.lint] +select = [ + "E", + "F", + "N", + "W", + "I001", +] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] + +[tool.ruff.lint.isort] +known-first-party = ["edvart"] diff --git a/tests/test_multivariate_analysis.py b/tests/test_multivariate_analysis.py index 5a4eb8d..3a480af 100644 --- a/tests/test_multivariate_analysis.py +++ b/tests/test_multivariate_analysis.py @@ -6,8 +6,6 @@ import plotly.io as pio import pytest -pio.renderers.default = "json" - from edvart import utils from edvart.report_sections import multivariate_analysis from edvart.report_sections.code_string_formatting import code_dedent, get_code @@ -25,6 +23,8 @@ from .execution_utils import check_section_executes from .pyarrow_utils import pyarrow_parameterize +pio.renderers.default = "json" + def get_test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame: test_df = pd.DataFrame( diff --git a/tests/test_timeseries_analysis.py b/tests/test_timeseries_analysis.py index f536309..33757f5 100644 --- a/tests/test_timeseries_analysis.py +++ b/tests/test_timeseries_analysis.py @@ -168,18 +168,18 @@ def test_ft_stft_included(): def test_ft_no_sampling_rate_error(): with pytest.raises(ValueError): - ts = TimeseriesAnalysis(subsections=[TimeseriesAnalysisSubsection.FourierTransform]) + _ts = TimeseriesAnalysis(subsections=[TimeseriesAnalysisSubsection.FourierTransform]) with pytest.raises(ValueError): - ts = TimeseriesAnalysis( + _ts = TimeseriesAnalysis( subsections=[TimeseriesAnalysisSubsection.FourierTransform], stft_window_size=2, ) with pytest.raises(ValueError): - ts = TimeseriesAnalysis( + _ts = TimeseriesAnalysis( subsections=[TimeseriesAnalysisSubsection.ShortTimeFT], ) with pytest.raises(ValueError): - ts = TimeseriesAnalysis( + _ts = TimeseriesAnalysis( subsections=[TimeseriesAnalysisSubsection.ShortTimeFT], sampling_rate=1, ) @@ -424,7 +424,7 @@ def month_func(x: datetime) -> str: def test_boxplots_over_time_lambda(): - month_lambda = lambda x: x.month + month_lambda = lambda x: x.month # noqa: E731 boxplots_sub = BoxplotsOverTime(grouping_name="Month", grouping_function=month_lambda) @@ -451,7 +451,7 @@ def test_imports_verbosity_low(): exported_imports = ts_section.required_imports() expected_imports = [ - "from edvart.report_sections.timeseries_analysis.timeseries_analysis import show_timeseries_analysis" + "from edvart.report_sections.timeseries_analysis.timeseries_analysis import show_timeseries_analysis" # noqa: E501 ] assert isinstance(exported_imports, list) @@ -504,8 +504,8 @@ def test_imports_verbosity_low_different_subsection_verbosities(): exported_imports = ts_section.required_imports() expected_imports = { - "from edvart.report_sections.timeseries_analysis.timeseries_analysis import show_timeseries_analysis", - "from edvart.report_sections.timeseries_analysis.timeseries_analysis import TimeseriesAnalysisSubsection", + "from edvart.report_sections.timeseries_analysis.timeseries_analysis import show_timeseries_analysis", # noqa: E501 + "from edvart.report_sections.timeseries_analysis.timeseries_analysis import TimeseriesAnalysisSubsection", # noqa: E501 } for s in ts_section.subsections: if s.verbosity > Verbosity.LOW: