Skip to content

Commit

Permalink
refactor!: move subsection enums outside section classes (#160)
Browse files Browse the repository at this point in the history
Resolves #107
  • Loading branch information
mbelak-dtml authored Oct 9, 2023
1 parent 8b71e43 commit 19fb878
Show file tree
Hide file tree
Showing 10 changed files with 312 additions and 274 deletions.
26 changes: 16 additions & 10 deletions edvart/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,20 @@
import pandas as pd

from edvart.data_types import is_date
from edvart.report_sections.bivariate_analysis import BivariateAnalysis
from edvart.report_sections.bivariate_analysis import BivariateAnalysis, BivariateAnalysisSubsection
from edvart.report_sections.code_string_formatting import code_dedent
from edvart.report_sections.dataset_overview import Overview
from edvart.report_sections.dataset_overview import Overview, OverviewSubsection
from edvart.report_sections.group_analysis import GroupAnalysis
from edvart.report_sections.multivariate_analysis import MultivariateAnalysis
from edvart.report_sections.multivariate_analysis import (
MultivariateAnalysis,
MultivariateAnalysisSubsection,
)
from edvart.report_sections.section_base import Section, Verbosity
from edvart.report_sections.table_of_contents import TableOfContents
from edvart.report_sections.timeseries_analysis import TimeseriesAnalysis
from edvart.report_sections.timeseries_analysis import (
TimeseriesAnalysis,
TimeseriesAnalysisSubsection,
)
from edvart.report_sections.univariate_analysis import UnivariateAnalysis
from edvart.utils import env_var

Expand Down Expand Up @@ -322,7 +328,7 @@ def add_section(
def add_overview(
self,
columns: Optional[List[str]] = None,
subsections: Optional[List[Overview.OverviewSubsection]] = None,
subsections: Optional[List[OverviewSubsection]] = None,
verbosity: Optional[Verbosity] = None,
verbosity_quick_info: Optional[Verbosity] = None,
verbosity_data_types: Optional[Verbosity] = None,
Expand Down Expand Up @@ -408,7 +414,7 @@ def add_bivariate_analysis(
columns_x: Optional[List[str]] = None,
columns_y: Optional[List[str]] = None,
columns_pairs: Optional[List[Tuple[str, str]]] = None,
subsections: Optional[List[BivariateAnalysis.BivariateAnalysisSubsection]] = None,
subsections: Optional[List[BivariateAnalysisSubsection]] = None,
verbosity: Optional[Verbosity] = None,
verbosity_correlations: Optional[Verbosity] = None,
verbosity_pairplot: Optional[Verbosity] = None,
Expand Down Expand Up @@ -438,7 +444,7 @@ def add_bivariate_analysis(
`columns`, `columns_x`, `columns_y` is specified. In that case, the first elements
of each pair are treated as `columns_x` and the second elements as `columns_y` in
pairplots and correlations.
subsections : List[BivariateAnalysis.BivariateAnalysisSubsection], optional
subsections : List[BivariateAnalysisSubsection], optional
List of sub-sections to include into the BivariateAnalysis section.
If None, all subsections are added.
verbosity : Verbosity, optional
Expand Down Expand Up @@ -473,7 +479,7 @@ def add_bivariate_analysis(
def add_multivariate_analysis(
self,
columns: Optional[List[str]] = None,
subsections: Optional[List[MultivariateAnalysis.MultivariateAnalysisSubsection]] = None,
subsections: Optional[List[MultivariateAnalysisSubsection]] = None,
verbosity: Optional[Verbosity] = None,
verbosity_pca: Optional[Verbosity] = None,
verbosity_umap: Optional[Verbosity] = None,
Expand All @@ -488,7 +494,7 @@ def add_multivariate_analysis(
columns : List[str], optional
Columns which to analyze.
If None, all columns are used.
subsections : List[MultivariateAnalysis.MultivariateAnalysisSubsection], optional
subsections : List[MultivariateAnalysisSubsection], optional
List of sub-sections to include into the BivariateAnalysis section.
If None, all subsections are added.
verbosity : Verbosity, optional
Expand Down Expand Up @@ -733,7 +739,7 @@ def __init__(
def add_timeseries_analysis(
self,
columns: Optional[List[str]] = None,
subsections: Optional[List[TimeseriesAnalysis.TimeseriesAnalysisSubsection]] = None,
subsections: Optional[List[TimeseriesAnalysisSubsection]] = None,
verbosity: Optional[Verbosity] = None,
verbosity_time_series_line_plot: Optional[Verbosity] = None,
verbosity_rolling_statistics: Optional[Verbosity] = None,
Expand Down
45 changes: 23 additions & 22 deletions edvart/report_sections/bivariate_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@
from edvart.report_sections.section_base import ReportSection, Section, Verbosity


# pylint:disable=invalid-name
class BivariateAnalysisSubsection(IntEnum):
"""Enum of all implemented bivariate analysis subsections."""

CorrelationPlot = 0
PairPlot = 1
ContingencyTable = 2

def __str__(self):
return self.name


class BivariateAnalysis(ReportSection):
"""Generates the Bivariate analysis section of the report.
Expand Down Expand Up @@ -69,17 +81,6 @@ class BivariateAnalysis(ReportSection):
If exactly one of `columns_x`, `columns_y` is specified.
"""

# pylint:disable=invalid-name
class BivariateAnalysisSubsection(IntEnum):
"""Enum of all implemented bivariate analysis subsections."""

CorrelationPlot = 0
PairPlot = 1
ContingencyTable = 2

def __str__(self):
return self.name

# By default use all subsections
_DEFAULT_SUBSECTIONS_TO_SHOW = list(BivariateAnalysisSubsection)

Expand All @@ -100,13 +101,11 @@ def __init__(
verbosity_pairplot = verbosity_pairplot or verbosity
verbosity_contingency_table = verbosity_contingency_table or verbosity

subsec = BivariateAnalysis.BivariateAnalysisSubsection

# Store subsection verbosities
self.subsection_verbosities = {
subsec.CorrelationPlot: verbosity_correlations,
subsec.PairPlot: verbosity_pairplot,
subsec.ContingencyTable: verbosity_contingency_table,
BivariateAnalysisSubsection.CorrelationPlot: verbosity_correlations,
BivariateAnalysisSubsection.PairPlot: verbosity_pairplot,
BivariateAnalysisSubsection.ContingencyTable: verbosity_contingency_table,
}

if subsections is None:
Expand All @@ -130,17 +129,17 @@ def __init__(
columns_x_no_pairs = columns_x
columns_y_no_pairs = columns_y
enum_to_implementation = {
subsec.CorrelationPlot: CorrelationPlot(
BivariateAnalysisSubsection.CorrelationPlot: CorrelationPlot(
verbosity_correlations, columns, columns_x_no_pairs, columns_y_no_pairs
),
subsec.PairPlot: PairPlot(
BivariateAnalysisSubsection.PairPlot: PairPlot(
verbosity_pairplot,
columns,
columns_x_no_pairs,
columns_y_no_pairs,
color_col=color_col,
),
subsec.ContingencyTable: ContingencyTable(
BivariateAnalysisSubsection.ContingencyTable: ContingencyTable(
verbosity_contingency_table, columns, columns_x, columns_y, columns_pairs
),
}
Expand Down Expand Up @@ -177,7 +176,7 @@ def add_cells(self, cells: List[Dict[str, Any]], df: pd.DataFrame) -> None:
code = "show_bivariate_analysis(df=df"
if self.subsections_to_show_with_low_verbosity != self._DEFAULT_SUBSECTIONS_TO_SHOW:
arg_subsections_names = [
f"BivariateAnalysis.BivariateAnalysisSubsection.{str(sub)}"
f"BivariateAnalysisSubsection.{str(sub)}"
for sub in self.subsections_to_show_with_low_verbosity
]

Expand Down Expand Up @@ -213,7 +212,9 @@ def required_imports(self) -> List[str]:

imports = {"from edvart.report_sections.bivariate_analysis import show_bivariate_analysis"}
if self.subsections_to_show_with_low_verbosity != self._DEFAULT_SUBSECTIONS_TO_SHOW:
imports.add("from edvart.report_sections.bivariate_analysis import BivariateAnalysis")
imports.add(
"from edvart.report_sections.bivariate_analysis import BivariateAnalysisSubsection"
)
for subsec in self.subsections:
if subsec.verbosity > Verbosity.LOW:
imports.update(subsec.required_imports())
Expand All @@ -234,7 +235,7 @@ def show(self, df: pd.DataFrame) -> None:

def show_bivariate_analysis(
df: pd.DataFrame,
subsections: Optional[List[BivariateAnalysis.BivariateAnalysisSubsection]] = None,
subsections: Optional[List[BivariateAnalysisSubsection]] = None,
columns: Optional[List[str]] = None,
columns_x: Optional[List[str]] = None,
columns_y: Optional[List[str]] = None,
Expand Down
75 changes: 39 additions & 36 deletions edvart/report_sections/dataset_overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,24 @@
from edvart.report_sections.section_base import ReportSection, Section, Verbosity


# pylint: disable=invalid-name
class OverviewSubsection(IntEnum):
"""
Enum of possible subsections of the Overview section.
"""

QuickInfo = 1
DataTypes = 2
DataPreview = 3
MissingValues = 4
RowsWithMissingValue = 5
ConstantOccurrence = 6
DuplicateRows = 7

def __str__(self):
return self.name


class Overview(ReportSection):
"""Generates the Overview section of the report.
Expand Down Expand Up @@ -52,23 +70,6 @@ class Overview(ReportSection):
Duplicate rows subsection code verbosity.
"""

# pylint: disable=invalid-name
class OverviewSubsection(IntEnum):
"""
Enum of possible subsections of the Overview section.
"""

QuickInfo = 1
DataTypes = 2
DataPreview = 3
MissingValues = 4
RowsWithMissingValue = 5
ConstantOccurrence = 6
DuplicateRows = 7

def __str__(self):
return self.name

# By default use all subsections
_DEFAULT_SUBSECTIONS_TO_SHOW = list(OverviewSubsection)

Expand All @@ -95,17 +96,15 @@ def __init__(
verbosity_constant_occurrence = verbosity_constant_occurrence or verbosity
verbosity_duplicate_rows = verbosity_duplicate_rows or verbosity

subsec = Overview.OverviewSubsection

# Store subsection verbosities
self.subsection_verbosities = {
subsec.QuickInfo: verbosity_quick_info,
subsec.DataTypes: verbosity_data_types,
subsec.DataPreview: verbosity_data_preview,
subsec.MissingValues: verbosity_missing_values,
subsec.RowsWithMissingValue: verbosity_rows_with_missing_value,
subsec.ConstantOccurrence: verbosity_constant_occurrence,
subsec.DuplicateRows: verbosity_duplicate_rows,
OverviewSubsection.QuickInfo: verbosity_quick_info,
OverviewSubsection.DataTypes: verbosity_data_types,
OverviewSubsection.DataPreview: verbosity_data_preview,
OverviewSubsection.MissingValues: verbosity_missing_values,
OverviewSubsection.RowsWithMissingValue: verbosity_rows_with_missing_value,
OverviewSubsection.ConstantOccurrence: verbosity_constant_occurrence,
OverviewSubsection.DuplicateRows: verbosity_duplicate_rows,
}

if subsections is None:
Expand All @@ -121,15 +120,17 @@ def __init__(

# Construct objects that implement subsections
enum_to_implementation = {
subsec.QuickInfo: QuickInfo(verbosity_quick_info, columns),
subsec.DataTypes: DataTypes(verbosity_data_types, columns),
subsec.DataPreview: DataPreview(verbosity_data_preview, columns),
subsec.MissingValues: MissingValues(verbosity_missing_values, columns),
subsec.RowsWithMissingValue: RowsWithMissingValue(
OverviewSubsection.QuickInfo: QuickInfo(verbosity_quick_info, columns),
OverviewSubsection.DataTypes: DataTypes(verbosity_data_types, columns),
OverviewSubsection.DataPreview: DataPreview(verbosity_data_preview, columns),
OverviewSubsection.MissingValues: MissingValues(verbosity_missing_values, columns),
OverviewSubsection.RowsWithMissingValue: RowsWithMissingValue(
verbosity_rows_with_missing_value, columns
),
subsec.ConstantOccurrence: ConstantOccurrence(verbosity_constant_occurrence, columns),
subsec.DuplicateRows: DuplicateRows(verbosity_duplicate_rows, columns),
OverviewSubsection.ConstantOccurrence: ConstantOccurrence(
verbosity_constant_occurrence, columns
),
OverviewSubsection.DuplicateRows: DuplicateRows(verbosity_duplicate_rows, columns),
}

subsections_implementations = [
Expand All @@ -153,7 +154,9 @@ def required_imports(self) -> List[str]:
if self.verbosity == Verbosity.LOW:
imports = {"from edvart.report_sections.dataset_overview import show_overview"}
if self.subsections_to_show_with_low_verbosity != self._DEFAULT_SUBSECTIONS_TO_SHOW:
imports.add("from edvart.report_sections.dataset_overview import Overview")
imports.add(
"from edvart.report_sections.dataset_overview import OverviewSubsection"
)

for subsec in self.subsections:
if subsec.verbosity > Verbosity.LOW:
Expand Down Expand Up @@ -181,7 +184,7 @@ def add_cells(self, cells: List[Dict[str, Any]], df: pd.DataFrame) -> None:
code = "show_overview(df=df"
if self.subsections_to_show_with_low_verbosity != self._DEFAULT_SUBSECTIONS_TO_SHOW:
arg_subsections_names = [
f"Overview.OverviewSubsection.{str(sub)}"
f"OverviewSubsection.{str(sub)}"
for sub in self.subsections_to_show_with_low_verbosity
]
code += f", subsections={arg_subsections_names}".replace("'", "")
Expand Down Expand Up @@ -209,7 +212,7 @@ def show(self, df: pd.DataFrame) -> None:

def show_overview(
df: pd.DataFrame,
subsections: Optional[List[Overview.OverviewSubsection]] = None,
subsections: Optional[List[OverviewSubsection]] = None,
columns: Optional[List[str]] = None,
) -> None:
"""Generates overview analysis for df.
Expand Down
Loading

0 comments on commit 19fb878

Please sign in to comment.