Skip to content

Commit

Permalink
starting to convert plugins to new format
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Aug 2, 2024
1 parent c92b519 commit 80c687c
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 97 deletions.
100 changes: 73 additions & 27 deletions countess/core/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import math
import os.path
import re
from typing import Any, Dict, Iterable, List, Mapping, Optional, Type, Union
from typing import Any, Dict, Iterable, List, Mapping, Optional, Type

import pandas as pd

Expand Down Expand Up @@ -81,7 +81,39 @@ def get_hash_value(self):
return digest.hexdigest()


class StringParam(ScalarParam):
class ScalarWithOperatorsParam(ScalarParam):
# Operator Methods which apply to both StringParams and
# NumericParams (but not BooleanParam)

def __add__(self, other):
return self._value + other

def __radd__(self, other):
return other + self._value

def __str__(self):
return str(self._value)

def __eq__(self, other):
return self._value == other

def __ne__(self, other):
return self._value != other

def __gt__(self, other):
return self._value > other

def __gte__(self, other):
return self._value >= other

def __lt__(self, other):
return self._value < other

def __lte__(self, other):
return self._value <= other


class StringParam(ScalarWithOperatorsParam):
"""A parameter representing a single string value. A number
of builtin methods are reproduced here to allow the parameter to be
used pretty much like a normal string. In some circumstances it may
Expand All @@ -93,21 +125,17 @@ class StringParam(ScalarParam):
def set_value(self, value: Any):
self._value = str(value)

def __add__(self, other):
return self._value + other

def __radd__(self, other):
return other + self._value
# Operator methods which apply only to strings

def __len__(self):
return len(self._value)

def __str__(self):
return str(self._value)

def __contains__(self, other):
return other in self._value

def __hash__(self):
return hash(self._value)


class TextParam(StringParam):
"""This is mostly just a convenience for the GUI, it marks this as a
Expand All @@ -117,7 +145,7 @@ def set_value(self, value):
self._value = re.sub("\n\n\n+", "\n\n", value)


class NumericParam(ScalarParam):
class NumericParam(ScalarWithOperatorsParam):
"""A parameter representing a single numeric value. A large number
of builtin methods are reproduced here to allow the parameter to be
used pretty much like a normal number. In some circumstances it may
Expand All @@ -132,11 +160,7 @@ def set_value(self, value):
except ValueError:
self.reset_value()

def __add__(self, other):
return self._value + other

def __radd__(self, other):
return other + self._value
# Operator methods which apply only to numerics

def __sub__(self, other):
return self._value - other
Expand All @@ -156,15 +180,11 @@ def __int__(self):
def __float__(self):
return float(self._value)

def __str__(self):
return str(self._value)

# XXX should include many more operator methods here, see
# XXX should include many more numeric operator methods here, see
# https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types
# matmul, truediv, floordiv, mod, divmod, pow, lshift, rshift, and, xor, or,
# rmatmul, rtruediv, rfloordiv, rmod, rdivmod, rpow, rlshift, rrshift, rand, rxor, ror,
# neg, pos, abs, invert, complex, index, round, trunc, floor, ceil
# lt le eq ne gt ge format
# neg, pos, abs, invert, complex, index, round, trunc, floor, ceil, format
# it seems like there should be a smarter way to do this but doing it the
# dumb way works with mypy and pylint.

Expand Down Expand Up @@ -200,6 +220,9 @@ def set_value(self, value):
def __bool__(self):
return self._value

def __str__(self):
return str(self._value)

# XXX are there other operator methods which need to be implemented here?


Expand Down Expand Up @@ -520,7 +543,7 @@ def get_column_name(self):
return self.value[len(self.PREFIX) :]
return None

def get_value(self, data: dict):
def get_value_from_dict(self, data: dict):
if self.value.startswith(self.PREFIX):
return data[self.value[len(self.PREFIX) :]]
else:
Expand Down Expand Up @@ -695,17 +718,32 @@ def add_files(self, filenames):
class MultiParam(BaseParam):
params: Mapping[str, BaseParam] = {}

def __init__(self, label: str, params: Mapping[str, BaseParam]):
def __init__(self, label: str, params: Optional[Mapping[str, BaseParam]] = None):
self.label = label
self.params = params
self.params = dict((k, v.copy()) for k, v in params.items()) if params else {}

# Allow new django-esque declarations via subclasses

for k, p in self.__class__.__dict__.items():
if isinstance(p, BaseParam):
self.__dict__[k] = self.params[k] = p.copy()


def copy(self) -> "MultiParam":
pp = dict(((k, p.copy()) for k, p in self.params.items()))
return self.__class__(self.label, pp)

# XXX decide if the "dict-like" accessors are worth keeping

def __getitem__(self, key):
return self.params[key]

def __contains__(self, item):
return item in self.params

def __setitem__(self, key, value):
self.params[key].value = value

def keys(self):
return self.params.keys()

Expand All @@ -715,14 +753,22 @@ def values(self):
def items(self):
return self.params.items()

# attribute-like accessors

def __getattr__(self, name):
try:
return self.params[name]
except KeyError as exc:
raise AttributeError(name=name, obj=self) from exc

def __contains__(self, item):
return item in self.params
def __setattr__(self, name, value):
"""Intercepts attempts to set parameters to a value and turns them into parameter.set_value.
Any other kind of attribute assignment is passed through."""
target_attr = getattr(self, name, None)
if isinstance(target_attr, BaseParam) and not isinstance(value, BaseParam):
target_attr.set_value(value)
else:
super().__setattr__(name, value)

def __iter__(self):
return self.params.__iter__()
Expand Down
31 changes: 22 additions & 9 deletions countess/core/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,17 +101,30 @@ def __init__(self, plugin_name=None):

self.parameters = dict((k, v.copy()) for k, v in self.parameters.items())

# XXX should we allow django-esque declarations like this?
# Code gets cleaner, Namespace gets cluttered, though.
# Allow new django-esque declarations ...

for key in dir(self):
if isinstance(getattr(self, key), BaseParam):
self.parameters[key] = getattr(self, key).copy()
setattr(self, key, self.parameters[key])
for k, p in self.__class__.__dict__.items():
if isinstance(p, BaseParam):
self.__dict__[k] = self.parameters[k] = p.copy()

def add_parameter(self, name: str, param: BaseParam):
self.parameters[name] = param.copy()
return self.parameters[name]
def __setattr__(self, name, value):
"""Intercepts attempts to set parameters to a value and turns them into parameter.set_value.
Any other kind of attribute assignment is passed through."""

target_attr = getattr(self, name, None)
if isinstance(target_attr, BaseParam) and not isinstance(value, BaseParam):
target_attr.set_value(value)
else:
super().__setattr__(name, value)

def __getitem__(self, key):
return self.parameters[key]

def __contains__(self, item):
return item in self.parameters

def __setitem__(self, key, value):
self.parameters[key].value = value

def set_parameter(self, key: str, value: Union[bool, int, float, str], base_dir: str = "."):
param = self.parameters
Expand Down
4 changes: 2 additions & 2 deletions countess/gui/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
FileParam,
FileSaveParam,
MultiParam,
SimpleParam,
ScalarParam,
TabularMultiParam,
TextParam,
)
Expand Down Expand Up @@ -96,7 +96,7 @@ def __init__( # pylint: disable=R0912,R0915
self.entry["state"] = tk.DISABLED
else:
self.entry.bind("<<Modified>>", self.widget_modified_callback)
elif isinstance(parameter, SimpleParam):
elif isinstance(parameter, ScalarParam):
self.var = tk.StringVar(tk_parent, value=parameter.value)
self.entry = tk.Entry(tk_parent, textvariable=self.var)
if parameter.read_only:
Expand Down
20 changes: 9 additions & 11 deletions countess/plugins/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def read_file_to_dataframe(self, file_params, logger, row_limit=None):
while len(df.columns) > len(self.parameters["columns"]):
self.parameters["columns"].add_row()

if self.parameters["header"].value:
if self.header:
for n, col in enumerate(df.columns):
if not self.parameters["columns"][n]["name"].value:
self.parameters["columns"][n]["name"].value = str(col)
Expand All @@ -132,12 +132,10 @@ class SaveCsvPlugin(PandasOutputPlugin):
version = VERSION
file_types = CSV_FILE_TYPES

parameters = {
"header": BooleanParam("CSV header row?", True),
"filename": FileSaveParam("Filename", file_types=file_types),
"delimiter": ChoiceParam("Delimiter", ",", choices=[",", ";", "TAB", "|", "SPACE"]),
"quoting": BooleanParam("Quote all Strings", False),
}
header = BooleanParam("CSV header row?", True)
filename = FileSaveParam("Filename", file_types=file_types)
delimiter = ChoiceParam("Delimiter", ",", choices=[",", ";", "TAB", "|", "SPACE"])
quoting = BooleanParam("Quote all Strings", False)

filehandle: Optional[Union[BufferedWriter, BytesIO]] = None
csv_columns = None
Expand All @@ -147,7 +145,7 @@ class SaveCsvPlugin(PandasOutputPlugin):

def prepare(self, sources: list[str], row_limit: Optional[int] = None):
if row_limit is None:
filename = self.parameters["filename"].value
filename = str(self.filename)
if filename.endswith(".gz"):
self.filehandle = gzip.open(filename, "wb")
else:
Expand All @@ -167,7 +165,7 @@ def process(self, data: pd.DataFrame, source: str, logger: Logger):
# include the header or not.
if self.csv_columns is None:
self.csv_columns = list(dataframe.columns)
emit_header = bool(self.parameters["header"].value)
emit_header = bool(self.header)
else:
# add in any columns we haven't seen yet in previous dataframes.
for c in dataframe.columns:
Expand All @@ -184,8 +182,8 @@ def process(self, data: pd.DataFrame, source: str, logger: Logger):
header=emit_header,
columns=self.csv_columns,
index=False,
sep=self.SEPARATORS[self.parameters["delimiter"].value],
quoting=self.QUOTING[self.parameters["quoting"].value],
sep=self.SEPARATORS[self.delimiter],
quoting=self.QUOTING[self.quoting],
) # type: ignore [call-overload]
return []

Expand Down
41 changes: 15 additions & 26 deletions countess/plugins/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,37 +24,28 @@ class JoinPlugin(PandasProductPlugin):
version = VERSION
link = "https://countess-project.github.io/CountESS/included-plugins/#join"

parameters = {
"inputs": ArrayParam(
"Inputs",
MultiParam(
"Input",
{
"join_on": ColumnOrIndexChoiceParam("Join On"),
"required": BooleanParam("Required", True),
"drop": BooleanParam("Drop Column", False),
},
),
read_only=True,
min_size=2,
max_size=2,
),
}
class InputMultiParam(MultiParam):
join_on = ColumnOrIndexChoiceParam("Join On")
required = BooleanParam("Required", True)
drop = BooleanParam("Drop Column", False)

inputs = ArrayParam("Inputs", InputMultiParam("Input"), min_size=2, max_size=2)

join_params = None
input_columns_1: Optional[Dict] = None
input_columns_2: Optional[Dict] = None

def prepare(self, sources: list[str], row_limit: Optional[int] = None):
super().prepare(sources, row_limit)

assert isinstance(self.parameters["inputs"], ArrayParam)
assert len(self.parameters["inputs"]) == 2
ip1, ip2 = self.parameters["inputs"]
ip1, ip2 = self.inputs.params
assert isinstance(ip1, self.InputMultiParam)
assert isinstance(ip2, self.InputMultiParam)
ip1.label = f"Input 1: {sources[0]}"
ip2.label = f"Input 2: {sources[1]}"

self.join_params = {
"how": _join_how(ip1.required.value, ip2.required.value),
"how": _join_how(bool(ip1.required), bool(ip2.required)),
"left_index": ip1.join_on.is_index(),
"right_index": ip2.join_on.is_index(),
"left_on": None if ip1.join_on.is_index() else ip1.join_on.value,
Expand All @@ -70,7 +61,6 @@ def process_dataframes(self, dataframe1: pd.DataFrame, dataframe2: pd.DataFrame,
assert self.input_columns_1 is not None
assert self.input_columns_2 is not None
assert self.join_params is not None
assert isinstance(self.parameters["inputs"], ArrayParam)

self.input_columns_1.update(get_all_columns(dataframe1))
self.input_columns_2.update(get_all_columns(dataframe2))
Expand All @@ -94,19 +84,18 @@ def process_dataframes(self, dataframe1: pd.DataFrame, dataframe2: pd.DataFrame,
logger.exception(exc)
return pd.DataFrame()

if self.parameters["inputs"][0]["drop"].value and join1 in dataframe.columns:
if self.inputs[0].drop and join1 in dataframe.columns:
dataframe.drop(columns=join1, inplace=True)
if self.parameters["inputs"][1]["drop"].value and join2 in dataframe.columns:
if self.inputs[1].drop and join2 in dataframe.columns:
dataframe.drop(columns=join2, inplace=True)

return dataframe

def finalize(self, logger: Logger) -> Iterable:
assert isinstance(self.parameters["inputs"], ArrayParam)
assert len(self.parameters["inputs"]) == 2
assert len(self.inputs.params) == 2
assert self.input_columns_1 is not None
assert self.input_columns_2 is not None
ip1, ip2 = self.parameters["inputs"]
ip1, ip2 = self.inputs.params

ip1.set_column_choices(self.input_columns_1.keys())
ip2.set_column_choices(self.input_columns_2.keys())
Expand Down
Loading

0 comments on commit 80c687c

Please sign in to comment.