From 80c687c6cc36c79a0603114f1231a58847e6b2bf Mon Sep 17 00:00:00 2001 From: Nick Moore Date: Fri, 2 Aug 2024 16:56:06 +1000 Subject: [PATCH] starting to convert plugins to new format --- countess/core/parameters.py | 100 ++++++++++++++++++++++++++---------- countess/core/plugins.py | 31 +++++++---- countess/gui/config.py | 4 +- countess/plugins/csv.py | 20 ++++---- countess/plugins/join.py | 41 ++++++--------- countess/plugins/variant.py | 53 +++++++++++-------- 6 files changed, 152 insertions(+), 97 deletions(-) diff --git a/countess/core/parameters.py b/countess/core/parameters.py index 36a4577..b4c4382 100644 --- a/countess/core/parameters.py +++ b/countess/core/parameters.py @@ -3,7 +3,7 @@ import math import os.path import re -from typing import Any, Dict, Iterable, List, Mapping, Optional, Type, Union +from typing import Any, Dict, Iterable, List, Mapping, Optional, Type import pandas as pd @@ -81,7 +81,39 @@ def get_hash_value(self): return digest.hexdigest() -class StringParam(ScalarParam): +class ScalarWithOperatorsParam(ScalarParam): + # Operator Methods which apply to both StringParams and + # NumericParams (but not BooleanParam) + + def __add__(self, other): + return self._value + other + + def __radd__(self, other): + return other + self._value + + def __str__(self): + return str(self._value) + + def __eq__(self, other): + return self._value == other + + def __ne__(self, other): + return self._value != other + + def __gt__(self, other): + return self._value > other + + def __gte__(self, other): + return self._value >= other + + def __lt__(self, other): + return self._value < other + + def __lte__(self, other): + return self._value <= other + + +class StringParam(ScalarWithOperatorsParam): """A parameter representing a single string value. A number of builtin methods are reproduced here to allow the parameter to be used pretty much like a normal string. In some circumstances it may @@ -93,21 +125,17 @@ class StringParam(ScalarParam): def set_value(self, value: Any): self._value = str(value) - def __add__(self, other): - return self._value + other - - def __radd__(self, other): - return other + self._value + # Operator methods which apply only to strings def __len__(self): return len(self._value) - def __str__(self): - return str(self._value) - def __contains__(self, other): return other in self._value + def __hash__(self): + return hash(self._value) + class TextParam(StringParam): """This is mostly just a convenience for the GUI, it marks this as a @@ -117,7 +145,7 @@ def set_value(self, value): self._value = re.sub("\n\n\n+", "\n\n", value) -class NumericParam(ScalarParam): +class NumericParam(ScalarWithOperatorsParam): """A parameter representing a single numeric value. A large number of builtin methods are reproduced here to allow the parameter to be used pretty much like a normal number. In some circumstances it may @@ -132,11 +160,7 @@ def set_value(self, value): except ValueError: self.reset_value() - def __add__(self, other): - return self._value + other - - def __radd__(self, other): - return other + self._value + # Operator methods which apply only to numerics def __sub__(self, other): return self._value - other @@ -156,15 +180,11 @@ def __int__(self): def __float__(self): return float(self._value) - def __str__(self): - return str(self._value) - - # XXX should include many more operator methods here, see + # XXX should include many more numeric operator methods here, see # https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types # matmul, truediv, floordiv, mod, divmod, pow, lshift, rshift, and, xor, or, # rmatmul, rtruediv, rfloordiv, rmod, rdivmod, rpow, rlshift, rrshift, rand, rxor, ror, - # neg, pos, abs, invert, complex, index, round, trunc, floor, ceil - # lt le eq ne gt ge format + # neg, pos, abs, invert, complex, index, round, trunc, floor, ceil, format # it seems like there should be a smarter way to do this but doing it the # dumb way works with mypy and pylint. @@ -200,6 +220,9 @@ def set_value(self, value): def __bool__(self): return self._value + def __str__(self): + return str(self._value) + # XXX are there other operator methods which need to be implemented here? @@ -520,7 +543,7 @@ def get_column_name(self): return self.value[len(self.PREFIX) :] return None - def get_value(self, data: dict): + def get_value_from_dict(self, data: dict): if self.value.startswith(self.PREFIX): return data[self.value[len(self.PREFIX) :]] else: @@ -695,17 +718,32 @@ def add_files(self, filenames): class MultiParam(BaseParam): params: Mapping[str, BaseParam] = {} - def __init__(self, label: str, params: Mapping[str, BaseParam]): + def __init__(self, label: str, params: Optional[Mapping[str, BaseParam]] = None): self.label = label - self.params = params + self.params = dict((k, v.copy()) for k, v in params.items()) if params else {} + + # Allow new django-esque declarations via subclasses + + for k, p in self.__class__.__dict__.items(): + if isinstance(p, BaseParam): + self.__dict__[k] = self.params[k] = p.copy() + def copy(self) -> "MultiParam": pp = dict(((k, p.copy()) for k, p in self.params.items())) return self.__class__(self.label, pp) + # XXX decide if the "dict-like" accessors are worth keeping + def __getitem__(self, key): return self.params[key] + def __contains__(self, item): + return item in self.params + + def __setitem__(self, key, value): + self.params[key].value = value + def keys(self): return self.params.keys() @@ -715,14 +753,22 @@ def values(self): def items(self): return self.params.items() + # attribute-like accessors + def __getattr__(self, name): try: return self.params[name] except KeyError as exc: raise AttributeError(name=name, obj=self) from exc - def __contains__(self, item): - return item in self.params + def __setattr__(self, name, value): + """Intercepts attempts to set parameters to a value and turns them into parameter.set_value. + Any other kind of attribute assignment is passed through.""" + target_attr = getattr(self, name, None) + if isinstance(target_attr, BaseParam) and not isinstance(value, BaseParam): + target_attr.set_value(value) + else: + super().__setattr__(name, value) def __iter__(self): return self.params.__iter__() diff --git a/countess/core/plugins.py b/countess/core/plugins.py index 4a17431..f3de5da 100644 --- a/countess/core/plugins.py +++ b/countess/core/plugins.py @@ -101,17 +101,30 @@ def __init__(self, plugin_name=None): self.parameters = dict((k, v.copy()) for k, v in self.parameters.items()) - # XXX should we allow django-esque declarations like this? - # Code gets cleaner, Namespace gets cluttered, though. + # Allow new django-esque declarations ... - for key in dir(self): - if isinstance(getattr(self, key), BaseParam): - self.parameters[key] = getattr(self, key).copy() - setattr(self, key, self.parameters[key]) + for k, p in self.__class__.__dict__.items(): + if isinstance(p, BaseParam): + self.__dict__[k] = self.parameters[k] = p.copy() - def add_parameter(self, name: str, param: BaseParam): - self.parameters[name] = param.copy() - return self.parameters[name] + def __setattr__(self, name, value): + """Intercepts attempts to set parameters to a value and turns them into parameter.set_value. + Any other kind of attribute assignment is passed through.""" + + target_attr = getattr(self, name, None) + if isinstance(target_attr, BaseParam) and not isinstance(value, BaseParam): + target_attr.set_value(value) + else: + super().__setattr__(name, value) + + def __getitem__(self, key): + return self.parameters[key] + + def __contains__(self, item): + return item in self.parameters + + def __setitem__(self, key, value): + self.parameters[key].value = value def set_parameter(self, key: str, value: Union[bool, int, float, str], base_dir: str = "."): param = self.parameters diff --git a/countess/gui/config.py b/countess/gui/config.py index 16fbf7f..a4a1fd1 100644 --- a/countess/gui/config.py +++ b/countess/gui/config.py @@ -17,7 +17,7 @@ FileParam, FileSaveParam, MultiParam, - SimpleParam, + ScalarParam, TabularMultiParam, TextParam, ) @@ -96,7 +96,7 @@ def __init__( # pylint: disable=R0912,R0915 self.entry["state"] = tk.DISABLED else: self.entry.bind("<>", self.widget_modified_callback) - elif isinstance(parameter, SimpleParam): + elif isinstance(parameter, ScalarParam): self.var = tk.StringVar(tk_parent, value=parameter.value) self.entry = tk.Entry(tk_parent, textvariable=self.var) if parameter.read_only: diff --git a/countess/plugins/csv.py b/countess/plugins/csv.py index c908248..9e8d622 100644 --- a/countess/plugins/csv.py +++ b/countess/plugins/csv.py @@ -109,7 +109,7 @@ def read_file_to_dataframe(self, file_params, logger, row_limit=None): while len(df.columns) > len(self.parameters["columns"]): self.parameters["columns"].add_row() - if self.parameters["header"].value: + if self.header: for n, col in enumerate(df.columns): if not self.parameters["columns"][n]["name"].value: self.parameters["columns"][n]["name"].value = str(col) @@ -132,12 +132,10 @@ class SaveCsvPlugin(PandasOutputPlugin): version = VERSION file_types = CSV_FILE_TYPES - parameters = { - "header": BooleanParam("CSV header row?", True), - "filename": FileSaveParam("Filename", file_types=file_types), - "delimiter": ChoiceParam("Delimiter", ",", choices=[",", ";", "TAB", "|", "SPACE"]), - "quoting": BooleanParam("Quote all Strings", False), - } + header = BooleanParam("CSV header row?", True) + filename = FileSaveParam("Filename", file_types=file_types) + delimiter = ChoiceParam("Delimiter", ",", choices=[",", ";", "TAB", "|", "SPACE"]) + quoting = BooleanParam("Quote all Strings", False) filehandle: Optional[Union[BufferedWriter, BytesIO]] = None csv_columns = None @@ -147,7 +145,7 @@ class SaveCsvPlugin(PandasOutputPlugin): def prepare(self, sources: list[str], row_limit: Optional[int] = None): if row_limit is None: - filename = self.parameters["filename"].value + filename = str(self.filename) if filename.endswith(".gz"): self.filehandle = gzip.open(filename, "wb") else: @@ -167,7 +165,7 @@ def process(self, data: pd.DataFrame, source: str, logger: Logger): # include the header or not. if self.csv_columns is None: self.csv_columns = list(dataframe.columns) - emit_header = bool(self.parameters["header"].value) + emit_header = bool(self.header) else: # add in any columns we haven't seen yet in previous dataframes. for c in dataframe.columns: @@ -184,8 +182,8 @@ def process(self, data: pd.DataFrame, source: str, logger: Logger): header=emit_header, columns=self.csv_columns, index=False, - sep=self.SEPARATORS[self.parameters["delimiter"].value], - quoting=self.QUOTING[self.parameters["quoting"].value], + sep=self.SEPARATORS[self.delimiter], + quoting=self.QUOTING[self.quoting], ) # type: ignore [call-overload] return [] diff --git a/countess/plugins/join.py b/countess/plugins/join.py index a850633..338862a 100644 --- a/countess/plugins/join.py +++ b/countess/plugins/join.py @@ -24,22 +24,13 @@ class JoinPlugin(PandasProductPlugin): version = VERSION link = "https://countess-project.github.io/CountESS/included-plugins/#join" - parameters = { - "inputs": ArrayParam( - "Inputs", - MultiParam( - "Input", - { - "join_on": ColumnOrIndexChoiceParam("Join On"), - "required": BooleanParam("Required", True), - "drop": BooleanParam("Drop Column", False), - }, - ), - read_only=True, - min_size=2, - max_size=2, - ), - } + class InputMultiParam(MultiParam): + join_on = ColumnOrIndexChoiceParam("Join On") + required = BooleanParam("Required", True) + drop = BooleanParam("Drop Column", False) + + inputs = ArrayParam("Inputs", InputMultiParam("Input"), min_size=2, max_size=2) + join_params = None input_columns_1: Optional[Dict] = None input_columns_2: Optional[Dict] = None @@ -47,14 +38,14 @@ class JoinPlugin(PandasProductPlugin): def prepare(self, sources: list[str], row_limit: Optional[int] = None): super().prepare(sources, row_limit) - assert isinstance(self.parameters["inputs"], ArrayParam) - assert len(self.parameters["inputs"]) == 2 - ip1, ip2 = self.parameters["inputs"] + ip1, ip2 = self.inputs.params + assert isinstance(ip1, self.InputMultiParam) + assert isinstance(ip2, self.InputMultiParam) ip1.label = f"Input 1: {sources[0]}" ip2.label = f"Input 2: {sources[1]}" self.join_params = { - "how": _join_how(ip1.required.value, ip2.required.value), + "how": _join_how(bool(ip1.required), bool(ip2.required)), "left_index": ip1.join_on.is_index(), "right_index": ip2.join_on.is_index(), "left_on": None if ip1.join_on.is_index() else ip1.join_on.value, @@ -70,7 +61,6 @@ def process_dataframes(self, dataframe1: pd.DataFrame, dataframe2: pd.DataFrame, assert self.input_columns_1 is not None assert self.input_columns_2 is not None assert self.join_params is not None - assert isinstance(self.parameters["inputs"], ArrayParam) self.input_columns_1.update(get_all_columns(dataframe1)) self.input_columns_2.update(get_all_columns(dataframe2)) @@ -94,19 +84,18 @@ def process_dataframes(self, dataframe1: pd.DataFrame, dataframe2: pd.DataFrame, logger.exception(exc) return pd.DataFrame() - if self.parameters["inputs"][0]["drop"].value and join1 in dataframe.columns: + if self.inputs[0].drop and join1 in dataframe.columns: dataframe.drop(columns=join1, inplace=True) - if self.parameters["inputs"][1]["drop"].value and join2 in dataframe.columns: + if self.inputs[1].drop and join2 in dataframe.columns: dataframe.drop(columns=join2, inplace=True) return dataframe def finalize(self, logger: Logger) -> Iterable: - assert isinstance(self.parameters["inputs"], ArrayParam) - assert len(self.parameters["inputs"]) == 2 + assert len(self.inputs.params) == 2 assert self.input_columns_1 is not None assert self.input_columns_2 is not None - ip1, ip2 = self.parameters["inputs"] + ip1, ip2 = self.inputs.params ip1.set_column_choices(self.input_columns_1.keys()) ip2.set_column_choices(self.input_columns_2.keys()) diff --git a/countess/plugins/variant.py b/countess/plugins/variant.py index c2bb555..8efa1f4 100644 --- a/countess/plugins/variant.py +++ b/countess/plugins/variant.py @@ -17,7 +17,7 @@ class VariantPlugin(PandasTransformDictToDictPlugin): version = VERSION link = "https://countess-project.github.io/CountESS/included-plugins/#variant-caller" - parameters = { + """parameters = { "column": ColumnChoiceParam("Input Column", "sequence"), "reference": ColumnOrStringParam("Reference Sequence"), "offset": IntegerParam("Reference Offset", 0), @@ -27,35 +27,44 @@ class VariantPlugin(PandasTransformDictToDictPlugin): "max_protein": IntegerParam("Max Protein Variations", 10), "drop": BooleanParam("Drop unidentified variants", False), "drop_columns": BooleanParam("Drop Input Column(s)", False), - } + }""" + + + column = ColumnChoiceParam("Input Column", "sequence") + reference = ColumnOrStringParam("Reference Sequence") + offset = IntegerParam("Reference Offset", 0) + output = StringParam("Output Column", "variant") + max_mutations = IntegerParam("Max Mutations", 10) + protein = StringParam("Protein Column", "protein") + max_protein = IntegerParam("Max Protein Variations", 10) + drop = BooleanParam("Drop unidentified variants", False) + drop_columns = BooleanParam("Drop Input Column(s)", False) + def process_dict(self, data, logger: Logger) -> dict: assert isinstance(self.parameters["reference"], ColumnOrStringParam) - sequence = data[self.parameters["column"].value] + sequence = data[self.column] if not sequence: return {} - reference = self.parameters["reference"].get_value(data) - offset = self.parameters["offset"].value + reference = self.reference.get_value_from_dict(data) r = {} - if self.parameters["output"].value: + if self.output: try: - max_mutations = self.parameters["max_mutations"].value - r[self.parameters["output"].value] = find_variant_string( - "g.", reference, sequence, max_mutations, offset=offset + r[self.output] = find_variant_string( + "g.", reference, sequence, int(self.max_mutations), offset=int(self.offset) ) except ValueError: pass except (TypeError, KeyError, IndexError) as exc: logger.exception(exc) - if self.parameters["protein"].value: + if self.protein: try: - max_protein = self.parameters["max_protein"].value - r[self.parameters["protein"].value] = find_variant_string( - "p.", reference, sequence, max_protein, offset=offset + r[self.protein] = find_variant_string( + "p.", reference, sequence, int(self.max_protein), offset=int(self.offset) ) except ValueError: pass @@ -69,16 +78,16 @@ def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> Optional df_out = super().process_dataframe(dataframe, logger) if df_out is not None: - if self.parameters["drop"].value: - if self.parameters["output"].value: - df_out.dropna(subset=self.parameters["output"].value, inplace=True) - if self.parameters["protein"].value: - df_out.dropna(subset=self.parameters["protein"].value, inplace=True) - if self.parameters["drop_columns"].value: + if self.drop: + if self.output: + df_out.dropna(subset=str(self.output), inplace=True) + if self.protein: + df_out.dropna(subset=str(self.protein), inplace=True) + if self.drop_columns: try: - df_out.drop(columns=self.parameters["column"].value, inplace=True) - if self.parameters["reference"].get_column_name(): - df_out.drop(columns=self.parameters["reference"].get_column_name(), inplace=True) + df_out.drop(columns=str(self.column), inplace=True) + if self.reference.get_column_name(): + df_out.drop(columns=self.reference.get_column_name(), inplace=True) except KeyError: pass