From 80c687c6cc36c79a0603114f1231a58847e6b2bf Mon Sep 17 00:00:00 2001
From: Nick Moore <nick@zoic.org>
Date: Fri, 2 Aug 2024 16:56:06 +1000
Subject: [PATCH] starting to convert plugins to new format

---
 countess/core/parameters.py | 100 ++++++++++++++++++++++++++----------
 countess/core/plugins.py    |  31 +++++++----
 countess/gui/config.py      |   4 +-
 countess/plugins/csv.py     |  20 ++++----
 countess/plugins/join.py    |  41 ++++++---------
 countess/plugins/variant.py |  53 +++++++++++--------
 6 files changed, 152 insertions(+), 97 deletions(-)

diff --git a/countess/core/parameters.py b/countess/core/parameters.py
index 36a4577..b4c4382 100644
--- a/countess/core/parameters.py
+++ b/countess/core/parameters.py
@@ -3,7 +3,7 @@
 import math
 import os.path
 import re
-from typing import Any, Dict, Iterable, List, Mapping, Optional, Type, Union
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
 
 import pandas as pd
 
@@ -81,7 +81,39 @@ def get_hash_value(self):
         return digest.hexdigest()
 
 
-class StringParam(ScalarParam):
+class ScalarWithOperatorsParam(ScalarParam):
+    # Operator Methods which apply to both StringParams and
+    # NumericParams (but not BooleanParam)
+
+    def __add__(self, other):
+        return self._value + other
+
+    def __radd__(self, other):
+        return other + self._value
+
+    def __str__(self):
+        return str(self._value)
+
+    def __eq__(self, other):
+        return self._value == other
+
+    def __ne__(self, other):
+        return self._value != other
+
+    def __gt__(self, other):
+        return self._value > other
+
+    def __gte__(self, other):
+        return self._value >= other
+
+    def __lt__(self, other):
+        return self._value < other
+
+    def __lte__(self, other):
+        return self._value <= other
+
+
+class StringParam(ScalarWithOperatorsParam):
     """A parameter representing a single string value.  A number 
     of builtin methods are reproduced here to allow the parameter to be
     used pretty much like a normal string. In some circumstances it may
@@ -93,21 +125,17 @@ class StringParam(ScalarParam):
     def set_value(self, value: Any):
         self._value = str(value)
 
-    def __add__(self, other):
-        return self._value + other
-
-    def __radd__(self, other):
-        return other + self._value
+    # Operator methods which apply only to strings
 
     def __len__(self):
         return len(self._value)
 
-    def __str__(self):
-        return str(self._value)
-
     def __contains__(self, other):
         return other in self._value
 
+    def __hash__(self):
+        return hash(self._value)
+
 
 class TextParam(StringParam):
     """This is mostly just a convenience for the GUI, it marks this as a
@@ -117,7 +145,7 @@ def set_value(self, value):
         self._value = re.sub("\n\n\n+", "\n\n", value)
 
 
-class NumericParam(ScalarParam):
+class NumericParam(ScalarWithOperatorsParam):
     """A parameter representing a single numeric value.  A large number 
     of builtin methods are reproduced here to allow the parameter to be
     used pretty much like a normal number. In some circumstances it may
@@ -132,11 +160,7 @@ def set_value(self, value):
         except ValueError:
             self.reset_value()
 
-    def __add__(self, other):
-        return self._value + other
-
-    def __radd__(self, other):
-        return other + self._value
+    # Operator methods which apply only to numerics
 
     def __sub__(self, other):
         return self._value - other
@@ -156,15 +180,11 @@ def __int__(self):
     def __float__(self):
         return float(self._value)
 
-    def __str__(self):
-        return str(self._value)
-
-    # XXX should include many more operator methods here, see
+    # XXX should include many more numeric operator methods here, see
     # https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types
     #   matmul, truediv, floordiv, mod, divmod, pow, lshift, rshift, and, xor, or,
     #   rmatmul, rtruediv, rfloordiv, rmod, rdivmod, rpow, rlshift, rrshift, rand, rxor, ror,
-    #   neg, pos, abs, invert, complex, index, round, trunc, floor, ceil
-    #   lt le eq ne gt ge format
+    #   neg, pos, abs, invert, complex, index, round, trunc, floor, ceil, format
     # it seems like there should be a smarter way to do this but doing it the
     # dumb way works with mypy and pylint.
 
@@ -200,6 +220,9 @@ def set_value(self, value):
     def __bool__(self):
         return self._value
 
+    def __str__(self):
+        return str(self._value)
+
     # XXX are there other operator methods which need to be implemented here?
 
 
@@ -520,7 +543,7 @@ def get_column_name(self):
             return self.value[len(self.PREFIX) :]
         return None
 
-    def get_value(self, data: dict):
+    def get_value_from_dict(self, data: dict):
         if self.value.startswith(self.PREFIX):
             return data[self.value[len(self.PREFIX) :]]
         else:
@@ -695,17 +718,32 @@ def add_files(self, filenames):
 class MultiParam(BaseParam):
     params: Mapping[str, BaseParam] = {}
 
-    def __init__(self, label: str, params: Mapping[str, BaseParam]):
+    def __init__(self, label: str, params: Optional[Mapping[str, BaseParam]] = None):
         self.label = label
-        self.params = params
+        self.params = dict((k, v.copy()) for k, v in params.items()) if params else {}
+
+        # Allow new django-esque declarations via subclasses
+
+        for k, p in self.__class__.__dict__.items():
+            if isinstance(p, BaseParam):
+                self.__dict__[k] = self.params[k] = p.copy()
+
 
     def copy(self) -> "MultiParam":
         pp = dict(((k, p.copy()) for k, p in self.params.items()))
         return self.__class__(self.label, pp)
 
+    # XXX decide if the "dict-like" accessors are worth keeping
+
     def __getitem__(self, key):
         return self.params[key]
 
+    def __contains__(self, item):
+        return item in self.params
+
+    def __setitem__(self, key, value):
+        self.params[key].value = value
+
     def keys(self):
         return self.params.keys()
 
@@ -715,14 +753,22 @@ def values(self):
     def items(self):
         return self.params.items()
 
+    # attribute-like accessors
+
     def __getattr__(self, name):
         try:
             return self.params[name]
         except KeyError as exc:
             raise AttributeError(name=name, obj=self) from exc
 
-    def __contains__(self, item):
-        return item in self.params
+    def __setattr__(self, name, value):
+        """Intercepts attempts to set parameters to a value and turns them into parameter.set_value.
+        Any other kind of attribute assignment is passed through."""
+        target_attr = getattr(self, name, None)
+        if isinstance(target_attr, BaseParam) and not isinstance(value, BaseParam):
+            target_attr.set_value(value)
+        else:
+            super().__setattr__(name, value)
 
     def __iter__(self):
         return self.params.__iter__()
diff --git a/countess/core/plugins.py b/countess/core/plugins.py
index 4a17431..f3de5da 100644
--- a/countess/core/plugins.py
+++ b/countess/core/plugins.py
@@ -101,17 +101,30 @@ def __init__(self, plugin_name=None):
 
         self.parameters = dict((k, v.copy()) for k, v in self.parameters.items())
 
-        # XXX should we allow django-esque declarations like this?
-        # Code gets cleaner, Namespace gets cluttered, though.
+        # Allow new django-esque declarations ...
 
-        for key in dir(self):
-            if isinstance(getattr(self, key), BaseParam):
-                self.parameters[key] = getattr(self, key).copy()
-                setattr(self, key, self.parameters[key])
+        for k, p in self.__class__.__dict__.items():
+            if isinstance(p, BaseParam):
+                self.__dict__[k] = self.parameters[k] = p.copy()
 
-    def add_parameter(self, name: str, param: BaseParam):
-        self.parameters[name] = param.copy()
-        return self.parameters[name]
+    def __setattr__(self, name, value):
+        """Intercepts attempts to set parameters to a value and turns them into parameter.set_value.
+        Any other kind of attribute assignment is passed through."""
+
+        target_attr = getattr(self, name, None)
+        if isinstance(target_attr, BaseParam) and not isinstance(value, BaseParam):
+            target_attr.set_value(value)
+        else:
+            super().__setattr__(name, value)
+
+    def __getitem__(self, key):
+        return self.parameters[key]
+
+    def __contains__(self, item):
+        return item in self.parameters
+
+    def __setitem__(self, key, value):
+        self.parameters[key].value = value
 
     def set_parameter(self, key: str, value: Union[bool, int, float, str], base_dir: str = "."):
         param = self.parameters
diff --git a/countess/gui/config.py b/countess/gui/config.py
index 16fbf7f..a4a1fd1 100644
--- a/countess/gui/config.py
+++ b/countess/gui/config.py
@@ -17,7 +17,7 @@
     FileParam,
     FileSaveParam,
     MultiParam,
-    SimpleParam,
+    ScalarParam,
     TabularMultiParam,
     TextParam,
 )
@@ -96,7 +96,7 @@ def __init__(  # pylint: disable=R0912,R0915
                 self.entry["state"] = tk.DISABLED
             else:
                 self.entry.bind("<<Modified>>", self.widget_modified_callback)
-        elif isinstance(parameter, SimpleParam):
+        elif isinstance(parameter, ScalarParam):
             self.var = tk.StringVar(tk_parent, value=parameter.value)
             self.entry = tk.Entry(tk_parent, textvariable=self.var)
             if parameter.read_only:
diff --git a/countess/plugins/csv.py b/countess/plugins/csv.py
index c908248..9e8d622 100644
--- a/countess/plugins/csv.py
+++ b/countess/plugins/csv.py
@@ -109,7 +109,7 @@ def read_file_to_dataframe(self, file_params, logger, row_limit=None):
         while len(df.columns) > len(self.parameters["columns"]):
             self.parameters["columns"].add_row()
 
-        if self.parameters["header"].value:
+        if self.header:
             for n, col in enumerate(df.columns):
                 if not self.parameters["columns"][n]["name"].value:
                     self.parameters["columns"][n]["name"].value = str(col)
@@ -132,12 +132,10 @@ class SaveCsvPlugin(PandasOutputPlugin):
     version = VERSION
     file_types = CSV_FILE_TYPES
 
-    parameters = {
-        "header": BooleanParam("CSV header row?", True),
-        "filename": FileSaveParam("Filename", file_types=file_types),
-        "delimiter": ChoiceParam("Delimiter", ",", choices=[",", ";", "TAB", "|", "SPACE"]),
-        "quoting": BooleanParam("Quote all Strings", False),
-    }
+    header = BooleanParam("CSV header row?", True)
+    filename = FileSaveParam("Filename", file_types=file_types)
+    delimiter = ChoiceParam("Delimiter", ",", choices=[",", ";", "TAB", "|", "SPACE"])
+    quoting = BooleanParam("Quote all Strings", False)
 
     filehandle: Optional[Union[BufferedWriter, BytesIO]] = None
     csv_columns = None
@@ -147,7 +145,7 @@ class SaveCsvPlugin(PandasOutputPlugin):
 
     def prepare(self, sources: list[str], row_limit: Optional[int] = None):
         if row_limit is None:
-            filename = self.parameters["filename"].value
+            filename = str(self.filename)
             if filename.endswith(".gz"):
                 self.filehandle = gzip.open(filename, "wb")
             else:
@@ -167,7 +165,7 @@ def process(self, data: pd.DataFrame, source: str, logger: Logger):
         # include the header or not.
         if self.csv_columns is None:
             self.csv_columns = list(dataframe.columns)
-            emit_header = bool(self.parameters["header"].value)
+            emit_header = bool(self.header)
         else:
             # add in any columns we haven't seen yet in previous dataframes.
             for c in dataframe.columns:
@@ -184,8 +182,8 @@ def process(self, data: pd.DataFrame, source: str, logger: Logger):
             header=emit_header,
             columns=self.csv_columns,
             index=False,
-            sep=self.SEPARATORS[self.parameters["delimiter"].value],
-            quoting=self.QUOTING[self.parameters["quoting"].value],
+            sep=self.SEPARATORS[self.delimiter],
+            quoting=self.QUOTING[self.quoting],
         )  # type: ignore [call-overload]
         return []
 
diff --git a/countess/plugins/join.py b/countess/plugins/join.py
index a850633..338862a 100644
--- a/countess/plugins/join.py
+++ b/countess/plugins/join.py
@@ -24,22 +24,13 @@ class JoinPlugin(PandasProductPlugin):
     version = VERSION
     link = "https://countess-project.github.io/CountESS/included-plugins/#join"
 
-    parameters = {
-        "inputs": ArrayParam(
-            "Inputs",
-            MultiParam(
-                "Input",
-                {
-                    "join_on": ColumnOrIndexChoiceParam("Join On"),
-                    "required": BooleanParam("Required", True),
-                    "drop": BooleanParam("Drop Column", False),
-                },
-            ),
-            read_only=True,
-            min_size=2,
-            max_size=2,
-        ),
-    }
+    class InputMultiParam(MultiParam):
+        join_on = ColumnOrIndexChoiceParam("Join On")
+        required = BooleanParam("Required", True)
+        drop = BooleanParam("Drop Column", False)
+
+    inputs = ArrayParam("Inputs", InputMultiParam("Input"), min_size=2, max_size=2)
+
     join_params = None
     input_columns_1: Optional[Dict] = None
     input_columns_2: Optional[Dict] = None
@@ -47,14 +38,14 @@ class JoinPlugin(PandasProductPlugin):
     def prepare(self, sources: list[str], row_limit: Optional[int] = None):
         super().prepare(sources, row_limit)
 
-        assert isinstance(self.parameters["inputs"], ArrayParam)
-        assert len(self.parameters["inputs"]) == 2
-        ip1, ip2 = self.parameters["inputs"]
+        ip1, ip2 = self.inputs.params
+        assert isinstance(ip1, self.InputMultiParam)
+        assert isinstance(ip2, self.InputMultiParam)
         ip1.label = f"Input 1: {sources[0]}"
         ip2.label = f"Input 2: {sources[1]}"
 
         self.join_params = {
-            "how": _join_how(ip1.required.value, ip2.required.value),
+            "how": _join_how(bool(ip1.required), bool(ip2.required)),
             "left_index": ip1.join_on.is_index(),
             "right_index": ip2.join_on.is_index(),
             "left_on": None if ip1.join_on.is_index() else ip1.join_on.value,
@@ -70,7 +61,6 @@ def process_dataframes(self, dataframe1: pd.DataFrame, dataframe2: pd.DataFrame,
         assert self.input_columns_1 is not None
         assert self.input_columns_2 is not None
         assert self.join_params is not None
-        assert isinstance(self.parameters["inputs"], ArrayParam)
 
         self.input_columns_1.update(get_all_columns(dataframe1))
         self.input_columns_2.update(get_all_columns(dataframe2))
@@ -94,19 +84,18 @@ def process_dataframes(self, dataframe1: pd.DataFrame, dataframe2: pd.DataFrame,
             logger.exception(exc)
             return pd.DataFrame()
 
-        if self.parameters["inputs"][0]["drop"].value and join1 in dataframe.columns:
+        if self.inputs[0].drop and join1 in dataframe.columns:
             dataframe.drop(columns=join1, inplace=True)
-        if self.parameters["inputs"][1]["drop"].value and join2 in dataframe.columns:
+        if self.inputs[1].drop and join2 in dataframe.columns:
             dataframe.drop(columns=join2, inplace=True)
 
         return dataframe
 
     def finalize(self, logger: Logger) -> Iterable:
-        assert isinstance(self.parameters["inputs"], ArrayParam)
-        assert len(self.parameters["inputs"]) == 2
+        assert len(self.inputs.params) == 2
         assert self.input_columns_1 is not None
         assert self.input_columns_2 is not None
-        ip1, ip2 = self.parameters["inputs"]
+        ip1, ip2 = self.inputs.params
 
         ip1.set_column_choices(self.input_columns_1.keys())
         ip2.set_column_choices(self.input_columns_2.keys())
diff --git a/countess/plugins/variant.py b/countess/plugins/variant.py
index c2bb555..8efa1f4 100644
--- a/countess/plugins/variant.py
+++ b/countess/plugins/variant.py
@@ -17,7 +17,7 @@ class VariantPlugin(PandasTransformDictToDictPlugin):
     version = VERSION
     link = "https://countess-project.github.io/CountESS/included-plugins/#variant-caller"
 
-    parameters = {
+    """parameters = {
         "column": ColumnChoiceParam("Input Column", "sequence"),
         "reference": ColumnOrStringParam("Reference Sequence"),
         "offset": IntegerParam("Reference Offset", 0),
@@ -27,35 +27,44 @@ class VariantPlugin(PandasTransformDictToDictPlugin):
         "max_protein": IntegerParam("Max Protein Variations", 10),
         "drop": BooleanParam("Drop unidentified variants", False),
         "drop_columns": BooleanParam("Drop Input Column(s)", False),
-    }
+    }"""
+
+    
+    column = ColumnChoiceParam("Input Column", "sequence")
+    reference = ColumnOrStringParam("Reference Sequence")
+    offset = IntegerParam("Reference Offset", 0)
+    output = StringParam("Output Column", "variant")
+    max_mutations = IntegerParam("Max Mutations", 10)
+    protein = StringParam("Protein Column", "protein")
+    max_protein = IntegerParam("Max Protein Variations", 10)
+    drop = BooleanParam("Drop unidentified variants", False)
+    drop_columns = BooleanParam("Drop Input Column(s)", False)
+
 
     def process_dict(self, data, logger: Logger) -> dict:
         assert isinstance(self.parameters["reference"], ColumnOrStringParam)
-        sequence = data[self.parameters["column"].value]
+        sequence = data[self.column]
         if not sequence:
             return {}
 
-        reference = self.parameters["reference"].get_value(data)
-        offset = self.parameters["offset"].value
+        reference = self.reference.get_value_from_dict(data)
 
         r = {}
 
-        if self.parameters["output"].value:
+        if self.output:
             try:
-                max_mutations = self.parameters["max_mutations"].value
-                r[self.parameters["output"].value] = find_variant_string(
-                    "g.", reference, sequence, max_mutations, offset=offset
+                r[self.output] = find_variant_string(
+                    "g.", reference, sequence, int(self.max_mutations), offset=int(self.offset)
                 )
             except ValueError:
                 pass
             except (TypeError, KeyError, IndexError) as exc:
                 logger.exception(exc)
 
-        if self.parameters["protein"].value:
+        if self.protein:
             try:
-                max_protein = self.parameters["max_protein"].value
-                r[self.parameters["protein"].value] = find_variant_string(
-                    "p.", reference, sequence, max_protein, offset=offset
+                r[self.protein] = find_variant_string(
+                    "p.", reference, sequence, int(self.max_protein), offset=int(self.offset)
                 )
             except ValueError:
                 pass
@@ -69,16 +78,16 @@ def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> Optional
         df_out = super().process_dataframe(dataframe, logger)
 
         if df_out is not None:
-            if self.parameters["drop"].value:
-                if self.parameters["output"].value:
-                    df_out.dropna(subset=self.parameters["output"].value, inplace=True)
-                if self.parameters["protein"].value:
-                    df_out.dropna(subset=self.parameters["protein"].value, inplace=True)
-            if self.parameters["drop_columns"].value:
+            if self.drop:
+                if self.output:
+                    df_out.dropna(subset=str(self.output), inplace=True)
+                if self.protein:
+                    df_out.dropna(subset=str(self.protein), inplace=True)
+            if self.drop_columns:
                 try:
-                    df_out.drop(columns=self.parameters["column"].value, inplace=True)
-                    if self.parameters["reference"].get_column_name():
-                        df_out.drop(columns=self.parameters["reference"].get_column_name(), inplace=True)
+                    df_out.drop(columns=str(self.column), inplace=True)
+                    if self.reference.get_column_name():
+                        df_out.drop(columns=self.reference.get_column_name(), inplace=True)
                 except KeyError:
                     pass