Skip to content

Commit

Permalink
Merge branch 'nick/genomic-variants'
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Sep 24, 2024
2 parents 951bf3c + 4bbf073 commit 273704b
Show file tree
Hide file tree
Showing 5 changed files with 225 additions and 72 deletions.
75 changes: 75 additions & 0 deletions countess/core/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,73 @@ def __init__(self, label: str, value=None, file_types=None):
self.file_types = file_types


class DictChoiceParam(ScalarWithOperatorsParam):
"""A drop-down menu parameter choosing between options.
Takes a mapping of choices where the key is the choice
and the value is the displayed value."""

_value: str = ""
_choice: str = ""
choices: dict[str, str]
reverse: dict[str, str]

def __init__(self, label: str, value: Optional[str] = None, choices: Optional[dict[str, str]] = None):
super().__init__(label)
self.set_choices(choices or {})
self.set_value(value)

def clean_value(self, value):
return value

def set_value(self, value):
if value in self.reverse:
self._choice = self.reverse[value]
self._value = value
elif value in self.choices:
self._choice = value
self._value = self.choices[value]
else:
self.set_default()

def get_choice(self):
return self._choice

def set_choice(self, choice):
if choice in self.choices:
self._choice = choice
self._value = self.choices[choice]
else:
self.set_default()

choice = property(get_choice, set_choice)

def set_choices(self, choices: dict[str, str]):
self.choices = dict(choices)
self.reverse = {v: k for k, v in choices.items()}
if self._choice in self.choices:
self._value = self.choices[self._choice]
elif self._value in self.reverse:
self._choice = self.reverse[self._value]
else:
self.set_default()

def get_values(self):
return list(self.choices.values())

def set_default(self):
if self.choices:
self._choice, self._value = list(self.choices.items())[0]
else:
self._choice = ""
self._value = ""

def get_parameters(self, key, base_dir="."):
return ((key, self._choice),)

def copy(self) -> "DictChoiceParam":
return self.__class__(self.label, self.value, self.choices)


class ChoiceParam(ScalarWithOperatorsParam):
"""A drop-down menu parameter choosing between options.
Defaults to 'None'"""
Expand Down Expand Up @@ -395,6 +462,9 @@ def set_choices(self, choices: Iterable[str]):
self._value = self.DEFAULT_VALUE
self._choice = None

def get_values(self):
return self.choices

def copy(self) -> "ChoiceParam":
return self.__class__(self.label, self.value, self.choices)

Expand Down Expand Up @@ -917,3 +987,8 @@ def get_hash_value(self):
class TabularMultiParam(MultiParam):
"""This is just used to drop a hint to the GUI as to how the MultiParam
is to be presented ... as a hierarchy or as a table ..."""


class FramedMultiParam(MultiParam):
"""This is just used to drop a hint to the GUI to display the MultiParam
in its own frame"""
29 changes: 21 additions & 8 deletions countess/gui/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
BooleanParam,
ChoiceParam,
ColumnOrStringParam,
DictChoiceParam,
FileArrayParam,
FileParam,
FileSaveParam,
FramedMultiParam,
MultiParam,
ScalarParam,
TabularMultiParam,
Expand Down Expand Up @@ -66,15 +68,15 @@ def __init__( # pylint: disable=R0912,R0915
self.label: Optional[tk.Widget] = None
self.row_labels: list[tk.Widget] = []

if isinstance(parameter, ArrayParam):
if isinstance(parameter, (ArrayParam, FramedMultiParam)):
self.label = None
else:
self.label = tk.Label(tk_parent, text=parameter.label)

if isinstance(parameter, ChoiceParam):
if isinstance(parameter, (ChoiceParam, DictChoiceParam)):
self.var = tk.StringVar(tk_parent, value=parameter.value)
self.entry = ttk.Combobox(tk_parent, textvariable=self.var)
self.entry["values"] = parameter.choices or [""]
self.entry["values"] = parameter.get_values() or [""]
if isinstance(parameter, ColumnOrStringParam):
self.entry.bind("<Key>", self.combobox_set)
self.entry["state"] = "normal"
Expand Down Expand Up @@ -139,6 +141,15 @@ def __init__( # pylint: disable=R0912,R0915

self.update_subwrappers(parameter.params, drc)

elif isinstance(parameter, FramedMultiParam):
label_frame_label = tk.Frame(tk_parent)
tk.Label(label_frame_label, text=parameter.label).grid(row=0, column=0, padx=5)
self.entry = tk.LabelFrame(tk_parent, labelwidget=label_frame_label, padx=10, pady=5)
self.entry.columnconfigure(0, weight=0)
self.entry.columnconfigure(1, weight=0)
self.entry.columnconfigure(2, weight=1)
self.update_subwrappers(parameter.params.values(), None)

elif isinstance(parameter, (ArrayParam, MultiParam)):
self.entry = tk.Frame(tk_parent)
self.entry.columnconfigure(0, weight=0)
Expand Down Expand Up @@ -193,9 +204,8 @@ def update(self):
)
elif isinstance(self.parameter, MultiParam):
self.update_subwrappers(self.parameter.params.values(), None)
elif isinstance(self.parameter, ChoiceParam):
choices = self.parameter.choices or [""]
self.entry["values"] = choices
elif isinstance(self.parameter, (ChoiceParam, DictChoiceParam)):
self.entry["values"] = self.parameter.get_values() or [""]
self.var.set(self.parameter.value)
elif isinstance(self.parameter, BooleanParam):
self.set_checkbox_value()
Expand Down Expand Up @@ -383,8 +393,11 @@ def set_choice(self, choice):
self.callback(self.parameter)

def value_changed_callback(self, *_):
if isinstance(self.parameter, ChoiceParam) and self.entry.current() != -1:
self.set_choice(self.entry.current())
if isinstance(self.parameter, (ChoiceParam, DictChoiceParam)) and self.entry.current() != -1:
value = self.parameter.get_values()[self.entry.current()]
if value != self.parameter.value:
self.parameter.value = value
self.callback(self.parameter)
else:
self.var.set(self.set_value(self.var.get()))

Expand Down
96 changes: 48 additions & 48 deletions countess/plugins/variant.py
Original file line number Diff line number Diff line change
@@ -1,88 +1,88 @@
import logging
from typing import Optional

import pandas as pd
import string

from countess import VERSION
from countess.core.parameters import (
BooleanParam,
ArrayParam,
ColumnChoiceParam,
ColumnOrIntegerParam,
ColumnOrStringParam,
DictChoiceParam,
IntegerParam,
MultiParam,
StringCharacterSetParam,
StringParam,
)
from countess.core.plugins import PandasTransformDictToDictPlugin
from countess.utils.variant import find_variant_string

logger = logging.getLogger(__name__)

REFERENCE_CHAR_SET = set(string.ascii_uppercase + string.digits + "_")

# XXX Should proabably support these other types as well but I don't
# know what I don't know ...
# XXX Supporting protein calls on mitochondrial (or other organisms)
# DNA will required expansion of the variant caller routine to handle
# different codon tables. This opens up a can of worms of course.
# XXX There should probably also be a warning generated if you ask for a
# non-MT DNA call with an MT protein call or vice versa.

SEQUENCE_TYPE_CHOICES = {
"g": "Genomic",
"g-": "Genomic (Minus Strand)",
# "o": "Circular Genomic",
# "m": "Mitochondrial",
"c": "Coding DNA",
# "n": "Non-Coding DNA",
"p": "Protein",
# "pm": "Protein (MT)",
}


class VariantOutputMultiParam(MultiParam):
prefix = StringCharacterSetParam("Prefix", "", character_set=REFERENCE_CHAR_SET)
seq_type = DictChoiceParam("Type", choices=SEQUENCE_TYPE_CHOICES)
offset = ColumnOrIntegerParam("Offset", 0)
maxlen = IntegerParam("Max Variations", 10)
output = StringParam("Output Column", "variant")


class VariantPlugin(PandasTransformDictToDictPlugin):
"""Turns a DNA sequence into a HGVS variant code"""

name = "Variant Translator"
name = "Variant Caller"
description = "Turns a DNA sequence into a HGVS variant code"
version = VERSION
link = "https://countess-project.github.io/CountESS/included-plugins/#variant-caller"

column = ColumnChoiceParam("Input Column", "sequence")
reference = ColumnOrStringParam("Reference Sequence")
offset = ColumnOrIntegerParam("Reference Offset", 0)
output = StringParam("Output Column", "variant")
max_mutations = IntegerParam("Max Mutations", 10)
protein = StringParam("Protein Column", "protein")
max_protein = IntegerParam("Max Protein Variations", 10)
drop = BooleanParam("Drop unidentified variants", False)
drop_columns = BooleanParam("Drop Input Column(s)", False)
outputs = ArrayParam("Outputs", VariantOutputMultiParam("Output"), min_size=1)

def process_dict(self, data) -> dict:
sequence = data[str(self.column)]
reference = self.reference.get_value_from_dict(data)
if not sequence:
return {}

reference = self.reference.get_value_from_dict(data)
offset = int(self.offset.get_value_from_dict(data) or 0)

r: dict[str, str] = {}
for output in self.outputs:
seq_type = output.seq_type.get_choice() or "g"
prefix = f"{output.prefix + ':' if output.prefix else ''}{seq_type[0]}."
offset = int(output.offset.get_value_from_dict(data) or 0)

if self.output:
try:
r[self.output.value] = find_variant_string(
"g.", reference, sequence, int(self.max_mutations), offset=offset
r[output.output.value] = find_variant_string(
prefix,
reference,
sequence,
max_mutations=output.maxlen.value,
offset=offset,
minus_strand=seq_type.endswith("-"),
)
except ValueError:
pass
except (TypeError, KeyError, IndexError) as exc:
logger.warning("Exception", exc_info=exc)

if self.protein:
try:
r[str(self.protein)] = find_variant_string(
"p.", reference, sequence, int(self.max_protein), offset=offset
)
except ValueError:
pass
except (TypeError, KeyError, IndexError) as exc:
logger.warning("Exception", exc_info=exc)

return r

def process_dataframe(self, dataframe: pd.DataFrame) -> Optional[pd.DataFrame]:
df_out = super().process_dataframe(dataframe)

if df_out is not None:
if self.drop:
if self.output:
df_out.dropna(subset=str(self.output), inplace=True)
if self.protein:
df_out.dropna(subset=str(self.protein), inplace=True)
if self.drop_columns:
try:
df_out.drop(columns=str(self.column), inplace=True)
if self.reference.get_column_name():
df_out.drop(columns=self.reference.get_column_name(), inplace=True)
except KeyError:
pass

return df_out
Loading

0 comments on commit 273704b

Please sign in to comment.