From 582f2213619cd6fd95b0b31d327df12564fc8234 Mon Sep 17 00:00:00 2001 From: Sanjay C Nagi Date: Fri, 27 Sep 2024 15:36:57 +0100 Subject: [PATCH 1/3] go --- AnoPrimer/evaluate.py | 22 +++++++++++++++++----- AnoPrimer/utils.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/AnoPrimer/evaluate.py b/AnoPrimer/evaluate.py index a2a0a02..547d34c 100644 --- a/AnoPrimer/evaluate.py +++ b/AnoPrimer/evaluate.py @@ -12,7 +12,9 @@ _plotly_primers, _retrieve_span, _return_oligo_list, + add_spans_to_df, retrieve_data_resource, + round_floats_in_df, ) @@ -71,7 +73,7 @@ def __init__( self.assay_type = assay_type self.assay_name = assay_name - self.df = primer_df + self.df = round_floats_in_df(add_spans_to_df(primer_df), decimal_places=2) self.seq_parameters = seq_parameters self.primer_parameters = primer_parameters @@ -350,11 +352,20 @@ def _plot_genes(self, ax, genegff, min_, max_): def _plot_primers(self, ax, oligos): """Helper method to plot primers.""" + + def _generate_primer_pair_positions(num_pairs, start=1, end=1.45): + if num_pairs == 1: + return [start] + + step = (end - start) / (num_pairs - 1) + return [start + i * step for i in range(num_pairs)] + pal = sns.color_palette("Set2", len(self.df.columns)) handles, labels = ax.get_legend_handles_labels() for pair in self.df: pair = int(pair) pair_idx = pair - 1 # python based indexing + pair_ypos = _generate_primer_pair_positions(len(self.df.columns)) for oligo in oligos: oligo_pos = _retrieve_span( primer_df=self.df, @@ -368,7 +379,7 @@ def _plot_primers(self, ax, oligos): if oligo == "forward": plt.arrow( lower, - 0.8 + (2 / (10 - (pair_idx))), + pair_ypos[pair_idx], upper - lower, 0, width=0.03, @@ -378,7 +389,7 @@ def _plot_primers(self, ax, oligos): elif oligo == "reverse": plt.arrow( upper, - 0.8 + (2 / (10 - (pair_idx))), + pair_ypos[pair_idx], lower - upper, 0, width=0.03, @@ -386,10 +397,11 @@ def _plot_primers(self, ax, oligos): color=pal[pair_idx], ) elif oligo == "probe": - ax.axhline(y=0.8 + (2 / (10 - (pair_idx))), xmin=lower, xmax=upper) + ax.axhline(y=pair_ypos[pair_idx], xmin=lower, xmax=upper) line = plt.Line2D( (lower, upper), - (0.8 + (2 / (10 - (pair))), 0.8 + (2 / (10 - (pair)))), + pair_ypos[pair_idx], + pair_ypos[pair_idx], lw=2.5, color=pal[pair_idx], ) diff --git a/AnoPrimer/utils.py b/AnoPrimer/utils.py index 621a083..a2ef0cd 100644 --- a/AnoPrimer/utils.py +++ b/AnoPrimer/utils.py @@ -153,6 +153,47 @@ def _plotly_frequencies( #### utility functions #### +def add_spans_to_df(primers): + df = primers.df + oligos, _ = _return_oligo_list(assay_type=primers.assay_type) + + oligo_spans = {} + for oligo in oligos: + spans = [] + for pair in df: + pos = _retrieve_span( + df, + gdna_pos=primers.gdna_pos, + oligo=oligo, + assay_type=primers.assay_type, + pair=pair, + ) + span = f"{primers.contig}:{pos.min()}-{pos.max()}" + spans.append(span) + + oligo_spans[oligo] = pd.Series( + spans, name=f"primer_{oligo}_span", index=primers.df.columns + ) + df = pd.concat([df, oligo_spans[oligo].to_frame().T]) + + return df + + +def round_floats_in_df(df, decimal_places=1): + import numpy as np + + def round_if_float(val): + if isinstance(val, (int, np.integer)): + return val + try: + float_val = float(val) + if float_val.is_integer(): + return int(float_val) + return round(float_val, decimal_places) + except (ValueError, TypeError): + return val + + return df.map(round_if_float) def extract_trailing_digits(string): From b064e536ec78448d509dc6f9125a56e5565555b6 Mon Sep 17 00:00:00 2001 From: Sanjay C Nagi Date: Fri, 27 Sep 2024 16:06:51 +0100 Subject: [PATCH 2/3] fixes --- AnoPrimer/evaluate.py | 30 ++++++++++++++++++++++++++++-- AnoPrimer/utils.py | 26 +------------------------- tests/run_ci_notebooks.sh | 6 +++--- 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/AnoPrimer/evaluate.py b/AnoPrimer/evaluate.py index 547d34c..3e8a0e1 100644 --- a/AnoPrimer/evaluate.py +++ b/AnoPrimer/evaluate.py @@ -12,7 +12,6 @@ _plotly_primers, _retrieve_span, _return_oligo_list, - add_spans_to_df, retrieve_data_resource, round_floats_in_df, ) @@ -73,7 +72,6 @@ def __init__( self.assay_type = assay_type self.assay_name = assay_name - self.df = round_floats_in_df(add_spans_to_df(primer_df), decimal_places=2) self.seq_parameters = seq_parameters self.primer_parameters = primer_parameters @@ -81,6 +79,9 @@ def __init__( self.target_sequence = seq_parameters.get("SEQUENCE_TEMPLATE") self.gdna_pos = np.array(seq_parameters.get("GENOMIC_DNA_POSITIONS")) + self.df = primer_df + self.df = round_floats_in_df(self.add_spans_to_df(), decimal_places=2) + def evaluate_primers( self, sample_sets, @@ -122,6 +123,31 @@ def evaluate_primers( if out_dir is not None and blat_df is not None: blat_df.to_csv(f"{out_dir}/{self.assay_name}_blat_results.csv") + def add_spans_to_df(self): + df = self.df + oligos, _ = _return_oligo_list(assay_type=self.assay_type) + + oligo_spans = {} + for oligo in oligos: + spans = [] + for pair in df: + pos = _retrieve_span( + df, + gdna_pos=self.gdna_pos, + oligo=oligo, + assay_type=self.assay_type, + pair=pair, + ) + span = f"{self.contig}:{pos.min()}-{pos.max()}" + spans.append(span) + + oligo_spans[oligo] = pd.Series( + spans, name=f"primer_{oligo}_span", index=self.df.columns + ) + df = pd.concat([df, oligo_spans[oligo].to_frame().T]) + + return df + def summarise_metadata(self, sample_sets=None, sample_query=None): """ Retrieve a summary of metadata for samples in the ag3/af1 resource. diff --git a/AnoPrimer/utils.py b/AnoPrimer/utils.py index a2ef0cd..6b58c48 100644 --- a/AnoPrimer/utils.py +++ b/AnoPrimer/utils.py @@ -153,30 +153,6 @@ def _plotly_frequencies( #### utility functions #### -def add_spans_to_df(primers): - df = primers.df - oligos, _ = _return_oligo_list(assay_type=primers.assay_type) - - oligo_spans = {} - for oligo in oligos: - spans = [] - for pair in df: - pos = _retrieve_span( - df, - gdna_pos=primers.gdna_pos, - oligo=oligo, - assay_type=primers.assay_type, - pair=pair, - ) - span = f"{primers.contig}:{pos.min()}-{pos.max()}" - spans.append(span) - - oligo_spans[oligo] = pd.Series( - spans, name=f"primer_{oligo}_span", index=primers.df.columns - ) - df = pd.concat([df, oligo_spans[oligo].to_frame().T]) - - return df def round_floats_in_df(df, decimal_places=1): @@ -193,7 +169,7 @@ def round_if_float(val): except (ValueError, TypeError): return val - return df.map(round_if_float) + return df.applymap(round_if_float) def extract_trailing_digits(string): diff --git a/tests/run_ci_notebooks.sh b/tests/run_ci_notebooks.sh index b72f6e3..15cc2b0 100755 --- a/tests/run_ci_notebooks.sh +++ b/tests/run_ci_notebooks.sh @@ -1,9 +1,9 @@ -papermill notebooks/AnoPrimer-long.ipynb tests/qPCR_run.ipynb -k AnoPrimer -f tests/cDNA_Params_fun.json && -papermill notebooks/AnoPrimer-long.ipynb tests/qPCR2_run.ipynb -k AnoPrimer -f tests/cDNA_Params_2_fun.json && -papermill notebooks/AnoPrimer-long.ipynb tests/gDNA_run.ipynb -k AnoPrimer -f tests/gDNA_probe_Params_fun.json && papermill notebooks/AnoPrimer-long.ipynb tests/qPCR_run.ipynb -k AnoPrimer -f tests/cDNA_Params.json && papermill notebooks/AnoPrimer-long.ipynb tests/qPCR2_run.ipynb -k AnoPrimer -f tests/cDNA_Params_2.json && papermill notebooks/AnoPrimer-long.ipynb tests/gDNA_run.ipynb -k AnoPrimer -f tests/gDNA_probe_Params.json && papermill notebooks/AnoPrimer-long.ipynb tests/probe_run.ipynb -k AnoPrimer -f tests/probe_Params.json && +papermill notebooks/AnoPrimer-long.ipynb tests/qPCR_run.ipynb -k AnoPrimer -f tests/cDNA_Params_fun.json && +papermill notebooks/AnoPrimer-long.ipynb tests/qPCR2_run.ipynb -k AnoPrimer -f tests/cDNA_Params_2_fun.json && +papermill notebooks/AnoPrimer-long.ipynb tests/gDNA_run.ipynb -k AnoPrimer -f tests/gDNA_probe_Params_fun.json && papermill notebooks/AnoPrimer-short.ipynb tests/short_run.ipynb -k AnoPrimer From c5ffe6459ee6a128fdbdcc538e42833985aa3890 Mon Sep 17 00:00:00 2001 From: Sanjay C Nagi Date: Fri, 27 Sep 2024 16:07:49 +0100 Subject: [PATCH 3/3] v2.0.3 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 79aea44..6d7adb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "AnoPrimer" -version = "2.0.2" +version = "2.0.3" description = "A package to design primers in Anopheles gambiae whilst considering genetic variation with malariagen_data" readme = "README.md" documentation = "https://sanjaynagi.github.io/anoprimer/latest/"