From 0e949a5861fa1d77a99266b810612c2bc07d4217 Mon Sep 17 00:00:00 2001 From: Sanjay C Nagi Date: Fri, 18 Aug 2023 16:21:54 +0100 Subject: [PATCH 1/8] check my oligo and add tests --- .github/workflows/testing.yml | 36 +++++++++ AgamPrimer/AgamPrimer.py | 134 ++++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+) create mode 100644 .github/workflows/testing.yml diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml new file mode 100644 index 0000000..4d55b65 --- /dev/null +++ b/.github/workflows/testing.yml @@ -0,0 +1,36 @@ +name: pytest +on: + push: + branches: + - main + pull_request: + branches: + - main +jobs: + tests: + strategy: + fail-fast: true + matrix: + python-version: ["3.9"] + poetry-version: ["1.3.1"] + os: [ubuntu-latest] + runs-on: ${{ matrix.os }} + steps: + + - name: Checkout source + uses: actions/checkout@v3 + + - name: Install poetry + run: pipx install poetry==${{ matrix.poetry-version }} + + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'poetry' + + - name: Install dependencies + run: poetry install + + - name: Run tests + run: poetry run pytest -v diff --git a/AgamPrimer/AgamPrimer.py b/AgamPrimer/AgamPrimer.py index e6f5ff9..ad84fa4 100644 --- a/AgamPrimer/AgamPrimer.py +++ b/AgamPrimer/AgamPrimer.py @@ -1227,3 +1227,137 @@ def _get_base_freqs(freqs, ref_alt_array): base = ref_alt_array[i, i_base] freq_df.loc[i, f"{base}_freq"] = freqs[i, i_base] return freq_df + + +### check my oligos ### + + +def check_my_oligo( + sequence, sample_sets="3.0", sample_query=None, width=700, height=400 +): + """ + Align a sequence to AgamP3, retrieve ag3 frequencies in this region and plot. + Only works with An.gambiae_sl for now. + """ + + print("Aligning sequence to AgamP3 genome with BLAT") + blat_df = gget.blat(sequence=sequence, seqtype="DNA", assembly="anoGam3") + if blat_df is None: + print(f"No hit for {sequence}") + return + + contig, start, end = blat_df.loc[0, ["chromosome", "start", "end"]] + contig = contig.replace("chr", "") + region_span = f"{contig}:{start}-{end}" + print("plotting frequencies in ag3 data") + fig = plot_sequence_frequencies( + region=region_span, + sample_sets=sample_sets, + sample_query=sample_query, + width=width, + height=height, + ) + + +def plot_sequence_frequencies( + region, sample_sets=None, sample_query=None, width=700, height=400 +): + """Retrieve frequencies""" + + snps = ag3.snp_calls( + region=region, sample_sets=sample_sets, sample_query=sample_query + ) + ref_alt_arr = snps["variant_allele"].compute().values.astype(str) + freq_arr = ( + allel.GenotypeArray(snps["call_genotype"]).count_alleles().to_frequencies() + ) + pos = snps["variant_position"].compute().values + df = pd.DataFrame( + { + "position": pos, + "base": ref_alt_arr[:, 0], + "alt_frequency": freq_arr[:, 1:].sum(axis=1), + } + ) # Make dataframe for plotting + df["base_pos"] = df["base"] + "_" + df["position"].astype(str) + # Get the frequency of each base and store as data frame + freq_df = _get_base_freqs(_addZeroCols(freq_arr), ref_alt_arr).filter(like="freq") + + data = pd.concat([df, freq_df], axis=1) + + fig = _plotly_frequencies( + data=data, + region=region, + sample_sets=sample_sets, + sample_query=sample_query, + width=width, + height=height, + ) + return fig + + +def _plotly_frequencies( + data, region, sample_sets, sample_query=None, width=700, height=400, save=False +): + import plotly.graph_objects as go + from plotly.subplots import make_subplots + + hover_template = "
".join( + [ + "Base / Position: %{customdata[4]}", + "Total Alternate freq: %{y}", + "A_freq: %{customdata[0]}", + "C_freq: %{customdata[1]}", + "G_freq: %{customdata[2]}", + "T_freq: %{customdata[3]}", + ] + ) + # Color scatterpoints blue if segregating SNP + color = [-1 if v == 0 else 1 if v > 0 else 0 for v in data["alt_frequency"]] + colorscale = [[0, "lightgray"], [0.5, "lightgray"], [1, "dodgerblue"]] + + fig = go.Figure( + go.Scatter( + x=data["position"], + y=data["alt_frequency"], + customdata=data[["A_freq", "C_freq", "G_freq", "T_freq", "base_pos"]], + hovertemplate=hover_template, + mode="markers", + marker=dict( + size=14, + color=color, + colorscale=colorscale, + line=dict(width=2, color="black"), + ), + marker_symbol="circle", + ) + ) + # Set xticks to be the REF allele + fig.update_xaxes( + tickmode="array", + tickangle=0, + tickvals=data["position"].to_list(), + ticktext=data["base"].to_list(), + ) + fig.update_yaxes( + tickmode="array", + tickvals=np.arange(0, 1, 0.2), + range=[0, 1], + title="Alternate allele frequency", + ) + # Set plot title + if sample_query is not None: + title_text = f"{region} | {sample_sets} | {sample_query} | allele frequencies" + else: + title_text = f"{region} | {sample_sets} | allele frequencies" + + fig.update_layout( + height=height, + width=width, + title_text=title_text, + title_x=0.5, + template="simple_white", + showlegend=False, + ) + fig.show() + return fig From 88deffd1619b6be273e28c89a827e52290a8d395 Mon Sep 17 00:00:00 2001 From: Sanjay C Nagi Date: Fri, 18 Aug 2023 16:39:42 +0100 Subject: [PATCH 2/8] go --- .github/workflows/github-action-AgamPrimer.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/github-action-AgamPrimer.yml b/.github/workflows/github-action-AgamPrimer.yml index cb5ca89..2422f14 100644 --- a/.github/workflows/github-action-AgamPrimer.yml +++ b/.github/workflows/github-action-AgamPrimer.yml @@ -46,5 +46,4 @@ jobs: poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params_fun.json poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params.json poetry run papermill notebooks/AgamPrimer-long.ipynb probe_run.ipynb -k AgamPrimer -f tests/probe_Params.json - poetry run papermill notebooks/AgamPrimer-long.ipynb probe_run.ipynb -k AgamPrimer -f tests/probe_Params_fun.json poetry run papermill notebooks/AgamPrimer-short.ipynb short_run.ipynb -k AgamPrimer From 3b0f67c287d381360c513ee0f02d8498dcfe6775 Mon Sep 17 00:00:00 2001 From: Sanjay C Nagi Date: Fri, 18 Aug 2023 20:43:19 +0100 Subject: [PATCH 3/8] add tests --- tests/test_agamprimer.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tests/test_agamprimer.py diff --git a/tests/test_agamprimer.py b/tests/test_agamprimer.py new file mode 100644 index 0000000..4b6d451 --- /dev/null +++ b/tests/test_agamprimer.py @@ -0,0 +1,16 @@ +import numpy as np +import pandas as pd +import pytest + +import AgamPrimer + +ace1_seq = "GCGGCGGCTTCTACTCCGG" +kdr_seq = "AGTGATAGGAAATTTAGTCGT" + + +@pytest.mark.parametrize( + "sequence", + [ace1_seq, kdr_seq], +) +def test_check_my_oligo(sequence): + AgamPrimer.check_my_oligo(sequence=sequence, sample_sets="AG1000G-GH") From e9ea08ec30944aa0711ebf2723c979fa7d7462e4 Mon Sep 17 00:00:00 2001 From: Sanjay C Nagi Date: Fri, 18 Aug 2023 20:46:21 +0100 Subject: [PATCH 4/8] new_cis --- .../github-action-AgamPrimer-funestus.yml | 44 ++++++++++++++++++ .../github-action-AgamPrimer-gambiae.yml | 46 +++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 .github/workflows/github-action-AgamPrimer-funestus.yml create mode 100644 .github/workflows/github-action-AgamPrimer-gambiae.yml diff --git a/.github/workflows/github-action-AgamPrimer-funestus.yml b/.github/workflows/github-action-AgamPrimer-funestus.yml new file mode 100644 index 0000000..c831413 --- /dev/null +++ b/.github/workflows/github-action-AgamPrimer-funestus.yml @@ -0,0 +1,44 @@ +name: Execute notebooks + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + + run: + + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + python-version: ['3.10'] + + steps: + + - name: Checkout source + uses: actions/checkout@v3 + + - name: Install poetry + run: pipx install poetry==1.4.2 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'poetry' + + - name: Install dependencies + run: | + poetry install + poetry run python -m ipykernel install --user --name AgamPrimer + + - name: Run notebooks + run: | + poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params_fun.json + poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2_fun.json + poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params_fun.json diff --git a/.github/workflows/github-action-AgamPrimer-gambiae.yml b/.github/workflows/github-action-AgamPrimer-gambiae.yml new file mode 100644 index 0000000..ae69a6b --- /dev/null +++ b/.github/workflows/github-action-AgamPrimer-gambiae.yml @@ -0,0 +1,46 @@ +name: Execute notebooks + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + + run: + + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + python-version: ['3.8', '3.11'] + + steps: + + - name: Checkout source + uses: actions/checkout@v3 + + - name: Install poetry + run: pipx install poetry==1.4.2 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'poetry' + + - name: Install dependencies + run: | + poetry install + poetry run python -m ipykernel install --user --name AgamPrimer + + - name: Run notebooks + run: | + poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params.json + poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2.json + poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params.json + poetry run papermill notebooks/AgamPrimer-long.ipynb probe_run.ipynb -k AgamPrimer -f tests/probe_Params.json + poetry run papermill notebooks/AgamPrimer-short.ipynb short_run.ipynb -k AgamPrimer From 2367cb43012c38b600903ba4ac31d3453333fec3 Mon Sep 17 00:00:00 2001 From: Sanjay C Nagi Date: Fri, 18 Aug 2023 20:47:18 +0100 Subject: [PATCH 5/8] new_cis --- .github/workflows/github-action-AgamPrimer-funestus.yml | 2 +- .github/workflows/github-action-AgamPrimer-gambiae.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/github-action-AgamPrimer-funestus.yml b/.github/workflows/github-action-AgamPrimer-funestus.yml index c831413..962e2b9 100644 --- a/.github/workflows/github-action-AgamPrimer-funestus.yml +++ b/.github/workflows/github-action-AgamPrimer-funestus.yml @@ -1,4 +1,4 @@ -name: Execute notebooks +name: notebooks-funestus on: push: diff --git a/.github/workflows/github-action-AgamPrimer-gambiae.yml b/.github/workflows/github-action-AgamPrimer-gambiae.yml index ae69a6b..97f2c5b 100644 --- a/.github/workflows/github-action-AgamPrimer-gambiae.yml +++ b/.github/workflows/github-action-AgamPrimer-gambiae.yml @@ -1,4 +1,4 @@ -name: Execute notebooks +name: notebooks-gambiae on: push: From 5841b6e4cca7f7e4a1f3fe2e72b55ce66a11b3da Mon Sep 17 00:00:00 2001 From: Sanjay C Nagi Date: Fri, 18 Aug 2023 20:48:15 +0100 Subject: [PATCH 6/8] remove old ci --- .../workflows/github-action-AgamPrimer.yml | 49 ------------------- 1 file changed, 49 deletions(-) delete mode 100644 .github/workflows/github-action-AgamPrimer.yml diff --git a/.github/workflows/github-action-AgamPrimer.yml b/.github/workflows/github-action-AgamPrimer.yml deleted file mode 100644 index 2422f14..0000000 --- a/.github/workflows/github-action-AgamPrimer.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: Execute notebooks - -on: - push: - branches: - - main - pull_request: - branches: - - main - -jobs: - - run: - - runs-on: ubuntu-latest - strategy: - fail-fast: true - matrix: - python-version: ['3.8', '3.10', '3.11'] - - steps: - - - name: Checkout source - uses: actions/checkout@v3 - - - name: Install poetry - run: pipx install poetry==1.4.2 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - cache: 'poetry' - - - name: Install dependencies - run: | - poetry install - poetry run python -m ipykernel install --user --name AgamPrimer - - - name: Run notebooks - run: | - poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params_fun.json - poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params.json - poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2_fun.json - poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2.json - poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params_fun.json - poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params.json - poetry run papermill notebooks/AgamPrimer-long.ipynb probe_run.ipynb -k AgamPrimer -f tests/probe_Params.json - poetry run papermill notebooks/AgamPrimer-short.ipynb short_run.ipynb -k AgamPrimer From ba0b53ae0eb56919dbcfe53eab4054bbdc232b22 Mon Sep 17 00:00:00 2001 From: Sanjay C Nagi Date: Fri, 18 Aug 2023 21:22:42 +0100 Subject: [PATCH 7/8] use sample query for ci speed --- notebooks/AgamPrimer-long.ipynb | 5 +++-- tests/cDNA_Params.json | 3 ++- tests/cDNA_Params_2.json | 3 ++- tests/gDNA_probe_Params.json | 3 ++- tests/probe_Params.json | 3 ++- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/notebooks/AgamPrimer-long.ipynb b/notebooks/AgamPrimer-long.ipynb index 17aea4b..a1f2308 100644 --- a/notebooks/AgamPrimer-long.ipynb +++ b/notebooks/AgamPrimer-long.ipynb @@ -101,7 +101,8 @@ "#@markdown primer_target should be a region string ('2L:28545767') for gDNA primers and probes, and an AGAP transcript identifier for cDNA primers.\n", "\n", "primer_target = 'AGAP006228-RA' #@param {type:\"string\"} \n", - "sample_sets = 'AG1000G-GH' #@param {type:\"string\"}" + "sample_sets = 'AG1000G-GH' #@param {type:\"string\"}\n", + "sample_query = None #'taxon == \"coluzzii\"' " ] }, { @@ -429,7 +430,7 @@ "outputs": [], "source": [ "#sample_sets = None #'AG1000G-GH' # sample_set = '3.0' .you can also supply lists with multiple sample sets e.g ['AG1000G-GH', 'AG1000G-CI', 'AG1000G-BF-A']\n", - "sample_query = None #\"taxon == 'coluzzii'\" # here we can subset to specific values in the metadata e.g : \"taxon == 'gambiae'\" , or \"taxon == 'arabiensis'\" " + "#sample_query = None #\"taxon == 'coluzzii'\" # here we can subset to specific values in the metadata e.g : \"taxon == 'gambiae'\" , or \"taxon == 'arabiensis'\" " ] }, { diff --git a/tests/cDNA_Params.json b/tests/cDNA_Params.json index d62dedc..72a75f5 100644 --- a/tests/cDNA_Params.json +++ b/tests/cDNA_Params.json @@ -6,5 +6,6 @@ "max_amplicon_size": 120, "n_primer_pairs":6, "primer_target": "AGAP006228-RA", - "sample_sets": "AG1000G-GH" + "sample_sets": "AG1000G-GH", + "sample_query":"taxon == 'coluzzii" } diff --git a/tests/cDNA_Params_2.json b/tests/cDNA_Params_2.json index 16fc1d9..0567cb6 100644 --- a/tests/cDNA_Params_2.json +++ b/tests/cDNA_Params_2.json @@ -7,5 +7,6 @@ "n_primer_pairs":6, "primer_target": "AGAP028081-RA", "cDNA_exon_junction" : false, - "sample_sets": "AG1000G-GH" + "sample_sets": "AG1000G-GH", + "sample_query":"taxon == 'coluzzii" } diff --git a/tests/gDNA_probe_Params.json b/tests/gDNA_probe_Params.json index 8fe2af5..23fc5ea 100644 --- a/tests/gDNA_probe_Params.json +++ b/tests/gDNA_probe_Params.json @@ -6,5 +6,6 @@ "max_amplicon_size": 100, "n_primer_pairs":4, "primer_target": "2L:2422652", - "sample_sets": "AG1000G-GH" + "sample_sets": "AG1000G-GH", + "sample_query":"taxon == 'coluzzii" } diff --git a/tests/probe_Params.json b/tests/probe_Params.json index 60cb6d8..71ce4dc 100644 --- a/tests/probe_Params.json +++ b/tests/probe_Params.json @@ -6,5 +6,6 @@ "max_amplicon_size": 240, "n_primer_pairs":6, "primer_target": "X:9500000", - "sample_sets": "AG1000G-GH" + "sample_sets": "AG1000G-GH", + "sample_query":"taxon == 'coluzzii" } From ccffb4cd656e7c89b4adc0e3149161554f06b333 Mon Sep 17 00:00:00 2001 From: Sanjay C Nagi Date: Mon, 21 Aug 2023 15:32:46 +0100 Subject: [PATCH 8/8] fix --- tests/cDNA_Params.json | 2 +- tests/cDNA_Params_2.json | 2 +- tests/gDNA_probe_Params.json | 2 +- tests/probe_Params.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/cDNA_Params.json b/tests/cDNA_Params.json index 72a75f5..2883674 100644 --- a/tests/cDNA_Params.json +++ b/tests/cDNA_Params.json @@ -7,5 +7,5 @@ "n_primer_pairs":6, "primer_target": "AGAP006228-RA", "sample_sets": "AG1000G-GH", - "sample_query":"taxon == 'coluzzii" + "sample_query":"taxon == 'coluzzii'" } diff --git a/tests/cDNA_Params_2.json b/tests/cDNA_Params_2.json index 0567cb6..e0a05b0 100644 --- a/tests/cDNA_Params_2.json +++ b/tests/cDNA_Params_2.json @@ -8,5 +8,5 @@ "primer_target": "AGAP028081-RA", "cDNA_exon_junction" : false, "sample_sets": "AG1000G-GH", - "sample_query":"taxon == 'coluzzii" + "sample_query":"taxon == 'coluzzii'" } diff --git a/tests/gDNA_probe_Params.json b/tests/gDNA_probe_Params.json index 23fc5ea..0d60dfc 100644 --- a/tests/gDNA_probe_Params.json +++ b/tests/gDNA_probe_Params.json @@ -7,5 +7,5 @@ "n_primer_pairs":4, "primer_target": "2L:2422652", "sample_sets": "AG1000G-GH", - "sample_query":"taxon == 'coluzzii" + "sample_query":"taxon == 'coluzzii'" } diff --git a/tests/probe_Params.json b/tests/probe_Params.json index 71ce4dc..45f97f2 100644 --- a/tests/probe_Params.json +++ b/tests/probe_Params.json @@ -7,5 +7,5 @@ "n_primer_pairs":6, "primer_target": "X:9500000", "sample_sets": "AG1000G-GH", - "sample_query":"taxon == 'coluzzii" + "sample_query":"taxon == 'coluzzii'" }