diff --git a/.github/workflows/github-action-AgamPrimer-funestus.yml b/.github/workflows/github-action-AgamPrimer-funestus.yml new file mode 100644 index 0000000..962e2b9 --- /dev/null +++ b/.github/workflows/github-action-AgamPrimer-funestus.yml @@ -0,0 +1,44 @@ +name: notebooks-funestus + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + + run: + + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + python-version: ['3.10'] + + steps: + + - name: Checkout source + uses: actions/checkout@v3 + + - name: Install poetry + run: pipx install poetry==1.4.2 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'poetry' + + - name: Install dependencies + run: | + poetry install + poetry run python -m ipykernel install --user --name AgamPrimer + + - name: Run notebooks + run: | + poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params_fun.json + poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2_fun.json + poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params_fun.json diff --git a/.github/workflows/github-action-AgamPrimer.yml b/.github/workflows/github-action-AgamPrimer-gambiae.yml similarity index 68% rename from .github/workflows/github-action-AgamPrimer.yml rename to .github/workflows/github-action-AgamPrimer-gambiae.yml index cb5ca89..97f2c5b 100644 --- a/.github/workflows/github-action-AgamPrimer.yml +++ b/.github/workflows/github-action-AgamPrimer-gambiae.yml @@ -1,4 +1,4 @@ -name: Execute notebooks +name: notebooks-gambiae on: push: @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: true matrix: - python-version: ['3.8', '3.10', '3.11'] + python-version: ['3.8', '3.11'] steps: @@ -39,12 +39,8 @@ jobs: - name: Run notebooks run: | - poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params_fun.json poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params.json - poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2_fun.json poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2.json - poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params_fun.json poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params.json poetry run papermill notebooks/AgamPrimer-long.ipynb probe_run.ipynb -k AgamPrimer -f tests/probe_Params.json - poetry run papermill notebooks/AgamPrimer-long.ipynb probe_run.ipynb -k AgamPrimer -f tests/probe_Params_fun.json poetry run papermill notebooks/AgamPrimer-short.ipynb short_run.ipynb -k AgamPrimer diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml new file mode 100644 index 0000000..4d55b65 --- /dev/null +++ b/.github/workflows/testing.yml @@ -0,0 +1,36 @@ +name: pytest +on: + push: + branches: + - main + pull_request: + branches: + - main +jobs: + tests: + strategy: + fail-fast: true + matrix: + python-version: ["3.9"] + poetry-version: ["1.3.1"] + os: [ubuntu-latest] + runs-on: ${{ matrix.os }} + steps: + + - name: Checkout source + uses: actions/checkout@v3 + + - name: Install poetry + run: pipx install poetry==${{ matrix.poetry-version }} + + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'poetry' + + - name: Install dependencies + run: poetry install + + - name: Run tests + run: poetry run pytest -v diff --git a/AgamPrimer/AgamPrimer.py b/AgamPrimer/AgamPrimer.py index e6f5ff9..ad84fa4 100644 --- a/AgamPrimer/AgamPrimer.py +++ b/AgamPrimer/AgamPrimer.py @@ -1227,3 +1227,137 @@ def _get_base_freqs(freqs, ref_alt_array): base = ref_alt_array[i, i_base] freq_df.loc[i, f"{base}_freq"] = freqs[i, i_base] return freq_df + + +### check my oligos ### + + +def check_my_oligo( + sequence, sample_sets="3.0", sample_query=None, width=700, height=400 +): + """ + Align a sequence to AgamP3, retrieve ag3 frequencies in this region and plot. + Only works with An.gambiae_sl for now. + """ + + print("Aligning sequence to AgamP3 genome with BLAT") + blat_df = gget.blat(sequence=sequence, seqtype="DNA", assembly="anoGam3") + if blat_df is None: + print(f"No hit for {sequence}") + return + + contig, start, end = blat_df.loc[0, ["chromosome", "start", "end"]] + contig = contig.replace("chr", "") + region_span = f"{contig}:{start}-{end}" + print("plotting frequencies in ag3 data") + fig = plot_sequence_frequencies( + region=region_span, + sample_sets=sample_sets, + sample_query=sample_query, + width=width, + height=height, + ) + + +def plot_sequence_frequencies( + region, sample_sets=None, sample_query=None, width=700, height=400 +): + """Retrieve frequencies""" + + snps = ag3.snp_calls( + region=region, sample_sets=sample_sets, sample_query=sample_query + ) + ref_alt_arr = snps["variant_allele"].compute().values.astype(str) + freq_arr = ( + allel.GenotypeArray(snps["call_genotype"]).count_alleles().to_frequencies() + ) + pos = snps["variant_position"].compute().values + df = pd.DataFrame( + { + "position": pos, + "base": ref_alt_arr[:, 0], + "alt_frequency": freq_arr[:, 1:].sum(axis=1), + } + ) # Make dataframe for plotting + df["base_pos"] = df["base"] + "_" + df["position"].astype(str) + # Get the frequency of each base and store as data frame + freq_df = _get_base_freqs(_addZeroCols(freq_arr), ref_alt_arr).filter(like="freq") + + data = pd.concat([df, freq_df], axis=1) + + fig = _plotly_frequencies( + data=data, + region=region, + sample_sets=sample_sets, + sample_query=sample_query, + width=width, + height=height, + ) + return fig + + +def _plotly_frequencies( + data, region, sample_sets, sample_query=None, width=700, height=400, save=False +): + import plotly.graph_objects as go + from plotly.subplots import make_subplots + + hover_template = "
".join( + [ + "Base / Position: %{customdata[4]}", + "Total Alternate freq: %{y}", + "A_freq: %{customdata[0]}", + "C_freq: %{customdata[1]}", + "G_freq: %{customdata[2]}", + "T_freq: %{customdata[3]}", + ] + ) + # Color scatterpoints blue if segregating SNP + color = [-1 if v == 0 else 1 if v > 0 else 0 for v in data["alt_frequency"]] + colorscale = [[0, "lightgray"], [0.5, "lightgray"], [1, "dodgerblue"]] + + fig = go.Figure( + go.Scatter( + x=data["position"], + y=data["alt_frequency"], + customdata=data[["A_freq", "C_freq", "G_freq", "T_freq", "base_pos"]], + hovertemplate=hover_template, + mode="markers", + marker=dict( + size=14, + color=color, + colorscale=colorscale, + line=dict(width=2, color="black"), + ), + marker_symbol="circle", + ) + ) + # Set xticks to be the REF allele + fig.update_xaxes( + tickmode="array", + tickangle=0, + tickvals=data["position"].to_list(), + ticktext=data["base"].to_list(), + ) + fig.update_yaxes( + tickmode="array", + tickvals=np.arange(0, 1, 0.2), + range=[0, 1], + title="Alternate allele frequency", + ) + # Set plot title + if sample_query is not None: + title_text = f"{region} | {sample_sets} | {sample_query} | allele frequencies" + else: + title_text = f"{region} | {sample_sets} | allele frequencies" + + fig.update_layout( + height=height, + width=width, + title_text=title_text, + title_x=0.5, + template="simple_white", + showlegend=False, + ) + fig.show() + return fig diff --git a/notebooks/AgamPrimer-long.ipynb b/notebooks/AgamPrimer-long.ipynb index 17aea4b..a1f2308 100644 --- a/notebooks/AgamPrimer-long.ipynb +++ b/notebooks/AgamPrimer-long.ipynb @@ -101,7 +101,8 @@ "#@markdown primer_target should be a region string ('2L:28545767') for gDNA primers and probes, and an AGAP transcript identifier for cDNA primers.\n", "\n", "primer_target = 'AGAP006228-RA' #@param {type:\"string\"} \n", - "sample_sets = 'AG1000G-GH' #@param {type:\"string\"}" + "sample_sets = 'AG1000G-GH' #@param {type:\"string\"}\n", + "sample_query = None #'taxon == \"coluzzii\"' " ] }, { @@ -429,7 +430,7 @@ "outputs": [], "source": [ "#sample_sets = None #'AG1000G-GH' # sample_set = '3.0' .you can also supply lists with multiple sample sets e.g ['AG1000G-GH', 'AG1000G-CI', 'AG1000G-BF-A']\n", - "sample_query = None #\"taxon == 'coluzzii'\" # here we can subset to specific values in the metadata e.g : \"taxon == 'gambiae'\" , or \"taxon == 'arabiensis'\" " + "#sample_query = None #\"taxon == 'coluzzii'\" # here we can subset to specific values in the metadata e.g : \"taxon == 'gambiae'\" , or \"taxon == 'arabiensis'\" " ] }, { diff --git a/tests/cDNA_Params.json b/tests/cDNA_Params.json index d62dedc..2883674 100644 --- a/tests/cDNA_Params.json +++ b/tests/cDNA_Params.json @@ -6,5 +6,6 @@ "max_amplicon_size": 120, "n_primer_pairs":6, "primer_target": "AGAP006228-RA", - "sample_sets": "AG1000G-GH" + "sample_sets": "AG1000G-GH", + "sample_query":"taxon == 'coluzzii'" } diff --git a/tests/cDNA_Params_2.json b/tests/cDNA_Params_2.json index 16fc1d9..e0a05b0 100644 --- a/tests/cDNA_Params_2.json +++ b/tests/cDNA_Params_2.json @@ -7,5 +7,6 @@ "n_primer_pairs":6, "primer_target": "AGAP028081-RA", "cDNA_exon_junction" : false, - "sample_sets": "AG1000G-GH" + "sample_sets": "AG1000G-GH", + "sample_query":"taxon == 'coluzzii'" } diff --git a/tests/gDNA_probe_Params.json b/tests/gDNA_probe_Params.json index 8fe2af5..0d60dfc 100644 --- a/tests/gDNA_probe_Params.json +++ b/tests/gDNA_probe_Params.json @@ -6,5 +6,6 @@ "max_amplicon_size": 100, "n_primer_pairs":4, "primer_target": "2L:2422652", - "sample_sets": "AG1000G-GH" + "sample_sets": "AG1000G-GH", + "sample_query":"taxon == 'coluzzii'" } diff --git a/tests/probe_Params.json b/tests/probe_Params.json index 60cb6d8..45f97f2 100644 --- a/tests/probe_Params.json +++ b/tests/probe_Params.json @@ -6,5 +6,6 @@ "max_amplicon_size": 240, "n_primer_pairs":6, "primer_target": "X:9500000", - "sample_sets": "AG1000G-GH" + "sample_sets": "AG1000G-GH", + "sample_query":"taxon == 'coluzzii'" } diff --git a/tests/test_agamprimer.py b/tests/test_agamprimer.py new file mode 100644 index 0000000..4b6d451 --- /dev/null +++ b/tests/test_agamprimer.py @@ -0,0 +1,16 @@ +import numpy as np +import pandas as pd +import pytest + +import AgamPrimer + +ace1_seq = "GCGGCGGCTTCTACTCCGG" +kdr_seq = "AGTGATAGGAAATTTAGTCGT" + + +@pytest.mark.parametrize( + "sequence", + [ace1_seq, kdr_seq], +) +def test_check_my_oligo(sequence): + AgamPrimer.check_my_oligo(sequence=sequence, sample_sets="AG1000G-GH")