Skip to content

Commit

Permalink
Merge pull request #65 from sanjaynagi/qc-notebooks-05-07-23
Browse files Browse the repository at this point in the history
Quality control notebooks
  • Loading branch information
sanjaynagi authored Jul 6, 2023
2 parents d71de96 + b29eb9c commit 90da4e9
Show file tree
Hide file tree
Showing 13 changed files with 428 additions and 44 deletions.
2 changes: 1 addition & 1 deletion .test/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ dataset: "AgamDao"
metadata: "config/metadata.tsv"

# Directory of Illumina Miseq Run
illumina-dir: /home/sanj/projects/AmpSeqVIGG2022/resources/220329_M05658_0010_000000000-K9TYL
illumina-dir: ""

# Genome fasta reference files
reference-name: 'AgamP4'
Expand Down
4 changes: 2 additions & 2 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ dataset: gaard-sanger
metadata: config/metadata.tsv

# Directory of Illumina Miseq Run
illumina-dir: resources/230530_M02853_0061_000000000-L23TH/
illumina-dir: resources/230629_M05658_0009_000000000-DL7P9/

# Specify whether reference provided is amplicon or wholegenome sequence data
# Genome fasta reference files
Expand All @@ -17,7 +17,7 @@ reference-gff3: resources/reference/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.1
targets: config/AgamDao.bed

# Specify whether to convert bcl files to fastq
bcl-convert: False
bcl-convert: True

# Specify whether to run quality-control analyses
quality-control:
Expand Down
5 changes: 4 additions & 1 deletion docs/ampseeker-results/_toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@
format: jb-book
root: intro
parts:
- caption: QC
chapters:
- file: notebooks/read-quality
- file: notebooks/reads-per-well
- caption: Results
chapters:
- file: notebooks/sample-map
- file: notebooks/IGV-explore
- file: notebooks/coverage
- file: notebooks/principal-component-analysis
- file: notebooks/allele-frequencies
Expand Down
2 changes: 2 additions & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import numpy as np
configfile:"config/config.yaml"
dataset = config['dataset']
metadata = pd.read_csv(config['metadata'], sep="\t")
plate_info = np.isin(['plate', 'well_letter', 'well_number'], metadata.columns).all()
samples = metadata['sampleID']

import os
Expand All @@ -16,6 +17,7 @@ include: "rules/utilities.smk"
include: "rules/bcl-convert.smk"
include: "rules/qc.smk"
include: "rules/alignment-variantcalling.smk"
include: "rules/qc-notebooks.smk"
include: "rules/analysis.smk"
include: "rules/jupyter-book.smk"

Expand Down
3 changes: 2 additions & 1 deletion workflow/envs/AmpSeeker-python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ dependencies:
- pip:
- igv_notebook
- papermill
- scikit-allel
- scikit-allel
- pysam
7 changes: 4 additions & 3 deletions workflow/envs/AmpSeeker-qc.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
name: AmpSeeker-qc
channels:
- bioconda
- dranew
- conda-forge
- anaconda
dependencies:
- fastqc
- bcl2fastq
- python=3.9
- fastqc
121 changes: 121 additions & 0 deletions workflow/notebooks/read-quality.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"parameters",
"remove-input"
]
},
"outputs": [],
"source": [
"metadata_path = '../../config/metadata.tsv'\n",
"index_read_qc = True\n",
"wkdir = \"\""
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Read quality\n",
"\n",
"In this notebook, we link to the quality reports from MultiQC, index read QC and per sample from FastQC and FastP. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"remove-input"
]
},
"outputs": [],
"source": [
"# load panel metadata\n",
"import pandas as pd\n",
"metadata = pd.read_csv(metadata_path, sep=\"\\t\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"remove-input"
]
},
"outputs": [],
"source": [
"from IPython.display import display, Markdown\n",
"display(Markdown('## MultiQC'))\n",
"display(Markdown('MultiQC is a tool which integrates information from various tools in the workflow (Currently, it is not quite configured correctly)'))\n",
"display(Markdown(f'<a href={wkdir}/results/multiqc/multiqc_report.html>MultiQC report</a>'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"remove-input"
]
},
"outputs": [],
"source": [
"if index_read_qc:\n",
" display(Markdown('## Index read QC'))\n",
" display(Markdown('Index reads with average quality score below 30 for a given run may be unreliable and cause demultiplexing errors.'))\n",
" display(Markdown(f'<a href={wkdir}/results/index-read-qc/I1.html>Index 1 FASTQC report</a>'))\n",
" display(Markdown(f'<a href={wkdir}/results/index-read-qc/I2.html>Index 2 FASTQC report</a>'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false,
"tags": [
"remove-input"
]
},
"outputs": [],
"source": [
"display(Markdown('## Sample read QC'))\n",
"for sample in metadata.sampleID:\n",
" display(Markdown(f'<a href={wkdir}/results/fastp_reports/{sample}.html>{sample} fastp report</a>'))"
]
}
],
"metadata": {
"celltoolbar": "Tags",
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
},
"vscode": {
"interpreter": {
"hash": "ce681de973941d5edd9bd94c9a2926b7fe65e17e578a68317f38265a230b8ca7"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 90da4e9

Please sign in to comment.