-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ef03a43
commit f1bfae3
Showing
5 changed files
with
209 additions
and
97 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
|
||
die() { | ||
echo "$@" >&2 | ||
exit 1 | ||
} | ||
|
||
REF_HG38=/genome/hg38noAlt.fa | ||
REF_HG38_IDX=/genome/hg38noAlt.idx | ||
|
||
REF_GENCODE=/genome/gencode.v40.transcripts.fa | ||
|
||
test -e new.blow5 && rm new.blow5 | ||
test -e new.fastq && rm new.fastq | ||
|
||
CHECK_ACC(){ | ||
THRESH=$1 | ||
FILE=$2 | ||
|
||
ACC=$(tail -1 $FILE | cut -f2) | ||
if [ $(echo "$ACC < $THRESH" | bc) -eq 1 ]; then | ||
die "FAILED: Accuracy $ACC is below threshold $THRESH" | ||
fi | ||
|
||
echo "PASSED: Accuracy $ACC is above threshold $THRESH" | ||
echo "________________________________________________" | ||
echo "" | ||
echo "" | ||
} | ||
|
||
REMOVE_TMP(){ | ||
rm -f new.blow5 new.fastq a.acc a.log | ||
} | ||
|
||
echo "R9 DNA ideal-time" | ||
./squigulator -x dna-r9-prom --ideal-time $REF_HG38 -o new.blow5 2> a.log || die "squigulator failed" | ||
eel -i new.blow5 --config dna_r9.4.1_450bps_sup.cfg --device cuda:all -o new.fastq &>> a.log|| die "eel failed" | ||
identitydna.sh $REF_HG38_IDX new.fastq > a.acc 2>> a.log || die "identitydna failed" | ||
cat a.acc | ||
CHECK_ACC 0.97 a.acc | ||
REMOVE_TMP | ||
|
||
echo "R9 DNA" | ||
./squigulator -x dna-r9-min $REF_HG38 -o new.blow5 2> a.log || die "squigulator failed" | ||
eel -i new.blow5 --config dna_r9.4.1_450bps_sup.cfg --device cuda:all -o new.fastq &>> a.log || die "eel failed" | ||
identitydna.sh $REF_HG38_IDX new.fastq > a.acc 2>> a.log || die "identitydna failed" | ||
cat a.acc | ||
CHECK_ACC 0.95 a.acc | ||
REMOVE_TMP | ||
|
||
echo "R9 RNA" | ||
./squigulator -x rna-r9-min $REF_GENCODE -o new.blow5 2> a.log || die "squigulator failed" | ||
eel -i new.blow5 --config rna_r9.4.1_70bps_hac.cfg --device cuda:all -o new.fastq &>> a.log|| die "eel failed" | ||
identityrna.sh $REF_GENCODE new.fastq > a.acc 2>> a.log || die "identitydna failed" | ||
cat a.acc | ||
CHECK_ACC 0.75 a.acc | ||
REMOVE_TMP | ||
|
||
echo "R10 DNA ideal-time" | ||
./squigulator -x dna-r10-min --ideal-time $REF_HG38 -o new.blow5 2> a.log || die "squigulator failed" | ||
eel -i new.blow5 --config dna_r10.4.1_e8.2_400bps_sup.cfg --device cuda:all -o new.fastq &>> a.log || die "eel failed" | ||
identitydna.sh $REF_HG38_IDX new.fastq > a.acc 2>> a.log || die "identitydna failed" | ||
cat a.acc | ||
CHECK_ACC 0.91 a.acc | ||
REMOVE_TMP | ||
|
||
echo "R10 DNA" | ||
./squigulator -x dna-r10-prom $REF_HG38 -o new.blow5 -a new.sam 2> a.log || die "squigulator failed" | ||
eel -i new.blow5 --config dna_r10.4.1_e8.2_400bps_sup.cfg --device cuda:all -o new.fastq &>> a.log || die "eel failed" | ||
identitydna.sh $REF_HG38_IDX new.fastq > a.acc 2>> a.log || die "identitydna failed" | ||
cat a.acc | ||
CHECK_ACC 0.90 a.acc | ||
samtools sort -o new.bam new.sam || die "samtools failed" | ||
samtools index new.bam || die "samtools failed" | ||
REMOVE_TMP | ||
|
||
echo "RNA004 RNA" | ||
./squigulator -x rna004-prom $REF_GENCODE -o new.blow5 2> a.log || die "squigulator failed" | ||
/install/buttery-eel-0.4.2+dorado7.2.13/scripts/eel -i new.blow5 --config rna_rp4_130bps_sup.cfg --device cuda:all -o new.fastq &>> a.log|| die "eel failed" | ||
identityrna.sh $REF_GENCODE new.fastq > a.acc 2>> a.log || die "identitydna failed" | ||
cat a.acc | ||
CHECK_ACC 0.77 a.acc | ||
REMOVE_TMP | ||
|
||
# CDNA R9 | ||
./squigulator -x dna-r9-prom $REF_GENCODE -o new.blow5 --cdna 2> a.log || die "squigulator failed" | ||
eel -i new.blow5 --config dna_r9.4.1_450bps_sup.cfg --device cuda:all -o new.fastq &>> a.log || die "eel failed" | ||
identitydna.sh $REF_GENCODE new.fastq > a.acc 2>> a.log || die "identitycdna failed" | ||
cat a.acc | ||
CHECK_ACC 0.94 a.acc | ||
REMOVE_TMP | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
|
||
die() { | ||
echo "$@" >&2 | ||
exit 1 | ||
} | ||
|
||
REF_HG38=/genome/hg38noAlt.fa | ||
REF_HG38_IDX=/genome/hg38noAlt.idx | ||
|
||
#should download the following | ||
METH_TRUTH=/home/hasindu/scratch/hg2_prom_lsk114_5khz/guppy_6.5.7_hac/PGXXXX230339_guppy657hac_mm226_f5c13_mfreq.tsv | ||
|
||
MINIMOD=/data/hasindu/hasindu2008.git/minimod/minimod | ||
COMPARE=~/hasindu2008.git/f5c/scripts/compare_methylation.py | ||
|
||
tail -n +2 $METH_TRUTH | grep -w "chr22" | cut -f 1,2,7 > meth.tsv || die "failed extracting chr, pos, meth_freq" | ||
samtools faidx ${REF_HG38} chr22 > ref_chr22.fa || die "failed extracting chr22 from ref" | ||
|
||
test -e new.blow5 && rm new.blow5 | ||
test -e new.fastq && rm new.fastq | ||
|
||
CHECK_ACC(){ | ||
THRESH=$1 | ||
FILE=$2 | ||
|
||
ACC=$(tail -1 $FILE | cut -f1) | ||
if [ $(echo "$ACC < $THRESH" | bc) -eq 1 ]; then | ||
die "FAILED: Accuracy $ACC is below threshold $THRESH" | ||
fi | ||
|
||
echo "PASSED: Accuracy $ACC is above threshold $THRESH" | ||
echo "________________________________________________" | ||
echo "" | ||
echo "" | ||
} | ||
|
||
REMOVE_TMP(){ | ||
rm -f new.blow5 new.sam new.bam new.bedmethyl methcomp.tsv a.acc a.log | ||
} | ||
|
||
RUN_TEST(){ | ||
PROF=$1 | ||
MODEL=$2 | ||
./squigulator ref_chr22.fa -x ${PROF} -f 10 -t 20 -o new.blow5 --meth-freq meth.tsv 2> a.log || die "squigulator failed" | ||
eel -i new.blow5 --config ${MODEL} --device cuda:all -o new.sam --call_mods &>> a.log|| die "eel failed" | ||
samtools fastq -TMM,ML new.sam | minimap2 -ax map-ont -y -Y --secondary=no ref_chr22.fa - | samtools sort - -o new.bam 2>> a.log || die "samtools failed" | ||
samtools index new.bam || die "samtools index failed" | ||
# /install/modkit-v0.1.13/modkit pileup --cpg --ref ref_chr22.fa --ignore h -t 32 new.bam new.tmp.bedmethyl | ||
# grep "chr22" new.tmp.bedmethyl | grep -v nan > new.bedmethyl | ||
${MINIMOD} mod-freq ref_chr22.fa new.bam -b > new.bedmethyl 2>> a.log || die "minimod failed" | ||
${COMPARE} ${METH_TRUTH} new.bedmethyl > methcomp.tsv 2>> a.log || die "compare failed" | ||
tail -n+2 methcomp.tsv | datamash ppearson 3:5 2>> a.acc || die "pearson failed" | ||
# ~/hasindu2008.git/f5c/scripts/plot_methylation.R -i methcomp.tsv -o methcomp.pdf | ||
# cat a.acc | ||
CHECK_ACC 0.93 a.acc | ||
REMOVE_TMP | ||
} | ||
|
||
echo "R9 DNA methylation" | ||
RUN_TEST "dna-r9-prom" "dna_r9.4.1_450bps_modbases_5mc_cg_hac_prom.cfg" | ||
|
||
echo "R10 DNA methylation" | ||
RUN_TEST "dna-r10-prom" "dna_r10.4.1_e8.2_400bps_5khz_modbases_5mc_cg_hac_prom.cfg" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
|
||
die() { | ||
echo "$@" >&2 | ||
exit 1 | ||
} | ||
|
||
REF_HG38=/genome/hg38noAlt.fa | ||
REF_HG38_IDX=/genome/hg38noAlt.idx | ||
|
||
REF_GENCODE=/genome/gencode.v40.transcripts.fa | ||
|
||
test -e new.blow5 && rm new.blow5 | ||
test -e new.fastq && rm new.fastq | ||
|
||
CHECK_ACC(){ | ||
THRESH=$1 | ||
FILE=$2 | ||
|
||
ACC=$(tail -1 $FILE | cut -f2) | ||
if [ $(echo "$ACC < $THRESH" | bc) -eq 1 ]; then | ||
die "FAILED: Accuracy $ACC is below threshold $THRESH" | ||
fi | ||
|
||
echo "PASSED: Accuracy $ACC is above threshold $THRESH" | ||
echo "________________________________________________" | ||
echo "" | ||
echo "" | ||
} | ||
|
||
REMOVE_TMP(){ | ||
rm -f new.blow5 new.fastq a.acc a.log | ||
} | ||
|
||
|
||
|
||
|