diff --git a/test/test_extensive.sh b/test/test_extensive.sh index 8fef0cf..a06f83b 100755 --- a/test/test_extensive.sh +++ b/test/test_extensive.sh @@ -8,5 +8,7 @@ die() { } test/test_identity.sh || die "test_identity.sh failed" +test/test_meth.sh || die "test_meth.sh failed" +test/test_misc.sh || die "test_misc.sh failed" echho "all done" \ No newline at end of file diff --git a/test/test_meth.sh b/test/test_meth.sh index 005588d..a304663 100755 --- a/test/test_meth.sh +++ b/test/test_meth.sh @@ -10,18 +10,23 @@ die() { REF_HG38=/genome/hg38noAlt.fa REF_HG38_IDX=/genome/hg38noAlt.idx -#should download the following +# should download the following from aws METH_TRUTH=/home/hasindu/scratch/hg2_prom_lsk114_5khz/guppy_6.5.7_hac/PGXXXX230339_guppy657hac_mm226_f5c13_mfreq.tsv MINIMOD=/data/hasindu/hasindu2008.git/minimod/minimod COMPARE=~/hasindu2008.git/f5c/scripts/compare_methylation.py +REMOVE_TMP(){ + rm -f new.blow5 new.sam new.bam new.bam.bai new.bedmethyl methcomp.tsv a.acc a.log +} + +REMOVE_TMP +test -e meth.tsv && rm meth.tsv +test -e ref_chr22.fa && rm ref_chr22.fa + tail -n +2 $METH_TRUTH | grep -w "chr22" | cut -f 1,2,7 > meth.tsv || die "failed extracting chr, pos, meth_freq" samtools faidx ${REF_HG38} chr22 > ref_chr22.fa || die "failed extracting chr22 from ref" -test -e new.blow5 && rm new.blow5 -test -e new.fastq && rm new.fastq - CHECK_ACC(){ THRESH=$1 FILE=$2 @@ -37,24 +42,22 @@ CHECK_ACC(){ echo "" } -REMOVE_TMP(){ - rm -f new.blow5 new.sam new.bam new.bedmethyl methcomp.tsv a.acc a.log -} + RUN_TEST(){ PROF=$1 MODEL=$2 ./squigulator ref_chr22.fa -x ${PROF} -f 10 -t 20 -o new.blow5 --meth-freq meth.tsv 2> a.log || die "squigulator failed" - eel -i new.blow5 --config ${MODEL} --device cuda:all -o new.sam --call_mods &>> a.log|| die "eel failed" - samtools fastq -TMM,ML new.sam | minimap2 -ax map-ont -y -Y --secondary=no ref_chr22.fa - | samtools sort - -o new.bam 2>> a.log || die "samtools failed" + eel -i new.blow5 --config ${MODEL} --device cuda:all -o new.sam --call_mods &>> a.log || die "eel failed" + samtools fastq -TMM,ML new.sam 2>> a.log | minimap2 -ax map-ont -y -Y --secondary=no ref_chr22.fa - 2>> a.log | samtools sort - -o new.bam 2>> a.log || die "samtools failed" samtools index new.bam || die "samtools index failed" # /install/modkit-v0.1.13/modkit pileup --cpg --ref ref_chr22.fa --ignore h -t 32 new.bam new.tmp.bedmethyl # grep "chr22" new.tmp.bedmethyl | grep -v nan > new.bedmethyl ${MINIMOD} mod-freq ref_chr22.fa new.bam -b > new.bedmethyl 2>> a.log || die "minimod failed" ${COMPARE} ${METH_TRUTH} new.bedmethyl > methcomp.tsv 2>> a.log || die "compare failed" - tail -n+2 methcomp.tsv | datamash ppearson 3:5 2>> a.acc || die "pearson failed" + tail -n+2 methcomp.tsv | datamash ppearson 3:5 > a.acc || die "pearson failed" # ~/hasindu2008.git/f5c/scripts/plot_methylation.R -i methcomp.tsv -o methcomp.pdf - # cat a.acc + cat a.acc CHECK_ACC 0.92 a.acc REMOVE_TMP } diff --git a/test/test_misc.sh b/test/test_misc.sh index cdf5d8c..3702f5f 100755 --- a/test/test_misc.sh +++ b/test/test_misc.sh @@ -7,13 +7,18 @@ die() { exit 1 } -REF_HG38=/genome/hg38noAlt.fa -REF_HG38_IDX=/genome/hg38noAlt.idx REF_GENCODE=/genome/gencode.v40.transcripts.fa -test -e new.blow5 && rm new.blow5 -test -e new.fastq && rm new.fastq +REF_TRANS_BAM=/home/hasindu/scratch/uhr_prom_rna004/PNXRXX240011_reads_500k.bam + +REMOVE_TMP(){ + rm -f new.blow5 new.fastq a.acc a.log a.paf count_joined.tsv count_new.tsv +} + +REMOVE_TMP +test -e count.tsv && rm count.tsv + CHECK_ACC(){ THRESH=$1 @@ -30,10 +35,28 @@ CHECK_ACC(){ echo "" } -REMOVE_TMP(){ - rm -f new.blow5 new.fastq a.acc a.log -} +samtools view PNXRXX240011_reads_500k.bam | cut -f 3 | sort | uniq -c | awk '{print $2"\t"$1}' | sort -k1,1 > count.tsv || die "failed extracting chr, pos, meth_freq" +RUN_REST(){ + minimap2 -cx map-ont -y -Y --secondary=no ${REF_GENCODE} new.fastq > a.paf 2>> a.log + cat a.paf | cut -f 6 | sort | uniq -c | awk '{print $2"\t"$1}' -k1,1 > count_new.tsv || die "failed getting counts" + join count.tsv count_new.tsv > count_joined.tsv || die "failed joining counts" + cat count_joined.tsv | datamash ppearson 2:3 > a.acc || die "pearson failed" + cat a.acc + CHECK_ACC 0.95 a.acc + REMOVE_TMP +} +PROF=rna004-prom +MODEL=rna_rp4_130bps_hac_prom.cfg +./squigulator ${REF_GENCODE} -x ${PROF} -t 20 -o new.blow5 --trans-count count.tsv 2> a.log || die "squigulator failed" +eel -i new.blow5 --config ${MODEL} --device cuda:all -o new.fastq &>> a.log || die "eel failed" +RUN_REST + +PROF=dna-r9-prom +MODEL=dna_r9.4.1_450bps_hac_prom.cfg +./squigulator ${REF_GENCODE} -x ${PROF} -t 20 -o new.blow5 --trans-count count.tsv --cdna 2> a.log || die "squigulator failed" +eel -i new.blow5 --config ${MODEL} --device cuda:all -o new.fastq &>> a.log || die "eel failed" +RUN_REST \ No newline at end of file