Skip to content

Commit

Permalink
add tests for meth
Browse files Browse the repository at this point in the history
  • Loading branch information
hasindu2008 committed Sep 11, 2024
1 parent ef03a43 commit f1bfae3
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 97 deletions.
10 changes: 5 additions & 5 deletions src/ref.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,31 +316,31 @@ void load_meth_freq(const char *meth_freq, ref_t *ref){
int ref_idx = get_ref_idx(name, ref);
//fprintf(stderr,"Refidx %d\n",ref_idx);
if(ref_idx < 0){
ERROR("There was no such chromosome in the reference. Check line %d value %s", line, name);
ERROR("There was no such chromosome in the reference. Check line %d value %s of the input methy-freq file.", line, name);
exit(EXIT_FAILURE);
}
char *position = strtok(NULL, "\t");
check_meth_line(position, meth_freq);
int32_t pos = atoi(position);
//fprintf(stderr,"%s,%d\n",position,pos);
if(pos < 0){
ERROR("chromosome position cannot be negative. Check line %d value %d",line,pos);
ERROR("Chromosome position cannot be negative. Check line %d value %d of the input methy-freq file.",line,pos);
exit(EXIT_FAILURE);
} else if (pos >= ref->ref_lengths[ref_idx]){
ERROR("chromosome %s position must be less than the length %d. Check line %d value %d", ref->ref_names[ref_idx], ref->ref_lengths[ref_idx], line, pos);
ERROR("Chromosome %s position must be less than the length %d. Check line %d value %d of the input methy-freq file.", ref->ref_names[ref_idx], ref->ref_lengths[ref_idx], line, pos);
exit(EXIT_FAILURE);
}
char c = ref->ref_seq[ref_idx][pos];
if(!(c=='C' || c=='c')){
ERROR("The chrsomsome %s pos %d n the refrence was a %c. How can it be methylated C? Check line %d",name,pos,c,line);
ERROR("The chromosome %s position %d in the reference was a %c. How can it be methylated C? Check line %d of the input methy-freq file.",name,pos,c,line);
exit(EXIT_FAILURE);
}

char *frequency = strtok(NULL, "\t");
check_meth_line(frequency, meth_freq);
float freq = atof(frequency);
if(freq<0 || freq >1){
ERROR("Methylation frequency must be between 0 to 1. Check line %d value %f",line, freq);
ERROR("Methylation frequency must be between 0 to 1. Check line %d value %f of the input methy-freq file.",line, freq);
exit(EXIT_FAILURE);
}
uint8_t f = roundf(freq * 255);
Expand Down
94 changes: 2 additions & 92 deletions test/test_extensive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,96 +7,6 @@ die() {
exit 1
}

REF_HG38=/genome/hg38noAlt.fa
REF_HG38_IDX=/genome/hg38noAlt.idx

REF_GENCODE=/genome/gencode.v40.transcripts.fa

test -e new.blow5 && rm new.blow5
test -e new.fastq && rm new.fastq

CHECK_ACC(){
THRESH=$1
FILE=$2

ACC=$(tail -1 $FILE | cut -f2)
if [ $(echo "$ACC < $THRESH" | bc) -eq 1 ]; then
die "FAILED: Accuracy $ACC is below threshold $THRESH"
fi

echo "PASSED: Accuracy $ACC is above threshold $THRESH"
echo "________________________________________________"
echo ""
echo ""
}

REMOVE_TMP(){
rm -f new.blow5 new.fastq a.acc a.log
}

echo "R9 DNA ideal-time"
./squigulator -x dna-r9-prom --ideal-time $REF_HG38 -o new.blow5 2> a.log || die "squigulator failed"
eel -i new.blow5 --config dna_r9.4.1_450bps_sup.cfg --device cuda:all -o new.fastq &>> a.log|| die "eel failed"
identitydna.sh $REF_HG38_IDX new.fastq > a.acc 2>> a.log || die "identitydna failed"
cat a.acc
CHECK_ACC 0.97 a.acc
REMOVE_TMP

echo "R9 DNA"
./squigulator -x dna-r9-min $REF_HG38 -o new.blow5 2> a.log || die "squigulator failed"
eel -i new.blow5 --config dna_r9.4.1_450bps_sup.cfg --device cuda:all -o new.fastq &>> a.log || die "eel failed"
identitydna.sh $REF_HG38_IDX new.fastq > a.acc 2>> a.log || die "identitydna failed"
cat a.acc
CHECK_ACC 0.95 a.acc
REMOVE_TMP

echo "R9 RNA"
./squigulator -x rna-r9-min $REF_GENCODE -o new.blow5 2> a.log || die "squigulator failed"
eel -i new.blow5 --config rna_r9.4.1_70bps_hac.cfg --device cuda:all -o new.fastq &>> a.log|| die "eel failed"
identityrna.sh $REF_GENCODE new.fastq > a.acc 2>> a.log || die "identitydna failed"
cat a.acc
CHECK_ACC 0.75 a.acc
REMOVE_TMP

echo "R10 DNA ideal-time"
./squigulator -x dna-r10-min --ideal-time $REF_HG38 -o new.blow5 2> a.log || die "squigulator failed"
eel -i new.blow5 --config dna_r10.4.1_e8.2_400bps_sup.cfg --device cuda:all -o new.fastq &>> a.log || die "eel failed"
identitydna.sh $REF_HG38_IDX new.fastq > a.acc 2>> a.log || die "identitydna failed"
cat a.acc
CHECK_ACC 0.91 a.acc
REMOVE_TMP

echo "R10 DNA"
./squigulator -x dna-r10-prom $REF_HG38 -o new.blow5 -a new.sam 2> a.log || die "squigulator failed"
eel -i new.blow5 --config dna_r10.4.1_e8.2_400bps_sup.cfg --device cuda:all -o new.fastq &>> a.log || die "eel failed"
identitydna.sh $REF_HG38_IDX new.fastq > a.acc 2>> a.log || die "identitydna failed"
cat a.acc
CHECK_ACC 0.90 a.acc
samtools sort -o new.bam new.sam || die "samtools failed"
samtools index new.bam || die "samtools failed"
REMOVE_TMP

echo "RNA004 RNA"
./squigulator -x rna004-prom $REF_GENCODE -o new.blow5 2> a.log || die "squigulator failed"
/install/buttery-eel-0.4.2+dorado7.2.13/scripts/eel -i new.blow5 --config rna_rp4_130bps_sup.cfg --device cuda:all -o new.fastq &>> a.log|| die "eel failed"
identityrna.sh $REF_GENCODE new.fastq > a.acc 2>> a.log || die "identitydna failed"
cat a.acc
CHECK_ACC 0.77 a.acc
REMOVE_TMP

# CDNA R9
./squigulator -x rna-r9-prom $REF_GENCODE -o new.blow5 --cdna 2> a.log || die "squigulator failed"
eel -i new.blow5 --config dna_r9.4.1_450bps_sup.cfg --device cuda:all -o new.fastq &>> a.log || die "eel failed"
identitydna.sh $REF_GENCODE new.fastq > a.acc 2>> a.log || die "identitycdna failed"
cat a.acc
CHECK_ACC 0.75 a.acc
REMOVE_TMP

# meth r9

# meth r10

# RNA trasn count

# cDNA trans count
test/test_identity.sh || die "test_identity.sh failed"

echho "all done"
96 changes: 96 additions & 0 deletions test/test_identity.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/bin/bash

set -e

die() {
echo "$@" >&2
exit 1
}

REF_HG38=/genome/hg38noAlt.fa
REF_HG38_IDX=/genome/hg38noAlt.idx

REF_GENCODE=/genome/gencode.v40.transcripts.fa

test -e new.blow5 && rm new.blow5
test -e new.fastq && rm new.fastq

CHECK_ACC(){
THRESH=$1
FILE=$2

ACC=$(tail -1 $FILE | cut -f2)
if [ $(echo "$ACC < $THRESH" | bc) -eq 1 ]; then
die "FAILED: Accuracy $ACC is below threshold $THRESH"
fi

echo "PASSED: Accuracy $ACC is above threshold $THRESH"
echo "________________________________________________"
echo ""
echo ""
}

REMOVE_TMP(){
rm -f new.blow5 new.fastq a.acc a.log
}

echo "R9 DNA ideal-time"
./squigulator -x dna-r9-prom --ideal-time $REF_HG38 -o new.blow5 2> a.log || die "squigulator failed"
eel -i new.blow5 --config dna_r9.4.1_450bps_sup.cfg --device cuda:all -o new.fastq &>> a.log|| die "eel failed"
identitydna.sh $REF_HG38_IDX new.fastq > a.acc 2>> a.log || die "identitydna failed"
cat a.acc
CHECK_ACC 0.97 a.acc
REMOVE_TMP

echo "R9 DNA"
./squigulator -x dna-r9-min $REF_HG38 -o new.blow5 2> a.log || die "squigulator failed"
eel -i new.blow5 --config dna_r9.4.1_450bps_sup.cfg --device cuda:all -o new.fastq &>> a.log || die "eel failed"
identitydna.sh $REF_HG38_IDX new.fastq > a.acc 2>> a.log || die "identitydna failed"
cat a.acc
CHECK_ACC 0.95 a.acc
REMOVE_TMP

echo "R9 RNA"
./squigulator -x rna-r9-min $REF_GENCODE -o new.blow5 2> a.log || die "squigulator failed"
eel -i new.blow5 --config rna_r9.4.1_70bps_hac.cfg --device cuda:all -o new.fastq &>> a.log|| die "eel failed"
identityrna.sh $REF_GENCODE new.fastq > a.acc 2>> a.log || die "identitydna failed"
cat a.acc
CHECK_ACC 0.75 a.acc
REMOVE_TMP

echo "R10 DNA ideal-time"
./squigulator -x dna-r10-min --ideal-time $REF_HG38 -o new.blow5 2> a.log || die "squigulator failed"
eel -i new.blow5 --config dna_r10.4.1_e8.2_400bps_sup.cfg --device cuda:all -o new.fastq &>> a.log || die "eel failed"
identitydna.sh $REF_HG38_IDX new.fastq > a.acc 2>> a.log || die "identitydna failed"
cat a.acc
CHECK_ACC 0.91 a.acc
REMOVE_TMP

echo "R10 DNA"
./squigulator -x dna-r10-prom $REF_HG38 -o new.blow5 -a new.sam 2> a.log || die "squigulator failed"
eel -i new.blow5 --config dna_r10.4.1_e8.2_400bps_sup.cfg --device cuda:all -o new.fastq &>> a.log || die "eel failed"
identitydna.sh $REF_HG38_IDX new.fastq > a.acc 2>> a.log || die "identitydna failed"
cat a.acc
CHECK_ACC 0.90 a.acc
samtools sort -o new.bam new.sam || die "samtools failed"
samtools index new.bam || die "samtools failed"
REMOVE_TMP

echo "RNA004 RNA"
./squigulator -x rna004-prom $REF_GENCODE -o new.blow5 2> a.log || die "squigulator failed"
/install/buttery-eel-0.4.2+dorado7.2.13/scripts/eel -i new.blow5 --config rna_rp4_130bps_sup.cfg --device cuda:all -o new.fastq &>> a.log|| die "eel failed"
identityrna.sh $REF_GENCODE new.fastq > a.acc 2>> a.log || die "identitydna failed"
cat a.acc
CHECK_ACC 0.77 a.acc
REMOVE_TMP

# CDNA R9
./squigulator -x dna-r9-prom $REF_GENCODE -o new.blow5 --cdna 2> a.log || die "squigulator failed"
eel -i new.blow5 --config dna_r9.4.1_450bps_sup.cfg --device cuda:all -o new.fastq &>> a.log || die "eel failed"
identitydna.sh $REF_GENCODE new.fastq > a.acc 2>> a.log || die "identitycdna failed"
cat a.acc
CHECK_ACC 0.94 a.acc
REMOVE_TMP



67 changes: 67 additions & 0 deletions test/test_meth.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash

set -e

die() {
echo "$@" >&2
exit 1
}

REF_HG38=/genome/hg38noAlt.fa
REF_HG38_IDX=/genome/hg38noAlt.idx

#should download the following
METH_TRUTH=/home/hasindu/scratch/hg2_prom_lsk114_5khz/guppy_6.5.7_hac/PGXXXX230339_guppy657hac_mm226_f5c13_mfreq.tsv

MINIMOD=/data/hasindu/hasindu2008.git/minimod/minimod
COMPARE=~/hasindu2008.git/f5c/scripts/compare_methylation.py

tail -n +2 $METH_TRUTH | grep -w "chr22" | cut -f 1,2,7 > meth.tsv || die "failed extracting chr, pos, meth_freq"
samtools faidx ${REF_HG38} chr22 > ref_chr22.fa || die "failed extracting chr22 from ref"

test -e new.blow5 && rm new.blow5
test -e new.fastq && rm new.fastq

CHECK_ACC(){
THRESH=$1
FILE=$2

ACC=$(tail -1 $FILE | cut -f1)
if [ $(echo "$ACC < $THRESH" | bc) -eq 1 ]; then
die "FAILED: Accuracy $ACC is below threshold $THRESH"
fi

echo "PASSED: Accuracy $ACC is above threshold $THRESH"
echo "________________________________________________"
echo ""
echo ""
}

REMOVE_TMP(){
rm -f new.blow5 new.sam new.bam new.bedmethyl methcomp.tsv a.acc a.log
}

RUN_TEST(){
PROF=$1
MODEL=$2
./squigulator ref_chr22.fa -x ${PROF} -f 10 -t 20 -o new.blow5 --meth-freq meth.tsv 2> a.log || die "squigulator failed"
eel -i new.blow5 --config ${MODEL} --device cuda:all -o new.sam --call_mods &>> a.log|| die "eel failed"
samtools fastq -TMM,ML new.sam | minimap2 -ax map-ont -y -Y --secondary=no ref_chr22.fa - | samtools sort - -o new.bam 2>> a.log || die "samtools failed"
samtools index new.bam || die "samtools index failed"
# /install/modkit-v0.1.13/modkit pileup --cpg --ref ref_chr22.fa --ignore h -t 32 new.bam new.tmp.bedmethyl
# grep "chr22" new.tmp.bedmethyl | grep -v nan > new.bedmethyl
${MINIMOD} mod-freq ref_chr22.fa new.bam -b > new.bedmethyl 2>> a.log || die "minimod failed"
${COMPARE} ${METH_TRUTH} new.bedmethyl > methcomp.tsv 2>> a.log || die "compare failed"
tail -n+2 methcomp.tsv | datamash ppearson 3:5 2>> a.acc || die "pearson failed"
# ~/hasindu2008.git/f5c/scripts/plot_methylation.R -i methcomp.tsv -o methcomp.pdf
# cat a.acc
CHECK_ACC 0.93 a.acc
REMOVE_TMP
}

echo "R9 DNA methylation"
RUN_TEST "dna-r9-prom" "dna_r9.4.1_450bps_modbases_5mc_cg_hac_prom.cfg"

echo "R10 DNA methylation"
RUN_TEST "dna-r10-prom" "dna_r10.4.1_e8.2_400bps_5khz_modbases_5mc_cg_hac_prom.cfg"

39 changes: 39 additions & 0 deletions test/test_misc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash

set -e

die() {
echo "$@" >&2
exit 1
}

REF_HG38=/genome/hg38noAlt.fa
REF_HG38_IDX=/genome/hg38noAlt.idx

REF_GENCODE=/genome/gencode.v40.transcripts.fa

test -e new.blow5 && rm new.blow5
test -e new.fastq && rm new.fastq

CHECK_ACC(){
THRESH=$1
FILE=$2

ACC=$(tail -1 $FILE | cut -f2)
if [ $(echo "$ACC < $THRESH" | bc) -eq 1 ]; then
die "FAILED: Accuracy $ACC is below threshold $THRESH"
fi

echo "PASSED: Accuracy $ACC is above threshold $THRESH"
echo "________________________________________________"
echo ""
echo ""
}

REMOVE_TMP(){
rm -f new.blow5 new.fastq a.acc a.log
}




0 comments on commit f1bfae3

Please sign in to comment.