Skip to content

Commit

Permalink
csi indices generated instead of tabix
Browse files Browse the repository at this point in the history
  • Loading branch information
umahsn committed Apr 22, 2024
1 parent 97287d3 commit 3126f36
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 8 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ NanoCaller is a computational method that integrates long reads in deep convolut
NanoCaller is distributed under the [MIT License by Wang Genomics Lab](https://wglab.mit-license.org/).

## Latest Updates
_**v3.6.0** (April 22 2024)_: CSI indices generated for VCF files instead of TBI to accommodate larger contigs.

_**v3.5.0** (March 27 2024)_: CRAM files are supported in input as well as in phased output if whatshap version>=2 is being used with NanoCaller.

_**v3.4.0** (July 31 2023)_: VCF files contain total and strand-specific allele depths for SNP calls from SNP calling models. A new mode for short ONT reads (5-10kbp) added. `--phase_qual_score` parameter filters out low quality SNP calls from phasing by WhatsHap; these SNP calls are kept in the output, but neither phased nor used for phasing reads.
Expand Down
12 changes: 6 additions & 6 deletions nanocaller_src/indelCaller.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def phase_run(contig_dict, params, indel_dict, job_Q, counter_Q, phased_snp_file
input_snp_vcf = params['snp_vcf']
output_contig_vcf = os.path.join(phase_dir, '%s.snps.phased.vcf.gz' %contig)
phased_snp_files_list.append(output_contig_vcf)
run_cmd('bcftools view %s -r %s|bgziptabix %s' %(input_snp_vcf, contig, output_contig_vcf))
run_cmd('bcftools view %s -r %s| bgzip > %s && tabix -fp vcf --csi %s' %(input_snp_vcf, contig, output_contig_vcf, output_contig_vcf))

if params['mode']=='snps':
counter_Q.put(1)
Expand Down Expand Up @@ -232,13 +232,13 @@ def phase_run(contig_dict, params, indel_dict, job_Q, counter_Q, phased_snp_file

# extract region from VCF
run_cmd('bcftools view %s -r %s -i "QUAL>=%.4f" -o %s' %(input_snp_vcf, contig, phase_qual_score,input_contig_vcf), verbose=params['verbose'])
run_cmd('bcftools view %s -r %s -i "QUAL<%.4f"|bgziptabix %s' %(input_snp_vcf, contig, phase_qual_score,lowq_input_contig_vcf), verbose=params['verbose'])
run_cmd('bcftools view %s -r %s -i "QUAL<%.4f"| bgzip > %s && tabix -fp vcf --csi %s' %(input_snp_vcf, contig, phase_qual_score, lowq_input_contig_vcf, lowq_input_contig_vcf), verbose=params['verbose'])

#phase VCF
run_cmd("whatshap phase %s %s -o %s -r %s --ignore-read-groups --chromosome %s %s" %(input_contig_vcf, params['sam_path'], raw_output_contig_vcf, params['fasta_path'], contig , enable_whatshap), verbose=params['verbose'])


run_cmd("bcftools view -e 'GT=\"0\\0\"' %s|bgziptabix %s" %(raw_output_contig_vcf, output_contig_vcf), verbose=params['verbose'])
run_cmd("bcftools view -e 'GT=\"0\\0\"' %s| bgzip > %s && tabix -fp vcf --csi %s" %(raw_output_contig_vcf, output_contig_vcf, output_contig_vcf), verbose=params['verbose'])

if file_type=='BAM':
phased_bam = os.path.join(phase_dir, '%s.phased.bam' %contig)
Expand Down Expand Up @@ -361,7 +361,7 @@ def call_manager(params):
output_files['snps']=os.path.join(params['vcf_path'],'%s.snps.phased.vcf.gz' %params['prefix'])

phased_snps_files='\n'.join(phased_snp_files_list)
cmd='bcftools concat -a -f - |bgziptabix %s' %(output_files['snps'])
cmd='bcftools concat -a -f - |bgzip > %s && tabix -fp vcf --csi %s' %(output_files['snps'], output_files['snps'])
stream=Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE)
stdout, stderr = stream.communicate(input=phased_snps_files.encode())

Expand All @@ -388,13 +388,13 @@ def call_manager(params):

print('\n%s: Compressing and indexing indel calls.' %str(datetime.datetime.now()))

run_cmd(' bcftools sort %s|rtg RTG_MEM=2G vcfdecompose -i - -o - |rtg RTG_MEM=2G vcffilter -i - --non-snps-only -o %s' %(raw_indel_vcf, output_files['indels']), verbose=params['verbose'])
run_cmd(' bcftools sort %s|rtg RTG_MEM=2G vcfdecompose -i - -o - |rtg RTG_MEM=2G vcffilter -i - --non-snps-only -o - | bgzip > %s && tabix -fp vcf --csi %s' %(raw_indel_vcf, output_files['indels'], output_files['indels']), verbose=params['verbose'])

if params['mode']=='all':
final_vcf=os.path.join(params['vcf_path'],'%s.vcf.gz' %params['prefix'])
output_files['final']=final_vcf

run_cmd('bcftools concat %s %s -a | bgziptabix %s' %(output_files['snps'], output_files['indels'], output_files['final']), verbose=params['verbose'])
run_cmd('bcftools concat %s %s -a | bgzip > %s && tabix -fp vcf --csi %s' %(output_files['snps'], output_files['indels'], output_files['final'], output_files['final']), verbose=params['verbose'])


return output_files
4 changes: 2 additions & 2 deletions nanocaller_src/snpCaller.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,8 @@ def call_manager(params):

print('\n%s: Compressing and indexing SNP calls.' %str(datetime.datetime.now()))

run_cmd("bcftools sort %s|bgziptabix %a" %(temp_output_file_path, all_snps_file_path), error=True)
run_cmd("bcftools view %s -f PASS|bgziptabix %a" %(all_snps_file_path, pass_snps_file_path), error=True)
run_cmd("bcftools sort %s|bgzip > %a && tabix -fp vcf --csi %a" %(temp_output_file_path, all_snps_file_path, all_snps_file_path), error=True)
run_cmd("bcftools view %s -f PASS|bgzip > %a && tabix -fp vcf --csi %a" %(all_snps_file_path, pass_snps_file_path, pass_snps_file_path), error=True)

return pass_snps_file_path

0 comments on commit 3126f36

Please sign in to comment.