If you have a chr prefix, e.g.
chr1 10 20
chr2 10 20
use the following command for sorting:
sort -V ${file}
If you do not have a chr prefix, e.g.
1 10 20
2 10 20
use the following command for sorting:
sort -n -k1 -k2 ${file}
samtools reheader -c 'perl -pe "s/^(@SQ.*)(\tSN:)Chr/\$1\$2/"' in.bam
samtools reheader -c 'perl -pe "s/^(@SQ.*)(\tSN:)(\d+|X|Y|MT)(\s|\$)/\$1chr\$2\$3/"' in.bam
samtools addreplacerg -r "ID:XXXX" -o OUT.bam IN.bam
samtools reheader -c 'grep -v ^@CO' in.bam
bcftools +split -S <(bcftools query -l ${VCF_FILE}) ${VCF_FILE} -o ${OUTPUT_DIR}
#linearize the fasta file
perl -pe '$. > 1 and /^>/ ? print "\n" : chomp' hla_gen.fasta > hla_gen_linear.fasta
#find the type (ID) and get the type +its next line for each match
while IFS=, read -r id type;do grep -F -A1 --no-group-separator $type hla_gen_linear.fasta > ${id}.fasta ;done < HG00733_MHC_types.csv
create blast database
ncbi-blast-2.12.0+/bin/makeblastdb -in ${i} -title ${i%.fasta} -dbtype nucl -parse_seqids
blast query to database
ncbi-blast-2.12.0+/bin/blastn -num_threads 4 -query QUERY.fa -db DATABASE
# Repeat the character 'A' N times and write to a file efficiently using "dd"
dd if=/dev/zero bs=1 count=${N} | tr '\0' 'A' > ${OUTPUT_FILE}
# Add a new line at the end of the file, if it is not already there
sed -i -e '$a\' ${OUTPUT_FILE}
# Append '>CONTIG1' to the beginning of the file
sed -i -e '1s/^/>CONTIG1\n/' ${OUTPUT_FILE}
bcftools query -f '%CHROM\t%POS\t%REF\t%ALT{0}\n' ${BCF} > sites.txt
awk -v X=${contig} -v XN=${contig_length} 'BEGIN{for(c=1;c<XN;c++) printf "%s\t%d\tA\tC\n",X,c}' > ${output}
# for snakemake
shell:
"""
awk -v X={wildcards.contig} -v XN={params.contig_length} 'BEGIN{{for(c=1;c<XN;c++) printf "%s\\t%d\\tA\\tC\\n",X,c}}' > {output}
"""