diff --git a/README.md b/README.md index b8e5ee5..4b709b7 100755 --- a/README.md +++ b/README.md @@ -11,6 +11,9 @@ You can read more about zUMIs in our [biorxiv preprint](https://www.biorxiv.org/ You can glance through zUMIs in [zUMIs poster](https://github.com/sdparekh/zUMIs/blob/master/zUMIs_GI2017_poster.pdf)! ## Releases/Changelog +12 Apr 2018: [zUMIs.0.0.6 released](https://github.com/sdparekh/zUMIs/releases/tag/zUMIs.0.0.6). +Improved support for combinatorial indexing methods. + 30 Mar 2018: [zUMIs.0.0.5 released](https://github.com/sdparekh/zUMIs/releases/tag/zUMIs.0.0.5). Rewrote hamming distance binning of UMIs and barcodes. In addition to faster running times, removed dependency on the stringdist package that may have led to issues with parallel computing in some systems. Furthermore removed a possible bug when resuming running with the -w switch in combination with plate barcode usage. @@ -39,14 +42,17 @@ zUMIs is compatible with these single-cell UMI protocols: - SORT-seq (Muraro et al., 2016) - DroNc-seq (Habib et al., 2017) - Seq-Well (Gierahn et al., 2017) -- SPLiT-seq (Rosenberg et al., 2017) -- STRT-2i (Hochgerner et al., 2017) +- SPLiT-seq (Rosenberg et al., 2018) +- sci-RNA-seq (Cao et al., 2017) +- STRT-2i (Hochgerner et al., 2018) - Quartz-seq2 (Sasagawa et al., 2017) - 10x Genomics Chromium (Zheng et al., 2017) - Wafergen ICELL8 (Gao et al., 2017) - Illumina ddSEQ SureCell - inDrops (Zilionis et al., 2017; Klein et al. 2015) +For combinatorial indexing protocols, be sure to [check our wiki page](https://github.com/sdparekh/zUMIs/wiki/Combinatorial-Indexing). + ## Getting help Refer to [zUMIs Github wiki](https://github.com/sdparekh/zUMIs/wiki) for help. diff --git a/cat3fq.pl b/cat3fq.pl new file mode 100755 index 0000000..6bf2c70 --- /dev/null +++ b/cat3fq.pl @@ -0,0 +1,79 @@ +#!/usr/bin/perl +# LMU Munich. AG Enard +# A script to filter reads based on Barcode base quality. +# Author: Swati Parekh +# Contact: parekh@bio.lmu.de or ziegenhain@bio.lmu.de + +if(@ARGV != 5) +{ +print +"\n##################################################################################### +Usage: perl $0 \n +Explanation of parameters: + +output.fq - Output file name. pigz will put the .gz only provide the base name. +threads - number of processors to zip. +Please drop your suggestions and clarifications to \n +######################################################################################\n\n"; +exit; +} + +$oneread=$ARGV[0]; +$tworead=$ARGV[1]; +$threeread = $ARGV[2]; +$bcreadoutfull = $ARGV[3]; +$threads=$ARGV[4]; + + +if ($oneread =~ /\.gz$/) { +open AF, '-|', 'gzip', '-dc', $oneread || die "Couldn't open file $oneread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; +open BF, '-|', 'gzip', '-dc', $tworead || die "Couldn't open file $tworead. Check permissions!\n Check if it is differently zipped then .gz\n\n"; +open CF, '-|', 'gzip', '-dc', $threeread || die "Couldn't open file $threeread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; +} +else { +open AF, "<", $oneread || die "Couldn't open file $oneread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; +open BF, "<", $tworead || die "Couldn't open file $tworead. Check permissions!\n Check if it is differently zipped then .gz\n\n"; +open CF, "<", $threeread || die "Couldn't open file $threeread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; +} + +open BCOUTFULL, ">", $bcreadoutfull || die "Couldn't open file $bcreadoutfull to write\n\n";; + +$count=0; +$total=0; +$filtered=0; + +while(){ +$total++; + $arid=$_; + $arseq=; + chomp($arseq); + + $aqid=; + $aqseq=; + chomp($aqseq); + + $brid=; + $brseq=; + chomp($brseq); + + $bqid=; + $bqseq=; + chomp($bqseq); + + $crid=; + $crseq=; + chomp($crseq); + + $cqid=; + $cqseq=; + chomp($cqseq); + + $seq=$arseq.$brseq.$crseq; + $qseq=$aqseq.$bqseq.$cqseq; + print BCOUTFULL $arid,$seq,"\n",$aqid,$qseq,"\n"; +} +close AF; +close BF; +close CF; +close BCOUTFULL; +`pigz -f -p $threads $bcreadoutfull`; diff --git a/preprocess_splitseq.pl b/preprocess_splitseq.pl new file mode 100644 index 0000000..01b7fd2 --- /dev/null +++ b/preprocess_splitseq.pl @@ -0,0 +1,84 @@ +#!/usr/bin/perl +# LMU Munich. AG Enard +# A script to preprocess Split-seq data. +# Author: Swati Parekh&Christoph Ziegenhain +# Contact: parekh@bio.lmu.de or ziegenhain@bio.lmu.de or hellmann@bio.lmu.de + +if(@ARGV != 8) +{ +print +"\n##################################################################################### +Usage: perl $0 \n +Explanation of parameter: + +barcode-Read.fq.gz - Input barcode reads fastq file name. +Threads - Number of threads to use. +Study - Study name. +Ranges 1,2,3 - Barcode Ranges to extract +OUTDIR - Output directory. +pigz-executable - Location of pigz executable +######################################################################################\n\n"; +exit; +} + +$bcread=$ARGV[0]; +$arange=$ARGV[1]; +$brange=$ARGV[2]; +$crange=$ARGV[3]; +$threads=$ARGV[4]; +$study=$ARGV[5]; +$outdir=$ARGV[6]; +$pigz=$ARGV[7]; + +@a = split("-",$arange); +@b = split("-",$brange); +@c = split("-",$crange); +$as = $a[0] - 1; +$bs = $b[0] - 1; +$cs = $c[0] - 1; + +$al = $a[1]-$a[0]+1; +$bl = $b[1]-$b[0]+1; +$cl = $c[1]-$c[0]+1; + +$bcreadoutfull = $outdir."/".$study.".barcoderead.preprocess.fastq"; + +if ($bcread =~ /\.gz$/) { +open BCF, '-|', $pigz, '-dc', $bcread || die "Couldn't open file $bcread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; +} +else { +open BCF, "<", $bcread || die "Couldn't open file $bcread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; +} + +open BCOUTFULL, ">", $bcreadoutfull || die "Couldn't open file $bcreadoutfull to write\n\n";; + +$total=0; + +while(){ +$total++; + $rid=$_; + $rseq=; + $qid=; + $qseq=; + + + $aqual = substr($qseq,$as,$al); + $bqual = substr($qseq,$bs,$bl); + $cqual = substr($qseq,$cs,$cl); + + $aseq = substr($rseq,$as,$al); + $bseq = substr($rseq,$bs,$bl); + $cseq = substr($rseq,$cs,$cl); + + + + print BCOUTFULL $rid,$aseq,$bseq,$cseq,"\n",$qid,$aqual,$bqual,$cqual,"\n"; + + +} +close BCF; +close BCOUTFULL; + +print "Reads processed: $total \n\n"; + +`$pigz -f -p $threads $bcreadoutfull`; diff --git a/zUMIs-master.sh b/zUMIs-master.sh index 02d334b..b9bd2e0 100755 --- a/zUMIs-master.sh +++ b/zUMIs-master.sh @@ -3,7 +3,7 @@ # Pipeline to run UMI-seq analysis from fastq to read count tables. # Authors: Swati Parekh & Christoph Ziegenhain # Contact: parekh@bio.lmu.de or christoph.ziegenhain@ki.se or hellmann@bio.lmu.de -vers=0.0.5b +vers=0.0.6 function check_opts() { value=$1 name=$2