-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7d3e71a
commit 0dd7308
Showing
4 changed files
with
172 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#!/usr/bin/perl | ||
# LMU Munich. AG Enard | ||
# A script to filter reads based on Barcode base quality. | ||
# Author: Swati Parekh | ||
# Contact: parekh@bio.lmu.de or ziegenhain@bio.lmu.de | ||
|
||
if(@ARGV != 5) | ||
{ | ||
"\n##################################################################################### | ||
Usage: perl $0 <Read1.fq.gz> <Read2.fq.gz> <Read3.fq.gz> <output.fq> <threads>\n | ||
Explanation of parameters: | ||
output.fq - Output file name. pigz will put the .gz only provide the base name. | ||
threads - number of processors to zip. | ||
Please drop your suggestions and clarifications to <parekh\@bio.lmu.de>\n | ||
######################################################################################\n\n"; | ||
exit; | ||
} | ||
|
||
$oneread=$ARGV[0]; | ||
$tworead=$ARGV[1]; | ||
$threeread = $ARGV[2]; | ||
$bcreadoutfull = $ARGV[3]; | ||
$threads=$ARGV[4]; | ||
|
||
|
||
if ($oneread =~ /\.gz$/) { | ||
open AF, '-|', 'gzip', '-dc', $oneread || die "Couldn't open file $oneread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; | ||
open BF, '-|', 'gzip', '-dc', $tworead || die "Couldn't open file $tworead. Check permissions!\n Check if it is differently zipped then .gz\n\n"; | ||
open CF, '-|', 'gzip', '-dc', $threeread || die "Couldn't open file $threeread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; | ||
} | ||
else { | ||
open AF, "<", $oneread || die "Couldn't open file $oneread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; | ||
open BF, "<", $tworead || die "Couldn't open file $tworead. Check permissions!\n Check if it is differently zipped then .gz\n\n"; | ||
open CF, "<", $threeread || die "Couldn't open file $threeread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; | ||
} | ||
|
||
open BCOUTFULL, ">", $bcreadoutfull || die "Couldn't open file $bcreadoutfull to write\n\n";; | ||
|
||
$count=0; | ||
$total=0; | ||
$filtered=0; | ||
|
||
while(<AF>){ | ||
$total++; | ||
$arid=$_; | ||
$arseq=<AF>; | ||
chomp($arseq); | ||
|
||
$aqid=<AF>; | ||
$aqseq=<AF>; | ||
chomp($aqseq); | ||
|
||
$brid=<BF>; | ||
$brseq=<BF>; | ||
chomp($brseq); | ||
|
||
$bqid=<BF>; | ||
$bqseq=<BF>; | ||
chomp($bqseq); | ||
|
||
$crid=<CF>; | ||
$crseq=<CF>; | ||
chomp($crseq); | ||
|
||
$cqid=<CF>; | ||
$cqseq=<CF>; | ||
chomp($cqseq); | ||
|
||
$seq=$arseq.$brseq.$crseq; | ||
$qseq=$aqseq.$bqseq.$cqseq; | ||
print BCOUTFULL $arid,$seq,"\n",$aqid,$qseq,"\n"; | ||
} | ||
close AF; | ||
close BF; | ||
close CF; | ||
close BCOUTFULL; | ||
`pigz -f -p $threads $bcreadoutfull`; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#!/usr/bin/perl | ||
# LMU Munich. AG Enard | ||
# A script to preprocess Split-seq data. | ||
# Author: Swati Parekh&Christoph Ziegenhain | ||
# Contact: parekh@bio.lmu.de or ziegenhain@bio.lmu.de or hellmann@bio.lmu.de | ||
|
||
if(@ARGV != 8) | ||
{ | ||
"\n##################################################################################### | ||
Usage: perl $0 <barcode-Read.fq.gz> <Range1> <Range2> <Range3> <Threads> <StudyName> <Outdir> <pigz-executable> \n | ||
Explanation of parameter: | ||
barcode-Read.fq.gz - Input barcode reads fastq file name. | ||
Threads - Number of threads to use. | ||
Study - Study name. | ||
Ranges 1,2,3 - Barcode Ranges to extract | ||
OUTDIR - Output directory. | ||
pigz-executable - Location of pigz executable | ||
######################################################################################\n\n"; | ||
exit; | ||
} | ||
|
||
$bcread=$ARGV[0]; | ||
$arange=$ARGV[1]; | ||
$brange=$ARGV[2]; | ||
$crange=$ARGV[3]; | ||
$threads=$ARGV[4]; | ||
$study=$ARGV[5]; | ||
$outdir=$ARGV[6]; | ||
$pigz=$ARGV[7]; | ||
|
||
@a = split("-",$arange); | ||
@b = split("-",$brange); | ||
@c = split("-",$crange); | ||
$as = $a[0] - 1; | ||
$bs = $b[0] - 1; | ||
$cs = $c[0] - 1; | ||
|
||
$al = $a[1]-$a[0]+1; | ||
$bl = $b[1]-$b[0]+1; | ||
$cl = $c[1]-$c[0]+1; | ||
|
||
$bcreadoutfull = $outdir."/".$study.".barcoderead.preprocess.fastq"; | ||
|
||
if ($bcread =~ /\.gz$/) { | ||
open BCF, '-|', $pigz, '-dc', $bcread || die "Couldn't open file $bcread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; | ||
} | ||
else { | ||
open BCF, "<", $bcread || die "Couldn't open file $bcread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; | ||
} | ||
|
||
open BCOUTFULL, ">", $bcreadoutfull || die "Couldn't open file $bcreadoutfull to write\n\n";; | ||
|
||
$total=0; | ||
|
||
while(<BCF>){ | ||
$total++; | ||
$rid=$_; | ||
$rseq=<BCF>; | ||
$qid=<BCF>; | ||
$qseq=<BCF>; | ||
|
||
|
||
$aqual = substr($qseq,$as,$al); | ||
$bqual = substr($qseq,$bs,$bl); | ||
$cqual = substr($qseq,$cs,$cl); | ||
|
||
$aseq = substr($rseq,$as,$al); | ||
$bseq = substr($rseq,$bs,$bl); | ||
$cseq = substr($rseq,$cs,$cl); | ||
|
||
|
||
|
||
print BCOUTFULL $rid,$aseq,$bseq,$cseq,"\n",$qid,$aqual,$bqual,$cqual,"\n"; | ||
|
||
|
||
} | ||
close BCF; | ||
close BCOUTFULL; | ||
|
||
print "Reads processed: $total \n\n"; | ||
|
||
`$pigz -f -p $threads $bcreadoutfull`; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters