-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_stacks_depth.pl
executable file
·95 lines (82 loc) · 2.36 KB
/
get_stacks_depth.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/perl -w
#
# get_stacks_depth.pl
#
# Create a fasta file based on the consensus sequences in a Stacks.tags.tsv file
# Save the read depth for each Stack
# Count the number of stacks per individual
#
# September 11, 2013
#
# RUN IN THE DIRECTORY WITH THE TAGS FILES!!!!!
use strict;
# Get all of the names of the tags.tsv files in the directory
my @files = glob("*.tags.tsv");
# Save the distribution of read depths and tag number outside of the file loop
my %depths = ();
my $tags = 0;
# Process each file individually
foreach my $file (@files) {
# Get the prefix from the file name and create an output file
my @filename = split(/\./, $file);
my $prefix = $filename[0];
my $output = $prefix . "_stacks.fasta";
# Open the input and output file
open (IN, $file) || die "\nUnable to open the file $file!\n";
open (OUT, ">$output") || die "\nUnable to open the file $output!\n";
# Set the read depth to zero, initialize the sequence, then start processing the input file
my $reads = 0;
my $sequence = '';
my $name = '';
while (<IN>) {
chomp $_;
my @info = split(/\t/, $_);
# Check if the sequence type is consensus
if ($info[6] =~ /consensus/) {
$sequence = $info[9];
#print $sequence, "\n";
unless ($reads == 0) {
# Update the hash with the distribution of depths
if (exists $depths{$reads}) {
$depths{$reads} += 1;
} else {
$depths{$reads} = 1;
}
# Print the previous consensus sequence and reset the variables
print OUT $name, $reads, "\n";
print OUT $sequence, "\n";
$tags++;
$reads = 0;
#$sequence = '';
#$sequence = $info[8];
$name = '';
$name = ">" . "Sample:" . $info[1] . "_Stack:" . $info[2] . "_Depth:";
} else {
#$sequence = $info[8];
$name = ">" . "Sample:" . $info[1] . "_Stack:" . $info[2] . "_Depth:";
}
} elsif ($info[6] =~ /model/) {
next;
} else {
$reads++;
}
}
if (exists $depths{$reads}) {
$depths{$reads} += 1;
} else {
$depths{$reads} = 1;
}
print OUT $name, $reads, "\n";
print OUT $sequence, "\n";
close(IN);
close(OUT);
print $prefix, " ", $tags, "\n";
$tags = 0;
}
# Print out the read depth distribution for the entire population
open (TMP, ">read.depths") || die "\nUnable to open the file read.depths!\n";
foreach my $key (keys %depths) {
print TMP $key, " ", $depths{$key}, "\n";
}
close(TMP);
exit;