-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetTaxids.pl
78 lines (51 loc) · 1.24 KB
/
getTaxids.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/perl -w
use strict;
print STDERR "Calculating unique GIs\n";
my @gisToLook = `cat *.gi | cut -f2 | uniq | sort | uniq`;
my %gis;
my %map;
foreach my $gi (@gisToLook){
next if($gi =~ m/GI/);
chomp $gi;
$gis{$gi} = 1;
}
print STDERR "Retrieving GIs to Taxids mapping\n";
open TAXID, "<../blast/analysis2/v1/nonMycReads/gis/taxids/nucl_wgs.accession2taxid" or die "Cannot open nucl_wgs.accession2taxid: $!\n";
<TAXID>;
while (<TAXID>){
chomp $_;
my (undef, undef, $taxid, $gi) = split(/\s+/, $_);
if(defined $gis{$gi}){
$map{$gi} = $taxid;
delete $gis{$gi};
}
last if (scalar keys %gis == 0);
}
close TAXID;
my @isolates;
while (<DATA>){
chomp $_;
push(@isolates, $_);
}
foreach my $isolate (@isolates){
print STDERR "Getting Taxids for $isolate\n";
open GIS, "<$isolate.gi" or die "Cannot open $isolate GIs: $!\n";
open OUT, ">$isolate.taxid" or die "Cannot open $isolate.taxid:$!\n";
my $header = <GIS>;
chomp $header;
print OUT $header."\tTaxid\n";
while (<GIS>){
chomp $_;
my (@pieces) = split(/\t/,$_);
if (exists $map{$pieces[1]}){
print OUT join("\t", @pieces, $map{$pieces[1]})."\n";
}
else {
print OUT join("\t", @pieces, ".")."\n";
}
}
close GIS;
close OUT;
}
__DATA__
Myc_16