-
Notifications
You must be signed in to change notification settings - Fork 2
/
extract_miRNAs.pl
79 lines (64 loc) · 1.38 KB
/
extract_miRNAs.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/perl
## support for gzip files
use strict;
my $id;
my $seq;
my @species = split(",",$ARGV[1]);
die "Error: Specify species or comma separated list of species\n\nUsage:\n\tperl $0 miRBase_fasta_file species [mature/star]\n\te.g. $0 mature.fa cel mature\n\tfiles can be gzipped\t=>no reason to decompress them\n" if(not $ARGV[1]);
my $in = 0;
my $first =1;
my $str;
my $sp;
my $type='mature';
if($ARGV[2]){
if(lc($ARGV[2]) eq 'mature' or lc($ARGV[2]) eq 'star'){
$type = $ARGV[2];
}else{
die 'argv 2 must either be mature or star\n';
}
}
foreach my $sp(@species){
$in = 0;
## we deliberately unzip the file
if($ARGV[0] =~ /.gz$/){
open IN,"gunzip -dc $ARGV[0]|" or die "Could not open file $ARGV[0]\n";
}else{
open IN,"<$ARGV[0]" or die "$ARGV[0] not found $!";
}
$first =1;
$str;
my $id;
#die $type;
while(<IN>){
chomp;
if(/(>$sp\S+)/){
$id=$1;
if($type eq 'mature' and $id =~/\*$/){ $in= 0; next;}
if($type eq 'star' and $id !~ /\*/){$in=0; next;}
#print "=== $id\n";
if($first){
print "$id\n";
$first = 0;
}else{
print "\n$id\n";
}
$in =1;
next;
}elsif(/>/){
$in = 0;
}elsif($in == 1){
#print "\n==== $_ ===\n";
$str = uc($_);
$str =~ tr/U/T/;
print $str;
}else{
$id='';
$str='';
$in=0;
}
}
if(not $first){
print "\n";
}
close IN;
}