forked from Ensembl/VEP_plugins
-
Notifications
You must be signed in to change notification settings - Fork 0
/
NMD.pm
181 lines (138 loc) · 5.69 KB
/
NMD.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2023] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=head1 CONTACT
Ensembl <http://www.ensembl.org/info/about/contact/index.html>
=cut
=head1 NAME
NMD
=head1 SYNOPSIS
mv NMD.pm ~/.vep/Plugins
./vep -i variations.vcf --plugin NMD
=head1 DESCRIPTION
This is a plugin for the Ensembl Variant Effect Predictor (VEP)
that predicts if a variant allows the
transcript escape nonsense-mediated mRNA decay based on certain rules.
The rules are :
1. The variant location falls in the last exon of the transcript.
vvvv
ES...EE..I.ES...EE.I.ES....EE.I.ES....EE
(ES= exon_start,EE = exon_end, I = intron, v = variant location)
2. The variant location falls 50 bases upstream of the penultimate (second to the last ) exon.
vvv
ES...EE..I.ES...EE.I.ES....EE.I.ES....EE
(ES= exon_start,EE = exon_end, I = intron, v = variant location)
3. The variant falls in the first 100 coding bases in the transcript.
vvv
..ES...EE..I.ES...EE.I.ES....EE.I.ES....EE
(ES= exon_start,EE = exon_end, I = intron, v = variant location)
4. If the variant is in an intronless transcript, meaning only one exon exist in the transcript.
The additional term NMD-escaping variant (nonsense-mediated mRNA decay escaping variants) will be added if the variant matches any of the rules.
REFERENCES :
Identifying Genes Whose Mutant Transcripts Cause Dominant Disease Traits by Potential Gain-of-Function Alleles (Coban-Akdemir, 2018)
The rules and impact of nonsense-mediated mRNA decay in human cancers (Lindeboom, 2016)
=cut
package NMD;
use strict;
use warnings;
use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
my %INCLUDE_SO = map{$_ => 1} qw(stop_gained frameshift_variant splice_donor_variant splice_acceptor_variant);
my %TERM = (
1 => "NMD_escaping_variant"
);
sub feature_types {
return ['Transcript'];
}
sub get_header_info {
return {
NMD => "Nonsense-mediated mRNA decay escaping variants prediction"};
}
sub run {
my $self = shift;
my $tva = shift;
# using stop_gained for now as advised.
return {} unless grep {$INCLUDE_SO{$_->SO_term}} @{$tva->get_all_OverlapConsequences};
my $tr = $tva->transcript;
my $tv = $tva->transcript_variation;
# position of the variant in respect to the coding sequence.
my $variant_coding_region = $tv->cds_end if (defined $tv->cds_end);
# checking for if the transcript is an intronless transcript
my @introns = $tr->get_all_Introns;
my $number = scalar @introns;
# to check for if the variant location falls in the last exon
my $check = $self->variant_exon_check($tva);
# to check what position the transcript variation coding sequence end is at
# former method gave room for a bug
my $position_check = 1 if ( defined ($variant_coding_region) && $variant_coding_region <= 101) ;
# to check if the variant is not on the coding sequence.
if (!defined ($tv->cds_end) || !defined ($tv->cds_start)){
return {};
}
# if statement to check if any of the rules is true
if ( ( defined ($position_check) && $position_check == 1) || $check || $number == 0 ) {
return {NMD => $TERM{1}};
}
else {
return {};
}
}
=head2 variant_exon_check
Args : $tva (TranscriptionVariationAllele)
Description : Checks if the variant location falls :
- in the last exon
- 50bp towards the end of the second to the last exon(if it exists).
If it does, It returns 1 else it returns 0.
Returntype : Boolean
Exceptions : None
Caller : run
Status : Stable
=cut
sub variant_exon_check {
my $self = shift;
my $tva = shift;
my $vf = $tva->variation_feature;
my $tr = $tva->transcript;
my $vf_end = $vf->seq_region_end;
my @exons = @{ $tr->get_all_Exons };
# to get the last exon
my $last_exon = $exons[-1];
# to get the second to the last exon (penultimate exon)
my $second_last_exon = $exons[-2];
my $coding_region_end = $last_exon->end;
my $coding_region_start = $last_exon->start;
if (defined($coding_region_start) && $vf_end >= $coding_region_start && $vf_end <= $coding_region_end){
return 1;
}
# to check if the second to the last exon exists
if (defined($second_last_exon)){
# this method has been changed because of the reverse strand consideration
my $coding_region_end;
my $diff_end;
# need to account for strand as if it is in reverse strand it should be reading from the start
# and adding 51 to find the variant 50 bases upstream in penultimate exon
if ($tr->strand == -1){
$coding_region_end = $second_last_exon->start;
$diff_end = $coding_region_end + 51;
if (defined($coding_region_end) && $vf_end >= $coding_region_end && $vf_end <= $diff_end ) {
return 1;
}
} else {
$coding_region_end = $second_last_exon->end;
$diff_end = $coding_region_end - 51;
if (defined($coding_region_end) && $vf_end >= $diff_end && $vf_end <= $coding_region_end ) {
return 1;
}
}
}
return 0;
}
1;