Skip to content

Commit

Permalink
Adjust behavior of locarna and mlocarna for pairwise alignments
Browse files Browse the repository at this point in the history
  -- do not use alifold in binaries when calculating base pairs
     for single sequences
  -- use noLP in mlocarna for calculating base pair probabilties
     /and/ the alignment
  -- increase output precision for probabilities in pp files
     (reducing rounding issues for the typical cutoff)
  • Loading branch information
s-will committed May 25, 2018
1 parent b577bb2 commit ada3ea0
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 34 deletions.
6 changes: 6 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
<<<<<<< HEAD
=== 2018

2.0.0RC6
adjust behavior of locarna and mlocarna for pairwise alignments
-- do not use alifold in binaries when calculating base pairs for single sequences
-- use noLP in mlocarna for calculating base pair probabilties /and/ the alignment
-- increase output precision for probabilities in pp files
(reducing rounding issues for the typical cutoff)

2.0.0RC5 (2018-05-08)
Allow penalized in global alignment
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ dnl -*- Autoconf -*-
dnl Process this file with autoconf to produce a configure script.

AC_PREREQ(2.59)
AC_INIT([LocARNA], [2.0.0RC5], [will@informatik.uni-freiburg.de], [locarna],
AC_INIT([LocARNA], [2.0.0RC6], [will@informatik.uni-freiburg.de], [locarna],
[http://www.bioinf.uni-freiburg.de/Software/LocARNA/])

dnl special dir for aux config files
Expand Down
25 changes: 14 additions & 11 deletions src/LocARNA/rna_data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ namespace LocARNA {
// recompute all probabilities
RnaEnsemble rna_ensemble(
pimpl_->sequence_, pfoldparams, false,
true); // use given parameters, no in loop, use alifold
pimpl_->sequence_.num_of_rows()>1); // use given parameters, no in loop, use alifold unless single seq

// initialize from RnaEnsemble; note: method is virtual
init_from_rna_ensemble(rna_ensemble, pfoldparams);
Expand Down Expand Up @@ -152,7 +152,7 @@ namespace LocARNA {
// recompute all probabilities
RnaEnsemble rna_ensemble(
sequence(), pfoldparams, true,
true); // use given parameters, in-loop, use alifold
pimpl_->sequence_.num_of_rows()>1); // use given parameters, in-loop, use alifold unless single seq

// initialize
init_from_rna_ensemble(rna_ensemble, pfoldparams);
Expand Down Expand Up @@ -1278,20 +1278,23 @@ namespace LocARNA {
*/
std::string
format_prob(double prob) {
size_t precision = 4;

std::ostringstream outd;
outd.precision(3);
outd.precision(precision);
outd << prob;

if (outd.str().length() <= 6) {
return outd.str();
}
std::string s = outd.str();

std::ostringstream outs;
outs.setf(std::ios::scientific, std::ios::floatfield);
outs.precision(2);
outs << prob;
if (outd.str().length() > precision+4) {
std::ostringstream outs;
outs.setf(std::ios::scientific, std::ios::floatfield);
outs.precision(precision-1);
outs << prob;

s = outs.str();
}

std::string s = outs.str();
size_t pos = s.find("e-0");
if (pos != std::string::npos) {
s.replace(pos, 3, "e-");
Expand Down
1 change: 0 additions & 1 deletion src/LocARNA/rna_data_impl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

namespace LocARNA {

class MultipleAlignment;
class RnaEnsemble;
class PFoldParams;
// template<class T> class SparseVector<T>;
Expand Down
32 changes: 16 additions & 16 deletions src/Tests/mlocarna-threads.testresult
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
CLUSTAL W --- LocARNA 2.0.0RC1
CLUSTAL W --- LocARNA 2.0.0RC5


X58295.1_1384-1453 UGGCGUCU-UCAUGAGGGAGGGGCCCAAAGCC----CUUGUGGGCGGACCUCCCCUGAGCCUGUCUGAGGGGCCA
D25220.1_1493-1556 CU-UGCGU-UAAUGAGAACAGAAACGAAAACUAUAA-C-CU-AGGGGUUUCUGUUGGAU---GGUU-GGCAAC--
AY060611.1_560-627 GU-GGCGC-UUAUGACGCAGUUGUCUUAAACUCGAA-CUCG-AGCGGGCAAUUGCUGAUUACGAUU-AACCAC--
L24896.1_600-665 CC-GGCAC-UCAUGACGGUCUGCCUGAAAACCAGCC-CGCU-GGUGGGGCAGUCCCGAGGAC--CU-GGCGUG--
AF096875.1_5504-5568 GU-GUG-C-GAAUGAUAACUACUGACGAAAGAGCUGUCUGC-UCAGUCUGUGGUUGGAU---GUAG-UCACAC--
AF093774.1_5851-5916 GU-GUG-C-GGAUGAUAACUACUGACGAAAGAGUCAUCGACCUCAGUUAGUGGUUGGAU---GUAG-UCACAU--
AE003628.2_106178-106240 UU-CAA-C-UUAUGAGGAUUAUUUCUUAAAGGCC--UCUGG-CUCGGAAAUAGUCUGAA---CCUU-AUUGUA--
AC000078.2_21139-21077 GC-CAG-A-UGAUGACGACCUGGGUGGAAACCUACCCUGUG--GGCACCCAUGUCCGAG---CC-C-CCUGGC--
AF241527.2_359-424 GC-CGC-U-UCAUGACAGGAAGGACUGAAA-UGUCUUAGACCUGUGGUCUUUCCUCGAU---GU-U-CCUGCGGC
AF333036.1_2190-2249 CA-UGCGU-CCAUGAAGUCACUGGCC----UCAAGCCCAAGUGGUGGGCAGUGACAGA---------AGAGCUGC
X57999.1_1526-1586 AUAUUUGU-UUAUGAUGGUCACAGUGUAAA-----GUUCAC--ACAGCUGUGACUUGAUU-UUUAA---AAAU--
X12367.1_703-764 GU-UUU-U-CCAUGACGGUGUUUCCUCUAA-----AUUUAC-AUGGAGAAACACCUGAUU-UCCAG-AAAAAU--
X13710.1_946-1008 UU-UUCAU-CUAUGAGGGUGUUUCCUCUAA-----ACCUACGAGGGAGGAACACCUGAUC-U-UAC-AGAAAA--
AF322071.1_1577-1642 AUGUGGUCUUUAUGAAGGCAGGUGCAGAAACUAUGCACUAGUGG-UGUC--UGUCUGAU---GUUUGGCCAU---
AC002327.1_156204-156268 --CUCAGCAGGAUGAUGAGAAGGGCUGAAAUGCUGC-CAAACCA-GGUCCUUUUCUGAU---GGUGGCUGGG---
X58295.1_1384-1453 --UGGCGUCUUCAUGAGGGAGGGGCCCAAAGCC----CUUGUGGGCGGACCUCCCCUGAGCCUGUCUGAGGGGCCA
AC000078.2_21139-21077 --GC-CAG-AUGAUGACGACCUGGGUGGAAACCUACCCUGUG--GGCACCCAUGUCCGA---GCCCCCU--GGC--
AE003628.2_106178-106240 --UU-CAA-CUUAUGAGGAUUAUUUCUUAAA-GGCCUCUGGC--UCGGAAAUAGUCUGA---ACCUUAU--UGUA-
AF241527.2_359-424 --GC-CGC-UUCAUGACAGGAAGGACUGAAA-UGUCUUAGACCUGUGGUCUUUCCUCGA---UGUUCCU--GCGGC
AF096875.1_5504-5568 --GU-GUG-CGAAUGAUAACUACUGACGAAAGAGCUGUCUGC-UCAGUCUGUGGUUGGA---UG-UAGU--CACAC
AF093774.1_5851-5916 --GU-GUG-CGGAUGAUAACUACUGACGAAAGAGUCAUCGACCUCAGUUAGUGGUUGGA---UG-UAGU--CACAU
D25220.1_1493-1556 --CU-UGCGUUAAUGAGAACAGAAACGAAAACUAUAAC--CU-AGGGGUUUCUGUUGGA---U---GGUUGGCAAC
AY060611.1_560-627 --GU-GGCGCUUAUGACGCAGUUGUCUUAAACUCGAAC-UCG-AGCGGGCAAUUGCUGA---UUACGAUUAACCAC
L24896.1_600-665 --CC-GGCACUCAUGACGGUCUGCCUGAAAACCAGCCC-GCU-GGUGGGGCAGUCCCGA---GGAC--CUGGCGUG
AF333036.1_2190-2249 --CA-UGCGUCCAUGAAGUCACUGGCC----UCAAGCCCAAGUGGUGGGCAGUGACAGA---AGA------GCUGC
X12367.1_703-764 --GU-UUU-UCCAUGACGGUGUUUCCUCUAA-----AUUUAC-AUGGAGAAACACCUGA---UUUCCAG-AAAAAU
X13710.1_946-1008 --UU-UUCAUCUAUGAGGGUGUUUCCUCUAA-----ACCUACGAGGGAGGAACACCUGA---UCU-UAC-AGAAAA
X57999.1_1526-1586 --AUAUUUGUUUAUGAUGGUCACAGUGUAAA-----GUUCAC--ACAGCUGUGACUUGA---UUUUUAA---AAAU
AF322071.1_1577-1642 AUGU-GGUCUUUAUGAAGGCAGGUGCAGAAACUAUGCA--CUAGU-GGUGUCUGUCUGA------UGUUUGGCCAU
AC002327.1_156204-156268 --CU-CAGCAGGAUGAUGAGAAGGGCUGAAAUGCUGCC--AAACCAGGUCCUUUUCUGA------UGGUGGCUGGG
24 changes: 19 additions & 5 deletions src/Utils/mlocarna
Original file line number Diff line number Diff line change
Expand Up @@ -1076,7 +1076,8 @@ my %opts;
## some default values
#

$opts{'noLP'}=1;
# handle noLP / LP later
# $opts{'noLP'}=1;

$opts{'skip-pp'}=0; ## =1 skips the computation of pair probabilities
## for files that exist already
Expand Down Expand Up @@ -1274,15 +1275,16 @@ if (defined($opts{'evaluate'})) {
exit 0;
}

# LP and noLP should never both be given on the command line
if (defined($opts{'LP'}) && defined($opts{'noLP'})) {
printerr "Only one of the options --noLP and --LP can be defined at a time.\n";
pod2usage(1);
printerr "ERROR: The flags --noLP and --LP contradict each other.\n";
exit(-1);
}

# this makes noLP the default, unless LP is given
if (defined($opts{'LP'})) {
$opts{'noLP'}=0;
}
if (defined($opts{'noLP'})) {
} else {
$opts{'noLP'}=1;
}

Expand Down Expand Up @@ -2903,6 +2905,10 @@ sub compute_dotplot_rnafold_pp {

my @fold_cmd = ( "$bindir/locarna_rnafold_pp", "-p" => $opts{'min-prob'} );

if ($opts{'noLP'}) {
push @fold_cmd, "--noLP";
}

if ($opts{'in-loop-robabilities'}) {
push @fold_cmd, "--in-loop";
}
Expand Down Expand Up @@ -2981,6 +2987,10 @@ sub compute_dotplot_rnafold {
my @fold_cmd = ("$RNAfold", "-p2");
push @fold_cmd, @RNAfold_args;

if ($opts{'noLP'}) {
push @fold_cmd, '--noLP';
}

my $seq_str = $seq->{seq}; ## the sequence string

my $input = ">$tmpname\n$seq_str\n";
Expand Down Expand Up @@ -3022,6 +3032,10 @@ sub compute_dotplot_rnaplfold {
"-L" => $opts{'plfold-span'},
"-W" => $opts{'plfold-winsize'}, @RNAfold_args;

if ($opts{'noLP'}) {
push @fold_cmd, '--noLP';
}

my $slen=length($seq_str);
my $win = $slen;
for (my $i=0; $i < @fold_cmd; $i++) {
Expand Down

0 comments on commit ada3ea0

Please sign in to comment.