Skip to content

Commit

Permalink
Merge pull request #878 from shawnlaffan/export_R_phylo
Browse files Browse the repository at this point in the history
Export to R phylo format
  • Loading branch information
shawnlaffan authored Sep 19, 2023
2 parents 9d2b3b0 + ef97109 commit 9216d7f
Show file tree
Hide file tree
Showing 9 changed files with 158 additions and 12 deletions.
4 changes: 4 additions & 0 deletions lib/Biodiverse/BaseData.pm
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ sub new {
if ( defined $args{file} ) {
my $file_loaded;
$file_loaded = $self->load_file(@_);
# hack to avoid seg faults with csv objects
$file_loaded->get_groups_ref->delete_element_name_csv_object;
$file_loaded->get_labels_ref->delete_element_name_csv_object;

return $file_loaded;
}

Expand Down
2 changes: 1 addition & 1 deletion lib/Biodiverse/Cluster.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1911,7 +1911,7 @@ sub setup_linkage_function {

my @linkage_functions = $self->get_linkage_functions;
my $valid = grep {$linkage_function eq $_} @linkage_functions;

my $class = blessed $self;
croak "Linkage function $linkage_function is not valid for an object of type $class\n"
if !$valid;
Expand Down
11 changes: 11 additions & 0 deletions lib/Biodiverse/Common.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1319,6 +1319,17 @@ sub get_csv_object {
return $csv;
}

# csv can cause seg faults when reloaded
# have not yet sorted out why
sub delete_element_name_csv_object {
my ($self) = @_;

state $cache_name = '_ELEMENT_NAME_CSV_OBJECT';
$self->delete_cached_value ($cache_name);

return;
}

sub get_element_name_csv_object {
my ($self) = @_;

Expand Down
4 changes: 4 additions & 0 deletions lib/Biodiverse/GUI/Project.pm
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,10 @@ sub add_base_data {
$self->select_base_data($basedata_ref);
}

# hack to avoid seg faults with csv objects
$basedata_ref->get_groups_ref->delete_element_name_csv_object;
$basedata_ref->get_labels_ref->delete_element_name_csv_object;

$self->set_dirty();
return $basedata_ref;
}
Expand Down
15 changes: 5 additions & 10 deletions lib/Biodiverse/ReadNexus.pm
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,13 @@ sub import_data {
$self->set_param (USE_ELEMENT_PROPERTIES => $use_element_properties);

# no import_newick here as import_phylip handles such files
my @import_methods = qw/import_nexus import_phylip import_R_phylo_json import_tabular_tree/;
my @import_methods = qw/import_nexus import_phylip import_R_phylo import_tabular_tree/;
# promote most likely method to front of the list
if (defined $args{file} && $args{file} =~ /(txt|csv)$/) { # dirty hack
unshift @import_methods, 'import_tabular_tree';
}
elsif (is_ref ($args{data})) { # also a dirty hack
unshift @import_methods, 'import_R_phylo_json';
unshift @import_methods, 'import_R_phylo';
}
else {
my $method = $self->probe_file_type (%args);
Expand Down Expand Up @@ -631,7 +631,7 @@ sub get_csv_object_for_tabular_tree_import {
}
sub import_R_phylo_json {
sub import_R_phylo {
my $self = shift;
my %args = @_;
Expand Down Expand Up @@ -713,19 +713,14 @@ sub import_R_phylo_json {
@length_hash{@node_arr} = @lengths;
$length_hash{$root_idx} = $root_len;

# add the root node after the others so we avoid name clashes
my %node_refs;
# add the root node
$node_refs{$root_idx} = $tree->add_node (
name => $tip_labels[$root_idx] // $tree->get_free_internal_name,
length => $length_hash{$root_idx},
);
foreach my $idx (@node_arr) {
foreach my $idx (@node_arr, $root_idx) {
my $name = $tip_labels[$idx] // $tree->get_free_internal_name;
$node_refs{$idx} = $tree->add_node (
name => $name,
length => $length_hash{$idx} // 1,
);
#say "Created node ", $idx, " ", $node_refs{$idx}->get_name, " length is ", $length_hash{$idx} // "UNDEFINED";
}

# set the parents - can this be done better?
Expand Down
85 changes: 85 additions & 0 deletions lib/Biodiverse/TreeNode.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2309,6 +2309,91 @@ sub to_newick { # convert the tree to a newick format. Based on the NEXUS li
return $string;
}

# convert the tree to the same structure as used by the R phylo system
# this cannot be a recursive function
sub to_R_phylo {
my $self = shift;
my %args = (use_internal_names => 1, @_);
my $remap = $args{remap} || {};
my $use_int_names = $args{use_internal_names};

use Sort::Key::Multi qw/iskeysort/;
my $terminals = $self->get_terminal_node_refs;

my @nodes = iskeysort {($_->get_depth, $_->get_name)} values %$terminals;

# push @nodes, $self; # no self in node array
my (@length_arr, @parent_id_arr, @tip_labels, @internal_labels);
my @node_id_arr = (1..@nodes);
my %name_id_hash;
@name_id_hash{map {$_->get_name} @nodes} = @node_id_arr;
my $max_node_id = 1 + @node_id_arr;
my $root_id = $max_node_id;
$name_id_hash{$self->get_name} = $root_id;

my $quote_char = q{'};
my $csv_obj = $args{csv_object} ||
$self->get_csv_object(
quote_char => $quote_char,
escape_char => $quote_char,
always_quote => 1,
);

NODE:
while (my $node = shift @nodes) {

my $orig_name = $node->get_name;
if (!$name_id_hash{$orig_name}) {
$max_node_id++;
$name_id_hash{$orig_name} = $max_node_id;
}

my $name = defined $remap->{$orig_name}
? $remap->{$orig_name}
: $orig_name;
# : $self->list2csv(csv_object => $csv_obj, list => [ $orig_name ]);

my $length = $node->get_length;
$length =~ s/,/./; # hack for issue #775 (another comma radix char)
push @length_arr, $length;

if ($node->is_terminal_node) {
push @tip_labels, $name;
}
else {
push @internal_labels, $name;
push @node_id_arr, $name_id_hash{$orig_name};
}

next NODE if $node->is_root_node;

my $parent_id;
my $parent = $node->get_parent;

my $parent_name = $parent->get_name;
$parent_id =$name_id_hash{$parent_name};
if (!$parent_id) {
$max_node_id++;
$name_id_hash{$parent_name}
= $parent_id
= $max_node_id;
push @nodes, $parent;
}

push @parent_id_arr, $parent_id;
}

my %str = (
'edge' => [@parent_id_arr, @node_id_arr],
'edge.length' => \@length_arr,
'Nnode' => (scalar @internal_labels + 1),
'node.label' => \@internal_labels,
'tip.label' => \@tip_labels,
'root.edge' => 0,
);

return wantarray ? %str : \%str;
}


sub print { # prints out the tree (for debugging)
Expand Down
26 changes: 26 additions & 0 deletions t/13-Tree.t
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,32 @@ sub _test_export_nexus {
}


sub test_export_Rphylo {
my $tree2 = shift // get_site_data_as_tree();

my $nwk = '(((t1:0.1838405095,t3:0.7839861871):0.7242035018,t7:0.8255161436):0.9768610101,((t6:0.2164495632,t8:0.8440289358):0.7437079474,(t4:0.4462201281,(t5:0.1244694644,t2:0.3507230047):0.7634477804):0.06578667508):0.5001766474)';
my $rn = Biodiverse::ReadNexus->new;
my $success = $rn->import_newick (data => $nwk);
my @trees = $rn->get_tree_array;
my $tree1 = shift @trees;

my $i;
foreach my $tree ($tree1, $tree2) {
$i++;
my $result = $tree->to_R_phylo;
# round trip it
$rn = Biodiverse::ReadNexus->new;
$rn->import_R_phylo(data => $result);
@trees = $rn->get_tree_array;
my $roundtripper = shift @trees;
ok($tree->trees_are_same(comparison => $roundtripper),
"roundtripped via Rphylo, tree $i"
);
}


}

sub test_roundtrip_names_with_quotes_in_newick {
# need a basedata to get the quoting we need to test
my $bd = Biodiverse::BaseData->new(name => 'blonk', CELL_SIZES => [1,1]);
Expand Down
2 changes: 1 addition & 1 deletion t/21-ReadNexus.t
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ sub test_read_R_phylo_json_data {
my $readnex = Biodiverse::ReadNexus->new;
#diag $baddata;
$result = eval {
$readnex->import_R_phylo_json (data => $baddata);
$readnex->import_R_phylo (data => $baddata);
};
is $@,
'JSON data is not an R phylo structure',
Expand Down
21 changes: 21 additions & 0 deletions t/lib/Biodiverse/TestHelpers.pm
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use English qw { -no_match_vars };
use Carp;
use Scalar::Util::Numeric qw/isfloat/;
use Ref::Util qw /is_ref is_arrayref is_hashref/;
use JSON::MaybeXS;
use Biodiverse::Config;

use Test2::V0;
Expand Down Expand Up @@ -85,6 +86,8 @@ use Exporter::Easy (
get_tree_array_from_sample_data
get_R_phylo_json_tree_data
get_R_phylo_json_tree_data_internal_labels
get_R_phylo_tree_data
get_R_phylo_tree_data_internal_labels
:utils
),
],
Expand Down Expand Up @@ -804,6 +807,14 @@ sub get_R_phylo_json_tree_data_internal_labels {
return get_data_section('R_PHYLO_JSON_TREE_INTERNAL_LABELS');
}

sub get_R_phylo_tree_data {
return decode_json get_R_phylo_json_tree_data();
}

sub get_R_phylo_tree_data_internal_labels {
return decode_json get_R_phylo_json_tree_data_internal_labels();
}

sub get_tabular_tree_data {
return get_data_section_with_unix_line_endings('TABULAR_TREE');
}
Expand Down Expand Up @@ -1808,6 +1819,16 @@ b,3,2,1
@@ R_PHYLO_JSON_TREE_INTERNAL_LABELS
{"edge":[32,33,34,35,36,37,38,39,40,41,42,42,41,43,43,40,39,44,44,38,45,46,47,48,49,49,48,47,46,45,50,50,37,51,52,52,51,36,53,54,54,53,55,55,35,34,56,56,33,57,58,59,60,60,59,61,61,58,57,32,33,34,35,36,37,38,39,40,41,42,1,2,43,3,4,5,44,6,7,45,46,47,48,49,8,9,10,11,12,50,13,14,51,52,15,16,17,53,54,18,19,55,20,21,22,56,23,24,57,58,59,60,25,26,61,27,28,29,30,31],"edge.length":[0.00993044169650192,0.00291112550535999,0.00121523842637206,0.020427284632173,0.026396763298318,0.057495084175743,0.03299436960061,0.05703610742759,0.106700478344225,0.077662337662338,0.6,0.6,0.098714969241285,0.578947368421053,0.578947368421053,0.784362816006563,0.341398923434153,0.5,0.5,0.075867662593738,0.172696292660468,0.0272381982058111,0.11249075347436,0.051317777404734,0.434782608695652,0.434782608695652,0.486100386100386,0.598591139574746,0.625829337780557,0.34398017589557,0.454545454545455,0.454545454545455,0.265221710543839,0.666666666666667,0,0,0.666666666666667,0.0574914897151729,0.111319966583125,0.789473684210526,0.789473684210526,0.300793650793651,0.6,0.6,0.978712425140997,0.729927663567369,0.25,0.25,0.027283233517174,0.258187134502923,0.075519681556834,0.160310277957336,0.461538461538462,0.461538461538462,0.455182072829131,0.166666666666667,0.166666666666667,0.697368421052632,0.955555555555555,0.992769230769231],"Nnode":30,"node.label":["","'58___'","'52___'","'50___'","'49___'","'45___'","'42___'","'35___'","'33___'","'32___'","","'31___'","'34___'","'41___'","'39___'","'38___'","'37___'","'36___'","'40___'","'44___'","'43___'","'48___'","'46___'","'47___'","'51___'","'57___'","'56___'","'55___'","'53___'","'54___'"],"tip.label":["'Genus:sp19'","'Genus:sp5'","'Genus:sp15'","'Genus:sp1'","'Genus:sp10'","'Genus:sp26'","'Genus:sp20'","'Genus:sp23'","'Genus:sp30'","'Genus:sp11'","'Genus:sp29'","'Genus:sp12'","'Genus:sp21'","'Genus:sp18'","'Genus:sp28'","'Genus:sp31'","'Genus:sp27'","'Genus:sp9'","'Genus:sp8'","'Genus:sp14'","'Genus:sp16'","'Genus:sp17'","'Genus:sp24'","'Genus:sp25'","'Genus:sp2'","'Genus:sp3'","'Genus:sp6'","'Genus:sp7'","'Genus:sp4'","'Genus:sp13'","'Genus:sp22'"],"root.edge":0}
@@ R_PHYLO_TREE
{
"edge" => [32,33,34,35,36,37,38,39,40,41,42,42,41,43,43,40,39,44,44,38,45,46,47,48,49,49,48,47,46,45,50,50,37,51,52,52,51,36,53,54,54,53,55,55,35,34,56,56,33,57,58,59,60,60,59,61,61,58,57,32,33,34,35,36,37,38,39,40,41,42,1,2,43,3,4,5,44,6,7,45,46,47,48,49,8,9,10,11,12,50,13,14,51,52,15,16,17,53,54,18,19,55,20,21,22,56,23,24,57,58,59,60,25,26,61,27,28,29,30,31],
"edge.length" => [0.00993044169650192,0.00291112550535999,0.00121523842637206,0.020427284632173,0.026396763298318,0.057495084175743,0.03299436960061,0.05703610742759,0.106700478344225,0.077662337662338,0.6,0.6,0.098714969241285,0.578947368421053,0.578947368421053,0.784362816006563,0.341398923434153,0.5,0.5,0.075867662593738,0.172696292660468,0.0272381982058111,0.11249075347436,0.051317777404734,0.434782608695652,0.434782608695652,0.486100386100386,0.598591139574746,0.625829337780557,0.34398017589557,0.454545454545455,0.454545454545455,0.265221710543839,0.666666666666667,0,0,0.666666666666667,0.0574914897151729,0.111319966583125,0.789473684210526,0.789473684210526,0.300793650793651,0.6,0.6,0.978712425140997,0.729927663567369,0.25,0.25,0.027283233517174,0.258187134502923,0.075519681556834,0.160310277957336,0.461538461538462,0.461538461538462,0.455182072829131,0.166666666666667,0.166666666666667,0.697368421052632,0.955555555555555,0.992769230769231],
"Nnode" => 30,
"node.label" => ["","'58___'","'52___'","'50___'","'49___'","'45___'","'42___'","'35___'","'33___'","'32___'","","'31___'","'34___'","'41___'","'39___'","'38___'","'37___'","'36___'","'40___'","'44___'","'43___'","'48___'","'46___'","'47___'","'51___'","'57___'","'56___'","'55___'","'53___'","'54___'"],
"tip.label" => ["'Genus:sp19'","'Genus:sp5'","'Genus:sp15'","'Genus:sp1'","'Genus:sp10'","'Genus:sp26'","'Genus:sp20'","'Genus:sp23'","'Genus:sp30'","'Genus:sp11'","'Genus:sp29'","'Genus:sp12'","'Genus:sp21'","'Genus:sp18'","'Genus:sp28'","'Genus:sp31'","'Genus:sp27'","'Genus:sp9'","'Genus:sp8'","'Genus:sp14'","'Genus:sp16'","'Genus:sp17'","'Genus:sp24'","'Genus:sp25'","'Genus:sp2'","'Genus:sp3'","'Genus:sp6'","'Genus:sp7'","'Genus:sp4'","'Genus:sp13'","'Genus:sp22'"],
"root.edge" => 0
}
@@ NEWICK_TREE
(((((((((((44:0.6,47:0.6):0.077662337662338,(18:0.578947368421053,58:0.578947368421053):0.098714969241285):0.106700478344225,31:0.784362816006563):0.05703610742759,(6:0.5,35:0.5):0.341398923434153):0.03299436960061,(((((1:0.434782608695652,52:0.434782608695652):0.051317777404734,57:0.486100386100386):0.11249075347436,22:0.598591139574746):0.0272381982058111,46:0.625829337780557):0.172696292660468,(7:0.454545454545455,9:0.454545454545455):0.34398017589557):0.075867662593738):0.057495084175743,((4:0,25:0):0.666666666666667,16:0.666666666666667):0.265221710543839):0.026396763298318,((0:0.789473684210526,12:0.789473684210526):0.111319966583125,(15:0.6,30:0.6):0.300793650793651):0.0574914897151729):0.020427284632173,48:0.978712425140997):0.00121523842637206,(24:0.25,55:0.25):0.729927663567369):0.00291112550535999,((((38:0.461538461538462,13:0.461538461538462):0.160310277957336,(50:0.166666666666667,59:0.166666666666667):0.455182072829131):0.075519681556834,32:0.697368421052632):0.258187134502923,2:0.955555555555555):0.027283233517174):0.00993044169650192,42:0.992769230769231):0;
Expand Down

0 comments on commit 9216d7f

Please sign in to comment.