From 5c307819f314afc1ec04d3380f0cc1bacac36e98 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 27 Feb 2024 18:00:27 +1100 Subject: [PATCH 01/27] delete_params: avoid some looping --- lib/Biodiverse/Common.pm | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/lib/Biodiverse/Common.pm b/lib/Biodiverse/Common.pm index feb2d0f38..fb6fa0698 100644 --- a/lib/Biodiverse/Common.pm +++ b/lib/Biodiverse/Common.pm @@ -447,16 +447,7 @@ sub weaken_param { sub delete_params { my $self = shift; - my $count = 0; - foreach my $param (@_) { # should only delete those that exist... - if (delete $self->{PARAMS}{$param}) { - $count ++; - print "Deleted parameter $param from $self\n" - if $self->get_param('PARAM_CHANGE_WARN'); - } - } # inefficient, as we could use a hash slice to do all in one hit, but allows better feedback - - return $count; + scalar delete @{$self->{PARAMS}}{@_}; } # an internal apocalyptic sub. use only for destroy methods From 36facdf4fae770980f1711d00a86a26c02aa8d74 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 27 Feb 2024 18:50:29 +1100 Subject: [PATCH 02/27] Indices: optimise RPE2 Use a global precalc to get all the range inverse scores, set to zero if the branch length is zero. This avoids a lot of operations when summing the local range weights. --- .../Indices/PhylogeneticRelative.pm | 98 ++++++++++++++++--- 1 file changed, 85 insertions(+), 13 deletions(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index 7513da897..5b87075df 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -3,6 +3,8 @@ use 5.022; use strict; use warnings; +use experimental qw/refaliasing/; + use English qw /-no_match_vars/; use List::Util qw /sum first/; @@ -241,6 +243,7 @@ sub get_metadata_calc_phylo_rpe_central { get_trimmed_tree get_trimmed_tree_with_equalised_branch_lengths get_trimmed_tree_eq_branch_lengths_node_length_hash + get_trimmed_tree_range_inverse_hash_nonzero_len /], uses_nbr_lists => 1, indices => { @@ -311,6 +314,7 @@ sub get_metadata_calc_phylo_rpe2 { get_trimmed_tree get_trimmed_tree_with_equalised_branch_lengths get_trimmed_tree_eq_branch_lengths_node_length_hash + get_trimmed_tree_range_inverse_hash_nonzero_len /], uses_nbr_lists => 1, indices => { @@ -375,21 +379,17 @@ sub calc_phylo_rpe2 { my $node_ranges_global = $args{PE_RANGELIST}; my $null_node_len_hash = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH_HASH}; my $default_eq_len = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH}; + \my %range_inverse = $args{trimmed_tree_range_inverse_hash_nonzero_len}; # Get the PE score assuming equal branch lengths my ($pe_null, $null, $phylo_rpe2, $diff); # First condition optimises for the common case where all local ranges are 1 - my $zero_len_branch_names = $null_tree_ref->get_zero_node_length_hash; - if (($args{EL_COUNT_ALL} // $args{EL_COUNT_SET1} // 0) == 1 - && scalar keys %$zero_len_branch_names < 10 # arbitrary number - ) { - delete local @$node_ranges_global{keys %$zero_len_branch_names}; - $pe_null += (1 / $_) foreach values %$node_ranges_global; + if (($args{EL_COUNT_ALL} // $args{EL_COUNT_SET1} // 0) == 1) { + $pe_null += $_ foreach @range_inverse{keys %$node_ranges_local}; $pe_null *= $default_eq_len; - #say STDERR 'jjjj'; } - elsif (HAVE_BD_UTILS) { + elsif (0 && HAVE_BD_UTILS) { $pe_null = Biodiverse::Utils::get_rpe_null ( $null_node_len_hash, $node_ranges_local, @@ -397,11 +397,12 @@ sub calc_phylo_rpe2 { ); } else { - foreach my $null_node (keys %$node_ranges_global) { - $pe_null += $null_node_len_hash->{$null_node} - * $node_ranges_local->{$null_node} - / $node_ranges_global->{$null_node}; - } + # postfix for speed + $pe_null + += $range_inverse{$_} + * $node_ranges_local->{$_} + foreach keys %$node_ranges_local; + $pe_null *= $default_eq_len; } { @@ -692,5 +693,76 @@ sub get_trimmed_tree_eq_branch_lengths_node_length_hash { return wantarray ? %results : \%results; } +sub get_metadata_get_trimmed_tree_range_inverse_hash { + my %metadata = ( + name => 'get_trimmed_tree_range_inverses', + description + => "Get a hash of the node range inverse values\n" + . "Forms the basis of the RPE calcs for equal area cells", + pre_calc_global => ['get_node_range_hash'], + indices => { + trimmed_tree_range_inverse_hash => { + description => 'Hash of trimmed tree range inverse values', + }, + }, + ); + return $metadata_class->new(\%metadata); +} + +sub get_trimmed_tree_range_inverse_hash { + my $self = shift; + my %args = @_; + + # my $tree = $args{TRIMMED_TREE}; + my $node_ranges = $args{node_range}; + + my %range_weighted; + + foreach my $name (keys %$node_ranges) { + my $range = $node_ranges->{$name} || next; + $range_weighted{$name} = 1 / $range; + } + + my %results = (trimmed_tree_range_inverse_hash => \%range_weighted); + + return wantarray ? %results : \%results; +} + +sub get_metadata_get_trimmed_tree_range_inverse_hash_nonzero_len { + my %metadata = ( + name => 'get_trimmed_tree_range_inverses_nonzero_len', + description + => "Get a hash of the node range inverse values for non-zero lengths\n" + . "Forms the basis of the RPE calcs for equal area cells", + pre_calc_global => ['get_node_range_hash', 'get_trimmed_tree'], + indices => { + trimmed_tree_range_inverse_hash => { + description => 'Hash of trimmed tree range inverse values', + }, + }, + ); + return $metadata_class->new(\%metadata); +} + +sub get_trimmed_tree_range_inverse_hash_nonzero_len { + my $self = shift; + my %args = @_; + + my $tree = $args{trimmed_tree}; + my $node_ranges = $args{node_range}; + + my %range_weighted; + + foreach my $name (keys %$node_ranges) { + my $range = $node_ranges->{$name} || next; + my $numerator = $tree->get_node_ref_aa($name)->get_length ? 1 : 0; + $range_weighted{$name} = $numerator / $range; + } + + my %results = (trimmed_tree_range_inverse_hash_nonzero_len => \%range_weighted); + + return wantarray ? %results : \%results; +} + 1; From fcb83159471dcded47ec2ed34f53cf9e960832ea Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 27 Feb 2024 18:51:47 +1100 Subject: [PATCH 03/27] simplify code a little remove one variable --- lib/Biodiverse/Indices/PhylogeneticRelative.pm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index 5b87075df..ecf44d94d 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -363,10 +363,9 @@ sub calc_phylo_rpe2 { my $null_total_tree_length = $null_tree_ref->get_total_tree_length; my $pe_p_score = $args{PE_WE_P}; - my $pe_score = $args{PE_WE}; # no point calculating anything if PE is undef - if (!defined $pe_score) { + if (!defined $pe_p_score) { my %results = ( PHYLO_RPE2 => undef, PHYLO_RPE_NULL2 => undef, From fa722088e4905a948b88d69a5f56738d33eee072 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 27 Feb 2024 19:02:00 +1100 Subject: [PATCH 04/27] Indices: optimise calc_rpe2 Use refaliasing and postfix for loops, and remove a Biodiverse::Utils call. --- .../Indices/PhylogeneticRelative.pm | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index ecf44d94d..0fc5c5559 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -374,33 +374,24 @@ sub calc_phylo_rpe2 { return wantarray ? %results : \%results; } - my $node_ranges_local = $args{PE_LOCAL_RANGELIST}; - my $node_ranges_global = $args{PE_RANGELIST}; - my $null_node_len_hash = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH_HASH}; - my $default_eq_len = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH}; - \my %range_inverse = $args{trimmed_tree_range_inverse_hash_nonzero_len}; + my $default_eq_len = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH}; + \my %node_ranges_local = $args{PE_LOCAL_RANGELIST}; + \my %range_inverse = $args{trimmed_tree_range_inverse_hash_nonzero_len}; # Get the PE score assuming equal branch lengths my ($pe_null, $null, $phylo_rpe2, $diff); # First condition optimises for the common case where all local ranges are 1 if (($args{EL_COUNT_ALL} // $args{EL_COUNT_SET1} // 0) == 1) { - $pe_null += $_ foreach @range_inverse{keys %$node_ranges_local}; + $pe_null += $_ foreach @range_inverse{keys %node_ranges_local}; $pe_null *= $default_eq_len; } - elsif (0 && HAVE_BD_UTILS) { - $pe_null = Biodiverse::Utils::get_rpe_null ( - $null_node_len_hash, - $node_ranges_local, - $node_ranges_global, - ); - } else { # postfix for speed $pe_null += $range_inverse{$_} - * $node_ranges_local->{$_} - foreach keys %$node_ranges_local; + * $node_ranges_local{$_} + foreach keys %node_ranges_local; $pe_null *= $default_eq_len; } From 6904a8f4291d55c06c5b4ec917537e9c1081d24a Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 27 Feb 2024 19:10:28 +1100 Subject: [PATCH 05/27] Indices: calc_phylo_rpe2: re-order some early exit conditions --- .../Indices/PhylogeneticRelative.pm | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index 0fc5c5559..bc4f0cd23 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -340,6 +340,18 @@ sub calc_phylo_rpe2 { my $self = shift; my %args = @_; + my $pe_p_score = $args{PE_WE_P}; + + # no point calculating anything if PE is undef + if (!defined $pe_p_score) { + my %results = ( + PHYLO_RPE2 => undef, + PHYLO_RPE_NULL2 => undef, + PHYLO_RPE_DIFF2 => undef, + ); + return wantarray ? %results : \%results; + } + if (!@{$args{element_list2} // []}) { # We just copy the calc_phylo_rpe_central results # if there are no nbrs in set2 @@ -355,25 +367,12 @@ sub calc_phylo_rpe2 { } } - my $orig_tree_ref = $args{trimmed_tree}; my $orig_total_tree_length = $orig_tree_ref->get_total_tree_length; my $null_tree_ref = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED}; my $null_total_tree_length = $null_tree_ref->get_total_tree_length; - my $pe_p_score = $args{PE_WE_P}; - - # no point calculating anything if PE is undef - if (!defined $pe_p_score) { - my %results = ( - PHYLO_RPE2 => undef, - PHYLO_RPE_NULL2 => undef, - PHYLO_RPE_DIFF2 => undef, - ); - return wantarray ? %results : \%results; - } - my $default_eq_len = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH}; \my %node_ranges_local = $args{PE_LOCAL_RANGELIST}; \my %range_inverse = $args{trimmed_tree_range_inverse_hash_nonzero_len}; From ad97510099311f0fdb0be6a8fb792bd82d7edc08 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 27 Feb 2024 19:50:08 +1100 Subject: [PATCH 06/27] Move comment, clarify it --- lib/Biodiverse/Indices/PhylogeneticRelative.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index bc4f0cd23..1144b584f 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -396,7 +396,8 @@ sub calc_phylo_rpe2 { { no warnings qw /numeric uninitialized/; - $null = eval {$pe_null / $null_total_tree_length}; # equiv to PE_WE_P for the equalised tree + # null is equiv to PE_WE_P for the equalised tree + $null = eval {$pe_null / $null_total_tree_length}; $phylo_rpe2 = eval {$pe_p_score / $null}; $diff = eval {$orig_total_tree_length * ($pe_p_score - $null)}; } From 36eda2fdf13bdfb6571c44cb7152ad3ef42a4da7 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 27 Feb 2024 20:32:26 +1100 Subject: [PATCH 07/27] Indices: move PE_RANGELIST out of _calc_pe This takes time to calculate and is generally not needed by most calcs that depend on _calc_pe. Instead we can calculate it within calc_pe_lists, which is the user-facing calc that provides it. --- lib/Biodiverse/Indices/Phylogenetic.pm | 13 ++++++++++--- lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm | 13 ------------- lib/Biodiverse/Indices/PhylogeneticRelative.pm | 1 - 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/lib/Biodiverse/Indices/Phylogenetic.pm b/lib/Biodiverse/Indices/Phylogenetic.pm index 94b0d4050..0d354e93c 100644 --- a/lib/Biodiverse/Indices/Phylogenetic.pm +++ b/lib/Biodiverse/Indices/Phylogenetic.pm @@ -772,7 +772,8 @@ sub get_metadata_calc_pe_lists { name => 'Phylogenetic Endemism lists', reference => 'Rosauer et al (2009) Mol. Ecol. https://doi.org/10.1111/j.1365-294X.2009.04311.x', type => 'Phylogenetic Endemism Indices', - pre_calc => ['_calc_pe'], + pre_calc => ['_calc_pe'], + pre_calc_global => [ 'get_node_range_hash' ], uses_nbr_lists => 1, distribution => 'nonnegative', indices => { @@ -800,6 +801,12 @@ sub calc_pe_lists { my @keys = qw /PE_WTLIST PE_RANGELIST PE_LOCAL_RANGELIST/; my %results = %args{@keys}; + # PE_RANGELIST used to be done in _calc_pe + if (!$results{PE_RANGELIST}) { + \my %ranges = $args{node_range}; + my %h = %ranges{keys %{$results{PE_WTLIST}}}; + $results{PE_RANGELIST} = \%h; + } return wantarray ? %results : \%results; } @@ -888,7 +895,7 @@ END_PEC_DESC name => 'Phylogenetic Endemism central lists', reference => 'Rosauer et al (2009) Mol. Ecol. https://doi.org/10.1111/j.1365-294X.2009.04311.x', type => 'Phylogenetic Endemism Indices', - pre_calc => [qw /_calc_pe _calc_phylo_abc_lists/], + pre_calc => [qw /_calc_pe calc_pe_lists _calc_phylo_abc_lists/], uses_nbr_lists => 1, # how many lists it must have distribution => 'nonnegative', indices => { @@ -1417,7 +1424,7 @@ EOD name => 'Phylogenetic Endemism single', reference => 'Rosauer et al (2009) Mol. Ecol. https://doi.org/10.1111/j.1365-294X.2009.04311.x', type => 'Phylogenetic Endemism Indices', - pre_calc => ['_calc_pe'], + pre_calc => ['_calc_pe', 'calc_pe_lists'], pre_calc_global => ['get_trimmed_tree'], uses_nbr_lists => 1, indices => { diff --git a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm index c4f42c8a3..f87002e37 100644 --- a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm +++ b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm @@ -25,7 +25,6 @@ sub _calc_pe { my $tree_ref = $args{trimmed_tree}; my $results_cache = $args{PE_RESULTS_CACHE}; - \my %node_ranges = $args{node_range}; \my %rw_node_lengths = $args{inverse_range_weighted_node_lengths}; my $bd = $args{basedata_ref} || $self->get_basedata_ref; @@ -142,12 +141,6 @@ sub _calc_pe { if (scalar @$element_list_all > 1) { $results{PE_WE} = $PE_WE; $results{PE_WTLIST} = \%wts; - my %nranges = %node_ranges{keys %wts}; - $results{PE_RANGELIST} = \%nranges; - } - else { - my %nranges = %node_ranges{keys %{$results{PE_WTLIST}}}; - $results{PE_RANGELIST} = \%nranges; } # need to set these @@ -238,12 +231,6 @@ sub _calc_pe_hierarchical { if (scalar @$element_list_all > 1) { $results{PE_WE} = $PE_WE; $results{PE_WTLIST} = \%wts; - my %nranges = %node_ranges{keys %wts}; - $results{PE_RANGELIST} = \%nranges; - } - else { - my %nranges = %node_ranges{keys %{$results{PE_WTLIST}}}; - $results{PE_RANGELIST} = \%nranges; } # need to set these diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index 1144b584f..bec920566 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -283,7 +283,6 @@ sub calc_phylo_rpe_central { %args, PE_WE_P => $args{PEC_WE_P}, PE_WE => $args{PEC_WE}, - PE_RANGELIST => $args{PEC_RANGELIST}, PE_LOCAL_RANGELIST => $args{PEC_LOCAL_RANGELIST}, ); } From d025dc9c1f516270556beb667f3ff190ece266fb Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 27 Feb 2024 21:01:42 +1100 Subject: [PATCH 08/27] fix some index metadata --- lib/Biodiverse/Indices/PhylogeneticRelative.pm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index bec920566..082f1b75a 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -684,7 +684,7 @@ sub get_trimmed_tree_eq_branch_lengths_node_length_hash { sub get_metadata_get_trimmed_tree_range_inverse_hash { my %metadata = ( - name => 'get_trimmed_tree_range_inverses', + name => 'get_trimmed_tree_range_inverse_hash', description => "Get a hash of the node range inverse values\n" . "Forms the basis of the RPE calcs for equal area cells", @@ -725,8 +725,8 @@ sub get_metadata_get_trimmed_tree_range_inverse_hash_nonzero_len { . "Forms the basis of the RPE calcs for equal area cells", pre_calc_global => ['get_node_range_hash', 'get_trimmed_tree'], indices => { - trimmed_tree_range_inverse_hash => { - description => 'Hash of trimmed tree range inverse values', + trimmed_tree_range_inverse_hash_nonzero_len => { + description => 'Hash of trimmed tree range inverse values for nodes with non-zero length', }, }, ); From 44f0b5685f6309341e212a07c4da267701f18123 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 27 Feb 2024 21:06:37 +1100 Subject: [PATCH 09/27] calc_phylo_rpe2: handle undef PE null score The CANAPE tests were triggering undef warnings. --- lib/Biodiverse/Indices/PhylogeneticRelative.pm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index 082f1b75a..8bc306d5d 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -382,7 +382,6 @@ sub calc_phylo_rpe2 { # First condition optimises for the common case where all local ranges are 1 if (($args{EL_COUNT_ALL} // $args{EL_COUNT_SET1} // 0) == 1) { $pe_null += $_ foreach @range_inverse{keys %node_ranges_local}; - $pe_null *= $default_eq_len; } else { # postfix for speed @@ -390,8 +389,8 @@ sub calc_phylo_rpe2 { += $range_inverse{$_} * $node_ranges_local{$_} foreach keys %node_ranges_local; - $pe_null *= $default_eq_len; } + $pe_null *= $default_eq_len if $pe_null; { no warnings qw /numeric uninitialized/; From 88734e197ac1e15a20bca4322c9538cce57d7bc3 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 27 Feb 2024 21:32:15 +1100 Subject: [PATCH 10/27] Indices metadata: calc_phylo_rpe2: remove dependency on calc_pe_lists We have now weaned it. --- lib/Biodiverse/Indices/PhylogeneticRelative.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index 8bc306d5d..53b89dd85 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -308,7 +308,7 @@ sub get_metadata_calc_phylo_rpe2 { name => 'Relative Phylogenetic Endemism, type 2', reference => 'Mishler et al. (2014) https://doi.org/10.1038/ncomms5473', type => 'Phylogenetic Indices (relative)', - pre_calc => [qw /calc_pe calc_pe_lists calc_elements_used/], + pre_calc => [qw /_calc_pe calc_elements_used/], pre_calc_global => [qw / get_trimmed_tree get_trimmed_tree_with_equalised_branch_lengths From 1bd027dc36fbff35e2a3e04cdc506bfa1da5ea2b Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 28 Feb 2024 16:20:02 +1100 Subject: [PATCH 11/27] Indices get_path_lengths_to_root_node: populate the path length hash with first path Reduces the work done in the xsub. --- lib/Biodiverse/Indices/Phylogenetic.pm | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/Biodiverse/Indices/Phylogenetic.pm b/lib/Biodiverse/Indices/Phylogenetic.pm index 0d354e93c..d2795f88e 100644 --- a/lib/Biodiverse/Indices/Phylogenetic.pm +++ b/lib/Biodiverse/Indices/Phylogenetic.pm @@ -664,10 +664,18 @@ sub get_path_lengths_to_root_node { # but first option is faster still my $len_hash = $tree_ref->get_node_length_hash; if (HAVE_BD_UTILS_108) { - # get keys and vals in one call - Biodiverse::Utils::XS::add_hash_keys_and_vals_until_exists_AoA ( - $path_hash, \@collected_paths, $len_hash, - ); + # sometimes there are no paths + if (@collected_paths) { + # direct assign the first (could do first few, or the longest but that needs another scan) + my $first = shift @collected_paths; + @$path_hash{@$first} = @$len_hash{@$first}; + if (@collected_paths) { + # get keys and vals in one call + Biodiverse::Utils::XS::add_hash_keys_and_vals_until_exists_AoA( + $path_hash, \@collected_paths, $len_hash, + ); + } + } } elsif (HAVE_BD_UTILS) { Biodiverse::Utils::copy_values_from ($path_hash, $len_hash); From cac1d7891c3bc32dd09ba58a038a0a2f84bccb57 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 28 Feb 2024 16:57:13 +1100 Subject: [PATCH 12/27] reduce calls to TreeNode->get_length The Tree class has a method to get a hash of lengths. This is cached so later calls are very cheap. By using this we can avoid many repeated calls to the get_length method. As fast as it is, they can add up across randomisations. --- lib/Biodiverse/Indices/Phylogenetic.pm | 14 +++++----- .../Indices/PhylogeneticRelative.pm | 27 +++++++++---------- lib/Biodiverse/Tree.pm | 17 +++++------- 3 files changed, 27 insertions(+), 31 deletions(-) diff --git a/lib/Biodiverse/Indices/Phylogenetic.pm b/lib/Biodiverse/Indices/Phylogenetic.pm index d2795f88e..321196ac5 100644 --- a/lib/Biodiverse/Indices/Phylogenetic.pm +++ b/lib/Biodiverse/Indices/Phylogenetic.pm @@ -1783,14 +1783,14 @@ sub get_inverse_range_weighted_path_lengths { my %args = @_; my $tree = $args{tree_ref}; - my $node_ranges = $args{node_range}; - + \my %node_ranges = $args{node_range}; + \my %node_length_hash = $tree->get_node_length_hash; + my %range_weighted; - - foreach my $node ($tree->get_node_refs) { - my $name = $node->get_name; - next if !$node_ranges->{$name}; - $range_weighted{$name} = $node->get_length / $node_ranges->{$name}; + + foreach my $name (keys %node_length_hash) { + next if !$node_ranges{$name}; + $range_weighted{$name} = $node_length_hash{$name} / $node_ranges{$name}; } my %results = (inverse_range_weighted_node_lengths => \%range_weighted); diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index 53b89dd85..31dd51712 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -656,21 +656,20 @@ sub get_metadata_get_trimmed_tree_eq_branch_lengths_node_length_hash { return $metadata_class->new(\%metadata); } -# should just be a wrapper around Tree::get_node_length_hash sub get_trimmed_tree_eq_branch_lengths_node_length_hash { my $self = shift; my %args = @_; my $tree_ref = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED} // croak 'Missing TREE_REF_EQUALISED_BRANCHES_TRIMMED arg'; - my $node_hash = $tree_ref->get_node_hash; - - my (%len_hash, $nonzero_length); - foreach my $node_name (keys %$node_hash) { - my $node_ref = $node_hash->{$node_name}; - my $length = $node_ref->get_length; - $len_hash{$node_name} = $length; - $nonzero_length ||= $length; + + \my %len_hash = $tree_ref->get_node_length_hash; + + my $nonzero_length; + + foreach my $len (values %len_hash) { + $nonzero_length ||= $len; + last if $len; } my %results = ( @@ -737,14 +736,14 @@ sub get_trimmed_tree_range_inverse_hash_nonzero_len { my %args = @_; my $tree = $args{trimmed_tree}; - my $node_ranges = $args{node_range}; + \my %node_ranges = $args{node_range}; + \my %length_hash = $tree->get_node_length_hash; my %range_weighted; - foreach my $name (keys %$node_ranges) { - my $range = $node_ranges->{$name} || next; - my $numerator = $tree->get_node_ref_aa($name)->get_length ? 1 : 0; - $range_weighted{$name} = $numerator / $range; + foreach my $name (keys %node_ranges) { + my $range = $node_ranges{$name} || next; + $range_weighted{$name} = ($length_hash{$name} ? 1 : 0) / $range; } my %results = (trimmed_tree_range_inverse_hash_nonzero_len => \%range_weighted); diff --git a/lib/Biodiverse/Tree.pm b/lib/Biodiverse/Tree.pm index 3d20b53ba..be6a47a69 100644 --- a/lib/Biodiverse/Tree.pm +++ b/lib/Biodiverse/Tree.pm @@ -3095,20 +3095,17 @@ sub clone_tree_with_equalised_branch_lengths { my $name = $args{name} // ( $self->get_param('NAME') . ' EQ' ); - my $non_zero_len = $args{node_length}; + my $non_zero_len = $args{node_length} + // ($self->get_total_tree_length / ( $self->get_nonzero_length_count || 1 )); - if ( !defined $non_zero_len ) { - # my $non_zero_node_count = grep { $_->get_length } $self->get_node_refs; - # this caches - my $non_zero_node_count = $self->get_nonzero_length_count; - $non_zero_len = - $self->get_total_tree_length / ( $non_zero_node_count || 1 ); - } + \my %orig_node_length_hash = $self->get_node_length_hash; my $new_tree = $self->clone_without_caches; + \my %new_node_hash = $new_tree->get_node_hash; - foreach my $node ( $new_tree->get_node_refs ) { - $node->set_length_aa ( $node->get_length ? $non_zero_len : 0 ); + foreach my $name ( keys %new_node_hash ) { + my $node = $new_node_hash{$name}; + $node->set_length_aa ( $orig_node_length_hash{$name} ? $non_zero_len : 0 ); } $new_tree->rename( new_name => $name ); From 1666094e0000c5d78bb46ba4e9d4a38d04f90856 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Thu, 29 Feb 2024 08:39:35 +1100 Subject: [PATCH 13/27] Indices _calc_abc_dispatcher: reorder some logic, use an empty array state var Minor optimisation but avoids sub call costs. The state var should be rarely needed but we might as well not create a new one every time. --- lib/Biodiverse/Indices/Indices.pm | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/Biodiverse/Indices/Indices.pm b/lib/Biodiverse/Indices/Indices.pm index 84023aee5..d52dc343d 100644 --- a/lib/Biodiverse/Indices/Indices.pm +++ b/lib/Biodiverse/Indices/Indices.pm @@ -1813,20 +1813,22 @@ sub _calc_abc_dispatcher { // $args{label_list2} ); + state $empty_array = []; + return $self->_calc_abc_pairwise_mode(%args) - if $self->get_pairwise_mode - && @{$args{element_list1} // []} == 1 - && @{$args{element_list2} // []} == 1 - && !$have_lb_lists; + if @{$args{element_list1} // $empty_array} == 1 + && @{$args{element_list2} // $empty_array} == 1 + && $self->get_pairwise_mode + && !$have_lb_lists; return $self->_calc_abc_hierarchical_mode(%args) if $args{current_node_details} - && !$have_lb_lists - && $self->get_hierarchical_mode; + && !$have_lb_lists + && $self->get_hierarchical_mode; return $self->_calc_abc(%args) if is_hashref($args{element_list1}) - || @{$args{element_list1} // []} != 1 + || @{$args{element_list1} // $empty_array} != 1 || defined $args{element_list2} || $have_lb_lists; From d30dfea0e7cd8a817a612b19715cb6808a762a1e Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Mon, 4 Mar 2024 12:38:49 +1100 Subject: [PATCH 14/27] Indices: generalise early return condition in _calc_phylo_abc_lists We have already calculated the count of labels unique to set 2 so if that is zero then we can return early. --- lib/Biodiverse/Indices/Phylogenetic.pm | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/Biodiverse/Indices/Phylogenetic.pm b/lib/Biodiverse/Indices/Phylogenetic.pm index 321196ac5..9274da53d 100644 --- a/lib/Biodiverse/Indices/Phylogenetic.pm +++ b/lib/Biodiverse/Indices/Phylogenetic.pm @@ -2694,7 +2694,9 @@ sub _calc_phylo_abc_lists { el_list => $args{element_list1}, ); - if (!@{$args{element_list2}}) { + # $args{C} is the count of labels unique to set 2 + # so if it is zero then we can short-circuit. + if ($args{C} == 0) { my $res = { PHYLO_A_LIST => {}, PHYLO_B_LIST => $nodes_in_path1, @@ -2703,14 +2705,12 @@ sub _calc_phylo_abc_lists { return wantarray ? %$res : $res; } - my $nodes_in_path2 = @{$args{element_list2}} - ? $self->get_path_lengths_to_root_node ( - %args, - labels => $label_hash2, - tree_ref => $tree, - el_list => $args{element_list2}, - ) - : {}; + my $nodes_in_path2 = $self->get_path_lengths_to_root_node ( + %args, + labels => $label_hash2, + tree_ref => $tree, + el_list => $args{element_list2}, + ); my %results; # one day we can clean this all up From 9091eee6d8b4b997911be2272f1544dd9f21d0b9 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Mon, 4 Mar 2024 12:43:29 +1100 Subject: [PATCH 15/27] Indices: set _calc_phylo_abc_lists precalc to _calc_abc_any This way we take advantage of any of the calc_abc variants --- lib/Biodiverse/Indices/Phylogenetic.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Biodiverse/Indices/Phylogenetic.pm b/lib/Biodiverse/Indices/Phylogenetic.pm index 9274da53d..b665e3bb2 100644 --- a/lib/Biodiverse/Indices/Phylogenetic.pm +++ b/lib/Biodiverse/Indices/Phylogenetic.pm @@ -2668,7 +2668,7 @@ sub get_metadata__calc_phylo_abc_lists { name => 'Phylogenetic ABC lists', description => 'Calculate the sets of shared and not shared branches between two sets of labels', type => 'Phylogenetic Indices', - pre_calc => 'calc_abc', + pre_calc => '_calc_abc_any', pre_calc_global => [qw /get_trimmed_tree get_path_length_cache set_path_length_cache_by_group_flag/], uses_nbr_lists => 1, # how many sets of lists it must have required_args => {tree_ref => 1}, From 65ad9adf953cac49569c44ad6098cc47afb9b23a Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 6 Mar 2024 16:48:06 +1100 Subject: [PATCH 16/27] Indices: simplify use_wts logic in _calc_phylo_mpd_mntd Avoid the triple negative. --- lib/Biodiverse/Indices/PhyloCom.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Biodiverse/Indices/PhyloCom.pm b/lib/Biodiverse/Indices/PhyloCom.pm index 67f0b6414..80632aa21 100644 --- a/lib/Biodiverse/Indices/PhyloCom.pm +++ b/lib/Biodiverse/Indices/PhyloCom.pm @@ -347,7 +347,7 @@ sub _calc_phylo_mpd_mntd { # Save some cycles if all the weights are the same. # If we ever implement dissim then we can also check label_hash2. if ($use_wts && $label_hashrefs_are_same) { - if (not List::Util::any {$_ != 1} values %$label_hash1) { + if (List::Util::all {$_ == 1} values %$label_hash1) { $use_wts = undef; } } From 73d62d6c938cbf4583bb82724275a878710cac3d Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 6 Mar 2024 18:22:11 +1100 Subject: [PATCH 17/27] Indices _calc_pe: refactor weights calcs We can calculate the weighted branch lengths from the local ranges and globally weighted branch lengths. This avoids much repeated summation and thus time. --- .../Indices/Phylogenetic/RefAlias.pm | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm index f87002e37..f81cc2a41 100644 --- a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm +++ b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm @@ -114,16 +114,15 @@ sub _calc_pe { # refalias might be a nano-optimisation here... \my %wt_hash = $results_this_gp->{PE_WTLIST}; - # weights need to be summed, - # unless we are starting from a blank slate - if (keys %wts) { - foreach my $node (keys %wt_hash) { - $wts{$node} += $wt_hash{$node}; - $local_ranges{$node}++; - } + # Local ranges need to be summed unless + # we are starting from a blank slate. + # Weights are aggregated later. + if (keys %local_ranges) { + # postfix for speed + $local_ranges{$_}++ + foreach keys %wt_hash; } else { - %wts = %wt_hash; @local_ranges{keys %wt_hash} = (1) x scalar keys %wt_hash; } } @@ -139,8 +138,10 @@ sub _calc_pe { # need the collated versions for multiple elements if (scalar @$element_list_all > 1) { - $results{PE_WE} = $PE_WE; + $wts{$_} = $rw_node_lengths{$_} * $local_ranges{$_} + for keys %local_ranges; $results{PE_WTLIST} = \%wts; + $results{PE_WE} = $PE_WE; } # need to set these From 6bb3e92ad4eb28a7d7ba82a969eb66a5595a83d7 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 6 Mar 2024 18:28:04 +1100 Subject: [PATCH 18/27] Indices _calc_pe: micro-optimise The element count is needed many times so store it in a scalar instead of repeatedly getting it from the array. --- lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm index f81cc2a41..119eb02cf 100644 --- a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm +++ b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm @@ -34,10 +34,12 @@ sub _calc_pe { my (%wts, %local_ranges, %results); + my $el_count = @$element_list_all; + # prob a micro-optimisation, but might avoid # some looping below when collating weights # and one group has many more labels than the other - if (@$element_list_all == 2) { + if ($el_count == 2) { my $count0 = $bd->get_richness_aa ($element_list_all->[0]); my $count1 = $bd->get_richness_aa ($element_list_all->[1]); if ($count1 > $count0) { @@ -103,7 +105,7 @@ sub _calc_pe { # Avoid some redundant slicing and dicing when we have only one group # Pays off when processing large data sets - if (scalar @$element_list_all == 1) { + if ($el_count == 1) { # no need to collate anything so make a shallow copy @results{keys %$results_this_gp} = values %$results_this_gp; # but we do need to add to the local range hash @@ -137,7 +139,7 @@ sub _calc_pe { } # need the collated versions for multiple elements - if (scalar @$element_list_all > 1) { + if ($el_count > 1) { $wts{$_} = $rw_node_lengths{$_} * $local_ranges{$_} for keys %local_ranges; $results{PE_WTLIST} = \%wts; From da2e1987f77b2a2a0e2459ab5d439a54faf59ddb Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Fri, 8 Mar 2024 10:13:07 +1100 Subject: [PATCH 19/27] Indices calc_phylo_rpe2: follow _calc_pe caching approach The central variant currently needs special handling but this will be removed in a future commit. --- .../Indices/PhylogeneticRelative.pm | 105 +++++++++++++++--- 1 file changed, 88 insertions(+), 17 deletions(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index 31dd51712..f16533408 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -238,12 +238,14 @@ sub get_metadata_calc_phylo_rpe_central { name => 'Relative Phylogenetic Endemism, central', reference => 'Mishler et al. (2014) https://doi.org/10.1038/ncomms5473', type => 'Phylogenetic Indices (relative)', - pre_calc => [qw /calc_pe_central calc_pe_central_lists calc_elements_used/], + pre_calc => [qw /calc_pe_central calc_pe_central_lists calc_elements_used _calc_abc_any/], pre_calc_global => [qw / get_trimmed_tree get_trimmed_tree_with_equalised_branch_lengths get_trimmed_tree_eq_branch_lengths_node_length_hash get_trimmed_tree_range_inverse_hash_nonzero_len + get_pe_element_cache + get_rpe_element_cache /], uses_nbr_lists => 1, indices => { @@ -271,7 +273,7 @@ sub calc_phylo_rpe_central { my $results; - if (!@{$args{element_list2} // []}) { + if (!@{$args{element_list2} // []} || !($args{C} // 1)) { # We just copy the calc_phylo_rpe2 results # if there are no nbrs in set2 my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL'); @@ -284,6 +286,7 @@ sub calc_phylo_rpe_central { PE_WE_P => $args{PEC_WE_P}, PE_WE => $args{PEC_WE}, PE_LOCAL_RANGELIST => $args{PEC_LOCAL_RANGELIST}, + rpe_central_mode => 1, # temporary we hope ); } @@ -308,12 +311,14 @@ sub get_metadata_calc_phylo_rpe2 { name => 'Relative Phylogenetic Endemism, type 2', reference => 'Mishler et al. (2014) https://doi.org/10.1038/ncomms5473', type => 'Phylogenetic Indices (relative)', - pre_calc => [qw /_calc_pe calc_elements_used/], + pre_calc => [qw /_calc_pe calc_elements_used _calc_abc_any/], pre_calc_global => [qw / get_trimmed_tree get_trimmed_tree_with_equalised_branch_lengths get_trimmed_tree_eq_branch_lengths_node_length_hash get_trimmed_tree_range_inverse_hash_nonzero_len + get_pe_element_cache + get_rpe_element_cache /], uses_nbr_lists => 1, indices => { @@ -351,9 +356,9 @@ sub calc_phylo_rpe2 { return wantarray ? %results : \%results; } - if (!@{$args{element_list2} // []}) { + if (!@{$args{element_list2} // []} || !($args{C} // 1) ) { # We just copy the calc_phylo_rpe_central results - # if there are no nbrs in set2 + # if there are no nbrs or no different labels in set2 my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL'); if (my $cached = $cache_hash->{calc_phylo_rpe_central}) { my %results; @@ -372,25 +377,65 @@ sub calc_phylo_rpe2 { my $null_tree_ref = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED}; my $null_total_tree_length = $null_tree_ref->get_total_tree_length; - my $default_eq_len = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH}; - \my %node_ranges_local = $args{PE_LOCAL_RANGELIST}; - \my %range_inverse = $args{trimmed_tree_range_inverse_hash_nonzero_len}; - + my $default_eq_len = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH}; + \my %range_inverse = $args{trimmed_tree_range_inverse_hash_nonzero_len}; + + my $element_list_all = $args{element_list_all}; + # Get the PE score assuming equal branch lengths my ($pe_null, $null, $phylo_rpe2, $diff); - # First condition optimises for the common case where all local ranges are 1 - if (($args{EL_COUNT_ALL} // $args{EL_COUNT_SET1} // 0) == 1) { - $pe_null += $_ foreach @range_inverse{keys %node_ranges_local}; + # Hopefully a temporary check until we rejig the central calc. + # Central mode is also the same as whole mode if all labels are + # in both nbr sets or there is one nbr set + if (!$args{rpe_central_mode} || !@{$args{element_list2} // []} || (!$args{C} // 1) ) { + my $results_cache = $args{RPE_RESULTS_CACHE}; + my $pe_results_cache = $args{PE_RESULTS_CACHE}; + + foreach my $group (@$element_list_all) { + my $results_this_gp; + # use the cached results for a group if present + if (exists $results_cache->{$group}) { + $results_this_gp = $results_cache->{$group}; + } + # else build them and cache them + else { + # precalcs mean this should exist + my $pe_cached = $pe_results_cache->{$group} + // croak "PE cache entry for $group not yet calculated"; + my $nodes_in_path = $pe_cached->{PE_WTLIST}; + + my $gp_score; + $gp_score += $_ foreach @range_inverse{keys %$nodes_in_path}; + $gp_score *= $default_eq_len if $gp_score; + + $results_this_gp = { RPE_WE => $gp_score }; + + $results_cache->{$group} = $results_this_gp; + } + + if (defined $results_this_gp->{RPE_WE}) { + $pe_null += $results_this_gp->{RPE_WE}; + } + + } } else { - # postfix for speed - $pe_null - += $range_inverse{$_} - * $node_ranges_local{$_} + \my %node_ranges_local = $args{PE_LOCAL_RANGELIST}; + + # First condition optimises for the common case where all local ranges are 1 + if (($args{EL_COUNT_ALL} // $args{EL_COUNT_SET1} // 0) == 1) { + $pe_null += $_ foreach @range_inverse{keys %node_ranges_local}; + } + else { + # postfix for speed + $pe_null + += $range_inverse{$_} + * $node_ranges_local{$_} foreach keys %node_ranges_local; + } + $pe_null *= $default_eq_len if $pe_null; } - $pe_null *= $default_eq_len if $pe_null; { no warnings qw /numeric uninitialized/; @@ -752,4 +797,30 @@ sub get_trimmed_tree_range_inverse_hash_nonzero_len { } + +sub get_metadata_get_rpe_element_cache { + + my %metadata = ( + name => 'get_rpe_element_cache', + description => 'Create a hash in which to cache the PE_alt scores for each element', + indices => { + RPE_RESULTS_CACHE => { + description => 'The hash in which to cache the PE_alt scores for each element' + }, + }, + ); + + return $metadata_class->new(\%metadata); +} + +# create a hash in which to cache the PE scores for each element +# this is called as a global precalc and then used or modified by each element as needed +sub get_rpe_element_cache { + my $self = shift; + my %args = @_; + + my %results = (RPE_RESULTS_CACHE => {}); + return wantarray ? %results : \%results; +} + 1; From 58a4ed55c0928585f97528951b1d428ce0fb9027 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Fri, 8 Mar 2024 10:28:01 +1100 Subject: [PATCH 20/27] Indices calc_phylo_rpe_central: process internally Call calc_phylo_rpe2 where possible, otherwise process the branches ourselves. This moves the special case logic out of calc_phylo_rpe2. --- .../Indices/PhylogeneticRelative.pm | 61 ++++++++++++++++--- 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index f16533408..d6efd6651 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -277,17 +277,62 @@ sub calc_phylo_rpe_central { # We just copy the calc_phylo_rpe2 results # if there are no nbrs in set2 my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL'); - $results = $cache_hash->{calc_phylo_rpe2}; + $results = $cache_hash->{calc_phylo_rpe2} + // $self->calc_phylo_rpe2( + %args, + PE_WE_P => $args{PEC_WE_P}, + PE_WE => $args{PEC_WE}, + PE_LOCAL_RANGELIST => $args{PEC_LOCAL_RANGELIST}, + ); } if (!$results) { - $results = $self->calc_phylo_rpe2( - %args, - PE_WE_P => $args{PEC_WE_P}, - PE_WE => $args{PEC_WE}, - PE_LOCAL_RANGELIST => $args{PEC_LOCAL_RANGELIST}, - rpe_central_mode => 1, # temporary we hope - ); + my $pe_p_score = $args{PEC_WE_P}; + + my $orig_tree_ref = $args{trimmed_tree}; + my $orig_total_tree_length = $orig_tree_ref->get_total_tree_length; + + my $null_tree_ref = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED}; + my $null_total_tree_length = $null_tree_ref->get_total_tree_length; + + my $default_eq_len = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH}; + \my %range_inverse = $args{trimmed_tree_range_inverse_hash_nonzero_len}; + + # Get the PE score assuming equal branch lengths + my ($pe_null, $null, $phylo_rpe2, $diff); + + # need to work over the lists + \my %node_ranges_local = $args{PEC_LOCAL_RANGELIST}; + + # First condition optimises for the common case where all local ranges are 1 + if (($args{EL_COUNT_ALL} // $args{EL_COUNT_SET1} // 0) == 1) { + $pe_null += $_ foreach @range_inverse{keys %node_ranges_local}; + } + else { + # postfix for speed + $pe_null + += $range_inverse{$_} + * $node_ranges_local{$_} + foreach keys %node_ranges_local; + } + $pe_null *= $default_eq_len if $pe_null; + + { + no warnings qw /numeric uninitialized/; + # null is equiv to PE_WE_P for the equalised tree + $null = eval {$pe_null / $null_total_tree_length}; + $phylo_rpe2 = eval {$pe_p_score / $null}; + $diff = eval {$orig_total_tree_length * ($pe_p_score - $null)}; + } + #if (defined $pe_nullx) { + #say STDERR "$pe_null $pe_nullx"; + #} + $results = { + PHYLO_RPE2 => $phylo_rpe2, + PHYLO_RPE_NULL2 => $null, + PHYLO_RPE_DIFF2 => $diff, + }; + } my %results2; From b0197f83aa0e89a6396b70a0de928dbb3f50b3bb Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Fri, 8 Mar 2024 10:31:14 +1100 Subject: [PATCH 21/27] Indices calc_phylo_rpe_central: less nesting --- .../Indices/PhylogeneticRelative.pm | 97 +++++++++---------- 1 file changed, 47 insertions(+), 50 deletions(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index d6efd6651..d5abfa992 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -271,78 +271,75 @@ sub calc_phylo_rpe_central { my $self = shift; my %args = @_; - my $results; - if (!@{$args{element_list2} // []} || !($args{C} // 1)) { # We just copy the calc_phylo_rpe2 results # if there are no nbrs in set2 my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL'); - $results = $cache_hash->{calc_phylo_rpe2} + my $results = $cache_hash->{calc_phylo_rpe2} // $self->calc_phylo_rpe2( %args, PE_WE_P => $args{PEC_WE_P}, PE_WE => $args{PEC_WE}, PE_LOCAL_RANGELIST => $args{PEC_LOCAL_RANGELIST}, ); - } - if (!$results) { - my $pe_p_score = $args{PEC_WE_P}; + my %results2; + foreach my $key (keys %$results) { + # will need to be changed if we rename the RPE indices + my $new_key = ($key =~ s/2$/C/r); + $results2{$new_key} = $results->{$key}; + } - my $orig_tree_ref = $args{trimmed_tree}; - my $orig_total_tree_length = $orig_tree_ref->get_total_tree_length; + return wantarray ? %results2 : \%results2; + } - my $null_tree_ref = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED}; - my $null_total_tree_length = $null_tree_ref->get_total_tree_length; + my $pe_p_score = $args{PEC_WE_P}; - my $default_eq_len = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH}; - \my %range_inverse = $args{trimmed_tree_range_inverse_hash_nonzero_len}; + my $orig_tree_ref = $args{trimmed_tree}; + my $orig_total_tree_length = $orig_tree_ref->get_total_tree_length; - # Get the PE score assuming equal branch lengths - my ($pe_null, $null, $phylo_rpe2, $diff); + my $null_tree_ref = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED}; + my $null_total_tree_length = $null_tree_ref->get_total_tree_length; - # need to work over the lists - \my %node_ranges_local = $args{PEC_LOCAL_RANGELIST}; + my $default_eq_len = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH}; + \my %range_inverse = $args{trimmed_tree_range_inverse_hash_nonzero_len}; - # First condition optimises for the common case where all local ranges are 1 - if (($args{EL_COUNT_ALL} // $args{EL_COUNT_SET1} // 0) == 1) { - $pe_null += $_ foreach @range_inverse{keys %node_ranges_local}; - } - else { - # postfix for speed - $pe_null - += $range_inverse{$_} - * $node_ranges_local{$_} - foreach keys %node_ranges_local; - } - $pe_null *= $default_eq_len if $pe_null; + # Get the PE score assuming equal branch lengths + my ($pe_null, $null, $phylo_rpe2, $diff); - { - no warnings qw /numeric uninitialized/; - # null is equiv to PE_WE_P for the equalised tree - $null = eval {$pe_null / $null_total_tree_length}; - $phylo_rpe2 = eval {$pe_p_score / $null}; - $diff = eval {$orig_total_tree_length * ($pe_p_score - $null)}; - } - #if (defined $pe_nullx) { - #say STDERR "$pe_null $pe_nullx"; - #} - $results = { - PHYLO_RPE2 => $phylo_rpe2, - PHYLO_RPE_NULL2 => $null, - PHYLO_RPE_DIFF2 => $diff, - }; + # need to work over the lists + \my %node_ranges_local = $args{PEC_LOCAL_RANGELIST}; + # First condition optimises for the common case where all local ranges are 1 + if (($args{EL_COUNT_ALL} // $args{EL_COUNT_SET1} // 0) == 1) { + $pe_null += $_ foreach @range_inverse{keys %node_ranges_local}; } - - my %results2; - foreach my $key (keys %$results) { - # will need to be changed if we rename the RPE indices - my $new_key = ($key =~ s/2$/C/r); - $results2{$new_key} = $results->{$key}; + else { + # postfix for speed + $pe_null + += $range_inverse{$_} + * $node_ranges_local{$_} + foreach keys %node_ranges_local; } + $pe_null *= $default_eq_len if $pe_null; - return wantarray ? %results2 : \%results2; + { + no warnings qw /numeric uninitialized/; + # null is equiv to PE_WE_P for the equalised tree + $null = eval {$pe_null / $null_total_tree_length}; + $phylo_rpe2 = eval {$pe_p_score / $null}; + $diff = eval {$orig_total_tree_length * ($pe_p_score - $null)}; + } + #if (defined $pe_nullx) { + #say STDERR "$pe_null $pe_nullx"; + #} + my $results = { + PHYLO_RPEC => $phylo_rpe2, + PHYLO_RPE_NULLC => $null, + PHYLO_RPE_DIFFC => $diff, + }; + + return wantarray ? %$results : $results; } From 52397655136742729d7b46f6d2717e5e079f3f7b Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Fri, 8 Mar 2024 10:33:33 +1100 Subject: [PATCH 22/27] Indices: calc_phylo_rpe2: cleanup if branch and less nesting The deleted branch was only needed by calc_phylo_rpe_central, and it now contains the same logic. --- .../Indices/PhylogeneticRelative.pm | 67 +++++++------------ 1 file changed, 23 insertions(+), 44 deletions(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index d5abfa992..69e7e5a7a 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -427,56 +427,35 @@ sub calc_phylo_rpe2 { # Get the PE score assuming equal branch lengths my ($pe_null, $null, $phylo_rpe2, $diff); - # Hopefully a temporary check until we rejig the central calc. - # Central mode is also the same as whole mode if all labels are - # in both nbr sets or there is one nbr set - if (!$args{rpe_central_mode} || !@{$args{element_list2} // []} || (!$args{C} // 1) ) { - my $results_cache = $args{RPE_RESULTS_CACHE}; - my $pe_results_cache = $args{PE_RESULTS_CACHE}; - - foreach my $group (@$element_list_all) { - my $results_this_gp; - # use the cached results for a group if present - if (exists $results_cache->{$group}) { - $results_this_gp = $results_cache->{$group}; - } - # else build them and cache them - else { - # precalcs mean this should exist - my $pe_cached = $pe_results_cache->{$group} - // croak "PE cache entry for $group not yet calculated"; - my $nodes_in_path = $pe_cached->{PE_WTLIST}; - - my $gp_score; - $gp_score += $_ foreach @range_inverse{keys %$nodes_in_path}; - $gp_score *= $default_eq_len if $gp_score; - - $results_this_gp = { RPE_WE => $gp_score }; + my $results_cache = $args{RPE_RESULTS_CACHE}; + my $pe_results_cache = $args{PE_RESULTS_CACHE}; + + foreach my $group (@$element_list_all) { + my $results_this_gp; + # use the cached results for a group if present + if (exists $results_cache->{$group}) { + $results_this_gp = $results_cache->{$group}; + } + # else build them and cache them + else { + # precalcs mean this should exist + my $pe_cached = $pe_results_cache->{$group} + // croak "PE cache entry for $group not yet calculated"; + my $nodes_in_path = $pe_cached->{PE_WTLIST}; - $results_cache->{$group} = $results_this_gp; - } + my $gp_score; + $gp_score += $_ foreach @range_inverse{keys %$nodes_in_path}; + $gp_score *= $default_eq_len if $gp_score; - if (defined $results_this_gp->{RPE_WE}) { - $pe_null += $results_this_gp->{RPE_WE}; - } + $results_this_gp = { RPE_WE => $gp_score }; + $results_cache->{$group} = $results_this_gp; } - } - else { - \my %node_ranges_local = $args{PE_LOCAL_RANGELIST}; - # First condition optimises for the common case where all local ranges are 1 - if (($args{EL_COUNT_ALL} // $args{EL_COUNT_SET1} // 0) == 1) { - $pe_null += $_ foreach @range_inverse{keys %node_ranges_local}; + if (defined $results_this_gp->{RPE_WE}) { + $pe_null += $results_this_gp->{RPE_WE}; } - else { - # postfix for speed - $pe_null - += $range_inverse{$_} - * $node_ranges_local{$_} - foreach keys %node_ranges_local; - } - $pe_null *= $default_eq_len if $pe_null; + } { From 80b033e79637d442d586f607041a89c180633f2b Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Fri, 8 Mar 2024 11:09:33 +1100 Subject: [PATCH 23/27] _calc_abc_any: micro-optimise Avoid list overheads. We only have three options anyway. --- lib/Biodiverse/Indices/Indices.pm | 19 +++++-------------- .../Indices/PhylogeneticRelative.pm | 3 ++- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/lib/Biodiverse/Indices/Indices.pm b/lib/Biodiverse/Indices/Indices.pm index d52dc343d..75f3ba5b8 100644 --- a/lib/Biodiverse/Indices/Indices.pm +++ b/lib/Biodiverse/Indices/Indices.pm @@ -1703,22 +1703,13 @@ sub _calc_abc_any { my $self = shift; my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL'); - my $cache_key - = List::Util::first {defined $cache_hash->{$_}} - (qw/calc_abc calc_abc2 calc_abc3/); - # say STDERR 'NO previous cache key' - # if !$cache_key; + my $results = $cache_hash->{calc_abc} + // $cache_hash->{calc_abc2} + // $cache_hash->{calc_abc3} + // $self->calc_abc(@_); - # fall back to calc_abc if nothing had an explicit abc dependency - my $cached = $cache_key - ? $cache_hash->{$cache_key} - : $self->calc_abc(@_); - - croak 'No previous calc_abc results found' - if !$cached; - - return wantarray ? %$cached : $cached; + return wantarray ? %$results : $results; } sub get_metadata_calc_abc { diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index 69e7e5a7a..cc420df1c 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -273,7 +273,8 @@ sub calc_phylo_rpe_central { if (!@{$args{element_list2} // []} || !($args{C} // 1)) { # We just copy the calc_phylo_rpe2 results - # if there are no nbrs in set2 + # if there are no nbrs in set2 or all the + # labels are common my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL'); my $results = $cache_hash->{calc_phylo_rpe2} // $self->calc_phylo_rpe2( From 711289f36bb685e21a441ceb00270a3cad14482f Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Fri, 8 Mar 2024 13:40:20 +1100 Subject: [PATCH 24/27] Indices: list results from _calc_pe are now in _calc_pe_lists These have large time overheads in some cases but are not always needed. For example the user might only need the PE scores. --- lib/Biodiverse/Indices/Phylogenetic.pm | 8 +- .../Indices/Phylogenetic/RefAlias.pm | 222 ++++++++++++------ lib/Biodiverse/Indices/RWTurnover.pm | 17 +- t/26-Cluster2.t | 4 +- 4 files changed, 163 insertions(+), 88 deletions(-) diff --git a/lib/Biodiverse/Indices/Phylogenetic.pm b/lib/Biodiverse/Indices/Phylogenetic.pm index b665e3bb2..8340cf694 100644 --- a/lib/Biodiverse/Indices/Phylogenetic.pm +++ b/lib/Biodiverse/Indices/Phylogenetic.pm @@ -780,7 +780,7 @@ sub get_metadata_calc_pe_lists { name => 'Phylogenetic Endemism lists', reference => 'Rosauer et al (2009) Mol. Ecol. https://doi.org/10.1111/j.1365-294X.2009.04311.x', type => 'Phylogenetic Endemism Indices', - pre_calc => ['_calc_pe'], + pre_calc => [qw /_calc_pe_lists/], pre_calc_global => [ 'get_node_range_hash' ], uses_nbr_lists => 1, distribution => 'nonnegative', @@ -845,7 +845,7 @@ END_PEC_DESC name => 'Phylogenetic Endemism central', reference => 'Rosauer et al (2009) Mol. Ecol. https://doi.org/10.1111/j.1365-294X.2009.04311.x', type => 'Phylogenetic Endemism Indices', - pre_calc => [qw /_calc_pe _calc_phylo_abc_lists/], + pre_calc => [qw /_calc_pe _calc_pe_lists _calc_phylo_abc_lists/], pre_calc_global => [qw /get_trimmed_tree/], uses_nbr_lists => 1, # how many lists it must have formula => $formula, @@ -1115,7 +1115,7 @@ sub get_metadata_calc_pe_clade_contributions { name => 'PE clade contributions', reference => '', type => 'Phylogenetic Endemism Indices', - pre_calc => ['_calc_pe', 'get_sub_tree_as_hash'], + pre_calc => [qw/_calc_pe _calc_pe_lists get_sub_tree_as_hash/], pre_calc_global => ['get_trimmed_tree'], uses_nbr_lists => 1, distribution => 'nonnegative', # default @@ -1432,7 +1432,7 @@ EOD name => 'Phylogenetic Endemism single', reference => 'Rosauer et al (2009) Mol. Ecol. https://doi.org/10.1111/j.1365-294X.2009.04311.x', type => 'Phylogenetic Endemism Indices', - pre_calc => ['_calc_pe', 'calc_pe_lists'], + pre_calc => [qw/_calc_pe calc_pe_lists/], pre_calc_global => ['get_trimmed_tree'], uses_nbr_lists => 1, indices => { diff --git a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm index 119eb02cf..8278f40d1 100644 --- a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm +++ b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm @@ -12,6 +12,8 @@ no warnings 'experimental::refaliasing'; use List::Util qw /sum/; +my $metadata_class = 'Biodiverse::Metadata::Indices'; + sub _calc_pe { my $self = shift; my %args = @_; @@ -32,23 +34,7 @@ sub _calc_pe { # default these to undef - more meaningful than zero my ($PE_WE, $PE_WE_P); - my (%wts, %local_ranges, %results); - - my $el_count = @$element_list_all; - - # prob a micro-optimisation, but might avoid - # some looping below when collating weights - # and one group has many more labels than the other - if ($el_count == 2) { - my $count0 = $bd->get_richness_aa ($element_list_all->[0]); - my $count1 = $bd->get_richness_aa ($element_list_all->[1]); - if ($count1 > $count0) { - $element_list_all = [ - $element_list_all->[1], - $element_list_all->[0], - ]; - } - } + my %results; foreach my $group (@$element_list_all) { my $results_this_gp; @@ -103,31 +89,6 @@ sub _calc_pe { $PE_WE += $results_this_gp->{PE_WE}; } - # Avoid some redundant slicing and dicing when we have only one group - # Pays off when processing large data sets - if ($el_count == 1) { - # no need to collate anything so make a shallow copy - @results{keys %$results_this_gp} = values %$results_this_gp; - # but we do need to add to the local range hash - my $hashref = $results_this_gp->{PE_WTLIST}; - @local_ranges{keys %$hashref} = (1) x scalar keys %$hashref; - } - else { - # refalias might be a nano-optimisation here... - \my %wt_hash = $results_this_gp->{PE_WTLIST}; - - # Local ranges need to be summed unless - # we are starting from a blank slate. - # Weights are aggregated later. - if (keys %local_ranges) { - # postfix for speed - $local_ranges{$_}++ - foreach keys %wt_hash; - } - else { - @local_ranges{keys %wt_hash} = (1) x scalar keys %wt_hash; - } - } } { @@ -138,17 +99,8 @@ sub _calc_pe { $PE_WE_P = eval {$PE_WE / $total_tree_length}; } - # need the collated versions for multiple elements - if ($el_count > 1) { - $wts{$_} = $rw_node_lengths{$_} * $local_ranges{$_} - for keys %local_ranges; - $results{PE_WTLIST} = \%wts; - $results{PE_WE} = $PE_WE; - } - - # need to set these + $results{PE_WE} = $PE_WE; $results{PE_WE_P} = $PE_WE_P; - $results{PE_LOCAL_RANGELIST} = \%local_ranges; return wantarray ? %results : \%results; } @@ -158,8 +110,6 @@ sub _calc_pe { sub _calc_pe_hierarchical { my ($self, %args) = @_; - my $element_list_all = $args{element_list_all}; - my $node_data = $args{current_node_details} // croak 'Must pass the current node details when in hierarchical mode'; my $node_name = $node_data->{name} @@ -168,12 +118,11 @@ sub _calc_pe_hierarchical { my $tree_ref = $args{trimmed_tree}; my $results_cache = $args{PE_RESULTS_CACHE}; - \my %node_ranges = $args{node_range}; # default these to undef - more meaningful than zero my ($PE_WE, $PE_WE_P); - my (%wts, %local_ranges, %results); + my %results; foreach my $group (@$child_names) { my $results_this_gp; @@ -192,6 +141,140 @@ sub _calc_pe_hierarchical { if (defined $results_this_gp->{PE_WE}) { $PE_WE += $results_this_gp->{PE_WE}; } + } + + { + no warnings 'uninitialized'; + my $total_tree_length = $tree_ref->get_total_tree_length; + + #Phylogenetic endemism = sum for all nodes of: + # (branch length/total tree length) / node range + $PE_WE_P = eval {$PE_WE / $total_tree_length}; + } + + $results{PE_WE} = $PE_WE; + $results{PE_WE_P} = $PE_WE_P; + + $results_cache->{$node_name}{PE_WE} = $results{PE_WE}; + + return wantarray ? %results : \%results; +} + +sub get_metadata__calc_pe_lists { + + my %metadata = ( + description => 'Phylogenetic endemism (PE) base lists.', + name => 'Phylogenetic Endemism base lists', + reference => 'Rosauer et al (2009) Mol. Ecol. https://doi.org/10.1111/j.1365-294X.2009.04311.x', + type => 'Phylogenetic Endemism Indices', # keeps it clear of the other indices in the GUI + pre_calc_global => [ qw / + get_node_range_hash + get_trimmed_tree + get_pe_element_cache + get_path_length_cache + set_path_length_cache_by_group_flag + get_inverse_range_weighted_path_lengths + /], + pre_calc => [qw/_calc_abc_any _calc_pe/], + uses_nbr_lists => 1, # how many lists it must have + required_args => {'tree_ref' => 1}, + ); + + return $metadata_class->new(\%metadata); +} + +sub _calc_pe_lists { + my $self = shift; + my %args = @_; + + my $element_list_all = $args{element_list_all}; + + return $self->_calc_pe_lists_hierarchical(%args) + if defined $args{current_node_details} + && $self->get_hierarchical_mode + && @$element_list_all > 1; + + my $results_cache = $args{PE_RESULTS_CACHE}; + \my %rw_node_lengths = $args{inverse_range_weighted_node_lengths}; + + my $bd = $args{basedata_ref} || $self->get_basedata_ref; + + my $el_count = @$element_list_all; + + # prob a micro-optimisation, but might avoid + # some looping below when collating weights + # and one group has many more labels than the other + if ($el_count == 2) { + my $count0 = $bd->get_richness_aa ($element_list_all->[0]); + my $count1 = $bd->get_richness_aa ($element_list_all->[1]); + if ($count1 > $count0) { + $element_list_all = [ + $element_list_all->[1], + $element_list_all->[0], + ]; + } + } + + my (%wts, %local_ranges, %results); + + foreach my $group (@$element_list_all) { + # this is populated by _calc_pe + my $results_this_gp = $results_cache->{$group}; + + # Avoid some redundant slicing and dicing when we have only one group + # Pays off when processing large data sets + if ($el_count == 1) { + # no need to collate anything so make a shallow copy + $results{PE_WTLIST} = $results_this_gp->{PE_WTLIST}; + # but we do need to add to the local range hash + my $hashref = $results_this_gp->{PE_WTLIST}; + @local_ranges{keys %$hashref} = (1) x scalar keys %$hashref; + } + else { + # refalias might be a nano-optimisation here... + \my %wt_hash = $results_this_gp->{PE_WTLIST}; + + # Local ranges need to be summed unless + # we are starting from a blank slate. + # Weights are aggregated later. + if (keys %local_ranges) { + # postfix for speed + $local_ranges{$_}++ + foreach keys %wt_hash; + } + else { + @local_ranges{keys %wt_hash} = (1) x scalar keys %wt_hash; + } + } + } + + # collate + $wts{$_} = $rw_node_lengths{$_} * $local_ranges{$_} + for keys %local_ranges; + $results{PE_WTLIST} = \%wts; + + $results{PE_LOCAL_RANGELIST} = \%local_ranges; + + return wantarray ? %results : \%results; +} + +sub _calc_pe_lists_hierarchical { + my ($self, %args) = @_; + + my $element_list_all = $args{element_list_all}; + + my $node_data = $args{current_node_details} + // croak 'Must pass the current node details when in hierarchical mode'; + my $node_name = $node_data->{name} + // croak 'Missing current node name in hierarchical mode'; + my $child_names = $node_data->{child_names}; + + my $results_cache = $args{PE_RESULTS_CACHE}; + + my (%wts, %local_ranges, %results); + + foreach my $group (@$child_names) { + my $results_this_gp = $results_cache->{$group}; # Avoid some redundant slicing and dicing when we have only one group # Pays off when processing large data sets @@ -211,36 +294,23 @@ sub _calc_pe_hierarchical { if (keys %wts) { foreach my $node (keys %wt_hash) { $wts{$node} += $wt_hash{$node}; - $local_ranges{$node}++; } } else { %wts = %wt_hash; - @local_ranges{keys %wt_hash} = (1) x scalar keys %wt_hash; } - } - } - - { - no warnings 'uninitialized'; - my $total_tree_length = $tree_ref->get_total_tree_length; - - #Phylogenetic endemism = sum for all nodes of: - # (branch length/total tree length) / node range - $PE_WE_P = eval {$PE_WE / $total_tree_length}; - } - # need the collated versions for multiple elements - if (scalar @$element_list_all > 1) { - $results{PE_WE} = $PE_WE; - $results{PE_WTLIST} = \%wts; + my $cached_ranges = $results_this_gp->{PE_LOCAL_RANGELIST}; + $local_ranges{$_} += $cached_ranges->{$_} // 1 + foreach keys %wt_hash; + } } - # need to set these - $results{PE_WE_P} = $PE_WE_P; + $results{PE_WTLIST} = \%wts; $results{PE_LOCAL_RANGELIST} = \%local_ranges; - $results_cache->{$node_name} = {%results{qw/PE_WE PE_WTLIST/}}; + $results_cache->{$node_name}{PE_WTLIST} = \%wts; + $results_cache->{$node_name}{PE_LOCAL_RANGELIST} = \%local_ranges; return wantarray ? %results : \%results; } diff --git a/lib/Biodiverse/Indices/RWTurnover.pm b/lib/Biodiverse/Indices/RWTurnover.pm index ccfc9bb58..f8d309346 100644 --- a/lib/Biodiverse/Indices/RWTurnover.pm +++ b/lib/Biodiverse/Indices/RWTurnover.pm @@ -341,16 +341,21 @@ sub _calc_pe_lists_per_element_set { BY_LIST: foreach my $list_name (qw /element_list1 element_list2/) { $i++; # start at 1 so we match the numbered names - my $el_list = $args{$list_name} // next BY_LIST; - \my @elements = $el_list; # FIXME + \my @elements = $args{$list_name} // next BY_LIST; my $have_cache = (@elements == 1 && $cache->{$elements[0]}); $results[$i] = $have_cache ? $cache->{$elements[0]} - : $self->_calc_pe( - %args, - element_list_all => \@elements, - ); + : { # messy but we need results from both + $self->_calc_pe( + %args, + element_list_all => $args{$list_name}, + ), + $self->_calc_pe_lists( + %args, + element_list_all => $args{$list_name}, + ), + }; $cache->{$elements[0]} = $results[$i] if @elements == 1; } diff --git a/t/26-Cluster2.t b/t/26-Cluster2.t index 0100c5278..54d712e59 100644 --- a/t/26-Cluster2.t +++ b/t/26-Cluster2.t @@ -219,12 +219,12 @@ sub test_rw_turnover_mx { sub test_cluster_node_calcs { my %args = @_; - my $bd = $args{basedata_ref} || get_basedata_object_from_site_data(CELL_SIZES => [300000, 300000]); + my $bd = $args{basedata_ref} || get_basedata_object_from_site_data(CELL_SIZES => [400000, 400000]); my $prng_seed = $args{prng_seed} || $default_prng_seed; my $tree_ref = $args{tree_ref} || get_tree_object_from_sample_data(); - my $calcs = [qw/calc_pe calc_pd/]; + my $calcs = [qw/calc_pe calc_pe_lists calc_phylo_rpe2 calc_pe_central_lists calc_pd/]; my $cl1 = $bd->add_cluster_output (name => 'cl1'); $cl1->run_analysis ( From 923155000dc5ae1e47aac04929dc828dd1d5e280 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Fri, 8 Mar 2024 14:43:30 +1100 Subject: [PATCH 25/27] remove some redundant commented code --- lib/Biodiverse/Indices/PhylogeneticRelative.pm | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index cc420df1c..90d0a0cf9 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -331,9 +331,7 @@ sub calc_phylo_rpe_central { $phylo_rpe2 = eval {$pe_p_score / $null}; $diff = eval {$orig_total_tree_length * ($pe_p_score - $null)}; } - #if (defined $pe_nullx) { - #say STDERR "$pe_null $pe_nullx"; - #} + my $results = { PHYLO_RPEC => $phylo_rpe2, PHYLO_RPE_NULLC => $null, From f04a81950db312fc8d47920b81cec68be54b9574 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Fri, 8 Mar 2024 19:09:37 +1100 Subject: [PATCH 26/27] Tests: change precision approach in Cluster2 --- t/26-Cluster2.t | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/t/26-Cluster2.t b/t/26-Cluster2.t index 54d712e59..052f24325 100644 --- a/t/26-Cluster2.t +++ b/t/26-Cluster2.t @@ -250,6 +250,7 @@ sub test_cluster_node_calcs { is [sort keys %$node_hash1], [sort keys %$node_hash2], 'paranoia check: same node names'; + my $prec = "%.10f"; my (%aggregate1, %aggregate2); foreach my $node_name (sort keys %$node_hash1) { my $node1 = $node_hash1->{$node_name}; @@ -258,8 +259,8 @@ sub test_cluster_node_calcs { foreach my $list_name (sort grep {$_ !~ /NODE_VALUES/}$node1->get_hash_lists) { my $ref1 = $node1->get_list_ref_aa($list_name); my $ref2 = $node2->get_list_ref_aa($list_name); - my $snapped1 = {map {$_ => sprintf "%.10f", $ref1->{$_}} keys %$ref1}; - my $snapped2 = {map {$_ => sprintf "%.10f", $ref2->{$_}} keys %$ref2}; + my $snapped1 = {map {$_ => snap_to_precision ($ref1->{$_}, $prec)} keys %$ref1}; + my $snapped2 = {map {$_ => snap_to_precision ($ref2->{$_}, $prec)} keys %$ref2}; $aggregate1{$node_name}{$list_name} = $snapped1; $aggregate2{$node_name}{$list_name} = $snapped2; } From 113cd2f6d48caaadda99daf352f1ac92c14fe941 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Mon, 11 Mar 2024 09:34:15 +1100 Subject: [PATCH 27/27] Indices: _calc_pe_lists: call _calc_pe if needed to populate cache Most of the time _calc_pe will have been called previously so the cache will exist. However there are occasions where this is not done, for example the range weighted phylo turnover calcs. That calc has also now been simplified. --- lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm | 13 ++++++++++++- lib/Biodiverse/Indices/RWTurnover.pm | 14 ++++---------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm index 8278f40d1..e51ed4af0 100644 --- a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm +++ b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm @@ -218,8 +218,13 @@ sub _calc_pe_lists { my (%wts, %local_ranges, %results); foreach my $group (@$element_list_all) { - # this is populated by _calc_pe + # this is populated by _calc_pe under the calc dependency system my $results_this_gp = $results_cache->{$group}; + # but can be called directly so need to handle that and populate the cache + if (!exists $results_this_gp->{PE_WTLIST}) { + $self->_calc_pe(%args); + $results_this_gp = $results_cache->{$group}; + } # Avoid some redundant slicing and dicing when we have only one group # Pays off when processing large data sets @@ -274,7 +279,13 @@ sub _calc_pe_lists_hierarchical { my (%wts, %local_ranges, %results); foreach my $group (@$child_names) { + # this is populated by _calc_pe under the calc dependency system my $results_this_gp = $results_cache->{$group}; + # but can be called directly so need to handle that and populate the cache + if (!exists $results_this_gp->{PE_WTLIST}) { + $self->_calc_pe(%args); + $results_this_gp = $results_cache->{$group}; + } # Avoid some redundant slicing and dicing when we have only one group # Pays off when processing large data sets diff --git a/lib/Biodiverse/Indices/RWTurnover.pm b/lib/Biodiverse/Indices/RWTurnover.pm index f8d309346..0f0217ad6 100644 --- a/lib/Biodiverse/Indices/RWTurnover.pm +++ b/lib/Biodiverse/Indices/RWTurnover.pm @@ -346,16 +346,10 @@ sub _calc_pe_lists_per_element_set { $results[$i] = $have_cache ? $cache->{$elements[0]} - : { # messy but we need results from both - $self->_calc_pe( - %args, - element_list_all => $args{$list_name}, - ), - $self->_calc_pe_lists( - %args, - element_list_all => $args{$list_name}, - ), - }; + : $self->_calc_pe_lists( + %args, + element_list_all => $args{$list_name}, + ); $cache->{$elements[0]} = $results[$i] if @elements == 1; }