From 19097021ecd198d80b2ba9c6afad1175bcc42c3b Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Thu, 22 Feb 2024 14:08:20 +1100 Subject: [PATCH 1/7] CellPopup.pm: Fix a long standing bug It was calling a non-existent sub. --- lib/Biodiverse/GUI/CellPopup.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Biodiverse/GUI/CellPopup.pm b/lib/Biodiverse/GUI/CellPopup.pm index 4a113348d..15b1c4ce0 100644 --- a/lib/Biodiverse/GUI/CellPopup.pm +++ b/lib/Biodiverse/GUI/CellPopup.pm @@ -378,7 +378,7 @@ sub show_output_list { $model->set($iter, 0, $key , 1, $val); } } - elsif (is_listref($list_ref)) { + elsif (is_arrayref($list_ref)) { my $numeric = 1; foreach my $key (@$list_ref) { if (! looks_like_number ($key)) { @@ -394,7 +394,7 @@ sub show_output_list { my @keys = $numeric ? sort {$a <=> $b} @$list_ref - : natsort keys @$list_ref; + : natsort @$list_ref; foreach my $elt (@keys) { #print "[Cell popup] Adding output array entry $elt\n"; From 08c16fd37d8c4cb51338adcf9e4d4764e77e7eec Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Thu, 22 Feb 2024 20:48:52 +1100 Subject: [PATCH 2/7] Indices: add calc_element_lists_used_as_arrays This supersedes calc_element_lists_used as it does not return consistent data structures. It is a mix of arrays and hashes. --- lib/Biodiverse/Indices/Indices.pm | 86 +++++++++++++++++++++++++++++-- t/24-Indices-lists-and-counts.t | 51 +++++++++++------- 2 files changed, 115 insertions(+), 22 deletions(-) diff --git a/lib/Biodiverse/Indices/Indices.pm b/lib/Biodiverse/Indices/Indices.pm index 308be12a1..42382db85 100644 --- a/lib/Biodiverse/Indices/Indices.pm +++ b/lib/Biodiverse/Indices/Indices.pm @@ -1567,8 +1567,11 @@ sub get_metadata_calc_element_lists_used { my %metadata = ( name => "Element lists", - description => "Lists of elements used in neighbour sets 1 and 2.\n" - . 'These form the basis for all the spatial calculations.', + description => + "[DEPRECATED] Lists of elements used in neighbour sets 1 and 2.\n" + . 'These form the basis for all the spatial calculations. ' + . 'The return types are inconsistent. New code should use ' + . 'calc_element_lists_used_as_arrays', type => 'Lists and Counts', pre_calc => 'calc_abc', uses_nbr_lists => 1, # how many sets of lists it must have @@ -1595,9 +1598,28 @@ sub get_metadata_calc_element_lists_used { sub calc_element_lists_used { my $self = shift; - my %args = @_; # rest of args into a hash + my $set1 = $args{element_list1}; + my $set2 = $args{element_list2}; + my $set3 = $args{element_list_all}; + + # these two are hashes + if ($set1 && !is_hashref $set1) { + my $href = {}; + @$href{@$set1} = (1) x @$set1; + $set1 = $href; + } + if ($set2 && !is_hashref $set2) { + my $href = {}; + @$href{@$set2} = (1) x @$set2; + $set2 = $href; + } + # this is an array + if ($set3 && is_hashref $set3) { + $set2 = [keys %$set3]; + } + my %results = ( EL_LIST_SET1 => $args{element_list1}, EL_LIST_SET2 => $args{element_list2}, @@ -1607,6 +1629,64 @@ sub calc_element_lists_used { return wantarray ? %results : \%results; } +sub get_metadata_calc_element_lists_used_as_arrays { + my $self = shift; + + my %metadata = ( + name => "Element arrays", + description => "Arrays of elements used in neighbour sets 1 and 2.\n" + . 'These form the basis for all the spatial calculations.', + type => 'Lists and Counts', + pre_calc => 'calc_abc', + uses_nbr_lists => 1, # how many sets of lists it must have + indices => { + EL_ARRAY_SET1 => { + description => 'Array of elements in neighbour set 1', + type => 'list', + }, + EL_ARRAY_SET2 => { + description => 'Array of elements in neighbour set 2', + uses_nbr_lists => 2, + type => 'list', + }, + EL_ARRAY_ALL => { + description => 'Array of elements in both neighbour sets', + uses_nbr_lists => 2, + type => 'list', + }, + }, + ); + + return $metadata_class->new(\%metadata); +} + +sub calc_element_lists_used_as_arrays { + my $self = shift; + my %args = @_; # rest of args into a hash + + my $set1 = $args{element_list1}; + my $set2 = $args{element_list2}; + my $set3 = $args{element_list_all}; + + if ($set1 && is_hashref $set1) { + $set1 = [keys %$set1]; + } + if ($set2 && is_hashref $set2) { + $set2 = [keys %$set2]; + } + if ($set3 && is_hashref $set3) { + $set3 = [keys %$set3]; + } + + my %results = ( + EL_ARRAY_SET1 => $set1, + EL_ARRAY_SET2 => $set2, + EL_ARRAY_ALL => $set3, + ); + + return wantarray ? %results : \%results; +} + sub get_metadata_calc_abc { diff --git a/t/24-Indices-lists-and-counts.t b/t/24-Indices-lists-and-counts.t index 6a3c80c1d..6a4b25e08 100644 --- a/t/24-Indices-lists-and-counts.t +++ b/t/24-Indices-lists-and-counts.t @@ -46,6 +46,7 @@ sub test_main { calc_nonempty_elements_used calc_elements_used calc_element_lists_used + calc_element_lists_used_as_arrays calc_abc_counts calc_d calc_local_range_lists @@ -59,7 +60,7 @@ sub test_main { /], calc_topic_to_test => 'Lists and Counts', sort_array_lists => 1, - #generate_result_sets => 1, + # generate_result_sets => 1, ); } @@ -116,7 +117,8 @@ done_testing; __DATA__ @@ RESULTS_2_NBR_LISTS -{ ABC2_LABELS_ALL => { +{ + ABC2_LABELS_ALL => { 'Genus:sp1' => 2, 'Genus:sp10' => 1, 'Genus:sp11' => 2, @@ -277,19 +279,29 @@ __DATA__ ABC_B => 0, ABC_C => 12, ABC_D => 17, - EL_COUNT_ALL => 5, - EL_COUNT_SET1 => 1, - EL_COUNT_SET2 => 4, + EL_ARRAY_ALL => [ + '3350000:750000', '3250000:850000', + '3350000:950000', '3450000:850000', + '3350000:850000' + ], + EL_ARRAY_SET1 => ['3350000:850000'], + EL_ARRAY_SET2 => [ + '3350000:950000', '3350000:750000', + '3450000:850000', '3250000:850000' + ], + EL_COUNT_ALL => 5, EL_COUNT_NONEMPTY_ALL => 5, EL_COUNT_NONEMPTY_SET1 => 1, EL_COUNT_NONEMPTY_SET2 => 4, - EL_LIST_ALL => [ - '3250000:850000', '3350000:850000', - '3350000:750000', '3350000:950000', - '3450000:850000' + EL_COUNT_SET1 => 1, + EL_COUNT_SET2 => 4, + EL_LIST_ALL => [ + '3350000:750000', '3250000:850000', + '3350000:950000', '3450000:850000', + '3350000:850000' ], - EL_LIST_SET1 => { '3350000:850000' => 1 }, - EL_LIST_SET2 => { + EL_LIST_SET1 => { '3350000:850000' => 1 }, + EL_LIST_SET2 => { '3250000:850000' => 1, '3350000:750000' => 1, '3350000:950000' => 1, @@ -317,7 +329,7 @@ __DATA__ RICHNESS_ALL => 14, RICHNESS_SET1 => 2, RICHNESS_SET2 => 14 -} +}; @@ RESULTS_1_NBR_LISTS @@ -356,15 +368,16 @@ __DATA__ Q095 => 4, Q100 => 4 }, - ABC3_SD_SET1 => '1.4142135623731', - ABC3_SUM_SET1 => 6, - ABC_D => 29, - EL_COUNT_SET1 => 1, - EL_COUNT_ALL => 1, + ABC3_SD_SET1 => '1.4142135623731', + ABC3_SUM_SET1 => 6, + ABC_D => 29, + EL_ARRAY_SET1 => ['3350000:850000'], + EL_COUNT_ALL => 1, EL_COUNT_NONEMPTY_ALL => 1, EL_COUNT_NONEMPTY_SET1 => 1, - EL_LIST_SET1 => { '3350000:850000' => 1 }, - LABEL_COUNT_RANK_PCT => { + EL_COUNT_SET1 => 1, + EL_LIST_SET1 => { '3350000:850000' => 1 }, + LABEL_COUNT_RANK_PCT => { 'Genus:sp20' => undef, 'Genus:sp26' => undef }, From a3fff38315b3c43faf67c13fe1d2f556bbcadc3e Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Thu, 22 Feb 2024 21:13:59 +1100 Subject: [PATCH 3/7] Indices: calc_abc subs return element_lists as arrays This standardises across list1, list2 and list_all. --- lib/Biodiverse/Indices/Indices.pm | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/Biodiverse/Indices/Indices.pm b/lib/Biodiverse/Indices/Indices.pm index 42382db85..ec0913e75 100644 --- a/lib/Biodiverse/Indices/Indices.pm +++ b/lib/Biodiverse/Indices/Indices.pm @@ -1621,9 +1621,9 @@ sub calc_element_lists_used { } my %results = ( - EL_LIST_SET1 => $args{element_list1}, - EL_LIST_SET2 => $args{element_list2}, - EL_LIST_ALL => $args{element_list_all}, + EL_LIST_SET1 => $set1, + EL_LIST_SET2 => $set2, + EL_LIST_ALL => $set3, ); return wantarray ? %results : \%results; @@ -1808,8 +1808,8 @@ sub _calc_abc_one_element { label_hash_all => \%label_list_master, label_hash1 => \%label_hash1, label_hash2 => {}, - element_list1 => {$element => 1}, - element_list2 => {}, + element_list1 => [$element], + element_list2 => [], element_list_all => [$element], element_count1 => 1, element_count2 => 0, @@ -1894,8 +1894,8 @@ sub _calc_abc_pairwise_mode { label_hash_all => \%label_list_master, label_hash1 => \%label_hash1, label_hash2 => \%label_hash2, - element_list1 => {$element1 => 1}, - element_list2 => {$element2 => 1}, + element_list1 => [$element1], + element_list2 => [$element2], element_list_all => [$element1, $element2], element_count1 => 1, element_count2 => 1, @@ -2080,8 +2080,8 @@ sub _calc_abc { # required by all the other indices, as it gets the labels in label_hash_all => \%label_list_master, label_hash1 => $label_list{1}, label_hash2 => $label_list{2}, - element_list1 => $element_check{1}, - element_list2 => $element_check{2}, + element_list1 => [keys %{$element_check{1}}], + element_list2 => [keys %{$element_check{2}}], element_list_all => [keys %element_check_master], element_count1 => $element_count1, element_count2 => $element_count2, From d816e5262d550cd07cd3cfc4f524cc20c864d532 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Thu, 22 Feb 2024 21:15:12 +1100 Subject: [PATCH 4/7] Indices: update to use element list arrays --- lib/Biodiverse/Indices/Phylogenetic.pm | 12 ++++++------ lib/Biodiverse/Indices/RWTurnover.pm | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/Biodiverse/Indices/Phylogenetic.pm b/lib/Biodiverse/Indices/Phylogenetic.pm index 7f96b39d4..707bcdb05 100644 --- a/lib/Biodiverse/Indices/Phylogenetic.pm +++ b/lib/Biodiverse/Indices/Phylogenetic.pm @@ -2538,7 +2538,7 @@ sub calc_phylo_abc { foreach my $list_name (qw/element_list1 element_list2/) { $i++; # start at 1 so we match the numbered names my $el_list = $args{$list_name} // next BY_LIST; - my @elements = keys %$el_list; + \my @elements = $el_list; # FIXME FIXME - use directly my $have_cache = (@elements == 1 && $cache->{$elements[0]}); $nodes_in_path[$i] = (@elements == 0) @@ -2564,9 +2564,9 @@ sub calc_phylo_abc { # simplify the calcs as we only need to find $aa my $cache = $self->get_cached_value_dor_set_default_href ('_calc_phylo_abc_pairwise_branch_sum_cache'); - my $sum_i = $cache->{(keys %{$args{element_list1}})[0]} # use postfix deref? + my $sum_i = $cache->{$args{element_list1}[0]} # use postfix deref? //= (sum values %list1) // 0; - my $sum_j = $cache->{(keys %{$args{element_list2}})[0]} + my $sum_j = $cache->{$args{element_list2}[0]} //= (sum values %list2) // 0; # save some looping, mainly when there are large differences in key counts if (keys %list1 <= keys %list2) { @@ -2634,15 +2634,15 @@ sub _calc_phylo_abc_lists { %args, labels => $label_hash1, tree_ref => $tree, - el_list => [keys %{$args{element_list1}}], + el_list => $args{element_list1}, ); - my $nodes_in_path2 = scalar %{$args{element_list2}} + my $nodes_in_path2 = @{$args{element_list2}} ? $self->get_path_lengths_to_root_node ( %args, labels => $label_hash2, tree_ref => $tree, - el_list => [keys %{$args{element_list2}}], + el_list => $args{element_list2}, ) : {}; diff --git a/lib/Biodiverse/Indices/RWTurnover.pm b/lib/Biodiverse/Indices/RWTurnover.pm index 71c47752c..0f37ba745 100644 --- a/lib/Biodiverse/Indices/RWTurnover.pm +++ b/lib/Biodiverse/Indices/RWTurnover.pm @@ -358,7 +358,7 @@ sub _calc_pe_lists_per_element_set { foreach my $list_name (qw /element_list1 element_list2/) { $i++; # start at 1 so we match the numbered names my $el_list = $args{$list_name} // next BY_LIST; - my @elements = keys %$el_list; + \my @elements = $el_list; # FIXME my $have_cache = (@elements == 1 && $cache->{$elements[0]}); $results[$i] = $have_cache From 18a9bd72a811640dd9713c540c1fe1118f323c00 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Thu, 22 Feb 2024 21:25:55 +1100 Subject: [PATCH 5/7] RWTurnover.pm: No need to check for element list hashes now This simplifies the code. --- lib/Biodiverse/Indices/RWTurnover.pm | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/lib/Biodiverse/Indices/RWTurnover.pm b/lib/Biodiverse/Indices/RWTurnover.pm index 0f37ba745..2cd6454bc 100644 --- a/lib/Biodiverse/Indices/RWTurnover.pm +++ b/lib/Biodiverse/Indices/RWTurnover.pm @@ -66,20 +66,12 @@ sub calc_rw_turnover { # or inverse of ranges my $cache = $self->get_cached_value_dor_set_default_href ('_calc_phylo_rwt_pairwise_branch_sum_cache'); - # use postfix idiom? - # ideally we would only be passed arrays, but see issue #919 - my $key1 = is_hashref ($args{element_list1}) - ? ((keys %{$args{element_list1}})[0]) - : (${$args{element_list1}}[0]); - my $key2 = is_hashref ($args{element_list2}) - ? ((keys %{$args{element_list2}})[0]) - : (${$args{element_list2} //[]}[0]); # Could use a reduce call to collapse the "sum map {} @list" idiom, # thus avoiding a list generation. These are only run once per group, # though, so it might not matter. - my $sum_i = $cache->{$key1} + my $sum_i = $cache->{$args{element_list1}[0]} //= (sum map {1 / $_} @ranges{keys %list1}) // 0; - my $sum_j = $cache->{$key2} + my $sum_j = $cache->{$args{element_list2}[0]} //= (sum map {1 / $_} @ranges{keys %list2}) // 0; # save some looping, mainly when there are large differences in key counts if (keys %list1 <= keys %list2) { @@ -173,20 +165,12 @@ sub calc_phylo_rw_turnover { # simplify the calcs as we only need to find $aa my $cache = $self->get_cached_value_dor_set_default_href ('_calc_phylo_rwt_pairwise_branch_sum_cache'); - # use postfix idiom? - # ideally we would only be passed arrays, but see issue #919 - my $key1 = is_hashref ($args{element_list1}) - ? ((keys %{$args{element_list1}})[0]) - : (${$args{element_list1}}[0]); - my $key2 = is_hashref ($args{element_list2}) - ? ((keys %{$args{element_list2}})[0]) - : (${$args{element_list2} //[]}[0]); # Could use a reduce call to collapse the "sum map {} @list" idiom, # thus avoiding a list generation. These are only run once per group, # though, so it might not matter. - my $sum_i = $cache->{$key1} + my $sum_i = $cache->{$args{element_list1}[0]} //= (sum values %list1) // 0; - my $sum_j = $cache->{$key2} + my $sum_j = $cache->{$args{element_list2}[0]} //= (sum values %list2) // 0; # save some looping, mainly when there are large differences in key counts if (keys %list1 <= keys %list2) { From 34ec68e7b94b4a6980de21ffd4f2c153ccd60dd9 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Thu, 22 Feb 2024 21:42:03 +1100 Subject: [PATCH 6/7] PhylogeneticRelative.pm: Remove an unused variable --- lib/Biodiverse/Indices/PhylogeneticRelative.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index e74b5231d..009887f52 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -58,7 +58,6 @@ sub calc_phylo_rpd1 { my $total_tree_length = $tree->get_total_tree_length; my $pd_p_score = $args{PD_P}; - my $pd_score = $args{PD_P}; my $label_hash = $args{PHYLO_LABELS_ON_TREE}; my $richness = scalar keys %$label_hash; From 8cb2a318337c74b06d0f2a88182fd7a56228aaed Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Thu, 22 Feb 2024 21:43:04 +1100 Subject: [PATCH 7/7] Indices: _calc_pe_lists_per_element_set does not need calc_abc precalc It uses the element arrays directly. This will speed up matrix building. --- lib/Biodiverse/Indices/RWTurnover.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Biodiverse/Indices/RWTurnover.pm b/lib/Biodiverse/Indices/RWTurnover.pm index 2cd6454bc..ccfc9bb58 100644 --- a/lib/Biodiverse/Indices/RWTurnover.pm +++ b/lib/Biodiverse/Indices/RWTurnover.pm @@ -317,7 +317,7 @@ sub get_metadata__calc_pe_lists_per_element_set { set_path_length_cache_by_group_flag get_inverse_range_weighted_path_lengths /], - pre_calc => ['calc_abc'], # don't need calc_abc2 as we don't use its counts + pre_calc => [], # don't need calc_abc2 as we don't use its counts uses_nbr_lists => 1, # how many lists it must have required_args => {'tree_ref' => 1}, );