Skip to content

Commit

Permalink
Merge pull request #897 from shawnlaffan/index_bounds2
Browse files Browse the repository at this point in the history
More index bounds work, plus some related index metadata changes and optimisations.

Also includes changes to module loading so that start times and thus tests are faster.
  • Loading branch information
shawnlaffan authored Dec 18, 2023
2 parents c19b9e3 + 25bff6f commit 47d42f1
Show file tree
Hide file tree
Showing 14 changed files with 227 additions and 120 deletions.
8 changes: 0 additions & 8 deletions lib/Biodiverse/BaseData.pm
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,10 @@ use Data::Compare ();

use Ref::Util qw { :all };
use Sort::Key::Natural qw /natkeysort/;
use Spreadsheet::Read 0.60;


use experimental qw /refaliasing declared_refs/;

use Geo::GDAL::FFI 0.07;
# silence a used-once warning - clunky
{
my $xx_frob_temp_zort = $FFI::Platypus::TypeParser::ffi_type;
my $xx_frob_temp_zert = $FFI::Platypus::keep;
}
#require Spreadsheet::XLSX; latest version does not install

use English qw { -no_match_vars };

Expand Down
9 changes: 8 additions & 1 deletion lib/Biodiverse/BaseData/Import.pm
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ use Data::Compare ();

use Ref::Util qw { :all };
use Sort::Key::Natural qw /natkeysort/;
use Spreadsheet::Read 0.82;

use Geo::GDAL::FFI 0.07;
{
my $xx_frob_temp_zort = $FFI::Platypus::TypeParser::ffi_type;
my $xx_frob_temp_zert = $FFI::Platypus::keep;
}

use experimental 'declared_refs';

Expand Down Expand Up @@ -1682,6 +1685,10 @@ sub import_data_spreadsheet {
my $self = shift;
my %args = @_;

# load at run time as it takes a while
require Spreadsheet::Read;
Spreadsheet::Read->VERSION('0.80');

my $orig_group_count = $self->get_group_count;
my $orig_label_count = $self->get_label_count;
my $data_in_matrix_form = $args{data_in_matrix_form};
Expand Down
9 changes: 5 additions & 4 deletions lib/Biodiverse/Common.pm
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ use HTML::QuickTable;
use Class::Inspector;
use Ref::Util qw { :all };
use File::BOM qw / :subs /;
use Spreadsheet::Read;

# Need to avoid an OIO destroyed twice warning due
# to HTTP::Tiny, which is used in Biodiverse::GUI::Help
Expand Down Expand Up @@ -1812,7 +1811,9 @@ sub unlink_file {
sub get_book_struct_from_spreadsheet_file {
my ($self, %args) = @_;

#use SpreadSheet::Read;
require Spreadsheet::Read;
Spreadsheet::Read->import('ReadData');

my $book;
my $file = $args{file_name}
// croak 'file_name argument not passed';
Expand Down Expand Up @@ -1856,7 +1857,7 @@ sub get_book_struct_from_xlsx_file {
my ($self, %args) = @_;
my $file = $args{filename} // croak "filename arg not passed";

use Excel::ValueReader::XLSX;
require Excel::ValueReader::XLSX;
use List::Util qw/reduce/;
my $reader = Excel::ValueReader::XLSX->new($file);
my @sheet_names = $reader->sheet_names;
Expand Down Expand Up @@ -2058,7 +2059,7 @@ sub get_cached_metadata {
my $self = shift;

my $cache
= $self->get_cached_value_dor_set_default_aa ('METADATA_CACHE', {});
= $self->get_cached_value_dor_set_default_href ('METADATA_CACHE');
# reset the cache if the versions differ (typically they would be older),
# this ensures new options are loaded
$cache->{__VERSION} //= 0;
Expand Down
60 changes: 2 additions & 58 deletions lib/Biodiverse/Config.pm
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,8 @@ BEGIN {
# Should loop this.
BEGIN {
# more general solution for anything new
# add modules as they are required
my @reqd = qw /
Text::Fuzzy
Data::Compare
List::Unique::DeterministicOrder
Sort::Key::Natural
Statistics::Descriptive::PDL
/;
foreach my $module (@reqd) {
if (not eval "require $module") {
Expand Down Expand Up @@ -245,59 +241,7 @@ sub get_load_extension_errors {
}

# need this for the pp build to work
if ($ENV{BDV_PP_BUILDING}) {
use utf8;
say 'Building pp file';
say "using $0";
use File::BOM qw / :subs /; # we need File::BOM.
open my $fh, '<:via(File::BOM)', $0 # just read ourselves
or croak "Cannot open $0 via File::BOM\n";
$fh->close;

# more File::BOM issues
require encoding;

# exercise the unicode regexp matching - needed for the spatial conditions
use 5.016;
use feature 'unicode_strings';
my $string = "sp_self_only () and \N{WHITE SMILING FACE}";
$string =~ /\bsp_self_only\b/;

# load extra encode pages, except the extended ones (for now)
# https://metacpan.org/pod/distribution/Encode/lib/Encode/Supported.pod#CJK:-Chinese-Japanese-Korean-Multibyte
use Encode::CN;
use Encode::JP;
use Encode::KR;
use Encode::TW;

# Big stuff needs loading (poss not any more with PAR>1.08)
use Math::BigInt;

use Alien::gdal ();
use Alien::geos::af ();
use Alien::proj ();
use Alien::sqlite ();
#eval 'use Alien::spatialite'; # might not have this one
#eval 'use Alien::freexl'; # might not have this one

# these are here for PAR purposes to ensure they get packed
# Spreadsheet::Read calls them as needed
# (not sure we need all of them, though)
use Spreadsheet::ParseODS 0.27;
use Spreadsheet::ReadSXC;
use Spreadsheet::ParseExcel;
use Spreadsheet::ParseXLSX;
use PerlIO::gzip; # used by ParseODS
# Excel::ValueReader::XLSX
use Excel::ValueReader::XLSX;
use Excel::ValueReader::XLSX::Backend;
use Excel::ValueReader::XLSX::Backend::Regex;
use Archive::Zip

# GUI needs this for help,
# so don't trigger for engine-only
eval 'use IO::Socket::SSL';
}
use if $ENV{BDV_PP_BUILDING}, 'Biodiverse::Config::PARModules';


1;
Expand Down
95 changes: 95 additions & 0 deletions lib/Biodiverse/Config/PARModules.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package Biodiverse::Config::PARModules;
use strict;
use warnings;
use 5.016;
use Carp;

use utf8;
say 'Building pp file';
say "using $0";
use File::BOM qw / :subs /; # we need File::BOM.
open my $fh, '<:via(File::BOM)', $0 # just read ourselves
or croak "Cannot open $0 via File::BOM\n";
$fh->close;

# more File::BOM issues
require encoding;

# exercise the unicode regexp matching - needed for the spatial conditions
use feature 'unicode_strings';
my $string = "sp_self_only () and \N{WHITE SMILING FACE}";
$string =~ /\bsp_self_only\b/;

# load extra encode pages, except the extended ones (for now)
# https://metacpan.org/pod/distribution/Encode/lib/Encode/Supported.pod#CJK:-Chinese-Japanese-Korean-Multibyte
use Encode::CN;
use Encode::JP;
use Encode::KR;
use Encode::TW;

# Big stuff needs loading (poss not any more with PAR>1.08)
use Math::BigInt;

use Alien::gdal ();
use Alien::geos::af ();
use Alien::proj ();
use Alien::sqlite ();
#eval 'use Alien::spatialite'; # might not have this one
#eval 'use Alien::freexl'; # might not have this one

# these are here for PAR purposes to ensure they get packed
# Spreadsheet::Read calls them as needed
# (not sure we need all of them, though)
use Spreadsheet::ParseODS 0.27;
use Spreadsheet::ReadSXC;
use Spreadsheet::ParseExcel;
use Spreadsheet::ParseXLSX;
use PerlIO::gzip; # used by ParseODS
# Excel::ValueReader::XLSX
use Excel::ValueReader::XLSX;
use Excel::ValueReader::XLSX::Backend;
use Excel::ValueReader::XLSX::Backend::Regex;
use Archive::Zip

# GUI needs this for help,
# so don't trigger for engine-only
eval 'use IO::Socket::SSL';

1;



=head1 NAME
Biodiverse::Config::PARModules
=head1 DESCRIPTION
Loads extra modules when using PAR::Packer.
Not for direct use.
=head1 SYNOPSIS
=head1 AUTHOR
Shawn Laffan
=head1 License
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
For a full copy of the license see <http://www.gnu.org/licenses/>.
=cut

53 changes: 46 additions & 7 deletions lib/Biodiverse/Indices.pm
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ sub reset_results {
sub get_calculations {
my $self = shift;

state $cache_key = 'GET_CALCULATIONS';
my $cached = $self->get_cached_value($cache_key);

return wantarray ? %$cached : $cached
if $cached;

my %calculations;

my $list = Class::Inspector->methods( blessed $self );
Expand All @@ -105,6 +111,8 @@ sub get_calculations {
push @{ $calculations{ $metadata->get_type } }, $method;
}

$self->set_cached_value($cache_key => \%calculations);

return wantarray ? %calculations : \%calculations;
}

Expand Down Expand Up @@ -360,6 +368,7 @@ sub get_calculation_metadata {
my $self = shift;

if (!blessed $self) {
require Biodiverse::BaseData;
state $default_bd = Biodiverse::BaseData->new (
NAME => 'for indices',
CELL_SIZES => [1,1],
Expand All @@ -380,10 +389,21 @@ sub get_calculation_metadata {

sub get_calculation_metadata_as_json {
my $self = shift;
my $metadata = $self->get_calculation_metadata;
my $json_obj = JSON::MaybeXS::JSON()->new;
$json_obj->convert_blessed(1);
return $json_obj->encode($metadata);
$json_obj->pretty(1);
$json_obj->canonical(1);

my $metadata = $self->get_calculation_metadata;
my $struct = {
_meta => {
title => 'Biodiverse calculations and indices',
version => $VERSION,
},
calculations => $metadata,
};

return $json_obj->encode($struct);
}

# now we have moved to github
Expand Down Expand Up @@ -1052,7 +1072,9 @@ sub get_index_source { # return the source sub for an index
}

# get a hash of indices arising from these calculations (keys),
# with the analysis as the value
# with the analysis as the value.
# Not sure why we are filtering on the nbr list counts here,
# but changing it will need modifications to calls from Spatial.pm.
sub get_index_source_hash {
my $self = shift;
my %args = @_;
Expand All @@ -1061,6 +1083,21 @@ sub get_index_source_hash {
my $using_nbr_list_count = $args{uses_nbr_lists}
// 2; # need to use 2 for back compat

# a little caching until we stop dealing with the nbr list counts
if ($using_nbr_list_count == 2 && !$args{calculations} && !$args{no_cache}) {
state $cachekey = 'GET_INDEX_SOURCE_HASH_NBR_COUNT_2';
my $cached = $self->get_cached_value($cachekey);
return wantarray ? %$cached : $cached
if $cached;
$cached = $self->get_index_source_hash (
calculations => undef,
uses_nbr_lists => 2,
no_cache => 1,
);
$self->set_cached_value ($cachekey => $cached);
return wantarray ? %$cached : $cached;
}

CALC:
foreach my $calculations ( keys %$list ) {
my $meta = $self->get_metadata( sub => $calculations );
Expand All @@ -1069,10 +1106,12 @@ sub get_index_source_hash {

INDEX:
foreach my $index ( keys %{ $meta->get_indices } ) {
my $index_uses_nbr_lists = $meta->get_index_uses_nbr_lists($index)
// 1;
next INDEX if $using_nbr_list_count < $index_uses_nbr_lists;

# revisit this if we ever support more than two nbr sets
if ($using_nbr_list_count < 2) {
my $index_uses_nbr_lists = $meta->get_index_uses_nbr_lists($index)
// 1;
next INDEX if $using_nbr_list_count < $index_uses_nbr_lists;
}
$list2{$index}{$calculations}++;
}
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Biodiverse/Indices/Endemism.pm
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,7 @@ sub get_metadata_calc_endemism_absolute {
distribution => 'unit_interval',
},
END_ABS_ALL_P => {
description => 'Proportion of labels entirely endemic to neighbour sets 1 and 2 combined',\
description => 'Proportion of labels entirely endemic to neighbour sets 1 and 2 combined',
distribution => 'unit_interval',
},
},
Expand Down
Loading

0 comments on commit 47d42f1

Please sign in to comment.