From 48ed36a05c4d7299b9985b7b571285edf5388a32 Mon Sep 17 00:00:00 2001 From: Nigel Horne Date: Sun, 7 Apr 2024 08:29:34 -0400 Subject: [PATCH] Start a proof of concept --- Makefile.PL | 1 + lib/Geo/Coder/Free.pm | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Makefile.PL b/Makefile.PL index a59b9261..07b41c10 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -240,6 +240,7 @@ if(open(my $admin2, '>>', 'lib/Geo/Coder/Free/MaxMind/databases/admin2.db')) { # For bin/create_sqlite my $build_requires = { 'App::csv2sqlite' => 0, + 'Array::Iterator' => 0, 'CHI' => 0, 'CHI::Driver::RawMemory' => 0, 'Cwd' => 0, diff --git a/lib/Geo/Coder/Free.pm b/lib/Geo/Coder/Free.pm index 226de8b7..a2a8a4ab 100644 --- a/lib/Geo/Coder/Free.pm +++ b/lib/Geo/Coder/Free.pm @@ -9,6 +9,7 @@ use warnings; # use lib '.'; +use Array::Iterator; use Config::Auto; use Geo::Coder::Abbreviations; use Geo::Coder::Free::MaxMind; @@ -198,9 +199,23 @@ sub geocode { my @rc = $self->{'openaddr'}->geocode(\%params); if((my $scantext = $params{'scantext'}) && (my $region = $params{'region'})) { $scantext =~ s/\W+/ /g; + my @a = List::MoreUtils::uniq(split(/\s/, $scantext)); + my $iterator = Array::Iterator->new({ __array__ => \@a }); + while(my $w = $iterator->get_next()) { + next if(exists($common_words{lc($w)})); + if($w =~ /^[a-z]{2,}$/i) { + my $peek = $iterator->peek(); + last if(!defined($peek)); + my $peekpeek = $peek->peek(); + last if(!defined($peekpeek)); + my $s = "$w $peek $peekpeek"; + ::diag($s); + } + } + foreach my $word(List::MoreUtils::uniq(split(/\s/, $scantext))) { # FIXME: There are a *lot* of false positives - next if(exists($common_words{lc$word})); + next if(exists($common_words{lc($word)})); if($word =~ /^[a-z]{2,}$/i) { my $key = "$word/$region"; my @matches;