Merge branch 'AtlasOfLivingAustralia:master' into master

biodiversitydata-se · Apr 3, 2024 · 4937963 · 4937963
2 parents cf8a05a + 51b15c4
commit 4937963
Show file tree

Hide file tree

Showing 27 changed files with 3,862 additions and 223 deletions.
diff --git a/README.md b/README.md
@@ -107,19 +107,20 @@ Below is an example meta.xml that would be provided in a darwin core archive.
 
 In addition to indexing the content of the darwin core archive, the ingestion & index creation (optionally) indexes data from the following ALA components. It does this by harvesting JSON feeds from the listed components.
 
-- Layers & regions - http://spatial.ala.org.au/layers - spatial layers available in the system and regions (e.g. states, countries)
-- Collectory - http://collections.ala.org.au - data resource, collections, institutions
-- Lists and traits - http://lists.ala.org.au - conservation lists, sensitive species lists, traits
-- Biocache services - http://biocache.ala.org.au/ws - images, occurrence record counts
+- Layers & regions - https://spatial.ala.org.au/layers - spatial layers available in the system and regions (e.g. states, countries)
+- Collectory - https://collections.ala.org.au - data resource, collections, institutions
+- Lists and traits - https://lists.ala.org.au - conservation lists, sensitive species lists, traits
+- Biocache services - https://biocache.ala.org.au/ws - images, occurrence record counts
+- Biocollect - https://biocollect.ala.org.au - projects
 
 ## Architecture
 
 This application makes use of the following technologies
 
 - Apache SOLR 6.6.x
-- Grails 3.2.x
+- Grails 6.0.0
 - Tomcat 7 or higher
-- Java 8 or higher
+- Java 11 or higher
 
 ![Architecture image](architecture.jpg)
 
@@ -302,6 +303,9 @@ The `term` supplies the name of the status field.
 `sourceField` gives the name of the field that contains the conservation status.
 `kingdomField` gives the name of the field that contains the kingdom -- handy for name lookups, if available.
 
+To use all species lists that are recorded as both authoritative and threatened, have `lists` as an empty array. These 
+lists must have a `status` column indicating the conservation status. 
+
 ## Weighting Rules
 
 Calculating weights for search and autosuggest operations gets rather complicated,
@@ -438,4 +442,4 @@ Each list can contain
 * **defaultTerm** The term to use if one is not specified for the entry. Defaults to the global default term.
 
 Favourites only mark selected taxa and their associated common names with favourite terms.
-Once marked, it is up to the bie-plugin otr weighting rules to make use of these terms.
+Once marked, it is up to the bie-plugin otr weighting rules to make use of these terms.
diff --git a/build.gradle b/build.gradle
@@ -1,3 +1,8 @@
+buildscript {
+    version "3.1.0"
+    group "au.org.ala"
+}
+
 plugins {
     id "groovy"
     id "org.grails.grails-gsp"
@@ -14,10 +19,6 @@ plugins {
     id "maven-publish"
 }
 
-version "3.0.0"
-group "au.org.ala"
-
-
 publishing {
     targetCompatibility = 1.11
     repositories {
@@ -109,7 +110,7 @@ dependencies {
     }
 
     implementation "org.grails.plugins:grails-spring-websocket:2.3.0"
-    implementation group: "au.org.ala", name: "ala-name-matching-model", version:"4.2"
+    implementation group: "au.org.ala", name: "ala-name-matching-model", version:"4.3"
     implementation "org.jsoup:jsoup:1.15.4"
     implementation 'net.sf.opencsv:opencsv:2.3'
     implementation "org.apache.solr:solr-solrj:8.1.0"
@@ -128,7 +129,7 @@ dependencies {
     testImplementation  'com.github.tomakehurst:wiremock:2.19.0'
     testImplementation  'com.github.tomjankes:wiremock-groovy:0.2.0'
 
-    implementation 'au.org.ala.plugins:openapi:1.0.0'
+    implementation 'au.org.ala.plugins:openapi:1.3.0'
 }
 
 bootRun {

diff --git a/docker/solr6/conf/schema.xml b/docker/solr6/conf/schema.xml
@@ -30,7 +30,7 @@
         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
-    </fieldType>    
+    </fieldType>
 
       <!-- A text field that has been "n-gramed" to allow partial term matching
         for use in auto completion
@@ -68,10 +68,10 @@
     <fieldType name="concat_name" class="solr.TextField">
         <analyzer type="index">
             <tokenizer class="solr.KeywordTokenizerFactory"/>
-            <filter class="solr.LowerCaseFilterFactory" />      
-            <filter class="solr.TrimFilterFactory" /> 
+            <filter class="solr.LowerCaseFilterFactory" />
+            <filter class="solr.TrimFilterFactory" />
             <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-zA-Z])" replacement="" replace="all" />
-            <filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="30" /> 
+            <filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="30" />
         </analyzer>
     <analyzer type="query">
             <tokenizer class="solr.KeywordTokenizerFactory"/>
@@ -83,41 +83,41 @@
       <analyzer type="index">
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
         <filter class="solr.TrimFilterFactory" />
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="bie_stopwords.txt"/> 
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="bie_stopwords.txt"/>
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
       <analyzer type="query">
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="bie_stopwords.txt"/> 
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="bie_stopwords.txt"/>
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
     </fieldType>
 
     <fieldType name="name_complete" class="solr.TextField">
         <analyzer type="index">
             <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-            <filter class="solr.LowerCaseFilterFactory" />      
-            <filter class="solr.TrimFilterFactory" />     
+            <filter class="solr.LowerCaseFilterFactory" />
+            <filter class="solr.TrimFilterFactory" />
             <filter class="solr.PatternReplaceFilterFactory" pattern="([().])" replacement="" replace="all" />
         </analyzer>
         <analyzer type="query">
             <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-            <filter class="solr.LowerCaseFilterFactory" />      
-            <filter class="solr.TrimFilterFactory" />     
+            <filter class="solr.LowerCaseFilterFactory" />
+            <filter class="solr.TrimFilterFactory" />
             <filter class="solr.PatternReplaceFilterFactory" pattern="([().])" replacement="" replace="all" />
         </analyzer>
     </fieldType>
 
     <fieldType name="auto_name_exact" class="solr.TextField">
         <analyzer type="index">
             <tokenizer class="solr.KeywordTokenizerFactory"/>
-            <filter class="solr.LowerCaseFilterFactory" />      
-            <filter class="solr.TrimFilterFactory" />     
+            <filter class="solr.LowerCaseFilterFactory" />
+            <filter class="solr.TrimFilterFactory" />
         </analyzer>
         <analyzer type="query">
             <tokenizer class="solr.KeywordTokenizerFactory"/>
-            <filter class="solr.LowerCaseFilterFactory" />      
-            <filter class="solr.TrimFilterFactory" />     
+            <filter class="solr.LowerCaseFilterFactory" />
+            <filter class="solr.TrimFilterFactory" />
         </analyzer>
     </fieldType>
 
@@ -164,8 +164,8 @@
       <analyzer type="index">
         <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <filter class="solr.WordDelimiterFilterFactory" 
-              generateWordParts="1" 
+        <filter class="solr.WordDelimiterFilterFactory"
+              generateWordParts="1"
               generateNumberParts="1"
               catenateWords="1"
               catenateNumbers="1"
@@ -180,8 +180,8 @@
       <analyzer type="query">
         <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <filter class="solr.WordDelimiterFilterFactory" 
-              generateWordParts="0" 
+        <filter class="solr.WordDelimiterFilterFactory"
+              generateWordParts="0"
               generateNumberParts="0"
               catenateWords="0"
               catenateNumbers="0"
@@ -192,7 +192,7 @@
         <filter class="solr.LowerCaseFilterFactory"/>
         <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement=" " replace="all"/>
       </analyzer>
-    </fieldType>    
+    </fieldType>
 
     <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
          but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
@@ -318,7 +318,7 @@
        when adding a document.
    -->
    <!-- start of BIE Lucene Fields -->
-   
+
    <!-- Common fields to all -->
    <field name="id" type="string" indexed="true" stored="true" required="true" />
    <field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
@@ -409,7 +409,7 @@
    <field name="url" type="string" indexed="true" stored="true" multiValued="false" omitNorms="false" />
    <field name="acronym" type="string" indexed="true" stored="true" multiValued="false" omitNorms="false" />
    <field name="institutionType" type="string" indexed="true" stored="true" multiValued="false" omitNorms="false" />
-   
+
    <!-- WordPress fields -->
    <field name="categories" type="string" indexed="true" stored="true" multiValued="true" omitNorms="false" />
 
@@ -431,14 +431,14 @@
    <field name="concat_name" type="concat_name" indexed="true" stored="false" multiValued="true" omitNorms="true" omitTermFreqAndPositions="true"/>
    <field name="doc_name" type="text" indexed="true" stored="true" multiValued="true" omitNorms="false" omitTermFreqAndPositions="false"/>
 
-  
+
    <!--  NC:2013-02-08 remove the omitTermFreqAndPositions because this setting will cause an exception when searching for a phrase -->
    <field name="stopped_common_name" type="stopped_common_name" indexed="true" stored="false" multiValued="true" omitNorms="true" />
    <field name="name_complete" type="name_complete" indexed="true" stored="false" multiValued="true" omitNorms="true" omitTermFreqAndPositions="true"/>
-   
+
    <!-- Fields used to rank exact matches higher -->
    <field name="exact_text" type="lowercase" indexed="true" stored="false" omitNorms="true" multiValued="true"/>
-   
+
    <dynamicField name="rk_*"  type="string"  indexed="true"  stored="true"/>
    <dynamicField name="rkid_*"  type="string"  indexed="true"  stored="true"/>
 
@@ -450,10 +450,10 @@
           EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
           Longer patterns will be matched first.  if equal size patterns
           both match, the first appearing in the schema will be used.  -->
-   <dynamicField name="*Count"  type="int"    indexed="true"  stored="true"/>     
+   <dynamicField name="*Count"  type="int"    indexed="true"  stored="true"/>
    <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
    <dynamicField name="*_m_s"  type="string" multiValued="true"  indexed="true"  stored="true"/>
-   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>   
+   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
    <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
    <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
    <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
@@ -520,15 +520,16 @@
    <!-- copy fields required for the auto complete -->
    <copyField source="scientificName" dest="auto_text"/>
    <copyField source="commonName" dest="auto_text"/>
+   <copyField source="name" dest="auto_text"/>
 
    <copyField source="name" dest="exact_text"/>
    <copyField source="acronym" dest="exact_text"/>
    <copyField source="scientificName" dest="exact_text"/>
    <copyField source="commonNameExact" dest="exact_text"/>
-   
+
    <copyField source="scientificName" dest="concat_name"/>
-   
+
    <copyField source="commonName" dest="concat_name"/>
-   
+
    <copyField source="commonName" dest="stopped_common_name"/>
 </schema>