Skip to content

Commit

Permalink
Fix and improve virtual museum harvester (#4231)
Browse files Browse the repository at this point in the history
  • Loading branch information
dimasciput authored Sep 8, 2024
1 parent 91dce61 commit 039ec43
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 108 deletions.
6 changes: 6 additions & 0 deletions bims/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -1695,6 +1695,7 @@ class IngestedDataAdmin(admin.ModelAdmin):
'datetime',
'is_valid',
'content_object',
'get_content_object_id',
'category',
'data_key'
)
Expand All @@ -1712,6 +1713,11 @@ class IngestedDataAdmin(admin.ModelAdmin):
'data_key',
)

def get_content_object_id(self, obj):
return obj.content_object.id if obj.content_object else None

get_content_object_id.short_description = 'Content Object ID'


class UploadSessionAdmin(admin.ModelAdmin):
list_display = (
Expand Down
69 changes: 4 additions & 65 deletions bims/scripts/occurrences_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@
import json
import logging

from django.contrib.sites.models import Site
from preferences import preferences

from bims.scripts.collection_csv_keys import * # noqa
from datetime import datetime

from django.contrib.gis.geos import Point
from django.db.models import Q, signals
from django.db.models import Q

from bims.models import (
LocationType,
Expand All @@ -19,35 +18,23 @@
BiologicalCollectionRecord,
SamplingMethod,
Taxonomy,
SourceReference,
ChemicalRecord,
Chem,
Biotope,
BIOTOPE_TYPE_BROAD,
BIOTOPE_TYPE_SPECIFIC,
BIOTOPE_TYPE_SUBSTRATUM,
location_site_post_save_handler,
collection_post_save_handler,
SourceReferenceBibliography,
Survey,
SurveyData,
SurveyDataOption,
SurveyDataValue,
LocationContextGroup,
LocationContextFilter,
LocationContextFilterGroupOrder,
source_reference_post_save_handler,
SourceReferenceDatabase,
SourceReferenceDocument,
Hydroperiod,
WetlandIndicatorStatus,
RecordType,
AbundanceType,
SamplingEffortMeasure,
BimsDocument,
location_context_post_save_handler, bims_document_post_save_handler, disconnect_source_reference_signals,
reconnect_source_reference_signals
)
from bims.signals.utils import disconnect_bims_signals, connect_bims_signals
from bims.utils.feature_info import get_feature_centroid
from bims.utils.user import create_users_from_string
from bims.scripts.data_upload import DataCSVUpload
Expand Down Expand Up @@ -75,31 +62,7 @@ class OccurrenceProcessor(object):
parks_data = {}

def start_process(self):
signals.post_save.disconnect(
collection_post_save_handler,
sender=BiologicalCollectionRecord
)
signals.post_save.disconnect(
location_site_post_save_handler,
sender=LocationSite
)
signals.post_save.disconnect(
location_context_post_save_handler,
sender=LocationContextGroup
)
signals.post_save.disconnect(
location_context_post_save_handler,
sender=LocationContextFilter
)
signals.post_save.disconnect(
location_context_post_save_handler,
sender=LocationContextFilterGroupOrder
)
signals.post_save.disconnect(
bims_document_post_save_handler,
sender=BimsDocument
)
disconnect_source_reference_signals()
disconnect_bims_signals()

def update_location_site_context(self):
update_location_context.delay(
Expand All @@ -115,31 +78,7 @@ def finish_process(self):
# Update source reference filter
generate_source_reference_filter()

signals.post_save.connect(
collection_post_save_handler,
sender=BiologicalCollectionRecord
)
signals.post_save.connect(
location_site_post_save_handler,
sender=LocationSite
)
signals.post_save.connect(
location_context_post_save_handler,
sender=LocationContextGroup
)
signals.post_save.connect(
location_context_post_save_handler,
sender=LocationContextFilter
)
signals.post_save.connect(
location_context_post_save_handler,
sender=LocationContextFilterGroupOrder
)
signals.post_save.connect(
bims_document_post_save_handler,
sender=BimsDocument
)
reconnect_source_reference_signals()
connect_bims_signals()

def handle_error(self, row, message):
pass
Expand Down
8 changes: 5 additions & 3 deletions bims/tasks/virtual_museum_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
logger = get_task_logger(__name__)


@shared_task(bind=True, queue='update')
@shared_task(name='bims.tasks.import_data_task', bind=True, queue='update')
def import_data_task(self, module, limit=10):
from bims.models.import_task import ImportTask
from preferences import preferences
Expand All @@ -32,8 +32,8 @@ def import_data_task(self, module, limit=10):
in_progress=True)
for task in running_tasks:
result = AsyncResult(task.celery_task_id)
if result.status in ['PENDING', 'STARTED', 'RETRY']:
logger.info(f"Found a running task for {module}, skipping new task.")
if result.status in ['STARTED', 'RETRY']:
logger.info(f"Found a running task for {module} : {result.status}, skipping new task.")
return

# Check if a task was completed this month
Expand Down Expand Up @@ -85,6 +85,8 @@ def import_data_task(self, module, limit=10):

while start_index < total_records:
task = ImportTask.objects.get(id=task.id)
self.update_state(state='STARTED',
meta={'process': 'Harvesting virtual museum data'})

if task.cancel:
cancel_log = f"Task for {module} in schema {tenant.schema_name} was cancelled.\n"
Expand Down
28 changes: 24 additions & 4 deletions core/settings/celery_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,42 @@
},'import-odonates-data-every-month': {
'task': 'bims.tasks.import_data_task',
'schedule': crontab(day_of_month='1', hour='0', minute='0'),
'args': ('odonates', 100)
'args': ('odonates', 100),
'options': {
'expires': 14,
'retry': False,
'queue': 'update'
}
},
'resume-odonates-data-every-day': {
'task': 'bims.tasks.import_data_task',
'schedule': crontab(hour='0', minute='0'),
'args': ('odonates', 100)
'args': ('odonates', 100),
'options': {
'expires': 14,
'retry': False,
'queue': 'update'
}
},
'import-anurans-data-every-month': {
'task': 'bims.tasks.import_data_task',
'schedule': crontab(day_of_month='1', hour='1', minute='0'),
'args': ('anurans', 100)
'args': ('anurans', 100),
'options': {
'expires': 14,
'retry': False,
'queue': 'update'
}
},
'resume-anurans-data-every-day': {
'task': 'bims.tasks.import_data_task',
'schedule': crontab(hour='1', minute='0'),
'args': ('anurans', 100)
'args': ('anurans', 100),
'options': {
'expires': 14,
'retry': False,
'queue': 'update'
}
},
'reset_caches': {
'task': 'bims.tasks.reset_caches',
Expand Down
5 changes: 4 additions & 1 deletion core/settings/contrib.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
'webpack_loader',
'ckeditor_uploader',
'django_admin_inline_paginator',
'django_celery_results',

# Apps bundled with Django
'modeltranslation',
Expand Down Expand Up @@ -111,6 +110,7 @@
)

TENANT_APPS = (
'django_celery_results',
'rest_framework',
'rest_framework_gis',
'allauth',
Expand Down Expand Up @@ -318,9 +318,12 @@
'bims.DownloadRequest',
'bims.Survey',
'bims.TaxonomyUpdateProposal',
'bims.ImportTask',
'bims.IngestedData'
]

DJANGO_EASY_AUDIT_CRUD_EVENT_NO_CHANGED_FIELDS_SKIP = True
DJANGO_EASY_AUDIT_CHECK_IF_REQUEST_USER_EXISTS = False


LOGGING = {
Expand Down
91 changes: 56 additions & 35 deletions scripts/virtual_museum.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
from datetime import datetime

import requests
from django.contrib.gis.geos import Point
from preferences import preferences

from bims.enums.taxonomic_group_category import TaxonomicGroupCategory
from bims.models.boundary import Boundary
from bims.models.import_task import ImportTask

from bims.models.taxon_group import TaxonGroup
Expand Down Expand Up @@ -105,49 +107,68 @@ def __init__(self, occurrence_data, module_group):
self.start_process()
for occurrence in occurrence_data:
try:
if IngestedData.objects.filter(
data_key=occurrence['ALL_DATA']['eventID'],
is_valid=False
).exists():
logger.info(f'{occurrence["ALL_DATA"]["eventID"]} '
f'already exists, skipping...')
continue
ingested_data = IngestedData.objects.filter(
data_key=occurrence['ALL_DATA']['eventID'],
is_valid=False,
)
if ingested_data.exists():
if ingested_data.exclude(content_type__isnull=False).exists():
logger.info(f'{occurrence["ALL_DATA"]["eventID"]} '
f'already exists, skipping...')
continue
else:
ingested_data.delete()
except KeyError:
logger.info(f'MISSING eventID - Skip')
continue

boundary_key = preferences.SiteSetting.boundary_key
if boundary_key:
url = (
'{base_url}/api/v2/query?registry=service&key={key}&'
'x={lon}&y={lat}&outformat=json'
).format(
base_url=get_key('GEOCONTEXT_URL'),
key=boundary_key,
lon=occurrence[LONGITUDE],
lat=occurrence[LATITUDE]
)
try:
response = requests.get(url)
if response.status_code != 200:
logger.info(
f'The site is not within a valid border.'
)
continue
else:
response_json = json.loads(response.content)
if response_json['value']:
logger.info(
f"Site is in {response_json['value']}"
)
else:
site_boundary = preferences.SiteSetting.site_boundary
if site_boundary:
site_point = Point(
float(occurrence[LONGITUDE]),
float(occurrence[LATITUDE]), srid=4326)
is_within_boundary = Boundary.objects.filter(
id=site_boundary.id,
geometry__contains=site_point,
).exists()
if not is_within_boundary:
logger.info(
f'The site is not within a valid border.'
)
continue
else:
boundary_key = preferences.SiteSetting.boundary_key
if boundary_key:
url = (
'{base_url}/api/v2/query?registry=service&key={key}&'
'x={lon}&y={lat}&outformat=json'
).format(
base_url=get_key('GEOCONTEXT_URL'),
key=boundary_key,
lon=occurrence[LONGITUDE],
lat=occurrence[LATITUDE]
)
try:
response = requests.get(url)
if response.status_code != 200:
logger.info(
f'The site is not within a valid border.'
)
continue
except Exception as e: # noqa
logger.info(
f'Unable to check boundary data from geocontext')
else:
response_json = json.loads(response.content)
if response_json['value']:
logger.info(
f"Site is in {response_json['value']}"
)
else:
logger.info(
f'The site is not within a valid border.'
)
continue
except Exception as e: # noqa
logger.info(
f'Unable to check boundary data from geocontext')

logger.info(f'Processing {occurrence}')
self.process_data(occurrence)
Expand Down

0 comments on commit 039ec43

Please sign in to comment.