diff --git a/Dockerfile b/Dockerfile index 53af2f2..a0b92aa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,10 +5,12 @@ # cds-migrator-kit is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. -FROM python:3.6 +FROM python:3.9 RUN apt-get update -y && apt-get upgrade -y -RUN apt-get install -y git curl vim +RUN apt-get install -y git curl vim build-essential python3-dev \ +libldap2-dev libsasl2-dev slapd ldap-utils tox \ +lcov valgrind RUN pip install --upgrade setuptools wheel pip pipenv uwsgi uwsgitop uwsgi-tools RUN python -m site diff --git a/cds_migrator_kit/errors.py b/cds_migrator_kit/errors.py new file mode 100644 index 0000000..a8f1f28 --- /dev/null +++ b/cds_migrator_kit/errors.py @@ -0,0 +1,189 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Invenio. +# Copyright (C) 2024 CERN. +# +# cds-migrator-kit is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Exceptions.""" + +from dojson.errors import DoJSONException + +################################################################# +# CDS-ILS Migrator Exceptions +################################################################# + +class LossyConversion(DoJSONException): + """Data lost during migration.""" + + def __init__(self, *args, **kwargs): + """Exception custom initialisation.""" + self.missing = kwargs.pop("missing", None) + self.message = self.description = "Lossy conversion: {0}".format( + self.missing or "" + ) + super().__init__(*args, **kwargs) + + +class RecordNotDeletable(DoJSONException): + """Record is not marked as deletable.""" + + def __init__(self, *args, **kwargs): + """Exception custom initialisation.""" + self.message = self.description = "Record is not marked as deletable" + super().__init__(*args, **kwargs) + + +class ProviderNotAllowedDeletion(DoJSONException): + """Provider is not allowed to delete records.""" + + def __init__(self, *args, **kwargs): + """Exception custom initialisation.""" + self.provider = kwargs.pop("provider", None) + self.message = self.description = ( + "This provider {0} is not allowed to delete records".format(self.provider) + ) + super().__init__(*args, **kwargs) + + +class CDSImporterException(DoJSONException): + """CDSDoJSONException class.""" + + def __init__(self, *args, **kwargs): + """Constructor.""" + self.subfield = kwargs.get("subfield", "") + message = kwargs.get("message", None) + if message: + self.message = message + + # because of ILSRestException class attributes + self.description = self.message + + super(CDSImporterException, self).__init__(*args) + + +class RecordModelMissing(CDSImporterException): + """Missing record model exception.""" + + message = "[Record did not match any available model]" + + +class UnexpectedValue(CDSImporterException): + """The corresponding value is unexpected.""" + + message = "[UNEXPECTED INPUT VALUE]" + + +class MissingRequiredField(CDSImporterException): + """The corresponding value is required.""" + + message = "[MISSING REQUIRED FIELD]" + + +class ManualImportRequired(CDSImporterException): + """The corresponding field should be manually migrated.""" + + message = "[MANUAL IMPORT REQUIRED]" + + +class DocumentImportError(CDSImporterException): + """Document import exception.""" + + message = "[DOCUMENT IMPORT ERROR]" + + +class SeriesImportError(CDSImporterException): + """Document import exception.""" + + message = "[SERIES IMPORT ERROR]" + + +class UnknownProvider(CDSImporterException): + """Unknown provider exception.""" + + message = "Unknown record provider." + + +class InvalidProvider(CDSImporterException): + """Invalid provider exception.""" + + message = "Invalid record provider." + + +class SimilarityMatchUnavailable(CDSImporterException): + """Similarity match unavailable exception.""" + + message = ( + "Title similarity matching cannot be performed for " + "this record. Please import it manually." + ) + + +############################################################################### +# Migration exceptions +############################################################################### + + +class DumpRevisionException(Exception): + """Exception for dump revision.""" + + +class JSONConversionException(Exception): + """JSON Conversion Exception in migration.""" + + +class MigrationException(Exception): + """Base exception for CDS-ILS migration errors.""" + + +class DocumentMigrationError(MigrationException): + """Raised for multipart migration errors.""" + + +class SeriesMigrationError(MigrationException): + """Raised for multipart migration errors.""" + + +class MultipartMigrationError(MigrationException): + """Raised for multipart migration errors.""" + + +class UserMigrationError(MigrationException): + """Raised for user migration errors.""" + + +class SerialMigrationError(MigrationException): + """Raised for serial migration errors.""" + + +class ItemMigrationError(MigrationException): + """Raised for item migration errors.""" + + +class LoanMigrationError(MigrationException): + """Raised for loan migration errors.""" + + +class EItemMigrationError(MigrationException): + """Raised for EItem migration errors.""" + + +class FileMigrationError(MigrationException): + """Raised for File migration errors.""" + + +class BorrowingRequestError(MigrationException): + """Raised for borrowing request migration errors.""" + + +class AcqOrderError(MigrationException): + """Raised for acquisition order migration errors.""" + + +class ProviderError(MigrationException): + """Raised for provider migration errors.""" + + +class RelationMigrationError(MigrationException): + """Raised for exceptions when migrating relations.""" diff --git a/cds_migrator_kit/ext.py b/cds_migrator_kit/ext.py index 2d04cd8..37ea85f 100644 --- a/cds_migrator_kit/ext.py +++ b/cds_migrator_kit/ext.py @@ -29,7 +29,7 @@ def init_app(self, app): self.init_config(app) set_logging() app.extensions['cds-migrator-kit'] = self - app.register_blueprint(blueprint) + app.register_blueprint(blueprint, name='cds_migrator_kit_records_bp') def init_config(self, app): """Initialize configuration.""" diff --git a/cds_migrator_kit/handlers.py b/cds_migrator_kit/handlers.py new file mode 100644 index 0000000..4ba8663 --- /dev/null +++ b/cds_migrator_kit/handlers.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Invenio. +# Copyright (C) 2024 CERN. +# +# cds-migrator-kit is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""CDS Migrator Records logging handler.""" + +import logging + +cli_logger = logging.getLogger("migrator") +documents_logger = logging.getLogger("documents_logger") +items_logger = logging.getLogger("items_logger") + + +def migration_exception_handler(exc, output, key, value, rectype=None, **kwargs): + """Migration exception handling - log to files. + + :param exc: exception + :param output: generated output version + :param key: MARC field ID + :param value: MARC field value + :return: + """ + logger = logging.getLogger(f"{rectype}s_logger") + cli_logger.error( + "#RECID: #{0} - {1} MARC FIELD: *{2}*, input value: {3}, -> {4}, ".format( + output["legacy_recid"], exc.message, key, value, output + ) + ) + logger.error( + "MARC: {0}, INPUT VALUE: {1} ERROR: {2}" "".format(key, value, exc.message), + extra=dict(legacy_id=output["legacy_recid"], status="WARNING", new_pid=None), + ) \ No newline at end of file diff --git a/cds_migrator_kit/records/cli.py b/cds_migrator_kit/records/cli.py index e0010fb..1094fd6 100644 --- a/cds_migrator_kit/records/cli.py +++ b/cds_migrator_kit/records/cli.py @@ -12,14 +12,15 @@ import logging import click -from cds_ils.importer.providers.cds.models.document import model as book_model -from cds_ils.importer.providers.cds.models.journal import \ +from cds_dojson.marc21.models.books.book import model as book_model +from cds_dojson.marc21.models.books.journal import \ model as journal_model -from cds_ils.importer.providers.cds.models.multipart import \ +from cds_dojson.marc21.models.books.multipart import \ model as multipart_model -from cds_ils.importer.providers.cds.models.serial import model as serial_model -from cds_ils.importer.providers.cds.models.standard import \ +from cds_dojson.marc21.models.rdm.summer_student_report import model as summer_student_model +from cds_dojson.marc21.models.books.standard import \ model as standard_model +from cds_dojson.marc21.models.books.serial import model as serial_model from flask import current_app from flask.cli import with_appcontext @@ -124,7 +125,11 @@ def dryrun(sources, source_type, recid, rectype, model=None): params['dojson_model'] = book_model elif rectype == 'standard': params['dojson_model'] = standard_model + elif rectype == 'summer-student': + params['dojson_model'] = summer_student_model else: raise ValueError('invalid rectype: {}'.format(rectype)) + # import pdb; pdb.set_trace() + load_records(sources=sources, source_type=source_type, eager=True, rectype=rectype, **params) diff --git a/cds_migrator_kit/records/log.py b/cds_migrator_kit/records/log.py index e310db7..edfacc0 100644 --- a/cds_migrator_kit/records/log.py +++ b/cds_migrator_kit/records/log.py @@ -13,11 +13,9 @@ import logging import os -from cds_ils.importer.errors import ManualImportRequired, \ - MissingRequiredField, UnexpectedValue from flask import current_app -from fuzzywuzzy import fuzz +from cds_migrator_kit.errors import ManualImportRequired, MissingRequiredField, UnexpectedValue from cds_migrator_kit.records.errors import LossyConversion, \ RequiredFieldMissing from cds_migrator_kit.records.utils import clean_exception_message, \ @@ -67,6 +65,8 @@ def get_json_logger(cls, rectype): return DocumentJsonLogger() elif rectype == 'multipart': return MultipartJsonLogger() + elif rectype == 'summer-student': + return RdmJsonLogger() else: raise Exception('Invalid rectype: {}'.format(rectype)) @@ -181,6 +181,30 @@ def add_recid_to_stats(self, recid): def add_record(self, record): """Add record to collected records.""" self.records[record['legacy_recid']] = record + + +class RdmJsonLogger(JsonLogger): + """Log rdm record migration statistic to file controller.""" + + def __init__(self): + """Constructor.""" + super().__init__('rdm_stats.json', 'rdm_records.json') + + def add_recid_to_stats(self, recid): + """Add empty log item.""" + if recid not in self.stats: + self.stats[recid] = { + 'recid': recid, + 'manual_migration': [], + 'unexpected_value': [], + 'missing_required_field': [], + 'lost_data': [], + 'clean': True, + } + + def add_record(self, record): + """Add record to collected records.""" + self.records[record['legacy_recid']] = record class JournalJsonLogger(JsonLogger): diff --git a/cds_migrator_kit/records/records.py b/cds_migrator_kit/records/records.py index 57396f2..6683a99 100644 --- a/cds_migrator_kit/records/records.py +++ b/cds_migrator_kit/records/records.py @@ -14,17 +14,17 @@ import arrow from cds_dojson.marc21.utils import create_record -from cds_ils.importer import marc21 -from cds_ils.importer.errors import ManualImportRequired, \ - MissingRequiredField, UnexpectedValue -from cds_ils.migrator.xml_to_json_dump import CDSRecordDump +from cds_dojson.overdo import OverdoBase from flask import current_app +from cds_migrator_kit.errors import ManualImportRequired, MissingRequiredField, UnexpectedValue from cds_migrator_kit.records.errors import LossyConversion from cds_migrator_kit.records.handlers import migration_exception_handler +from cds_migrator_kit.xml_to_json_dump import CDSRecordDump cli_logger = logging.getLogger('migrator') +marc21 = OverdoBase(entry_point_models="cds.importer.models") class CDSMigKitDump(CDSRecordDump): """CDS record dump class.""" diff --git a/cds_migrator_kit/records/templates/cds_migrator_kit_records/base.html b/cds_migrator_kit/records/templates/cds_migrator_kit_records/base.html index a4e302c..c493e18 100644 --- a/cds_migrator_kit/records/templates/cds_migrator_kit_records/base.html +++ b/cds_migrator_kit/records/templates/cds_migrator_kit_records/base.html @@ -37,6 +37,9 @@
Recid | +Unexpected Value | +Missing required | +Manual Migration | +Lost data fields | +Document | +
---|---|---|---|---|---|
{{ stat.recid }} | +
+ {% for val in stat.unexpected_value %}
+
+ {{ val.key }}{{ val.subfield or '' }}: {{ val.value }}
+ + {% endfor %} + |
+
+ {% for val in stat.missing_required_field %}
+
+ {{ val.key }}{{ val.subfield or '' }}: {{ val.value }}
+ + {% endfor %} + |
+
+ {% for val in stat.manual_migration %}
+
+ {{ val.key }}{{ val.subfield or '' }}: {{ val.value }}
+ + {% endfor %} + |
+
+ {% for val in stat.lost_data %}
+ {% for missing in val.missing %}
+ {{ missing }} + {% endfor %} + {% endfor %} + |
+ + {% if not stat.lost_data %} + View + {% endif %} + | +