From a58b997bb328c8e31e7f28f47ff97f0d1cf28c06 Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Tue, 18 Apr 2023 15:46:58 +1000 Subject: [PATCH 01/29] reduce Flake8 exceptions --- .flake8 | 4 ---- ckanext/xloader/jobs.py | 2 +- ckanext/xloader/loader.py | 1 - ckanext/xloader/parser.py | 2 -- ckanext/xloader/plugin.py | 1 - ckanext/xloader/tests/ckan_setup.py | 2 +- ckanext/xloader/tests/fixtures.py | 5 ++--- 7 files changed, 4 insertions(+), 13 deletions(-) diff --git a/.flake8 b/.flake8 index a4eea9e3..32068ca7 100644 --- a/.flake8 +++ b/.flake8 @@ -17,8 +17,4 @@ max-line-length=127 # List ignore rules one per line. ignore = - E501 - C901 W503 - F401 - F403 diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py index 4c4068f9..0d242db1 100644 --- a/ckanext/xloader/jobs.py +++ b/ckanext/xloader/jobs.py @@ -16,7 +16,7 @@ import sqlalchemy as sa from ckan import model -from ckan.plugins.toolkit import get_action, asbool, ObjectNotFound, config, check_ckan_version +from ckan.plugins.toolkit import get_action, asbool, ObjectNotFound, config from . import loader from . import db diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py index afc3c980..dfddd0ff 100644 --- a/ckanext/xloader/loader.py +++ b/ckanext/xloader/loader.py @@ -14,7 +14,6 @@ from unidecode import unidecode import ckan.plugins as p -import ckan.plugins.toolkit as tk from .job_exceptions import FileCouldNotBeLoadedError, LoaderError from .parser import XloaderCSVParser diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py index b2a6f889..b52c59a3 100644 --- a/ckanext/xloader/parser.py +++ b/ckanext/xloader/parser.py @@ -1,10 +1,8 @@ # -*- coding: utf-8 -*- import csv -from codecs import iterencode from decimal import Decimal, InvalidOperation from itertools import chain -import six from ckan.plugins.toolkit import asbool from dateutil.parser import isoparser, parser from dateutil.parser import ParserError diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py index dbde8ed5..159b99de 100644 --- a/ckanext/xloader/plugin.py +++ b/ckanext/xloader/plugin.py @@ -6,7 +6,6 @@ from ckan.plugins import toolkit from . import action, auth, helpers as xloader_helpers, utils -from .loader import fulltext_function_exists, get_write_engine try: config_declarations = toolkit.blanket.config_declarations diff --git a/ckanext/xloader/tests/ckan_setup.py b/ckanext/xloader/tests/ckan_setup.py index ae8bfb3e..ff43d74c 100644 --- a/ckanext/xloader/tests/ckan_setup.py +++ b/ckanext/xloader/tests/ckan_setup.py @@ -1,5 +1,5 @@ try: - from ckan.tests.pytest_ckan.ckan_setup import * + from ckan.tests.pytest_ckan.ckan_setup import * # noqa except ImportError: import pkg_resources from paste.deploy import loadapp diff --git a/ckanext/xloader/tests/fixtures.py b/ckanext/xloader/tests/fixtures.py index f43916ab..9a7ad37f 100644 --- a/ckanext/xloader/tests/fixtures.py +++ b/ckanext/xloader/tests/fixtures.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import sqlalchemy -import sqlalchemy.orm as orm +from sqlalchemy import orm import os from ckanext.datastore.tests import helpers as datastore_helpers @@ -11,7 +10,7 @@ ) try: - from ckan.tests.pytest_ckan.fixtures import * + from ckan.tests.pytest_ckan.fixtures import * # noqa except ImportError: import pytest From 7c433e2a0a820bd4be6415b1e0d095a23126e4cd Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Tue, 18 Apr 2023 15:50:33 +1000 Subject: [PATCH 02/29] fix fallback to 'str' type when no other types are guessed, #182 - Columns that used numeric on some rows and free text on others resulted in no type being guessed and an error --- .../tests/samples/mixed_numeric_string_sample.csv | 3 +++ ckanext/xloader/tests/test_loader.py | 12 ++++++++++++ ckanext/xloader/utils.py | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv diff --git a/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv b/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv new file mode 100644 index 00000000..7f59686c --- /dev/null +++ b/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv @@ -0,0 +1,3 @@ +Funding agency,Program title,Maximum (indicative) grant amount +DTIS,Accessible Tourism Infrastructure Grants,Five hundred thousand dollars +DTIS,Boosting Accessible Tourism Experiences Grants,5000 diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py index f31b663b..0241693d 100644 --- a/ckanext/xloader/tests/test_loader.py +++ b/ckanext/xloader/tests/test_loader.py @@ -612,6 +612,18 @@ def test_german(self, Session): u"tsvector", ] + [u"text"] * (len(records[0]) - 1) + def test_with_mixed_types(self, Session): + csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv") + resource_id = "test1" + factories.Resource(id=resource_id) + loader.load_csv( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, "test1")) == 2 + def test_reload(self, Session): csv_filepath = get_sample_filepath("simple.csv") resource_id = "test1" diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py index cbffaa2f..79facbea 100644 --- a/ckanext/xloader/utils.py +++ b/ckanext/xloader/utils.py @@ -175,10 +175,10 @@ def type_guess(rows, types=TYPES, strict=False): for ci, cell in enumerate(row): if not cell: continue - at_least_one_value[ci] = True for type in list(guesses[ci].keys()): if not isinstance(cell, type): guesses[ci].pop(type) + at_least_one_value[ci] = True if guesses[ci] else False # no need to set guessing weights before this # because we only accept a type if it never fails for i, guess in enumerate(guesses): From a1a5193d3f5a2f26c183fa0726ead0a8d75654c0 Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Tue, 18 Apr 2023 15:52:51 +1000 Subject: [PATCH 03/29] fix parsing failures when converting empty strings to SQL types, #182 - 'timestamp' and 'numeric' cannot handle empty strings, so convert to None --- ckanext/xloader/loader.py | 7 +++++++ ckanext/xloader/tests/samples/sample_with_blanks.csv | 4 ++++ ckanext/xloader/tests/test_loader.py | 12 ++++++++++++ 3 files changed, 23 insertions(+) create mode 100644 ckanext/xloader/tests/samples/sample_with_blanks.csv diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py index dfddd0ff..55c9cab5 100644 --- a/ckanext/xloader/loader.py +++ b/ckanext/xloader/loader.py @@ -317,9 +317,16 @@ def row_iterator(): logger.info('Copying to database...') count = 0 + # Some types cannot be stored as empty strings and must be converted to None, + # https://github.com/ckan/ckanext-xloader/issues/182 + non_empty_types = ['timestamp', 'numeric'] for i, records in enumerate(chunky(result, 250)): count += len(records) logger.info('Saving chunk {number}'.format(number=i)) + for row in records: + for column_index, column_name in enumerate(row): + if headers_dicts[column_index]['type'] in non_empty_types and row[column_name] == '': + row[column_name] = None send_resource_to_datastore(resource_id, headers_dicts, records) logger.info('...copying done') diff --git a/ckanext/xloader/tests/samples/sample_with_blanks.csv b/ckanext/xloader/tests/samples/sample_with_blanks.csv new file mode 100644 index 00000000..2b7c415c --- /dev/null +++ b/ckanext/xloader/tests/samples/sample_with_blanks.csv @@ -0,0 +1,4 @@ +Funding agency,Program title,Opening date,Service ID +DTIS,Visitor First Experiences Fund,23/03/2023,63039 +DTIS,First Nations Sport and Recreation Program Round 2,22/03/2023,63040 +,,,63041 diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py index 0241693d..68452d11 100644 --- a/ckanext/xloader/tests/test_loader.py +++ b/ckanext/xloader/tests/test_loader.py @@ -612,6 +612,18 @@ def test_german(self, Session): u"tsvector", ] + [u"text"] * (len(records[0]) - 1) + def test_with_blanks(self, Session): + csv_filepath = get_sample_filepath("sample_with_blanks.csv") + resource_id = "test1" + factories.Resource(id=resource_id) + loader.load_csv( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, "test1")) == 3 + def test_with_mixed_types(self, Session): csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv") resource_id = "test1" From 4111f3fe7cc29424c3182b3e29e064b15265b8f5 Mon Sep 17 00:00:00 2001 From: antuarc Date: Fri, 19 May 2023 14:08:16 +1000 Subject: [PATCH 04/29] hide excessive numbers of resource_data log entries, #187 - Default to showing the first 50 and last 50 entries, with links to display more - This dramatically improves page load times for very large resources --- .../templates/xloader/resource_data.html | 32 ++++++++++++++++++- ckanext/xloader/utils.py | 15 +++++---- ckanext/xloader/views.py | 12 +++++-- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html index a94ad631..d9a22058 100644 --- a/ckanext/xloader/templates/xloader/resource_data.html +++ b/ckanext/xloader/templates/xloader/resource_data.html @@ -61,7 +61,25 @@ {% if status.status and status.task_info and show_table %}

{{ _('Upload Log') }}

    - {% for item in status.task_info.logs %} + {% set items = status.task_info.logs %} + {% set rows = rows or 50 %} + {% set skipped_rows = (items | length) - (rows * 2) %} + {% if skipped_rows > 1 %} +
  • + +

    + {{ skipped_rows }} out of {{ items | length }} logs will be hidden. +
    + + Show more   Show all + +

    +
  • + {% endif %} + {% for item in items %} + {# Truncate very long loops, showing just the start and end #} + {% if loop.index <= rows or loop.revindex <= rows + or (loop.index == rows + 1 and loop.revindex == rows + 1) %} {% set icon = 'ok' if item.level == 'INFO' else 'exclamation' %} {% set class = ' failure' if icon == 'exclamation' else ' success' %} {% set popover_content = 'test' %} @@ -77,6 +95,18 @@

    {{ _('Upload Log') }}

    + {% elif loop.index == rows + 1 %} +
  • + +

    + Skipping {{ skipped_rows }} logs... +
    + + Show more   Show all + +

    +
  • + {% endif %} {% endfor %}
  • diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py index cbffaa2f..0f1d6a8b 100644 --- a/ckanext/xloader/utils.py +++ b/ckanext/xloader/utils.py @@ -11,7 +11,7 @@ import ckan.plugins as p -def resource_data(id, resource_id): +def resource_data(id, resource_id, rows=None): if p.toolkit.request.method == "POST": try: @@ -44,13 +44,16 @@ def resource_data(id, resource_id): except p.toolkit.NotAuthorized: return p.toolkit.abort(403, p.toolkit._("Not authorized to see this page")) + extra_vars = { + "status": xloader_status, + "resource": resource, + "pkg_dict": pkg_dict, + } + if rows: + extra_vars["rows"] = rows return p.toolkit.render( "xloader/resource_data.html", - extra_vars={ - "status": xloader_status, - "resource": resource, - "pkg_dict": pkg_dict, - }, + extra_vars=extra_vars, ) diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py index 198de320..5a56322c 100644 --- a/ckanext/xloader/views.py +++ b/ckanext/xloader/views.py @@ -1,4 +1,4 @@ -from flask import Blueprint +from flask import Blueprint, request import ckanext.xloader.utils as utils @@ -12,4 +12,12 @@ def get_blueprints(): @xloader.route("/dataset//resource_data/", methods=("GET", "POST")) def resource_data(id, resource_id): - return utils.resource_data(id, resource_id) + rows = request.args.get('rows') + if rows: + try: + rows = int(rows) + if rows < 0: + rows = None + except ValueError: + rows = None + return utils.resource_data(id, resource_id, rows) From d17e55f40f8fe0714f676ecb4bdf1aee39d4f514 Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Mon, 29 May 2023 12:42:49 +1000 Subject: [PATCH 05/29] increase CSV sample size for identifying quote character, #182 - Messytables used to use 1000 rows, the Tabulator approach should do the same --- ckanext/xloader/loader.py | 5 +- ckanext/xloader/parser.py | 2 +- .../samples/sample_with_mixed_quotes.csv | 136 ++++++++++++++++++ ckanext/xloader/tests/test_loader.py | 24 ++++ 4 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py index 55c9cab5..2060a9ef 100644 --- a/ckanext/xloader/loader.py +++ b/ckanext/xloader/loader.py @@ -10,13 +10,13 @@ import psycopg2 from six.moves import zip -from tabulator import Stream, TabulatorException +from tabulator import config as tabulator_config, Stream, TabulatorException from unidecode import unidecode import ckan.plugins as p from .job_exceptions import FileCouldNotBeLoadedError, LoaderError -from .parser import XloaderCSVParser +from .parser import CSV_SAMPLE_LINES, XloaderCSVParser from .utils import headers_guess, type_guess from ckan.plugins.toolkit import config @@ -28,6 +28,7 @@ _drop_indexes = datastore_db._drop_indexes MAX_COLUMN_LENGTH = 63 +tabulator_config.CSV_SAMPLE_LINES = CSV_SAMPLE_LINES def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None): diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py index b52c59a3..82539f4d 100644 --- a/ckanext/xloader/parser.py +++ b/ckanext/xloader/parser.py @@ -12,7 +12,7 @@ from ckan.plugins.toolkit import config -CSV_SAMPLE_LINES = 100 +CSV_SAMPLE_LINES = 1000 class XloaderCSVParser(Parser): diff --git a/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv b/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv new file mode 100644 index 00000000..a9527cf7 --- /dev/null +++ b/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv @@ -0,0 +1,136 @@ +Category,Category name,Priority,Initiative name,Investment objectives,Primary digital priority,Initiative stage,Actual start date,Approved end date,Date data current at,Percentage complete,Overall status,Project commencement allocation,Approved expenditure,Actual cost to date,Scope change event,Cost re-evaluation event,Delivery delay event,Project journey and reasons for variance,Learn more (URL) +DDSSHHESW,"Department of Defence, Social Security, Health, Housing, Education, and Silly Walks",High,Silly Walks project - Stage 2,"Lorum ipsum.",Collaboration,Delivery,01/07/1970,30/06/1971,31/03/1971,41,G,5633000,5739000,2352000,N,N,N,"As at 31 March 1971 +- Overall 'green' (on track) status +- Revised user journey following results of Silly Walk UX/UI testing +- Transition to support progressing with documentation and walk-through of the solution. +- Ongoing high levels of silly walk usage reflecting the success of search engine marketing. Silly walk focused campaign to further increase awareness and usage is being finalised. + +As at 28 February 1971 +- Overall 'green' (on track) status +- Results of Silly Walk UX/UI testing is guiding development of the revised user journey. +- Silly Walk transition to BAU support continuing with workshops, showcases and handover documentation. +- Silly Walk usage is increasing + +As at 31 January 1971 +- Continued amber status [closely monitored] with risks under management +- Search Engine Marketing -'Always On' yielding good results with continued increase in users and the proportion benefitting from Silly Walk +- Good progress on development of revised Silly Walk user journey. + +As at 31 December 1970 +Status AMBER [Closely monitored] +- Search Engine Marketing commenced 19 December 1970 and already showing increased users and proportion of customers benefitting from Silly Walk +- External assurance review completed and reported 'green' rating for confidence of delivery. + +As at 30 November 1970 +- Continued amber status pending risk management +- Marketing to commence to increase awareness of platform +- Good progress on development of revised user journey + +As at 31 October 1970 +Status AMBER [Closely monitored] +- Silly Walk Stage 2 continue reporting amber status reflective of ongoing high-level risks associated with demand-driven labour-market conditions and planned transition to support. +- Communications and engagement are in progress. +- The revised user journey continues development and testing. This is planned to be ready for release in the first quarter of 1971. As at 30 September 1970 +Status AMBER [Closely monitored] +Project journey events: +- A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness continues to progress. +- Silly Walk industries expanded to include all industries. +- Engagement with agencies continues, to heighten Silly Walk awareness and complete validation following recent expansion to encompass all industries. + +As at 31 August 1970 +Status GREEN [On track] +The project is reporting green overall. Ongoing resourcing risk will continue to be monitored and managed for the life of the project, due to a tight labour market. +Project journey events: +- A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness continues to progress. +- Further analysis of June/July 1970 marketing campaign has offered recommendations for consideration, to improve target audience awareness and Silly Walk uptake. +- Silly Walk industries expanded to include Retail Trade, Accommodation and Non-residential Construction industries finalised. +- Engagement with agencies continues, to heighten Silly Walk awareness and complete validation following recent expansion with three additional industries. + +As at 31 July 1970 +Status AMBER [Closely monitored] +The project is continuing to report amber overall mainly due to ongoing resourcing challenges. +Project journey events: +- A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness, is progressing. +- Analysis of a major marketing campaign conducted in June/July 1970 showed a significant step-up in number of Silly Walk users. +- The target of 95% of Circus population coverage was met in June 1970 with 100% of Circus population now covered on Silly Walk. +- Agency engagement for extension industries has commenced. + +As at 1 July 1970 +Silly Walk commenced work on expanding industries to include Retail Trade, Accommodation and Non-residential Construction industries. + +As at June 1970 +Stage 2 of the project is commencing and will build up the solution delivered in Silly Walk Stage 1. Customer journey will be revised in line with outcome of customer testing. The increased coverage target of at least 95% of the Circus population was met in June 1970, with all local governments included on Silly Walk. Benefits realisation through marketing and promotion of Silly Walk.",https://example.com +DDSSHHESW,"Department of Defence, Social Security, Health, Housing, Education, and Silly Walks",High,Flying Circus Modernisation and Transformation Program - Tranche 1,"The Flying Circus Modernisation and Transformation (FCMT) Program seeks to reduce the risks associated with department legacy systems by delivering contemporary, consolidated, integrated, user-friendly applications to support delivery of Flying Circus outcomes. To optimise the technical capabilities of the new solutions, engagement with business teams in the review and development of business processes is a priority. ",Trust,Delivery,01/07/1969,31/08/1971,28/02/1971,52,G,8692200,9614968,4961147,Y,Y,Y,"As at 28 February 1971 +- Tranche 1 FCMT projects continue on schedule and on budget for Tranche 1 completion by 31 August 1971. +- Customer Engagement and Contract Establishment projects continue to progress focusing on delivery activities for new CRM and Portal enhancements. +- FCMT Tranche 2 Business Case tracking for completion April 1971. + +As at 31 January 1971 +- FCMT Projects continue to track to schedule and on budget for Tranche 1 completion 31 August 1971. +- Customer Engagement and Contract Establishment Projects progressing well with delivery activities for new CRM and Portal enhancements. + +As at 31 December 1970 +Status GREEN +- FCMT projects continuing to track to board endorsed updated schedule and on budget for Tranche 1 completion on 31 August 1971. +- Customer Engagement and Contract Establishment projects completed partner onboarding and delivery activities underway. +- Planning in progress for Tranche 2, focusing on remaining legacy systems for planned commencement at completion of Tranch 1. + +As at 30 November 1970 +Status GREEN +- Tranche 1 delivery date extended to 31 August 1971 due to CRM vendor procurement delays and subsequent additional time requirements for build completion and testing of new CRM. +- All projects maintaining momentum and progressing to revised schedule within budget. + +As at 31 October 1970 +Status GREEN +-New 'Partner Portal' Digital Channel continues to perform well with 3516 registered, active, external users from 634 different organisations. Update release being planned for January 1971. +-SkillsCRM (CEP Project) delivery partner on-boarded and formal delivery stage commenced. +-Contract Establishment and Variation (CEV PRoject) continuing delivery partner select with a view to commencing prior to end of December 1970. + +As at 30 September 1970 Status GREEN. +The FCMT 'Partner Portal' solution was successfully launched on the 17 August 1970. The decommissioning of the outdated legacy application, 'WalkConnect', has completed. Work is now increasing on the next Flying Circus systems to be replaced, SkillsCRM (via the Customer Engagement Project) and Policy on Line (via the Contract Establishment and Variation Project). +Project Journey Events: +- Partner Portal. After the successful launch of Partner Portal and decommissioning of WalkConnect, the transition to BAU is underway with the Project team continuing to support business until BAU transition is completed. +- Data, Infrastructure and Reporting. +New 'Data Lake' infrastructure built. Data ingestion processes being trialled. QTS report requirement gathering underway which will showcase new capability once completed. Compliance tool SMCM successfully launched September 30. +-Customer Engagement Project (CEP). Completed assurance reviews successfully. Delivery partner selection completed. Partner and formal delivery stage due to start 18 October 1970. Ramp up of activities continuing with business demonstrations of CRM proof of concept. +-Contract Establishment and Variation (CEV). +Requirements gathering completed. Delivery partner selection process commenced. 'As is' process documentation underway. + +As at 31 August 1970 +Status GREEN. The project remains on track. Successful launch of new secure 'Partner Portal' Digital Channel for Flying Circus related organisations occurred 17 August 1970. + +Current Projects underway: +- Partner Portal. Go-live occurred on track 17 August 1970. All registered Flying Circus organisations now able to use the portal to access key applications and send information to DDSSHHESW via secure channel. Enhanced support being provided for 6 weeks. Legacy system decommissioning underway. +- Data, Infrastructure and Reporting. Build of initial Data Lake (centralised, quality, information source) continuing and requirement gathering of first report planned to use new capabilites commenced. +- Customer Services Hub (CRM). Implementation partner selection complete. Solution delivery activities due to start by end September 1970. +- Contract Engagement and Variation. Requirements gathering complete and partner selection process to commence by end September 1970. + +As at 31 July 1970 +Status GREEN + +Project journey events: +Implementation of next changes to FCMT applications remain on track for August 1970 with full launch of new secure Partner Portal Digital Channel for Flying Circus related organisations. +FCMT Program scope adjusted to include additional at risk system decommission activties during this financial year. Approved expenditure updated to align with revised scope. + +Current Projects underway +- Partner Portal. Opened for registrations 4 July 1970. Majority of Flying Circus related organisation now registered. Full access (go-live) on track to commence 17 August 1970. Legacy system to be disabled and decommissioned September 1970. +- Data, Infrastructure and Reporting. Build of initial Data Lake (centralised, quality, information source) underway with population and work on first report to commence in September. +- Customer Services Hub (CRM). Requirements confirmed and partner selection underway. Work on legacy CRM replacement due to start September/October 1970. +- Contract Engagement and Variation. Requirements gathering and new process design activities in progress. + +15 May 1970 Update +Status GREEN + +Implementation of next changes to Flying Circus applications on track for August 1970 with introduction of new secure 'Silly Portal' Digital Channel for Flying Circus related organisations. + +Projects Completed +-Database consolidation - key databases transitioned to supported versions and platforms. Completed November 1969. +-System to System Integration platform. Completed 9 May 1970. + +Current projects underway +-Partner Portal secure digital channel, in final testing. Pilot successfully complete and on track for release in August 1970. +Projects in startup +-Data, Infrastructure and Reporting, planning underway. +-Customer Services Hub (CRM), planning underway. +-Contract Engagement and Variation, planning underway. +-Planning continues for Tranche 2.",https://example.com diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py index 68452d11..14b54eeb 100644 --- a/ckanext/xloader/tests/test_loader.py +++ b/ckanext/xloader/tests/test_loader.py @@ -624,6 +624,18 @@ def test_with_blanks(self, Session): ) assert len(self._get_records(Session, "test1")) == 3 + def test_with_mixed_quotes(self, Session): + csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv") + resource_id = "test1" + factories.Resource(id=resource_id) + loader.load_csv( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, "test1")) == 2 + def test_with_mixed_types(self, Session): csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv") resource_id = "test1" @@ -1159,3 +1171,15 @@ def test_no_entries(self): mimetype="csv", logger=logger, ) + + def test_with_mixed_quotes(self, Session): + csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv") + resource_id = "test1" + factories.Resource(id=resource_id) + loader.load_table( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, "test1")) == 2 From c141716ff0f389812e3148b99b5bbdbc4bcd8431 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Wed, 8 Nov 2023 20:14:14 +0000 Subject: [PATCH 06/29] feat(templates): added missing csrf field; - Added csrf field to delete button form. --- ckanext/xloader/templates/xloader/resource_data.html | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html index 1582fe41..0c1480f8 100644 --- a/ckanext/xloader/templates/xloader/resource_data.html +++ b/ckanext/xloader/templates/xloader/resource_data.html @@ -5,13 +5,17 @@ {% block primary_content_inner %} {% set action = h.url_for('xloader.resource_data', id=pkg.name, resource_id=res.id) %} + {% set delete_action = h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) %} {% set show_table = true %} {% block delete_ds_button %} -
    + + {{ h.csrf_input() if 'csrf_input' in h }} {% block delete_datastore_button_text %}{{ _('Delete DataStore table') }}{% endblock %}
    From ae29b8fe33e8bfcd8d774a94fbe95a8b678203c7 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Thu, 9 Nov 2023 15:07:58 +0000 Subject: [PATCH 07/29] fix(semantics): removed the word table; - Removed the word `table` from user visible things. --- .../xloader/templates/xloader/confirm_datastore_delete.html | 2 +- ckanext/xloader/templates/xloader/resource_data.html | 4 ++-- ckanext/xloader/views.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ckanext/xloader/templates/xloader/confirm_datastore_delete.html b/ckanext/xloader/templates/xloader/confirm_datastore_delete.html index 06af5ecc..81fca258 100644 --- a/ckanext/xloader/templates/xloader/confirm_datastore_delete.html +++ b/ckanext/xloader/templates/xloader/confirm_datastore_delete.html @@ -8,7 +8,7 @@
    {% block form %} -

    {{ _('Are you sure you want to delete this DataStore table and Data Dictionary?') }}

    +

    {{ _('Are you sure you want to delete the DataStore and Data Dictionary?') }}

    {{ h.csrf_input() if 'csrf_input' in h }} diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html index 0c1480f8..cca86f39 100644 --- a/ckanext/xloader/templates/xloader/resource_data.html +++ b/ckanext/xloader/templates/xloader/resource_data.html @@ -16,8 +16,8 @@ type="submit" data-module="confirm-action" data-module-with-data=true - data-module-content="{{ _('Are you sure you want to delete this DataStore table and Data Dictionary?') }}" - >{% block delete_datastore_button_text %}{{ _('Delete DataStore table') }}{% endblock %} + data-module-content="{{ _('Are you sure you want to delete the DataStore and Data Dictionary?') }}" + >{% block delete_datastore_button_text %}{{ _('Delete from DataStore') }}{% endblock %}
    {% endblock %} diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py index 7126844a..32c8f6d2 100644 --- a/ckanext/xloader/views.py +++ b/ckanext/xloader/views.py @@ -18,7 +18,7 @@ def resource_data(id, resource_id): return utils.resource_data(id, resource_id) -@xloader.route("/dataset//delete-datastore-table/", methods=("GET", "POST")) +@xloader.route("/dataset//delete-datastore/", methods=("GET", "POST")) def delete_datastore_table(id, resource_id): if u'cancel' in request.form: return h.redirect_to(u'xloader.resource_data', id=id, resource_id=resource_id) @@ -34,7 +34,7 @@ def delete_datastore_table(id, resource_id): except NotAuthorized: return abort(403, _(u'Unauthorized to delete resource %s') % resource_id) - h.flash_notice(_(u'DataStore table and Data Dictionary deleted for resource %s') % resource_id) + h.flash_notice(_(u'DataStore and Data Dictionary deleted for resource %s') % resource_id) return h.redirect_to( 'xloader.resource_data', From 46bc35a0568a9fdbc8bd40c65cbf287309db1ce8 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Thu, 9 Nov 2023 15:09:35 +0000 Subject: [PATCH 08/29] removal(views): replaces ckanapi with toolkit; - Used `get_action` from toolkit instead of ckanapi dependency. --- ckanext/xloader/views.py | 11 +++++------ requirements.txt | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py index 32c8f6d2..805102fc 100644 --- a/ckanext/xloader/views.py +++ b/ckanext/xloader/views.py @@ -1,7 +1,7 @@ from flask import Blueprint from ckanapi import LocalCKAN -from ckan.plugins.toolkit import _, h, g, render, request, abort, NotAuthorized +from ckan.plugins.toolkit import _, h, g, render, request, abort, NotAuthorized, get_action import ckanext.xloader.utils as utils @@ -24,13 +24,12 @@ def delete_datastore_table(id, resource_id): return h.redirect_to(u'xloader.resource_data', id=id, resource_id=resource_id) if request.method == 'POST': - lc = LocalCKAN(username=g.user) + context = {"user": g.user} try: - lc.action.datastore_delete( - resource_id=resource_id, - force=True, - ) + get_action('datastore_delete')(context, { + "resource_id": resource_id, + "force": True}) except NotAuthorized: return abort(403, _(u'Unauthorized to delete resource %s') % resource_id) diff --git a/requirements.txt b/requirements.txt index 8124a06f..58540beb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,3 @@ six>=1.12.0 tabulator==1.53.5 Unidecode==1.0.22 python-dateutil>=2.8.2 -ckanapi From 2defc6a4a7133a2ec318c72cc99432b5f90059bd Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Thu, 16 Nov 2023 13:22:24 +0000 Subject: [PATCH 09/29] feat(views): check resource owner; - Confirm that the resource id belongs to the package id. --- ckanext/xloader/views.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py index 805102fc..79ba1aa8 100644 --- a/ckanext/xloader/views.py +++ b/ckanext/xloader/views.py @@ -1,7 +1,6 @@ from flask import Blueprint -from ckanapi import LocalCKAN -from ckan.plugins.toolkit import _, h, g, render, request, abort, NotAuthorized, get_action +from ckan.plugins.toolkit import _, h, g, render, request, abort, NotAuthorized, get_action, ObjectNotFound import ckanext.xloader.utils as utils @@ -23,6 +22,13 @@ def delete_datastore_table(id, resource_id): if u'cancel' in request.form: return h.redirect_to(u'xloader.resource_data', id=id, resource_id=resource_id) + try: + res_dict = get_action('resource_show')(context, {"id": resource_id}) + if res_dict.get('package_id') != id: + raise ObjectNotFound + except ObjectNotFound: + return abort(404, _(u'Resource not found')) + if request.method == 'POST': context = {"user": g.user} From ce69207fe79575acf75f57f50cc23239dcc0cc08 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Thu, 16 Nov 2023 13:41:25 +0000 Subject: [PATCH 10/29] fix(views): syntax, and template; - Condition delete button behind `datastore_active`. - Fix context variable in view. --- .../templates/xloader/resource_data.html | 22 ++++++++++--------- ckanext/xloader/views.py | 4 ++-- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html index cca86f39..6ea29166 100644 --- a/ckanext/xloader/templates/xloader/resource_data.html +++ b/ckanext/xloader/templates/xloader/resource_data.html @@ -9,16 +9,18 @@ {% set show_table = true %} {% block delete_ds_button %} -
    - {{ h.csrf_input() if 'csrf_input' in h }} - {% block delete_datastore_button_text %}{{ _('Delete from DataStore') }}{% endblock %} -
    + {% if res.datastore_active %} +
    + {{ h.csrf_input() if 'csrf_input' in h }} + {% block delete_datastore_button_text %}{{ _('Delete from DataStore') }}{% endblock %} +
    + {% endif %} {% endblock %} {% block upload_ds_button %} diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py index 79ba1aa8..0564144b 100644 --- a/ckanext/xloader/views.py +++ b/ckanext/xloader/views.py @@ -22,6 +22,8 @@ def delete_datastore_table(id, resource_id): if u'cancel' in request.form: return h.redirect_to(u'xloader.resource_data', id=id, resource_id=resource_id) + context = {"user": g.user} + try: res_dict = get_action('resource_show')(context, {"id": resource_id}) if res_dict.get('package_id') != id: @@ -30,8 +32,6 @@ def delete_datastore_table(id, resource_id): return abort(404, _(u'Resource not found')) if request.method == 'POST': - context = {"user": g.user} - try: get_action('datastore_delete')(context, { "resource_id": resource_id, From b8f38b588a89406965f76ebba8a5e9e80fed43b1 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Thu, 16 Nov 2023 16:47:11 +0000 Subject: [PATCH 11/29] fix(dev): check url_changed in domain implementation; - Check `url_changed`. --- ckanext/xloader/plugin.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py index 243026b5..c03340ee 100644 --- a/ckanext/xloader/plugin.py +++ b/ckanext/xloader/plugin.py @@ -107,7 +107,9 @@ def notify(self, entity, operation): We want to check if values have changed, namely the url. See: ckan/model/modification.py.DomainObjectModificationExtension """ - if operation != DomainObjectOperation.changed or not isinstance(entity, Resource): + if operation != DomainObjectOperation.changed \ + or not isinstance(entity, Resource) \ + or not getattr(entity, 'url_changed', False): return context = { "ignore_auth": True, From 080ea1f6b0759f7fccf6f1c2522b21370243f478 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Mon, 15 Jan 2024 18:42:12 +0000 Subject: [PATCH 12/29] fix(misc): fixed binary/str types between py2 and py3; - Added binary type support and mapping. - Added new config option for strict type guessing. - Minor fixes for python2 types and classes. - Minor fix for failed type guessing. --- ckanext/xloader/config_declaration.yaml | 8 ++++++++ ckanext/xloader/loader.py | 12 ++++++++++-- ckanext/xloader/utils.py | 8 +++++++- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/ckanext/xloader/config_declaration.yaml b/ckanext/xloader/config_declaration.yaml index b31f12e2..1aeb01b9 100644 --- a/ckanext/xloader/config_declaration.yaml +++ b/ckanext/xloader/config_declaration.yaml @@ -48,6 +48,14 @@ groups: type: bool required: false legacy_key: ckanext.xloader.just_load_with_messytables + - key: ckanext.xloader.strict_type_guessing + default: True + example: False + description: | + Use with ckanext.xloader.use_type_guessing to set strict true or false + for type guessing. If set to False, the types will always fallback to string type. + type: bool + required: false - key: ckanext.xloader.parse_dates_dayfirst default: False example: False diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py index afc3c980..e4c00dd7 100644 --- a/ckanext/xloader/loader.py +++ b/ckanext/xloader/loader.py @@ -3,6 +3,7 @@ import datetime import itertools +from six import text_type as str, binary_type import os import os.path import tempfile @@ -266,7 +267,9 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None): skip_rows = list(range(1, header_offset + 2)) TYPES, TYPE_MAPPING = get_types() - types = type_guess(stream.sample[1:], types=TYPES, strict=True) + strict_guessing = p.toolkit.asbool( + config.get('ckanext.xloader.strict_type_guessing', True)) + types = type_guess(stream.sample[1:], types=TYPES, strict=strict_guessing) # override with types user requested if existing_info: @@ -333,12 +336,17 @@ def row_iterator(): _TYPE_MAPPING = { + "": 'text', "": 'text', + "": 'text', "": 'text', "": 'numeric', "": 'numeric', "": 'numeric', + "": 'timestamp', "": 'text', + "": 'text', + "": 'text', "": 'text', "": 'numeric', "": 'numeric', @@ -347,7 +355,7 @@ def row_iterator(): def get_types(): - _TYPES = [int, bool, str, datetime.datetime, float, Decimal] + _TYPES = [int, bool, str, binary_type, datetime.datetime, float, Decimal] TYPE_MAPPING = config.get('TYPE_MAPPING', _TYPE_MAPPING) return _TYPES, TYPE_MAPPING diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py index cbffaa2f..fb672ed5 100644 --- a/ckanext/xloader/utils.py +++ b/ckanext/xloader/utils.py @@ -3,6 +3,8 @@ import json import datetime +from six import text_type as str, binary_type + from ckan import model from ckan.lib import search from collections import defaultdict @@ -10,6 +12,8 @@ import ckan.plugins as p +from .job_exceptions import JobError + def resource_data(id, resource_id): @@ -149,7 +153,7 @@ def headers_guess(rows, tolerance=1): return 0, [] -TYPES = [int, bool, str, datetime.datetime, float, Decimal] +TYPES = [int, bool, str, binary_type, datetime.datetime, float, Decimal] def type_guess(rows, types=TYPES, strict=False): @@ -210,5 +214,7 @@ def type_guess(rows, types=TYPES, strict=False): # element in case of a tie # See: http://stackoverflow.com/a/6783101/214950 guesses_tuples = [(t, guess[t]) for t in types if t in guess] + if not guesses_tuples: + raise JobError('Failed to guess types') _columns.append(max(guesses_tuples, key=lambda t_n: t_n[1])[0]) return _columns From 1190cd41c9494cefa00ddcb1007575619180d3d5 Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Mon, 29 Jan 2024 15:20:55 +1000 Subject: [PATCH 13/29] Document the ckan.download_proxy setting, #176 --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index b4f113ae..0586f336 100644 --- a/README.rst +++ b/README.rst @@ -191,6 +191,10 @@ Configuration: See the extension's `config_declaration.yaml `_ file. +This plugin also supports the `ckan.download_proxy` setting, to use a proxy server when downloading files. +This setting is shared with other plugins that download resource files, such as ckanext-archiver. Eg: + + ckan.download_proxy = http://my-proxy:1234/ ------------------------ Developer installation From 994279afdbe390000746286840bdad9bbe44888d Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Mon, 29 Jan 2024 15:42:09 +1000 Subject: [PATCH 14/29] fix test data creation to use factory properly --- ckanext/xloader/tests/test_loader.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py index 5bfc7a0b..4c4e2820 100644 --- a/ckanext/xloader/tests/test_loader.py +++ b/ckanext/xloader/tests/test_loader.py @@ -622,39 +622,39 @@ def test_german(self, Session): def test_with_blanks(self, Session): csv_filepath = get_sample_filepath("sample_with_blanks.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_csv( csv_filepath, resource_id=resource_id, mimetype="text/csv", logger=logger, ) - assert len(self._get_records(Session, "test1")) == 3 + assert len(self._get_records(Session, resource_id)) == 3 def test_with_mixed_quotes(self, Session): csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_csv( csv_filepath, resource_id=resource_id, mimetype="text/csv", logger=logger, ) - assert len(self._get_records(Session, "test1")) == 2 + assert len(self._get_records(Session, resource_id)) == 2 def test_with_mixed_types(self, Session): csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_csv( csv_filepath, resource_id=resource_id, mimetype="text/csv", logger=logger, ) - assert len(self._get_records(Session, "test1")) == 2 + assert len(self._get_records(Session, resource_id)) == 2 def test_reload(self, Session): csv_filepath = get_sample_filepath("simple.csv") @@ -1182,12 +1182,12 @@ def test_no_entries(self): def test_with_mixed_quotes(self, Session): csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_table( csv_filepath, resource_id=resource_id, mimetype="text/csv", logger=logger, ) - assert len(self._get_records(Session, "test1")) == 2 + assert len(self._get_records(Session, resource_id)) == 2 From 787b5a45d172dc909f9a74b9e49790239d9fe43f Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Mon, 29 Jan 2024 17:38:55 +1000 Subject: [PATCH 15/29] make locking behaviour more robust to prevent deadlocks - Shorten the locking timeout when refreshing a resource without changing its structure - Retry jobs that fail due to locking issues --- ckanext/xloader/jobs.py | 30 +++++++-- ckanext/xloader/loader.py | 127 +++++++++++++++++++++++++++----------- 2 files changed, 116 insertions(+), 41 deletions(-) diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py index 0d242db1..7b96b993 100644 --- a/ckanext/xloader/jobs.py +++ b/ckanext/xloader/jobs.py @@ -10,16 +10,16 @@ import traceback import sys +from psycopg2 import errors from six.moves.urllib.parse import urlsplit import requests from rq import get_current_job import sqlalchemy as sa from ckan import model -from ckan.plugins.toolkit import get_action, asbool, ObjectNotFound, config +from ckan.plugins.toolkit import get_action, asbool, enqueue_job, ObjectNotFound, config -from . import loader -from . import db +from . import db, loader from .job_exceptions import JobError, HTTPError, DataTooBigError, FileCouldNotBeLoadedError from .utils import set_resource_metadata @@ -28,6 +28,8 @@ except ImportError: get_user_from_token = None +log = logging.getLogger(__name__) + SSL_VERIFY = asbool(config.get('ckanext.xloader.ssl_verify', True)) if not SSL_VERIFY: requests.packages.urllib3.disable_warnings() @@ -37,6 +39,13 @@ CHUNK_SIZE = 16 * 1024 # 16kb DOWNLOAD_TIMEOUT = 30 +RETRYABLE_ERRORS = ( + errors.DeadlockDetected, + errors.LockNotAvailable, + errors.ObjectInUse, +) +RETRIED_JOB_TIMEOUT = config.get('ckanext.xloader.job_timeout', '3600') + # input = { # 'api_key': user['apikey'], @@ -80,15 +89,26 @@ def xloader_data_into_datastore(input): db.mark_job_as_errored(job_id, str(e)) job_dict['status'] = 'error' job_dict['error'] = str(e) - log = logging.getLogger(__name__) log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc())) errored = True except Exception as e: + if isinstance(e, RETRYABLE_ERRORS): + tries = job_dict['metadata'].get('tries', 0) + if tries == 0: + log.info("Job %s failed due to temporary error [%s], retrying", job_id, e) + job_dict['status'] = 'pending' + job_dict['metadata']['tries'] = tries + 1 + enqueue_job( + xloader_data_into_datastore, + [input], + rq_kwargs=dict(timeout=RETRIED_JOB_TIMEOUT) + ) + return None + db.mark_job_as_errored( job_id, traceback.format_tb(sys.exc_info()[2])[-1] + repr(e)) job_dict['status'] = 'error' job_dict['error'] = str(e) - log = logging.getLogger(__name__) log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc())) errored = True finally: diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py index 2060a9ef..e64802e8 100644 --- a/ckanext/xloader/loader.py +++ b/ckanext/xloader/loader.py @@ -31,6 +31,49 @@ tabulator_config.CSV_SAMPLE_LINES = CSV_SAMPLE_LINES +def _fields_match(fields, existing_fields, logger): + ''' Check whether all columns have the same names and types as previously, + independent of ordering. + ''' + # drop the generated '_id' field + for index in range(len(existing_fields)): + if existing_fields[index]['id'] == '_id': + existing_fields.pop(index) + break + + # fail fast if number of fields doesn't match + field_count = len(fields) + if field_count != len(existing_fields): + logger.info("Fields do not match; there are now %s fields but previously %s", field_count, len(existing_fields)) + return False + + # ensure each field is present in both collections with the same type + for index in range(field_count): + field_id = fields[index]['id'] + for existing_index in range(field_count): + existing_field_id = existing_fields[existing_index]['id'] + if field_id == existing_field_id: + if fields[index]['type'] == existing_fields[existing_index]['type']: + break + else: + logger.info("Fields do not match; new type for %s field is %s but existing type is %s", + field_id, fields[index]["type"], existing_fields[existing_index]['type']) + return False + else: + logger.info("Fields do not match; no existing entry found for %s", field_id) + return False + return True + + +def _clear_datastore_resource(resource_id): + ''' Delete all records from the datastore table, without dropping the table itself. + ''' + engine = get_write_engine() + with engine.begin() as conn: + conn.execute("SET LOCAL lock_timeout = '5s'") + conn.execute('TRUNCATE TABLE "{}"'.format(resource_id)) + + def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None): '''Loads a CSV into DataStore. Does not create the indexes.''' @@ -85,34 +128,43 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None): existing = datastore_resource_exists(resource_id) existing_info = {} if existing: + existing_fields = existing.get('fields', []) existing_info = dict((f['id'], f['info']) - for f in existing.get('fields', []) + for f in existing_fields if 'info' in f) - ''' - Delete existing datastore table before proceeding. Otherwise - the COPY will append to the existing table. And if - the fields have significantly changed, it may also fail. - ''' - logger.info('Deleting "{res_id}" from DataStore.'.format( - res_id=resource_id)) - delete_datastore_resource(resource_id) - - # Columns types are either set (overridden) in the Data Dictionary page - # or default to text type (which is robust) - fields = [ - {'id': header_name, - 'type': existing_info.get(header_name, {}) - .get('type_override') or 'text', - } - for header_name in headers] + # Column types are either set (overridden) in the Data Dictionary page + # or default to text type (which is robust) + fields = [ + {'id': header_name, + 'type': existing_info.get(header_name, {}) + .get('type_override') or 'text', + } + for header_name in headers] - # Maintain data dictionaries from matching column names - if existing_info: + # Maintain data dictionaries from matching column names for f in fields: if f['id'] in existing_info: f['info'] = existing_info[f['id']] + ''' + Delete or truncate existing datastore table before proceeding, + depending on whether any fields have changed. + Otherwise the COPY will append to the existing table. + And if the fields have significantly changed, it may also fail. + ''' + if _fields_match(fields, existing_fields, logger): + logger.info('Clearing records for "%s" from DataStore.', resource_id) + _clear_datastore_resource(resource_id) + else: + logger.info('Deleting "%s" from DataStore.', resource_id) + delete_datastore_resource(resource_id) + else: + fields = [ + {'id': header_name, + 'type': 'text'} + for header_name in headers] + logger.info('Fields: %s', fields) # Create table @@ -254,9 +306,10 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None): existing = datastore_resource_exists(resource_id) existing_info = None if existing: + existing_fields = existing.get('fields', []) existing_info = dict( (f['id'], f['info']) - for f in existing.get('fields', []) if 'info' in f) + for f in existing_fields if 'info' in f) # Some headers might have been converted from strings to floats and such. headers = encode_headers(headers) @@ -290,16 +343,6 @@ def row_iterator(): yield data_row result = row_iterator() - ''' - Delete existing datstore resource before proceeding. Otherwise - 'datastore_create' will append to the existing datastore. And if - the fields have significantly changed, it may also fail. - ''' - if existing: - logger.info('Deleting "{res_id}" from datastore.'.format( - res_id=resource_id)) - delete_datastore_resource(resource_id) - headers_dicts = [dict(id=field[0], type=TYPE_MAPPING[str(field[1])]) for field in zip(headers, types)] @@ -313,8 +356,21 @@ def row_iterator(): if type_override in list(_TYPE_MAPPING.values()): h['type'] = type_override - logger.info('Determined headers and types: {headers}'.format( - headers=headers_dicts)) + logger.info('Determined headers and types: %s', headers_dicts) + + ''' + Delete or truncate existing datastore table before proceeding, + depending on whether any fields have changed. + Otherwise 'datastore_create' will append to the existing datastore. + And if the fields have significantly changed, it may also fail. + ''' + if existing: + if _fields_match(headers_dicts, existing_fields, logger): + logger.info('Clearing records for "%s" from DataStore.', resource_id) + _clear_datastore_resource(resource_id) + else: + logger.info('Deleting "%s" from datastore.', resource_id) + delete_datastore_resource(resource_id) logger.info('Copying to database...') count = 0 @@ -323,7 +379,7 @@ def row_iterator(): non_empty_types = ['timestamp', 'numeric'] for i, records in enumerate(chunky(result, 250)): count += len(records) - logger.info('Saving chunk {number}'.format(number=i)) + logger.info('Saving chunk %s', i) for row in records: for column_index, column_name in enumerate(row): if headers_dicts[column_index]['type'] in non_empty_types and row[column_name] == '': @@ -332,8 +388,7 @@ def row_iterator(): logger.info('...copying done') if count: - logger.info('Successfully pushed {n} entries to "{res_id}".'.format( - n=count, res_id=resource_id)) + logger.info('Successfully pushed %s entries to "%s".', count, resource_id) else: # no datastore table is created raise LoaderError('No entries found - nothing to load') From e6687a280aafc3c2fdba367fa04c290df3ac04dd Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Mon, 29 Jan 2024 17:40:29 +1000 Subject: [PATCH 16/29] acquire locks up-front, #201 - Request a write-lock when preparing to update resource metadata, not just a read lock, so that we don't risk deadlocking with another process. --- ckanext/xloader/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py index ec8e4bbd..e504a738 100644 --- a/ckanext/xloader/utils.py +++ b/ckanext/xloader/utils.py @@ -83,6 +83,7 @@ def set_resource_metadata(update_dict): # better fix q = model.Session.query(model.Resource). \ + with_for_update(of=model.Resource). \ filter(model.Resource.id == update_dict['resource_id']) resource = q.one() From 1a1bd147fd46176f5f752cf7833fccb2e7553412 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Mon, 29 Jan 2024 20:04:38 +0000 Subject: [PATCH 17/29] fix(templates): fix layout of delete button; - Fix layout of delete button with upload button. - Changed delete button icon. --- .../templates/xloader/resource_data.html | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html index e5349aae..e55f6421 100644 --- a/ckanext/xloader/templates/xloader/resource_data.html +++ b/ckanext/xloader/templates/xloader/resource_data.html @@ -8,30 +8,32 @@ {% set delete_action = h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) %} {% set show_table = true %} + {% block upload_ds_button %} +
    + {{ h.csrf_input() if 'csrf_input' in h }} + +
    + {% endblock %} + +
    + {% block delete_ds_button %} {% if res.datastore_active %}
    {{ h.csrf_input() if 'csrf_input' in h }} {% block delete_datastore_button_text %}{{ _('Delete from DataStore') }}{% endblock %} + >{% block delete_datastore_button_text %} {{ _('Delete from DataStore') }}{% endblock %}
    {% endif %} {% endblock %} - {% block upload_ds_button %} -
    - {{ h.csrf_input() if 'csrf_input' in h }} - -
    - {% endblock %} - {% if status.error and status.error.message %} {% set show_table = false %}
    From 3e3eebf804987e51046e5d3bc4fe401da87b7950 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Mon, 29 Jan 2024 20:34:09 +0000 Subject: [PATCH 18/29] fix(templates): <=2.10 template additions, falsy url type; - Added old template blocks for version 2.10 and less. - Added falsy check on url_type in template helper method. --- ckanext/xloader/helpers.py | 2 +- ckanext/xloader/templates/package/resource_read.html | 7 +++++++ .../xloader/templates/package/snippets/resource_item.html | 7 +++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/ckanext/xloader/helpers.py b/ckanext/xloader/helpers.py index 3c071028..a28f89f7 100644 --- a/ckanext/xloader/helpers.py +++ b/ckanext/xloader/helpers.py @@ -38,5 +38,5 @@ def is_resource_supported_by_xloader(res_dict, check_access = True): try: is_supported_url_type = res_dict.get('url_type') not in toolkit.h.datastore_rw_resource_url_types() except AttributeError: - is_supported_url_type = (res_dict.get('url_type') == 'upload' or res_dict.get('url_type') == '') + is_supported_url_type = (res_dict.get('url_type') == 'upload' or not res_dict.get('url_type')) return (is_supported_format or is_datastore_active) and user_has_access and is_supported_url_type diff --git a/ckanext/xloader/templates/package/resource_read.html b/ckanext/xloader/templates/package/resource_read.html index 6d5f5ff2..3ab1476e 100644 --- a/ckanext/xloader/templates/package/resource_read.html +++ b/ckanext/xloader/templates/package/resource_read.html @@ -6,3 +6,10 @@
  • {% link_for _('DataStore'), named_route='xloader.resource_data', id=pkg.name, resource_id=res.id, class_='btn btn-light', icon='cloud-upload' %}
  • {% endif %} {% endblock %} + +{% block resource_actions_inner %} + {% if h.check_ckan_version(max_version='2.10') and h.is_resource_supported_by_xloader(res) %} +
  • {% link_for _('DataStore'), named_route='xloader.resource_data', id=pkg.name, resource_id=res.id, class_='btn btn-light', icon='cloud-upload' %}
  • + {% endif %} + {{ super() }} +{% endblock %} diff --git a/ckanext/xloader/templates/package/snippets/resource_item.html b/ckanext/xloader/templates/package/snippets/resource_item.html index 37ed457c..70bf99c4 100644 --- a/ckanext/xloader/templates/package/snippets/resource_item.html +++ b/ckanext/xloader/templates/package/snippets/resource_item.html @@ -6,3 +6,10 @@
  • {% link_for _('DataStore'), named_route='xloader.resource_data', id=pkg.name, resource_id=res.id, class_='dropdown-item', icon='cloud-upload' %}
  • {% endif %} {% endblock %} + +{% block resource_item_explore_links %} + {% if h.check_ckan_version(max_version='2.10') and h.is_resource_supported_by_xloader(res) %} +
  • {% link_for _('DataStore'), named_route='xloader.resource_data', id=pkg.name, resource_id=res.id, class_='dropdown-item', icon='cloud-upload' %}
  • + {% endif %} + {{ super() }} +{% endblock %} From 29de39cf594a36bf5790ca77c1fa4a3b82607d15 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Mon, 29 Jan 2024 21:14:39 +0000 Subject: [PATCH 19/29] feat(misc): config description; - Added to config declaration description. --- ckanext/xloader/config_declaration.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ckanext/xloader/config_declaration.yaml b/ckanext/xloader/config_declaration.yaml index 867cc9ae..66888050 100644 --- a/ckanext/xloader/config_declaration.yaml +++ b/ckanext/xloader/config_declaration.yaml @@ -52,6 +52,8 @@ groups: description: | Use with ckanext.xloader.use_type_guessing to set strict true or false for type guessing. If set to False, the types will always fallback to string type. + + Strict means that a type will not be guessed if parsing fails for a single cell in the column. type: bool - key: ckanext.xloader.max_type_guessing_length default: 0 From 8b756c5032c6f70639f525d90ce5203fccb9d7ae Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Tue, 30 Jan 2024 16:07:55 +1000 Subject: [PATCH 20/29] [QOLCHG-440] add another test case for type guessing --- ckanext/xloader/tests/test_loader.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py index e024b315..a0c478ff 100644 --- a/ckanext/xloader/tests/test_loader.py +++ b/ckanext/xloader/tests/test_loader.py @@ -958,6 +958,24 @@ def test_simple(self, Session): assert fields[1].get("info", {}).get("type_override", "") == "numeric" assert fields[2].get("info", {}).get("type_override", "") == "" + def test_simple_large_file(self, Session): + csv_filepath = get_sample_filepath("simple-large.csv") + resource = factories.Resource() + resource_id = resource['id'] + fields = loader.load_table( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert self._get_column_types(Session, resource_id) == [ + u"int4", + u"tsvector", + u"numeric", + u"text", + ] + + # test disabled by default to avoid adding large file to repo and slow test @pytest.mark.skip def test_boston_311_complete(self): From 8d6c1692afa8e77afe7bbb57fe96b6887a2b86f6 Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Tue, 30 Jan 2024 17:39:48 +1000 Subject: [PATCH 21/29] [QOLCHG-440] include testing of '0' values in type guessing --- ckanext/xloader/tests/samples/simple-large.csv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckanext/xloader/tests/samples/simple-large.csv b/ckanext/xloader/tests/samples/simple-large.csv index 53d3fb24..46c6c3b9 100644 --- a/ckanext/xloader/tests/samples/simple-large.csv +++ b/ckanext/xloader/tests/samples/simple-large.csv @@ -1,4 +1,5 @@ id,text +0,- 1,a 2,b 3,c @@ -49997,4 +49998,4 @@ id,text 49996,x 49997,y 49998,z -49999,a \ No newline at end of file +49999,a From d2d6acf305681be251766718032aaf8267ea5072 Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Tue, 30 Jan 2024 17:44:01 +1000 Subject: [PATCH 22/29] [QOLCHG-440] allow zero to be recognised as a valid number --- ckanext/xloader/parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py index 812ccd1f..11e756cd 100644 --- a/ckanext/xloader/parser.py +++ b/ckanext/xloader/parser.py @@ -36,7 +36,9 @@ def convert_types(self, extended_rows): cell_type = self.types[cell_index] if self.types else None if cell_type in [Decimal, None]: converted_value = to_number(cell_value) - if converted_value: + # Can't do a simple truthiness check, + # because 0 is a valid numeric result. + if converted_value is not None: row[cell_index] = converted_value continue if cell_type in [datetime.datetime, None]: From 523df62ee68641becbb2551f278b5cc37823d6b5 Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Wed, 31 Jan 2024 12:07:55 +1000 Subject: [PATCH 23/29] [QOLSVC-4689] add CLI option to process datasets immediately, #202 - This will allow eg support staff processing a critical resource immediately while a large job is occupying the queue --- ckanext/xloader/cli.py | 10 +++-- ckanext/xloader/command.py | 48 ++++++++++++++--------- ckanext/xloader/jobs.py | 58 ++++++++++++++-------------- ckanext/xloader/tests/test_loader.py | 2 +- ckanext/xloader/utils.py | 4 +- 5 files changed, 69 insertions(+), 53 deletions(-) diff --git a/ckanext/xloader/cli.py b/ckanext/xloader/cli.py index 6b54482a..f4cf8659 100644 --- a/ckanext/xloader/cli.py +++ b/ckanext/xloader/cli.py @@ -26,7 +26,9 @@ def status(): @click.argument(u'dataset-spec') @click.option('-y', is_flag=True, default=False, help='Always answer yes to questions') @click.option('--dry-run', is_flag=True, default=False, help='Don\'t actually submit any resources') -def submit(dataset_spec, y, dry_run): +@click.option('--sync', is_flag=True, default=False, + help='Execute immediately instead of enqueueing for asynchronous processing') +def submit(dataset_spec, y, dry_run, sync): """ xloader submit [options] """ @@ -34,15 +36,15 @@ def submit(dataset_spec, y, dry_run): if dataset_spec == 'all': cmd._setup_xloader_logger() - cmd._submit_all() + cmd._submit_all(sync=sync) elif dataset_spec == 'all-existing': _confirm_or_abort(y, dry_run) cmd._setup_xloader_logger() - cmd._submit_all_existing() + cmd._submit_all_existing(sync=sync) else: pkg_name_or_id = dataset_spec cmd._setup_xloader_logger() - cmd._submit_package(pkg_name_or_id) + cmd._submit_package(pkg_name_or_id, sync=sync) if cmd.error_occured: print('Finished but saw errors - see above for details') diff --git a/ckanext/xloader/command.py b/ckanext/xloader/command.py index 64b79754..d1d687ca 100644 --- a/ckanext/xloader/command.py +++ b/ckanext/xloader/command.py @@ -3,6 +3,8 @@ import sys import logging import ckan.plugins.toolkit as tk + +from ckanext.xloader.jobs import xloader_data_into_datastore_ from ckanext.xloader.utils import XLoaderFormats @@ -23,7 +25,7 @@ def _setup_xloader_logger(self): logger.setLevel(logging.DEBUG) logger.propagate = False # in case the config - def _submit_all_existing(self): + def _submit_all_existing(self, sync=False): from ckanext.datastore.backend \ import get_all_resources_ids_in_datastore resource_ids = get_all_resources_ids_in_datastore() @@ -38,9 +40,9 @@ def _submit_all_existing(self): print(' Skipping resource {} found in datastore but not in ' 'metadata'.format(resource_id)) continue - self._submit_resource(resource_dict, user, indent=2) + self._submit_resource(resource_dict, user, indent=2, sync=sync) - def _submit_all(self): + def _submit_all(self, sync=False): # submit every package # for each package in the package list, # submit each resource w/ _submit_package @@ -51,9 +53,9 @@ def _submit_all(self): user = tk.get_action('get_site_user')( {'ignore_auth': True}, {}) for p_id in package_list: - self._submit_package(p_id, user, indent=2) + self._submit_package(p_id, user, indent=2, sync=sync) - def _submit_package(self, pkg_id, user=None, indent=0): + def _submit_package(self, pkg_id, user=None, indent=0, sync=False): indentation = ' ' * indent if not user: user = tk.get_action('get_site_user')( @@ -73,15 +75,15 @@ def _submit_package(self, pkg_id, user=None, indent=0): for resource in pkg['resources']: try: resource['package_name'] = pkg['name'] # for debug output - self._submit_resource(resource, user, indent=indent + 2) + self._submit_resource(resource, user, indent=indent + 2, sync=sync) except Exception as e: self.error_occured = True - print(e) + print(str(e)) print(indentation + 'ERROR submitting resource "{}" '.format( resource['id'])) continue - def _submit_resource(self, resource, user, indent=0): + def _submit_resource(self, resource, user, indent=0, sync=False): '''resource: resource dictionary ''' indentation = ' ' * indent @@ -99,23 +101,33 @@ def _submit_resource(self, resource, user, indent=0): r=resource)) return dataset_ref = resource.get('package_name', resource['package_id']) - print('{indent}Submitting /dataset/{dataset}/resource/{r[id]}\n' + print('{indent}{sync_style} /dataset/{dataset}/resource/{r[id]}\n' '{indent} url={r[url]}\n' '{indent} format={r[format]}' - .format(dataset=dataset_ref, r=resource, indent=indentation)) + .format(sync_style='Processing' if sync else 'Submitting', + dataset=dataset_ref, r=resource, indent=indentation)) + if self.dry_run: + print(indentation + '(not submitted - dry-run)') + return data_dict = { 'resource_id': resource['id'], 'ignore_hash': True, } - if self.dry_run: - print(indentation + '(not submitted - dry-run)') - return - success = tk.get_action('xloader_submit')({'user': user['name']}, data_dict) - if success: - print(indentation + '...ok') + if sync: + data_dict['ckan_url'] = tk.config.get('ckan.site_url') + input_dict = { + 'metadata': data_dict, + 'api_key': 'TODO' + } + logger = logging.getLogger('ckanext.xloader.cli') + xloader_data_into_datastore_(input_dict, None, logger) else: - print(indentation + 'ERROR submitting resource') - self.error_occured = True + success = tk.get_action('xloader_submit')({'user': user['name']}, data_dict) + if success: + print(indentation + '...ok') + else: + print(indentation + 'ERROR submitting resource') + self.error_occured = True def print_status(self): import ckan.lib.jobs as rq_jobs diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py index f1d7467f..e7181911 100644 --- a/ckanext/xloader/jobs.py +++ b/ckanext/xloader/jobs.py @@ -86,15 +86,36 @@ def xloader_data_into_datastore(input): job_id = get_current_job().id errored = False + + # Set-up logging to the db + handler = StoringHandler(job_id, input) + level = logging.DEBUG + handler.setLevel(level) + logger = logging.getLogger(job_id) + handler.setFormatter(logging.Formatter('%(message)s')) + logger.addHandler(handler) + # also show logs on stderr + logger.addHandler(logging.StreamHandler()) + logger.setLevel(logging.DEBUG) + + db.init(config) try: - xloader_data_into_datastore_(input, job_dict) + # Store details of the job in the db + db.add_pending_job(job_id, **input) + xloader_data_into_datastore_(input, job_dict, logger) job_dict['status'] = 'complete' db.mark_job_as_completed(job_id, job_dict) + except sa.exc.IntegrityError as e: + db.mark_job_as_errored(job_id, str(e)) + job_dict['status'] = 'error' + job_dict['error'] = str(e) + log.error('xloader error: job_id %s already exists', job_id) + errored = True except JobError as e: db.mark_job_as_errored(job_id, str(e)) job_dict['status'] = 'error' job_dict['error'] = str(e) - log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc())) + log.error('xloader error: %s, %s', e, traceback.format_exc()) errored = True except Exception as e: if isinstance(e, RETRYABLE_ERRORS): @@ -114,7 +135,7 @@ def xloader_data_into_datastore(input): job_id, traceback.format_tb(sys.exc_info()[2])[-1] + repr(e)) job_dict['status'] = 'error' job_dict['error'] = str(e) - log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc())) + log.error('xloader error: %s, %s', e, traceback.format_exc()) errored = True finally: # job_dict is defined in xloader_hook's docstring @@ -125,7 +146,7 @@ def xloader_data_into_datastore(input): return 'error' if errored else None -def xloader_data_into_datastore_(input, job_dict): +def xloader_data_into_datastore_(input, job_dict, logger): '''This function: * downloads the resource (metadata) from CKAN * downloads the data @@ -134,26 +155,6 @@ def xloader_data_into_datastore_(input, job_dict): (datapusher called this function 'push_to_datastore') ''' - job_id = get_current_job().id - db.init(config) - - # Store details of the job in the db - try: - db.add_pending_job(job_id, **input) - except sa.exc.IntegrityError: - raise JobError('job_id {} already exists'.format(job_id)) - - # Set-up logging to the db - handler = StoringHandler(job_id, input) - level = logging.DEBUG - handler.setLevel(level) - logger = logging.getLogger(job_id) - handler.setFormatter(logging.Formatter('%(message)s')) - logger.addHandler(handler) - # also show logs on stderr - logger.addHandler(logging.StreamHandler()) - logger.setLevel(logging.DEBUG) - validate_input(input) data = input['metadata'] @@ -197,10 +198,11 @@ def direct_load(): loader.calculate_record_count( resource_id=resource['id'], logger=logger) set_datastore_active(data, resource, logger) - job_dict['status'] = 'running_but_viewable' - callback_xloader_hook(result_url=input['result_url'], - api_key=api_key, - job_dict=job_dict) + if 'result_url' in input: + job_dict['status'] = 'running_but_viewable' + callback_xloader_hook(result_url=input['result_url'], + api_key=api_key, + job_dict=job_dict) logger.info('Data now available to users: %s', resource_ckan_url) loader.create_column_indexes( fields=fields, diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py index a0c478ff..96293993 100644 --- a/ckanext/xloader/tests/test_loader.py +++ b/ckanext/xloader/tests/test_loader.py @@ -962,7 +962,7 @@ def test_simple_large_file(self, Session): csv_filepath = get_sample_filepath("simple-large.csv") resource = factories.Resource() resource_id = resource['id'] - fields = loader.load_table( + loader.load_table( csv_filepath, resource_id=resource_id, mimetype="text/csv", diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py index 5c71b156..7e414e79 100644 --- a/ckanext/xloader/utils.py +++ b/ckanext/xloader/utils.py @@ -13,6 +13,8 @@ import ckan.plugins as p from ckan.plugins.toolkit import config +from .job_exceptions import JobError + # resource.formats accepted by ckanext-xloader. Must be lowercase here. DEFAULT_FORMATS = [ "csv", @@ -26,8 +28,6 @@ "application/vnd.oasis.opendocument.spreadsheet", ] -from .job_exceptions import JobError - class XLoaderFormats(object): formats = None From a1b73f1dae67c6490e71b13eafdb09a4aeb8b2a6 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Wed, 31 Jan 2024 15:05:44 +0000 Subject: [PATCH 24/29] fix(templates): set in block; - Move sets into their respective blocks. --- ckanext/xloader/templates/xloader/resource_data.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html index e55f6421..74a5f715 100644 --- a/ckanext/xloader/templates/xloader/resource_data.html +++ b/ckanext/xloader/templates/xloader/resource_data.html @@ -4,11 +4,10 @@ {% block primary_content_inner %} - {% set action = h.url_for('xloader.resource_data', id=pkg.name, resource_id=res.id) %} - {% set delete_action = h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) %} {% set show_table = true %} {% block upload_ds_button %} + {% set action = h.url_for('xloader.resource_data', id=pkg.name, resource_id=res.id) %}
    {{ h.csrf_input() if 'csrf_input' in h }}