From 97c4027f2ff6a09dd8d3f51585dfaf8ce54e6aef Mon Sep 17 00:00:00 2001 From: Lucas Lavandeira <19612265+lucaslavandeira@users.noreply.github.com> Date: Fri, 9 Mar 2018 12:01:33 -0300 Subject: [PATCH] =?UTF-8?q?Permito=20que=20las=20series=20puedan=20cambiar?= =?UTF-8?q?=20distribuciones=20dentro=20de=20su=20cat=C3=A1logo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit refs:#215 --- .../libs/indexing/database_loader.py | 28 +++- .../libs/indexing/tests/loader_tests.py | 38 ++++- .../full_ts_data_changed_distribution.json | 135 ++++++++++++++++++ 3 files changed, 193 insertions(+), 8 deletions(-) create mode 100644 series_tiempo_ar_api/libs/indexing/tests/samples/full_ts_data_changed_distribution.json diff --git a/series_tiempo_ar_api/libs/indexing/database_loader.py b/series_tiempo_ar_api/libs/indexing/database_loader.py index 24f1b168..04c6c9b4 100644 --- a/series_tiempo_ar_api/libs/indexing/database_loader.py +++ b/series_tiempo_ar_api/libs/indexing/database_loader.py @@ -211,13 +211,25 @@ def _save_fields(self, distribution_model, fields): for field in fields: series_id = field.get(constants.FIELD_ID) - title = field.get(constants.FIELD_TITLE) - field_model, created = Field.objects.get_or_create( - series_id=series_id, - title=title, - distribution=distribution_model - ) + # No vale get_or_create, distribution_model puede haber diferido desde la última ejecución + field_model = Field.objects.filter(series_id=series_id) + if not field_model: + field_model = Field(series_id=series_id) + created = True + else: + field_model = field_model[0] + created = False + + old_catalog_id = field_model.distribution.dataset.catalog.identifier + if old_catalog_id != self.catalog_id: + raise FieldRepetitionError(u"Serie {} repetida en catálogos {} y {}".format( + series_id, old_catalog_id, self.catalog_id + )) + + title = field.get(constants.FIELD_TITLE) + field_model.title = title + field_model.distribution = distribution_model field = self._remove_blacklisted_fields( field, settings.FIELD_BLACKLIST @@ -268,3 +280,7 @@ def set_as_updated(model): def read_updated(model): with transaction.atomic(): return model.__class__.objects.select_for_update().get(id=model.id).updated + + +class FieldRepetitionError(Exception): + pass diff --git a/series_tiempo_ar_api/libs/indexing/tests/loader_tests.py b/series_tiempo_ar_api/libs/indexing/tests/loader_tests.py index 0de303a5..b0e0151d 100644 --- a/series_tiempo_ar_api/libs/indexing/tests/loader_tests.py +++ b/series_tiempo_ar_api/libs/indexing/tests/loader_tests.py @@ -5,11 +5,12 @@ from django.conf import settings from django.test import TestCase from pydatajson import DataJson +from nose.tools import raises from series_tiempo_ar.search import get_time_series_distributions -from series_tiempo_ar_api.apps.api.models import Catalog, Dataset, Distribution +from series_tiempo_ar_api.apps.api.models import Catalog, Dataset, Distribution, Field from series_tiempo_ar_api.apps.management.models import ReadDataJsonTask, Node -from series_tiempo_ar_api.libs.indexing.database_loader import DatabaseLoader +from series_tiempo_ar_api.libs.indexing.database_loader import DatabaseLoader, FieldRepetitionError from series_tiempo_ar_api.libs.indexing.tests.reader_tests import SAMPLES_DIR, CATALOG_ID dir_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'samples') @@ -83,3 +84,36 @@ def test_datasets_loaded_are_not_indexable(self): self.assertEqual(dataset.count(), 1) self.assertFalse(dataset.first().indexable) + + def test_change_series_distribution(self): + catalog = DataJson(os.path.join(SAMPLES_DIR, 'full_ts_data.json')) + distributions = get_time_series_distributions(catalog) + + self.loader.run(distributions[0], catalog, self.catalog_id) + + catalog = DataJson(os.path.join(SAMPLES_DIR, 'full_ts_data_changed_distribution.json')) + distributions = get_time_series_distributions(catalog) + loader = DatabaseLoader(self.task, read_local=True, default_whitelist=False) + loader.run(distributions[0], catalog, self.catalog_id) + + # Valores obtenidos del .json fuente + self.assertEqual(Field.objects.get(series_id="212.1_PSCIOS_IOS_0_0_25").distribution, + Distribution.objects.get(identifier="300.1")) + + @raises(FieldRepetitionError) + def test_change_series_distributions_different_catalog(self): + catalog = DataJson(os.path.join(SAMPLES_DIR, 'full_ts_data.json')) + distributions = get_time_series_distributions(catalog) + + self.loader.run(distributions[0], catalog, self.catalog_id) + + other_catalog_id = 'other_catalog_id' + node = Node(catalog_id=other_catalog_id, + catalog_url=os.path.join(SAMPLES_DIR, 'full_ts_data_changed_distribution.json'), + indexable=True) + node.save() + loader = DatabaseLoader(self.task, read_local=True, default_whitelist=False) + + catalog = DataJson(os.path.join(SAMPLES_DIR, 'full_ts_data_changed_distribution.json')) + distributions = get_time_series_distributions(catalog) + loader.run(distributions[0], catalog, 'other_catalog_id') diff --git a/series_tiempo_ar_api/libs/indexing/tests/samples/full_ts_data_changed_distribution.json b/series_tiempo_ar_api/libs/indexing/tests/samples/full_ts_data_changed_distribution.json new file mode 100644 index 00000000..bca2483d --- /dev/null +++ b/series_tiempo_ar_api/libs/indexing/tests/samples/full_ts_data_changed_distribution.json @@ -0,0 +1,135 @@ +{ + "title": "Datos Argentina", + "description": "Portal de Datos Abiertos del Gobierno de la República Argentina", + "publisher": { + "name": "Ministerio de Modernización", + "mbox": "datos@modernizacion.gob.ar" + }, + "issued": "2016-04-14T19:48:05.433640-03:00", + "modified": "2016-04-19T19:48:05.433640-03:00", + "language": [ + "spa" + ], + "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json", + "themeTaxonomy": [ + { + "id": "convocatorias", + "label": "Convocatorias", + "description": "Datasets sobre licitaciones en estado de convocatoria." + }, + { + "id": "compras", + "label": "Compras", + "description": "Datasets sobre compras realizadas." + }, + { + "id": "contrataciones", + "label": "Contrataciones", + "description": "Datasets sobre contrataciones." + }, + { + "id": "adjudicaciones", + "label": "Adjudicaciones", + "description": "Datasets sobre licitaciones adjudicadas." + }, + { + "id": "normativa", + "label": "Normativa", + "description": "Datasets sobre normativa para compras y contrataciones." + }, + { + "id": "proveedores", + "label": "Proveedores", + "description": "Datasets sobre proveedores del Estado." + } + ], + "license": "Open Data Commons Open Database License 1.0", + "homepage": "http://datos.gob.ar", + "rights": "Derechos especificados en la licencia.", + "spatial": "ARG", + "dataset": [ + { + "title": "Sistema de contrataciones electrónicas", + "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)", + "publisher": { + "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones", + "mbox": "onc@modernizacion.gob.ar" + }, + "contactPoint": { + "fn": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones. Dirección de Compras Electrónicas.", + "hasEmail": "onc-compraselectronicas@modernizacion.gob.ar" + }, + "superTheme": [ + "econ" + ], + "theme": [ + "contrataciones", + "compras", + "convocatorias" + ], + "keyword": [ + "bienes", + "compras", + "contrataciones" + ], + "accrualPeriodicity": "R/P1Y", + "issued": "2016-04-14T19:48:05.433640-03:00", + "modified": "2016-04-19T19:48:05.433640-03:00", + "identifier": "300", + "language": [ + "spa" + ], + "spatial": "ARG", + "temporal": "1970-01-01/1980-10-01", + "landingPage": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra", + "license": "Open Data Commons Open Database License 1.0", + "distribution": [ + { + "accessURL": "https://www.minhacienda.gob.ar/secretarias/politica-economica/programacion-macroeconomica/", + "description": "PIB Servicios Comunales, Sociales y Personales en millones de pesos de 1960. 1970-1980", + "format": "CSV", + "dataset_identifier": "300", + "issued": "2017-09-28T00:00:00", + "title": "Producto Interno Bruto Servicios Comunales, Sociales y Personales Millones de pesos constantes. Base 1960. Valores trimestrales", + "modified": "2017-09-28T00:00:00", + "fileName": "pib-servicios-comunales-sociales-personales-millones-pesos-1960-trimestral.csv", + "downloadURL": "series_tiempo_ar_api/libs/indexing/tests/samples/one_distribution_data.csv", + "field": [ + { + "title": "indice_tiempo", + "specialTypeDetail": "R/P3M", + "units": "Millones de pesos", + "specialType": "time_index", + "type": "date", + "id": "212.1_ICE_TIEMPO_0_0_13" + }, + { + "units": "Millones de pesos", + "id": "212.1_PSCIOS_ERS_0_0_22", + "description": "PIB Servicios Comunales, Sociales y Personales en millones de pesos de 1960. Total", + "title": "pib_scios_com_soc_pers", + "scrapingIdentifierCell": "AC9" + }, + { + "units": "Millones de pesos", + "id": "212.1_PSCIOS_ERN_0_0_25", + "description": "PIB Servicios Comunales, Sociales y Personales en millones de pesos de 1960. Servicios Gubernamentales", + "title": "pib_scios_com_serv_gubern" + }, + { + "units": "Millones de pesos", + "id": "212.1_PSCIOS_IOS_0_0_25", + "description": "PIB Servicios Comunales, Sociales y Personales en millones de pesos de 1960. Otros Servicios", + "title": "pib_scios_com_otros_scios", + "scrapingDataStartCell": "AD10" + } + ], + "draft": false, + "units": "Millones de pesos", + "identifier": "300.1", + "scrapingFileSheet": "32. Ingreso IED Pais" + } + ] + } + ] +}