From fe9d79297beb58bd760cac081213fc6bc43c9be8 Mon Sep 17 00:00:00 2001 From: davidschober Date: Mon, 1 Apr 2024 19:23:47 -0500 Subject: [PATCH] made cli and xml dump the same (#34) --- nuldc/commandline.py | 2 +- nuldc/dump.py | 12 +++++++----- nuldc/helpers.py | 13 +++++++------ pyproject.toml | 2 +- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/nuldc/commandline.py b/nuldc/commandline.py index 075f21f..a850786 100644 --- a/nuldc/commandline.py +++ b/nuldc/commandline.py @@ -63,7 +63,7 @@ def main(): data = {"message": "saved csv to :" + args['']} if args["xml"]: - # saving xml + # saving xml helpers.save_xml(data, args['']) data = {"message": "saved xml to :" + args['']} diff --git a/nuldc/dump.py b/nuldc/dump.py index 97c6ffc..05e2e73 100644 --- a/nuldc/dump.py +++ b/nuldc/dump.py @@ -52,7 +52,7 @@ def save_files(basename, data): with open(f"json/{basename}.json", 'w', encoding='utf-8') as f: json.dump(data.get('data'), f) - helpers.save_xml(data.get('data'), f'xml/{basename}.xml') + helpers.save_xml(data, f'xml/{basename}.xml') headers, values = helpers.sort_fields_and_values(data) helpers.save_as_csv(headers, values, f'csv/{basename}.csv') @@ -63,13 +63,15 @@ def dump_collection(col_id): json, xml, and csv files""" params = { - "query": f"collection.id:{col_id}", + "query": f"collection.id: {col_id}", "size": "50", "sort": "id:asc"} try: data = helpers.get_search_results(API, - "works", - params, all_results=True, page_limit=5000) + "works", + params, + all_results=True, + page_limit=5000) col_title = data['data'][0]['collection']['title'] filename = f"{slugify(col_title)}-{col_id}" save_files(filename, data) @@ -106,7 +108,7 @@ def main(): with open('_updated_at.txt') as f: updated = f.readline().strip() - query = f"indexed_at:>={updated}" + query = f"indexed_at: >={updated}" print(f"looking for collections with works updated since {query}") else: print("can't find updated since file, rebuilding all collections") diff --git a/nuldc/helpers.py b/nuldc/helpers.py index 1bdcabf..f3837c7 100644 --- a/nuldc/helpers.py +++ b/nuldc/helpers.py @@ -174,19 +174,20 @@ def save_as_csv(headers, values, output_file): writer.writerow(row) -def save_xml(data, output_file): +def save_xml(opensearch_results, output_file): """takes results as a list of dicts and writes them out to xml""" - # TODO DRY up this bit and sort_fields_and_values ignore_fields = ['embedding', 'embedding_model'] - + # massage the data and remove the embeddings + data = opensearch_results.get('data') data = [{key: value for (key, value) in sorted(d.items()) - if key not in ignore_fields} for d in data.get('data')] - xml = dicttoxml.dicttoxml(data, attr_type=False) - + if key not in ignore_fields} for d in data] + opensearch_results['data'] = data + xml = dicttoxml.dicttoxml(opensearch_results, attr_type=False) + with open(output_file, 'wb') as xmlfile: xmlfile.write(xml) diff --git a/pyproject.toml b/pyproject.toml index d05a1da..20ef0ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "nuldc" -version = "0.10.1" +version = "0.10.2" description = "" authors = ["davidschober "] license = "MIT"