Skip to content

Commit

Permalink
made cli and xml dump the same (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidschober authored Apr 2, 2024
1 parent 8182271 commit fe9d792
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 13 deletions.
2 changes: 1 addition & 1 deletion nuldc/commandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def main():
data = {"message": "saved csv to :" + args['<outfile>']}

if args["xml"]:
# saving xml
# saving xml
helpers.save_xml(data, args['<outfile>'])
data = {"message": "saved xml to :" + args['<outfile>']}

Expand Down
12 changes: 7 additions & 5 deletions nuldc/dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def save_files(basename, data):
with open(f"json/{basename}.json", 'w', encoding='utf-8') as f:
json.dump(data.get('data'), f)

helpers.save_xml(data.get('data'), f'xml/{basename}.xml')
helpers.save_xml(data, f'xml/{basename}.xml')

headers, values = helpers.sort_fields_and_values(data)
helpers.save_as_csv(headers, values, f'csv/{basename}.csv')
Expand All @@ -63,13 +63,15 @@ def dump_collection(col_id):
json, xml, and csv files"""

params = {
"query": f"collection.id:{col_id}",
"query": f"collection.id: {col_id}",
"size": "50",
"sort": "id:asc"}
try:
data = helpers.get_search_results(API,
"works",
params, all_results=True, page_limit=5000)
"works",
params,
all_results=True,
page_limit=5000)
col_title = data['data'][0]['collection']['title']
filename = f"{slugify(col_title)}-{col_id}"
save_files(filename, data)
Expand Down Expand Up @@ -106,7 +108,7 @@ def main():
with open('_updated_at.txt') as f:
updated = f.readline().strip()

query = f"indexed_at:>={updated}"
query = f"indexed_at: >={updated}"
print(f"looking for collections with works updated since {query}")
else:
print("can't find updated since file, rebuilding all collections")
Expand Down
13 changes: 7 additions & 6 deletions nuldc/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,19 +174,20 @@ def save_as_csv(headers, values, output_file):
writer.writerow(row)


def save_xml(data, output_file):
def save_xml(opensearch_results, output_file):
"""takes results as a list of dicts and writes them out to xml"""


# TODO DRY up this bit and sort_fields_and_values

ignore_fields = ['embedding', 'embedding_model']

# massage the data and remove the embeddings
data = opensearch_results.get('data')
data = [{key: value
for (key, value) in sorted(d.items())
if key not in ignore_fields} for d in data.get('data')]
xml = dicttoxml.dicttoxml(data, attr_type=False)

if key not in ignore_fields} for d in data]
opensearch_results['data'] = data
xml = dicttoxml.dicttoxml(opensearch_results, attr_type=False)

with open(output_file, 'wb') as xmlfile:
xmlfile.write(xml)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "nuldc"
version = "0.10.1"
version = "0.10.2"
description = ""
authors = ["davidschober <davidschob@gmail.com>"]
license = "MIT"
Expand Down

0 comments on commit fe9d792

Please sign in to comment.