From c2385c850d929a89992e9ca910fd2de6976512b3 Mon Sep 17 00:00:00 2001 From: davidschober Date: Mon, 18 Mar 2024 11:45:59 -0500 Subject: [PATCH] Fix dump by increasing limit (#31) * added catch in dump and upped page limit and size * bump patch --- nuldc/dump.py | 15 +++++++++------ pyproject.toml | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/nuldc/dump.py b/nuldc/dump.py index 8d9c005..bfafafe 100644 --- a/nuldc/dump.py +++ b/nuldc/dump.py @@ -25,6 +25,7 @@ import concurrent.futures import datetime import os +import sys API = "https://api.dc.library.northwestern.edu/api/v2" @@ -64,15 +65,17 @@ def dump_collection(col_id): params = { "query": f"collection.id:{col_id}", - "size": "25", + "size": "100", "sort": "id:asc"} data = helpers.get_search_results(API, "works", - params, all_results=True) - - col_title = data['data'][0]['collection']['title'] - filename = f"{slugify(col_title)}-{col_id}" - save_files(filename, data) + params, all_results=True, page_limit=5000) + try: + col_title = data['data'][0]['collection']['title'] + filename = f"{slugify(col_title)}-{col_id}" + save_files(filename, data) + except Exception as e: + sys.exit(f"Error with collection {col_id}: {e} \n\n CONTEXT: {data}") def dump_collections(query_string): diff --git a/pyproject.toml b/pyproject.toml index b83e5de..19f49a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "nuldc" -version = "0.9.0" +version = "0.9.1" description = "" authors = ["davidschober "] license = "MIT"