From c2385c850d929a89992e9ca910fd2de6976512b3 Mon Sep 17 00:00:00 2001
From: davidschober <david.schober@northwestern.edu>
Date: Mon, 18 Mar 2024 11:45:59 -0500
Subject: [PATCH] Fix dump by increasing limit (#31)

* added catch in dump and upped page limit and size

* bump patch
---
 nuldc/dump.py  | 15 +++++++++------
 pyproject.toml |  2 +-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/nuldc/dump.py b/nuldc/dump.py
index 8d9c005..bfafafe 100644
--- a/nuldc/dump.py
+++ b/nuldc/dump.py
@@ -25,6 +25,7 @@
 import concurrent.futures
 import datetime
 import os
+import sys
 
 
 API = "https://api.dc.library.northwestern.edu/api/v2"
@@ -64,15 +65,17 @@ def dump_collection(col_id):
 
     params = {
         "query": f"collection.id:{col_id}",
-        "size": "25",
+        "size": "100",
         "sort": "id:asc"}
     data = helpers.get_search_results(API,
                                       "works",
-                                      params, all_results=True)
-
-    col_title = data['data'][0]['collection']['title']
-    filename = f"{slugify(col_title)}-{col_id}"
-    save_files(filename, data)
+                                      params, all_results=True, page_limit=5000)
+    try:
+        col_title = data['data'][0]['collection']['title']
+        filename = f"{slugify(col_title)}-{col_id}"
+        save_files(filename, data)
+    except Exception as e:
+        sys.exit(f"Error with collection {col_id}: {e} \n\n CONTEXT: {data}")
 
 
 def dump_collections(query_string):
diff --git a/pyproject.toml b/pyproject.toml
index b83e5de..19f49a8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "nuldc"
-version = "0.9.0"
+version = "0.9.1"
 description = ""
 authors = ["davidschober <davidschob@gmail.com>"]
 license = "MIT"