From 010a2f3e02b8460573a41a2ba7ac08a04328d9ba Mon Sep 17 00:00:00 2001
From: Vera Clemens <clemens@zbmed.de>
Date: Tue, 28 Nov 2023 16:55:48 +0100
Subject: [PATCH 1/6] feat(k8s.dataverse): add CT.gov collection

---
 .../persona/nfdi4health/dataverses/ctgov.json    | 12 ++++++++++++
 k8s/dataverse/persona/nfdi4health/init.sh        | 16 +++++++++-------
 2 files changed, 21 insertions(+), 7 deletions(-)
 create mode 100644 k8s/dataverse/persona/nfdi4health/dataverses/ctgov.json

diff --git a/k8s/dataverse/persona/nfdi4health/dataverses/ctgov.json b/k8s/dataverse/persona/nfdi4health/dataverses/ctgov.json
new file mode 100644
index 0000000..d4e1bde
--- /dev/null
+++ b/k8s/dataverse/persona/nfdi4health/dataverses/ctgov.json
@@ -0,0 +1,12 @@
+{
+  "name": "CTgov",
+  "alias": "CTgov",
+  "dataverseContacts": [
+    {
+      "contactEmail": "fb.studyhub@nfdi4health.de"
+    }
+  ],
+  "affiliation": "NFDI4Health",
+  "description": "Automatically imported from CT.gov",
+  "dataverseType": "UNCATEGORIZED"
+}
\ No newline at end of file
diff --git a/k8s/dataverse/persona/nfdi4health/init.sh b/k8s/dataverse/persona/nfdi4health/init.sh
index c84fef1..7399cfe 100644
--- a/k8s/dataverse/persona/nfdi4health/init.sh
+++ b/k8s/dataverse/persona/nfdi4health/init.sh
@@ -85,13 +85,15 @@ while IFS= read -r DATAVERSE; do
   curl -s -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-Type: application/json" $DATAVERSE_URL/api/dataverses/$DATAVERSE_ID/assignments -d '{"assignee": "@dataverseAdmin", "role": "admin"}'
   echo
 
-  echo "Adding :authenticated-users as dataset creators to dataverse $PARENT_DATAVERSE/$DATAVERSE_ID:"
-  curl -s -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-Type: application/json" $DATAVERSE_URL/api/dataverses/$DATAVERSE_ID/assignments -d '{"assignee": ":authenticated-users", "role": "dsContributor"}'
-  echo
+  if [[ $DATAVERSE_ID == "nfdi4health" ]]; then
+    echo "Adding :authenticated-users as dataset creators to dataverse $PARENT_DATAVERSE/$DATAVERSE_ID:"
+    curl -s -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-Type: application/json" $DATAVERSE_URL/api/dataverses/$DATAVERSE_ID/assignments -d '{"assignee": ":authenticated-users", "role": "dsContributor"}'
+    echo
+  fi
 
-  if [[ $PARENT_DATAVERSE != "root" ]]; then
+  if [[ $PARENT_DATAVERSE == "nfdi4health" ]]; then
     # We add the "Publish permission" for all users only to the sub-dataverses (collection dataverses, e.g. "COVID-19")
-    # where no datasets are created so it can only be used for linking, not publishing
+    # of "NFDI4Health" where no datasets are created so it can only be used for linking, not publishing
     # (only curators should be able to publish)
     echo "Adding :authenticated-users as dataset publisher to dataverse $PARENT_DATAVERSE/$DATAVERSE_ID:"
     curl -s -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-Type: application/json" $DATAVERSE_URL/api/dataverses/$DATAVERSE_ID/assignments -d '{"assignee": ":authenticated-users", "role": "dsPublisher"}'
@@ -100,7 +102,7 @@ while IFS= read -r DATAVERSE; do
     # The import client and the admin are currently the only automatically configured curator user, all other curators
     # must be added manually
     echo "Creating curator group"
-    curl -s -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-Type: application/json" $DATAVERSE_URL/api/dataverses/$DATAVERSE_ID/groups -d '{"description": "Curator users", "displayName": "Curators", "aliasInOwner": "curators"}'
+    CURATOR_GROUP_ID=`curl -s -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-Type: application/json" $DATAVERSE_URL/api/dataverses/$DATAVERSE_ID/groups -d '{"description": "Curator users", "displayName": "Curators", "aliasInOwner": "curators"}' | jq .data.identifier -r`
     echo
 
     echo "Adding @service-account-import_client and @dataverseAdmin to curator group"
@@ -108,7 +110,7 @@ while IFS= read -r DATAVERSE; do
     echo
 
     echo "Adding curator group as curator to dataverse $PARENT_DATAVERSE/$DATAVERSE_ID:"
-    curl -s -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-Type: application/json" $DATAVERSE_URL/api/dataverses/$DATAVERSE_ID/assignments -d '{"assignee": "&explicit/2-curators", "role": "curator"}'
+    curl -s -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-Type: application/json" $DATAVERSE_URL/api/dataverses/$DATAVERSE_ID/assignments -d "{\"assignee\": \"$CURATOR_GROUP_ID\", \"role\": \"curator\"}"
     echo
   fi
 done <<< "${DATAVERSES}"

From 8015cb743dfccc2d6beb62bf072572faddca3be3 Mon Sep 17 00:00:00 2001
From: Vera Clemens <clemens@zbmed.de>
Date: Tue, 28 Nov 2023 17:41:01 +0100
Subject: [PATCH 2/6] feat(k8s.dataverse): add collections for other external
 data sources

---
 .../persona/nfdi4health/dataverses/drks.json         | 12 ++++++++++++
 .../persona/nfdi4health/dataverses/ictrp.json        | 12 ++++++++++++
 .../persona/nfdi4health/dataverses/mdm.json          | 12 ++++++++++++
 3 files changed, 36 insertions(+)
 create mode 100644 k8s/dataverse/persona/nfdi4health/dataverses/drks.json
 create mode 100644 k8s/dataverse/persona/nfdi4health/dataverses/ictrp.json
 create mode 100644 k8s/dataverse/persona/nfdi4health/dataverses/mdm.json

diff --git a/k8s/dataverse/persona/nfdi4health/dataverses/drks.json b/k8s/dataverse/persona/nfdi4health/dataverses/drks.json
new file mode 100644
index 0000000..916a620
--- /dev/null
+++ b/k8s/dataverse/persona/nfdi4health/dataverses/drks.json
@@ -0,0 +1,12 @@
+{
+  "name": "DRKS",
+  "alias": "DRKS",
+  "dataverseContacts": [
+    {
+      "contactEmail": "fb.studyhub@nfdi4health.de"
+    }
+  ],
+  "affiliation": "NFDI4Health",
+  "description": "Automatically imported from DRKS",
+  "dataverseType": "UNCATEGORIZED"
+}
\ No newline at end of file
diff --git a/k8s/dataverse/persona/nfdi4health/dataverses/ictrp.json b/k8s/dataverse/persona/nfdi4health/dataverses/ictrp.json
new file mode 100644
index 0000000..8398148
--- /dev/null
+++ b/k8s/dataverse/persona/nfdi4health/dataverses/ictrp.json
@@ -0,0 +1,12 @@
+{
+  "name": "ICTRP",
+  "alias": "ICTRP",
+  "dataverseContacts": [
+    {
+      "contactEmail": "fb.studyhub@nfdi4health.de"
+    }
+  ],
+  "affiliation": "NFDI4Health",
+  "description": "Automatically imported from ICTRP",
+  "dataverseType": "UNCATEGORIZED"
+}
\ No newline at end of file
diff --git a/k8s/dataverse/persona/nfdi4health/dataverses/mdm.json b/k8s/dataverse/persona/nfdi4health/dataverses/mdm.json
new file mode 100644
index 0000000..1e88b90
--- /dev/null
+++ b/k8s/dataverse/persona/nfdi4health/dataverses/mdm.json
@@ -0,0 +1,12 @@
+{
+  "name": "MDM",
+  "alias": "MDM",
+  "dataverseContacts": [
+    {
+      "contactEmail": "fb.studyhub@nfdi4health.de"
+    }
+  ],
+  "affiliation": "NFDI4Health",
+  "description": "Automatically imported from MDM",
+  "dataverseType": "UNCATEGORIZED"
+}
\ No newline at end of file

From 046d53ca11ec88541afebf5744827404f2acf49a Mon Sep 17 00:00:00 2001
From: Vera Clemens <clemens@zbmed.de>
Date: Wed, 17 Jul 2024 12:01:24 +0200
Subject: [PATCH 3/6] feat: add script for loading backup into Dataverse

---
 k8s/dataverse/README.md          | 88 +++++---------------------------
 scripts/load_dataverse_backup.sh | 61 ++++++++++++++++++++++
 2 files changed, 73 insertions(+), 76 deletions(-)
 create mode 100755 scripts/load_dataverse_backup.sh

diff --git a/k8s/dataverse/README.md b/k8s/dataverse/README.md
index f5464e4..69b3299 100644
--- a/k8s/dataverse/README.md
+++ b/k8s/dataverse/README.md
@@ -2,88 +2,24 @@
 `helm install my-dataverse ./dataverse`
 
 # Backup & Restore
-## Restore database backup
-
-### Get a logical backup
-#### From S3 
-Postgres is configured to automatically create and store a logical backup in S3. You can use the following to find the most recent one.
-1. Find the newest backup
-   
-   `s3cmd ls s3://$LOGICAL_BACKUP_S3_BUCKET/spilo/$SCOPE$LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX/logical_backups/`
-   
-   The env variable values can be found using `kubectl describe pod` on one of the backup job pods.
-
-2. Copy the backup to your local computer
-
-   `s3cmd get s3://$LOGICAL_BACKUP_S3_BUCKET/spilo/$SCOPE$LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX/logical_backups/1695726061.sql.gz .`
-   
-   (replace the file name with the name of the newest backup found in step 1)
-
-### Copy the backup into the postgres pod
-
-3. Copy a logical backup from local computer it into the postgres pod
-
-   `kubectl cp 1695726061.sql.gz $POSTGRES_POD_NAME:/tmp/1695726061.sql.gz`
-   
-   (replace the file name)
-
-4. Extract the backup
-
-   `kubectl exec -it $POSTGRES_POD_NAME -- /bin/bash`
-
-   `gunzip /tmp/1695726061.sql.gz`
-
-5.  Empty the database before loading the backup
-   `kubectl exec -it $POSTGRES_POD_NAME -- psql -U dataverse `
-   ```
-   -- Recreate the schema
-   DROP SCHEMA public CASCADE;
-   CREATE SCHEMA public;
 
-   -- Restore default permissions
-   GRANT ALL ON SCHEMA public TO postgres;
-   GRANT ALL ON SCHEMA public TO public;
-   ```
-
-   (source: https://stackoverflow.com/a/61221726)
-
-6. Load the backup into the database
-
-   `kubectl exec -it $POSTGRES_POD_NAME -- bash`
-
-   `psql -U dataverse -f /tmp/1690815661.sql template1`
-
-   (replace the file name)
-
-7. Configure and sync postgres secrets with k8s
-
-   The  postgres deployment creates at least three k8s secrets. Since you just loaded a backup they (k8s secret) are out of sync.
-   Either those k8s secrets must be updated with the values from the just loaded backup or the database must be adapted to the values of the k8s secrets
-   We update the values within the db. First, we obtain list of accounts to update, then we obtain the passwords and update the db values.
-
-   Get the list of accounts:
-
-   `kubectl get secret | grep ${DEPLOYMENTNAME}-dataverse-postgres.credentials.postgresql.acid.zalan.do `
-   
-   Repeat the following for each account:
-
-      Get the password for the user `dataverse`:
-   
-      `kubectl get secrets/dataverse.${DEPLOYMENTNAME}-dataverse-postgres.credentials.postgresql.acid.zalan.do  -o=jsonpath="{.data.password}" | base64 -d`
-   
-      Update the password for the user `dataverse`:
-   
-      `kubectl exec -it $POSTGRES_POD_NAME -- psql -U dataverse "ALTER USER dataverse WITH PASSWORD '...'"`
+## Restore database backup
 
-8. Restart the dataverse container
+Postgres is configured to automatically create and store a logical backup in S3. You can use the script at
+[`scripts/load_dataverse_backup.sh`][1] to load it into a Dataverse deployed on Kubernetes.
 
-9. Start complete SOLR reindex
+[1]: https://github.com/nfdi4health/csh-deployment/blob/main/scripts/load_dataverse_backup.sh
 
-   `curl http://localhost:8080/api/admin/index/clear`
+Before running the script, you must set these env variables:
 
-   `curl http://localhost:8080/api/admin/index`
+- `DESTINATION_DATAVERSE_NAME`, the deployment name of the destination Dataverse
+- `LOGICAL_BACKUP_S3_BUCKET`, the S3 bucket where the backup is located
+- `SCOPE` and `LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX`, define the directory inside the S3 bucket where the backup is
+   located
+- (optional) `S3_CONFIG_FILE`, path to a s3cmd config file
 
-   (see https://guides.dataverse.org/en/latest/admin/solr-search-index.html#clear-and-reindex)
+The values for `LOGICAL_BACKUP_S3_BUCKET`, `SCOPE` and `LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX` can be found using
+`kubectl describe pod` on one of the backup job pods.
 
 ## Creating a database backup
 
diff --git a/scripts/load_dataverse_backup.sh b/scripts/load_dataverse_backup.sh
new file mode 100755
index 0000000..8a553e7
--- /dev/null
+++ b/scripts/load_dataverse_backup.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+S3_CONFIG_FILE="${S3_CONFIG_FILE:-'~/.s3cfg'}"
+
+# Computed from env variables above
+LAST_BACKUP_FILE=$(s3cmd ls s3://$LOGICAL_BACKUP_S3_BUCKET/spilo/$SCOPE$LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX/logical_backups/ -c $S3_CONFIG_FILE | sort | tail -n 1 | awk '{print $4}')
+POSTGRES_POD_NAME=${DESTINATION_DATAVERSE_NAME}-dataverse-postgres-0
+DATAVERSE_POD_NAME=${DESTINATION_DATAVERSE_NAME}-dataverse-0
+
+echo "Downloading backup from S3..."
+s3cmd get $LAST_BACKUP_FILE . -c $S3_CONFIG_FILE --skip-existing
+
+echo "Copying backup to postgres pod..."
+kubectl cp $(basename $LAST_BACKUP_FILE) $POSTGRES_POD_NAME:/tmp/
+
+echo "Unzipping backup..."
+kubectl exec $POSTGRES_POD_NAME -- gunzip /tmp/$(basename $LAST_BACKUP_FILE)
+
+echo "Emptying database..."
+kubectl exec $POSTGRES_POD_NAME -- psql -P pager=off -U dataverse -c "-- Recreate the schema
+DROP SCHEMA public CASCADE;
+CREATE SCHEMA public;
+
+-- Restore default permissions
+GRANT ALL ON SCHEMA public TO postgres;
+GRANT ALL ON SCHEMA public TO public;"
+# source: https://stackoverflow.com/a/61221726
+
+echo "Loading backup into database..."
+kubectl exec $POSTGRES_POD_NAME -- psql -P pager=off -U dataverse -f /tmp/$(basename $LAST_BACKUP_FILE .gz) template1
+
+
+echo "Updating database passwords..."
+kubectl get secret | grep ${DESTINATION_DATAVERSE_NAME}-dataverse-postgres.credentials.postgresql.acid.zalan.do | awk '{print $1}' | while read SECRET; do kubectl exec $POSTGRES_POD_NAME -- psql -P pager=off -U dataverse -c "ALTER USER $(echo $SECRET | awk -F. '{print $1}') WITH PASSWORD '$(kubectl get secrets/$SECRET -o=jsonpath="{.data.password}" | base64 -d)';"; done
+
+echo "Restarting dataverse pod..."
+kubectl delete pod $DATAVERSE_POD_NAME
+kubectl wait --for=condition=Ready --timeout=-1s pod/$DATAVERSE_POD_NAME
+
+# Using port 8081 because 8080 is often already used if currently developing with Dataverse
+DATAVERSE_LOCAL_PORT=8081
+DATAVERSE_REMOTE_PORT=8080
+
+echo "Starting re-index..."
+kubectl port-forward $DATAVERSE_POD_NAME $DATAVERSE_LOCAL_PORT:$DATAVERSE_REMOTE_PORT >/dev/null &
+PORT_FORWARD_PID=$!
+# Kill the port-forward when this script exits
+trap '{
+    kill $PORT_FORWARD_PID 2>/dev/null
+}' EXIT
+# Wait for port to be available
+while ! nc -vz localhost $DATAVERSE_LOCAL_PORT >/dev/null 2>&1; do
+    sleep 0.1
+done
+curl http://localhost:$DATAVERSE_LOCAL_PORT/api/admin/index/clear
+echo
+curl http://localhost:$DATAVERSE_LOCAL_PORT/api/admin/index
+echo
+
+echo
+echo "Done! Please wait for the re-indexing to finish, then the backup loading will be complete."

From a2d01b2f77a95ed0631594fb92d4bc0bb5b748b6 Mon Sep 17 00:00:00 2001
From: Vera Clemens <clemens@zbmed.de>
Date: Wed, 24 Jul 2024 15:23:38 +0200
Subject: [PATCH 4/6] feat(k8s.dataverse): increase storage for postgres and
 solr

---
 k8s/dataverse/templates/postgres.yaml | 2 +-
 k8s/dataverse/templates/solr.yaml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/k8s/dataverse/templates/postgres.yaml b/k8s/dataverse/templates/postgres.yaml
index 8e85a4f..6bae6b5 100644
--- a/k8s/dataverse/templates/postgres.yaml
+++ b/k8s/dataverse/templates/postgres.yaml
@@ -13,7 +13,7 @@ spec:
   enableLogicalBackup: true
   teamId:  {{ .Release.Name }}
   volume:
-    size: 4Gi
+    size: 8Gi
   numberOfInstances: 2
   enableConnectionPooler: true
   connectionPooler:
diff --git a/k8s/dataverse/templates/solr.yaml b/k8s/dataverse/templates/solr.yaml
index 8558cba..eacac43 100644
--- a/k8s/dataverse/templates/solr.yaml
+++ b/k8s/dataverse/templates/solr.yaml
@@ -29,7 +29,7 @@ spec:
     - ReadWriteOnce
   resources:
     requests:
-      storage: 100Mi
+      storage: 2Gi
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim

From 9dceba30fa4cf5b2ae8c47789118054b74c93de4 Mon Sep 17 00:00:00 2001
From: Vera Clemens <clemens@zbmed.de>
Date: Fri, 26 Jul 2024 14:26:03 +0200
Subject: [PATCH 5/6] feat(k8s.dataverse): increase storage for postgres
 further

---
 k8s/dataverse/templates/postgres.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/k8s/dataverse/templates/postgres.yaml b/k8s/dataverse/templates/postgres.yaml
index 6bae6b5..ca1cf80 100644
--- a/k8s/dataverse/templates/postgres.yaml
+++ b/k8s/dataverse/templates/postgres.yaml
@@ -13,7 +13,7 @@ spec:
   enableLogicalBackup: true
   teamId:  {{ .Release.Name }}
   volume:
-    size: 8Gi
+    size: 16Gi
   numberOfInstances: 2
   enableConnectionPooler: true
   connectionPooler:

From 422db61731f6521b15c88298da483c905dd5802e Mon Sep 17 00:00:00 2001
From: Vera Clemens <clemens@zbmed.de>
Date: Fri, 26 Jul 2024 14:43:23 +0200
Subject: [PATCH 6/6] feat(k8s.dataverse): increase storage for postgres
 further

---
 k8s/dataverse/templates/postgres.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/k8s/dataverse/templates/postgres.yaml b/k8s/dataverse/templates/postgres.yaml
index ca1cf80..be316b6 100644
--- a/k8s/dataverse/templates/postgres.yaml
+++ b/k8s/dataverse/templates/postgres.yaml
@@ -13,7 +13,7 @@ spec:
   enableLogicalBackup: true
   teamId:  {{ .Release.Name }}
   volume:
-    size: 16Gi
+    size: 32Gi
   numberOfInstances: 2
   enableConnectionPooler: true
   connectionPooler: