From ed8443a262e949ef4e7d68d6bb315454e8ca9f6a Mon Sep 17 00:00:00 2001 From: darms Date: Wed, 13 Dec 2023 15:27:50 +0100 Subject: [PATCH] feature(k8s.dataverse): Added more documentation on backup and restore --- k8s/dataverse/README.md | 78 +++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 23 deletions(-) diff --git a/k8s/dataverse/README.md b/k8s/dataverse/README.md index ae3b184..f9db12b 100644 --- a/k8s/dataverse/README.md +++ b/k8s/dataverse/README.md @@ -1,26 +1,18 @@ # Install dataverse - -## If you would like to install custom `helm install my-dataverse ./dataverse` - -# Configure custom schema -## Enable admin api via localhost port forwarding -### Allow all API calls +# Access dataverse via localhost port forwarding `export DATAVERSE_POD="my-dataverse-dataverse-0"` `kubectl port-forward pods/${DATAVERSE_POD} 8080:8080` - -`curl -X PUT -d allow http://localhost:8080/api/admin/settings/:BlockedApiPolicy` - +# Configure custom schema ## Update SOLR fields with custom metadata info +The charts persona contains a valid solr schema which is used by default. +If you alter a MDS block you manually need to refresh the solr core. ### Login into the solr helper container and execute the update `export SOLR_POD="my-dataverse-dataverse-solr-0"` -`kubectl exec -i -t $SOLR_POD --container dataverse-solr-config -- /bin/sh` - -Until the image is fixed, manually add the `ed` package. -`apk add ed` +`kubectl exec -i -t $SOLR_POD --container dataverse-solr-config -- /bin/sh` `curl "http://${DATAVERSE_HOSTNAME}:8080/api/admin/index/solr/schema" | /scripts/update-fields.sh /template/conf/schema.xml` @@ -28,32 +20,40 @@ Until the image is fixed, manually add the `ed` package. `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` -## Loading a backup +# Backup & Restore +## Restore database backup +### Get a logical backup +#### From S3 +Postgres is configured to automatically create and store a logical backup in S3. You can use the following to find the most recent one. 1. Find the newest backup `s3cmd ls s3://$LOGICAL_BACKUP_S3_BUCKET/spilo/$SCOPE$LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX/logical_backups/` The env variable values can be found using `kubectl describe job` on one of the backup jobs. -2. Copy the backup +2. Copy the backup to your local computer `s3cmd get s3://$LOGICAL_BACKUP_S3_BUCKET/spilo/$SCOPE$LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX/logical_backups/1695726061.sql.gz .` (replace the file name with the name of the newest backup found in step 1) -3. Extract the sql file and copy it into the postgres pod +### Copy the backup into the postgres pod - `kubectl cp 1695726061.sql $POSTGRES_POD_NAME:/tmp` +3. Copy a logical backup from local computer it into the postgres pod + + `kubectl cp 1695726061.sql.gz $POSTGRES_POD_NAME:/tmp/1695726061.sql.gz` (replace the file name) -4. Empty the database before loading the backup +4. Extract the backup - `kubectl exec -it $POSTGRES_POD_NAME -- bash` - - `psql -U dataverse` + `kubectl exec -it $POSTGRES_POD_NAME -- /bin/bash` + + `gunzip /tmp/1695726061.sql.gz` +5. Empty the database before loading the backup + `kubectl exec -it $POSTGRES_POD_NAME -- psql -U dataverse ` ``` -- Recreate the schema DROP SCHEMA public CASCADE; @@ -67,15 +67,47 @@ Until the image is fixed, manually add the `ed` package. (source: https://stackoverflow.com/a/61221726) 6. Load the backup into the database - + `kubectl exec -it $POSTGRES_POD_NAME -- bash` `psql -U dataverse -f /tmp/1690815661.sql template1` (replace the file name) -7. Correct the database user password: Log into a container of the pod ...-dataverse-0, then run `echo $DATAVERSE_DB_PASSWORD`. Set this password in psql using `ALTER USER dataverse WITH PASSWORD '...';` +7. Configure and sync postgres secrets with k8s. + + The default postgres deployment creates at least three k8s secrets. Since you just loaded a backup they are out of sync. + Either those secrets must be updated with the values of the backup or the values within the db mut be updated. + We update the values within the db. First, we obtain list of accounts to update, then we obtain the passwords and update the db values. + + Get the list of accounts: + + `kubectl get secret | grep ${DEPLOYMENTNAME}-dataverse-postgres.credentials.postgresql.acid.zalan.do ` + + Repeat the following for each account: + + Get the password for the user `dataverse`: + + `kubectl get secrets/dataverse.${DEPLOYMENTNAME}-dataverse-postgres.credentials.postgresql.acid.zalan.do -o=jsonpath="{.data.password}" | base64 -d` + + Update the password for the user `dataverse`: + + `kubectl exec -it $POSTGRES_POD_NAME -- psql -U dataverse "ALTER USER dataverse WITH PASSWORD '...'"` 8. Start SOLR reindex `curl http://localhost:8080/api/admin/index` (see https://guides.dataverse.org/en/latest/admin/solr-search-index.html#clear-and-reindex) + +## Creating a database backup + +1. Login into the postgres pod and create and compress a logical backup. + + `kubectl exec -it pods/$POSTGRES_POD_NAME -- /bin/bash` + + `pg_dumpall -f /tmp/jd.dump -U dataverse` + + `gzip /tmp/jd.dump` + +2. Copy the logical backup to your local computer + + `kubectl cp $POSTGRES_POD_NAME:/tmp/jd.dump.gz ./jd.dump.gz` \ No newline at end of file