Skip to content

Commit

Permalink
Data Migration Code for Version 1.0.9.1 which fixes bug with publish …
Browse files Browse the repository at this point in the history
…glag mapping, and allow users the ability to select which record type to publish during data migration. Also improves command line functionality.
  • Loading branch information
Nathan Stevens committed May 29, 2014
1 parent 3b7c679 commit 3102fa5
Show file tree
Hide file tree
Showing 8 changed files with 621 additions and 108 deletions.
33 changes: 26 additions & 7 deletions dbcopy.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,9 @@

# specify whether to use the tracer database. If so specify the tracer database to use
# valid options for tracer database are 1,2,3,SB
useTracer=true
useTracer=false
tracerDatabase=2

# the index of the stored AT database. These must be saved in the AT db connection file in the user
# home directory
databaseURLIndex=0

# specify the number of client threads to use when copying resource records
clientThreads=1

Expand All @@ -23,13 +19,36 @@ continueFromResources=false
resetPassword=archive

# specify whether to simulate the REST calls
simulateRESTCalls=true
simulateRESTCalls=false

# specify whether to ignore unlinked names and subjects
ignoreUnlinkedNames=false
ignoreUnlinkedSubjects=false

# specify what the ASpace publish flag should for the main record types
publishNames=true
publishSubjects=true
publishAccessions=true
publishDigitalObjects=true
publishResources=true

# specify whether to only copy resource records. Useful for debugging
copyOnlyResources=false

# specify whether to check all iso dates
checkISODates=false

# specify which resources to copy. Useful for debugging
#resourcesToCopy=MSS 3122

# AT database connection information
databaseType=MySQL
atUrl=jdbc:mysql://dev.archiviststoolkit.org:3306/AT_SANDBOX2_0
atUsername=atuser
atPassword=cr4ckA1t

# parameters used to connect to an ASpace instance
aspaceHost=http://localhost:8089
aspaceAdmin=admin
aspacePassword=admin
aspacePassword=admin

13 changes: 6 additions & 7 deletions dbcopy.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
#!/bin/sh
#
# Simple script to run the data migration plugin from the command line
# outside of the AT. It assumes the plugin zip file "scriptAT.zip" has already
# been installed in the ATs' plugin directory. Also a file called dbcopy.properties must
# also be present in the same directory as this script. This file stores all
# connection information for the AT database, and ASpace instances. Please note that
# the plugin reads all AT database connection information from a file called atdbinfo.txt
# located in the users home directory.
# outside of the AT. It assumes the plugin zip file, "scriptAT.zip", has
# been installed in the ATs' plugin directory. Also, a file called dbcopy.properties
# must be present in the same directory as this script. This file stores all
# connection information for the AT database, and ASpace instances.
#
# On Linux System without X11 display, then Xvbf needs to used by doing the following
#
# 1. Install Xvfb for the particular Linux distro
# 2. Run > Xvfb :99 -screen 0 800x600x24 &
# 3. Run > export DISPLAY=":99"
#
# Now the this script can be executed
# Now the this script can be executed from the AT installation directory
#

java -Xmx1024m -cp "plugins/scriptAT.zip:lib/*" org.archiviststoolkit.plugin.dbCopyCLI
122 changes: 90 additions & 32 deletions src/org/archiviststoolkit/plugin/dbCopyCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import org.hibernate.Session;

import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Properties;

Expand Down Expand Up @@ -36,12 +37,28 @@ public class dbCopyCLI {

private boolean ignoreUnlinkedSubjects = false;

private String aspaceHost = "http://localhost:8089";
private boolean copyOnlyResources = false;

private String aspaceAdmin = "admin";
private boolean checkISODates = false;

private String resourcesToCopy = null;

private String databaseType = "";
private String atUrl = "";
private String atUsername = "";
private String atPassword = "";

private String aspaceHost = "http://localhost:8089";
private String aspaceAdmin = "admin";
private String aspacePassword = "admin";

// specify which records to publish
private boolean publishNames = false;
private boolean publishSubjects = false;
private boolean publishAccessions = false;
private boolean publishDigitalObjects = false;
private boolean publishResources = true;

// this is used to connect to the AT database
private RemoteDBConnectDialogLight rcd;

Expand Down Expand Up @@ -72,14 +89,25 @@ public class dbCopyCLI {
public dbCopyCLI(Properties properties) throws Exception {
useTracer = new Boolean(properties.getProperty("useTracer"));
tracerDatabase = properties.getProperty("tracerDatabase");
databaseURLIndex = new Integer(properties.getProperty("databaseURLIndex"));
clientThreads = new Integer(properties.getProperty("clientThreads"));
checkRepositoryMismatch = new Boolean(properties.getProperty("checkRepositoryMismatch"));
continueFromResources = new Boolean(properties.getProperty("continueFromResources"));
resetPassword = properties.getProperty("resetPassword");
simulateRESTCalls = new Boolean(properties.getProperty("simulateRESTCalls"));
ignoreUnlinkedNames = new Boolean(properties.getProperty("ignoreUnlinkedNames"));
ignoreUnlinkedSubjects = new Boolean(properties.getProperty("ignoreUnlinkedSubjects"));
publishNames = new Boolean(properties.getProperty("publishNames"));
publishSubjects = new Boolean(properties.getProperty("publishSubjects"));
publishAccessions = new Boolean(properties.getProperty("publishAccessions"));
publishDigitalObjects = new Boolean(properties.getProperty("publishDigitalObjects"));
publishResources = new Boolean(properties.getProperty("publishResources"));
copyOnlyResources = new Boolean(properties.getProperty("copyOnlyResources"));
checkISODates = new Boolean(properties.getProperty("checkISODates"));
resourcesToCopy = properties.getProperty("resourcesToCopy");
databaseType = properties.getProperty("databaseType");
atUrl = properties.getProperty("atUrl");
atUsername = properties.getProperty("atUsername");
atPassword = properties.getProperty("atPassword");
aspaceHost = properties.getProperty("aspaceHost");
aspaceAdmin = properties.getProperty("aspaceAdmin");
aspacePassword = properties.getProperty("aspacePassword");
Expand All @@ -95,25 +123,22 @@ public Session getDatabaseSession() {

// see whether to connect to the particular index
if(useTracer) {
String databaseType = "MySQL";
String url = "jdbc:mysql://tracerdb.cyo37z0ucix8.us-east-1.rds.amazonaws.com/at" + tracerDatabase;
String username = "aspace";
String password = "clubfoots37@freakiest";
databaseType = "MySQL";
atUrl = "jdbc:mysql://tracerdb.cyo37z0ucix8.us-east-1.rds.amazonaws.com/at" + tracerDatabase;
atUsername = "aspace";
atPassword = "clubfoots37@freakiest";

// see whether we need to connect to the AT sandbox
if(tracerDatabase.equals("SB")) {
url = "jdbc:mysql://dev.archiviststoolkit.org:3306/AT_SANDBOX2_0";
username = "atuser";
password = "cr4ckA1t";
atUrl = "jdbc:mysql://dev.archiviststoolkit.org:3306/AT_SANDBOX2_0";
atUsername = "atuser";
atPassword = "cr4ckA1t";
}

rcd.connectToDatabase(databaseType, url, username, password);
} else if (databaseURLIndex == -2) {
rcd.connectToCurrentDatabase();
} else {
rcd.connectToDatabase(databaseURLIndex);
}

// try connecting to the T database
rcd.connectToDatabase(databaseType, atUrl, atUsername, atPassword);

// return the session which maybe null
return rcd.getSession();
}
Expand Down Expand Up @@ -183,9 +208,19 @@ private void startASpaceCopyProcess() {
// print the connection message
System.out.println("Starting record copy\n\n");

// create the hash map use to see if a certain record should be exported automatically
HashMap<String, Boolean> publishMap = new HashMap<String, Boolean>();
publishMap.put("names", publishNames);
publishMap.put("subjects", publishSubjects);
publishMap.put("accessions", publishAccessions);
publishMap.put("digitalObjects", publishDigitalObjects);
publishMap.put("resources", publishResources);

ascopy = new ASpaceCopyUtil(rcd, aspaceHost, aspaceAdmin, aspacePassword);
ascopy.setPublishHashMap(publishMap);
ascopy.setRepositoryMismatchMap(repositoryMismatchMap);
ascopy.setSimulateRESTCalls(simulateRESTCalls);
ascopy.setCheckISODates(checkISODates);
ascopy.setExtentPortionInParts(false);
ascopy.setIgnoreUnlinkedRecords(ignoreUnlinkedNames, ignoreUnlinkedSubjects);

Expand All @@ -207,26 +242,36 @@ private void startASpaceCopyProcess() {
if (continueFromResources && ascopy.uriMapFileExist()) {
ascopy.loadURIMaps();
} else {
ascopy.copyLookupList();
ascopy.copyRepositoryRecords();
ascopy.mapRepositoryGroups();
ascopy.copyLocationRecords();
ascopy.copyUserRecords();
ascopy.copySubjectRecords();
ascopy.copyNameRecords();
ascopy.copyAccessionRecords();
ascopy.copyDigitalObjectRecords();

// save the record maps for possible future use
ascopy.saveURIMaps();
if(!copyOnlyResources) {
ascopy.copyLookupList();
ascopy.copyRepositoryRecords();
ascopy.mapRepositoryGroups();
ascopy.copyLocationRecords();
ascopy.copyUserRecords();
ascopy.copySubjectRecords();
ascopy.copyNameRecords();
ascopy.copyAccessionRecords();
ascopy.copyDigitalObjectRecords();

// save the record maps for possible future use
ascopy.saveURIMaps();
}
}

// get the number of resources to copy here to allow it to be reset while the migration
// has been started, but migration of resources has not yet started
int resourcesToCopy = 1000000;
// set the number of resources to copy
int numberOfResourcesToCopy = 1000000;

// set the resources to copy. Useful for debugging only
ascopy.setResourcesToCopyList(getResourcesToCopy());

ascopy.setUseBatchImport(true);
ascopy.copyResourceRecords(resourcesToCopy, clientThreads);

ascopy.copyResourceRecords(numberOfResourcesToCopy, clientThreads);

// DEBUG code which checks to see that all ISO dates are valid
if(checkISODates) {
ascopy.checkISODates();
}

ascopy.cleanUp();

Expand Down Expand Up @@ -274,6 +319,19 @@ public void closeATConnection() {
rcd.closeSession();
}

private ArrayList<String> getResourcesToCopy() {
ArrayList<String> resourcesIDsList = new ArrayList<String>();

if(resourcesToCopy != null) {
String[] sa = resourcesToCopy.split("\\s*,\\s*");
for (String id : sa) {
resourcesIDsList.add(id);
}
}

return resourcesIDsList;
}

/**
* Main method for testing in stand alone mode
*/
Expand Down
Loading

0 comments on commit 3102fa5

Please sign in to comment.