Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up loading many columns #5651

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions CommonData/column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void Column::dbDelete(bool cleanUpRest)
{
assert(_id != -1);

labelsClear();
labelsClear(false);
db().columnDelete(_id, cleanUpRest);

_id = -1;
Expand Down Expand Up @@ -649,13 +649,14 @@ void Column::_sortLabelsByOrder()
std::sort(_labels.begin(), _labels.end(), [](const Label * l, const Label * r) { return l->order() < r->order(); });
}

void Column::labelsClear()
void Column::labelsClear(bool doIncRevision)
{
db().labelsClear(_id);
_labels.clear();
_labelByIntsIdMap.clear();

incRevision(false);
if(doIncRevision)
incRevision(false);
}

void Column::beginBatchedLabelsDB()
Expand Down
2 changes: 1 addition & 1 deletion CommonData/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class Column : public DataSetBaseNode
void upgradeSetDoubleLabelsInInts(); ///< Used by upgrade 0.18.* -> 0.19
void upgradeExtractDoublesIntsFromLabels(); ///< Used by upgrade 0.18.* -> 0.19

void labelsClear();
void labelsClear(bool doIncRevision=true);
int labelsAdd( int display);
int labelsAdd( const std::string & display);
int labelsAdd( const std::string & display, const std::string & description, const Json::Value & originalValue);
Expand Down
63 changes: 46 additions & 17 deletions CommonData/databaseinterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ int DatabaseInterface::dataSetInsert(const std::string & dataFilePath, long data

transactionWriteBegin();
int id = runStatementsId("INSERT INTO DataSets (dataFilePath, dataFileTimestamp, description, databaseJson, emptyValuesJson, dataFileSynch) VALUES (?, ?, ?, ?, ?, ?) RETURNING id;", prepare);
runStatements("CREATE TABLE " + dataSetName(id) + " (rowNumber INTEGER PRIMARY KEY);");
runStatements("CREATE TABLE " + dataSetName(id) + " (rowNumber INTEGER PRIMARY KEY);"); // Can be overwritten through dataSetCreateTable
transactionWriteEnd();

return id;
Expand Down Expand Up @@ -183,6 +183,7 @@ void DatabaseInterface::filterDelete(int filterIndex)

if(dataSetId != -1)
runStatements("ALTER TABLE " + dataSetName(dataSetId) + " DROP COLUMN " + filterName(filterIndex) + ";");

runStatements("DELETE FROM Filters WHERE id = " + std::to_string(filterIndex) + ";");

transactionWriteEnd();
Expand Down Expand Up @@ -391,7 +392,7 @@ void DatabaseInterface::filterWrite(int filterIndex, const std::vector<bool> & v
transactionWriteEnd();
}

int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string & name, columnType colType)
int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string & name, columnType colType, bool alterTable)
{
JASPTIMER_SCOPE(DatabaseInterface::columnInsert);
transactionWriteBegin();
Expand Down Expand Up @@ -419,19 +420,38 @@ int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string
Log::log() << "Inserting column failed!" << std::endl;
#endif

//Add a scalar and ordinal/nominal column to DataSet_# for the column
const std::string alterDatasetPrefix = "ALTER TABLE " + dataSetName(dataSetId);
const std::string addColumnFragment = " ADD " + columnBaseName(columnId);

runStatements(alterDatasetPrefix + addColumnFragment + "_DBL REAL NULL;");
runStatements(alterDatasetPrefix + addColumnFragment + "_INT INT NULL;");


if(alterTable) //If not then via dataSetCreateTable
{
//Add a scalar and ordinal/nominal column to DataSet_# for the column
const std::string alterDatasetPrefix = "ALTER TABLE " + dataSetName(dataSetId);
const std::string addColumnFragment = " ADD " + columnBaseName(columnId);

runStatements(alterDatasetPrefix + addColumnFragment + "_DBL REAL NULL;");
runStatements(alterDatasetPrefix + addColumnFragment + "_INT INT NULL;");
}

//The labels will be added separately later

transactionWriteEnd();
return columnId;
}

void DatabaseInterface::dataSetCreateTable(DataSet * dataSet)
{
runStatements("DROP TABLE " + dataSetName(dataSet->id()) + ";");

std::stringstream statements;
statements << "CREATE TABLE " + dataSetName(dataSet->id()) + " (rowNumber INTEGER PRIMARY KEY, "+ filterName(dataSet->filter()->id()) + " INT NOT NULL DEFAULT 1";

for(Column * column : dataSet->columns())
statements << ", " << columnBaseName(column->id()) << "_DBL REAL NULL, " << columnBaseName(column->id()) << "_INT INT NULL";

statements << ");";

runStatements(statements.str());
}

int DatabaseInterface::columnGetDataSetId(int columnId)
{
JASPTIMER_SCOPE(DatabaseInterface::columnGetDataSetId);
Expand Down Expand Up @@ -1438,19 +1458,19 @@ void DatabaseInterface::_runStatements(const std::string & statements, bindParam
}
while(remain > 1 && (ret == SQLITE_OK && ret != SQLITE_DONE));

const int maxLenStatementError = 200;
std::string shortStatements = statements.size() <= maxLenStatementError ? statements : statements.substr(0, maxLenStatementError);

if(ret == SQLITE_ERROR)
{
std::string errorMsg = "Running ```\n"+statements+"\n``` failed because of: `" + sqlite3_errmsg(_db);
Log::log() << errorMsg << std::endl;

throw std::runtime_error(errorMsg);
Log::log() << "Running ```\n"+statements +"\n``` failed because of: `" + sqlite3_errmsg(_db) << std::endl;
throw std::runtime_error( "Running ```\n"+shortStatements +"\n``` failed because of: `" + sqlite3_errmsg(_db));
}

if(ret == SQLITE_READONLY)
{
std::string errorMsg = "Running ```\n"+statements+"\n``` failed because the database is readonly...";
Log::log() << errorMsg << std::endl;
throw std::runtime_error(errorMsg);
Log::log() << "Running ```\n"+statements +"\n``` failed because the database is readonly..." << std::endl;
throw std::runtime_error( "Running ```\n"+shortStatements +"\n``` failed because the database is readonly...");
}
}

Expand Down Expand Up @@ -1570,7 +1590,8 @@ void DatabaseInterface::create()
else
Log::log() << "Opened internal sqlite database for creation at '" << dbFile() << "'." << std::endl;


dbStartUpPragmas();

transactionWriteBegin();
runStatements(_dbConstructionSql);
transactionWriteEnd();
Expand All @@ -1593,6 +1614,14 @@ void DatabaseInterface::load()
}
else
Log::log() << "Opened internal sqlite database for loading at '" << dbFile() << "'." << std::endl;

dbStartUpPragmas();
}

void DatabaseInterface::dbStartUpPragmas()
{
runStatements("pragma journal_mode = WAL;");
runStatements("pragma synchronous = normal;");
}

void DatabaseInterface::close()
Expand Down
5 changes: 4 additions & 1 deletion CommonData/databaseinterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class DatabaseInterface
int dataSetGetRevision( int dataSetId);
int dataSetGetFilter( int dataSetId);
void dataSetInsertEmptyRow( int dataSetId, size_t row);
void dataSetCreateTable( DataSet * dataSet); ///< Assumes you are importing fresh data and havent created any DataSet_? table yet

void dataSetBatchedValuesUpdate(DataSet * data, std::vector<Column*> columns, std::function<void(float)> progressCallback = [](float){});
void dataSetBatchedValuesUpdate(DataSet * data, std::function<void(float)> progressCallback = [](float){});
Expand All @@ -109,7 +110,7 @@ class DatabaseInterface

//Columns & Data/Values
//Index stuff:
int columnInsert( int dataSetId, int index = -1, const std::string & name = "", columnType colType = columnType::unknown); ///< Insert a row into Columns and create the corresponding columns in DataSet_? Also makes sure the indices are correct
int columnInsert( int dataSetId, int index = -1, const std::string & name = "", columnType colType = columnType::unknown, bool alterTable=true); ///< Insert a row into Columns and create the corresponding columns in DataSet_? Also makes sure the indices are correct
int columnLastFreeIndex( int dataSetId);
void columnIndexIncrements( int dataSetId, int index); ///< If index already is in use that column and all after are incremented by 1
void columnIndexDecrements( int dataSetId, int index); ///< Indices bigger than index are decremented, assumption is that the previous one using it has been removed already
Expand Down Expand Up @@ -158,6 +159,7 @@ class DatabaseInterface
void transactionReadBegin(); ///< runs BEGIN DEFERRED and waits for sqlite to not be busy anymore if some other process is writing Tracks whether nested and only does BEGIN+COMMIT at lowest depth
void transactionReadEnd(); ///< runs COMMIT and ends the transaction. Tracks whether nested and only does BEGIN+COMMIT at lowest depth


private:
void _doubleTroubleBinder(sqlite3_stmt *stmt, int param, double dbl); ///< Needed to work around the lack of support for NAN, INF and NEG_INF in sqlite, converts those to string to make use of sqlite flexibility
double _doubleTroubleReader(sqlite3_stmt *stmt, int colI); ///< The reading counterpart to _doubleTroubleBinder to convert string representations of NAN, INF and NEG_INF back to double
Expand All @@ -168,6 +170,7 @@ class DatabaseInterface
void load(); ///< Loads a sqlite database from sessiondir (after loading a jaspfile)
void close(); ///< Closes the loaded database and disconnects
bool tableHasColumn(const std::string & tableName, const std::string & columnName);
void dbStartUpPragmas();

int _transactionWriteDepth = 0,
_transactionReadDepth = 0;
Expand Down
13 changes: 10 additions & 3 deletions CommonData/dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ void DataSet::dbDelete()

_dataSetID = -1;


db().transactionWriteEnd();
}

Expand Down Expand Up @@ -171,12 +172,12 @@ void DataSet::removeColumn(const std::string & name)
}
}

void DataSet::insertColumn(size_t index)
void DataSet::insertColumn(size_t index, bool alterDataSetTable)
{

assert(_dataSetID > 0);

Column * newColumn = new Column(this, db().columnInsert(_dataSetID, index));
Column * newColumn = new Column(this, db().columnInsert(_dataSetID, index, "", columnType::unknown, alterDataSetTable));

_columns.insert(_columns.begin()+index, newColumn);

Expand Down Expand Up @@ -382,18 +383,24 @@ void DataSet::setColumnCount(size_t colCount)
db().transactionWriteBegin();

int curCount = columns().size();

bool alterTableAfterwards = curCount == 0 && colCount > 0;

if(colCount > curCount)
for(size_t i=curCount; i<colCount; i++)
insertColumn(i);
insertColumn(i, !alterTableAfterwards);

else if(colCount < curCount)
for(size_t i=curCount-1; i>=colCount; i--)
removeColumn(i);


incRevision();

db().transactionWriteEnd();

if(alterTableAfterwards)
db().dataSetCreateTable(this);
}

void DataSet::setRowCount(size_t rowCount)
Expand Down
4 changes: 2 additions & 2 deletions CommonData/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ class DataSet : public DataSetBaseNode
void beginBatchedToDB();
void endBatchedToDB(std::function<void(float)> progressCallback = [](float){}, Columns columns={});
void endBatchedToDB(Columns columns) { endBatchedToDB([](float){}, columns); }

void removeColumn( const std::string & name );
void removeColumn( size_t index );
void removeColumnById( size_t id );
void insertColumn( size_t index );
void insertColumn( size_t index, bool alterDataSetTable = true);
Column * newColumn( const std::string & name);
int getColumnIndex( const std::string & name ) const;
int columnIndex( const Column * col ) const;
Expand Down
Loading