Skip to content

Commit

Permalink
refactor: performance logs are now captured directly instead of throu…
Browse files Browse the repository at this point in the history
…gh console logs
  • Loading branch information
1Anuar committed Nov 25, 2024
1 parent 87790da commit b2b13fd
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 19 deletions.
61 changes: 44 additions & 17 deletions R/Achilles.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ achilles <- function(connectionDetails,
totalStart <- Sys.time()
achillesSql <- c()

performanceTable <- data.frame(analysis_id = integer(), elapsed_seconds = numeric(), start_time = numeric(), end_time = numeric ())

# Check if the correct parameters are supplied when running in sqlOnly mode
if (sqlOnly && missing(connectionDetails) && is.null(sqlDialect)) {
stop("Error: When specifying sqlOnly = TRUE, sqlDialect or connectionDetails must be supplied.")
Expand Down Expand Up @@ -325,11 +327,18 @@ achilles <- function(connectionDetails,
resultsDatabaseSchema = resultsDatabaseSchema
)

sqlPerformanceTable <- SqlRender::loadRenderTranslateSql(
sqlFilename = "achilles_performance_ddl.sql",
packageName = "Achilles",
dbms = connectionDetails$dbms,
resultsDatabaseSchema = resultsDatabaseSchema
)

# Populate achilles_analysis without the "distribution" and "distributed_field"
# columns from achilles_analysis_details.csv
analysisDetailsCsv <- Achilles::getAnalysisDetails()
analysisDetailsCsv <- analysisDetailsCsv[,-c(2, 3)]

if (!sqlOnly) {
# Create empty achilles_analysis
DatabaseConnector::executeSql(
Expand All @@ -342,6 +351,16 @@ achilles <- function(connectionDetails,
progressBar = F,
reportOverallTime = F
)
DatabaseConnector::executeSql(
connection = connection,
sql = sqlPerformanceTable,
errorReportFile = file.path(
outputFolder,
"achillesErrorCreateAchillesPerformance.txt"
),
progressBar = F,
reportOverallTime = F
)

# Populate achilles_analysis with data from achilles_analysis_details.csv from above
suppressMessages(
Expand Down Expand Up @@ -381,7 +400,7 @@ achilles <- function(connectionDetails,

# Generate Main Analyses
mainAnalysisIds <- analysisDetails$analysis_id

mainSqls <- lapply(mainAnalysisIds, function(analysisId) {
list(
analysisId = analysisId,
Expand Down Expand Up @@ -435,7 +454,10 @@ achilles <- function(connectionDetails,
progressBar = FALSE,
reportOverallTime = FALSE
)
delta <- Sys.time() - start
endTime <- Sys.time()
delta <- endTime - start
analysisId <- as.integer(mainSql$analysisId)
performanceTable[nrow(performanceTable) + 1,] <- c(analysisId,delta,start,endTime)
ParallelLogger::logInfo(sprintf(
"[Main Analysis] [COMPLETE] %d (%f %s)",
as.integer(mainSql$analysisId),
Expand Down Expand Up @@ -487,6 +509,17 @@ achilles <- function(connectionDetails,

ParallelLogger::stopCluster(cluster = cluster)
}

DatabaseConnector::insertTable(
connection = connection,
databaseSchema = resultsDatabaseSchema,
tableName = "achilles_performance",
data = performanceTable,
dropTableIfExists = FALSE,
createTable = FALSE,
tempTable = FALSE,
progressBar = F
)
}

# Merge scratch tables into final analysis tables
Expand All @@ -511,7 +544,8 @@ achilles <- function(connectionDetails,
smallCellCount = smallCellCount,
outputFolder = outputFolder,
sqlOnly = sqlOnly,
logFile = logFile
logFile = logFile,
performanceTable = performanceTable
)
})

Expand Down Expand Up @@ -1150,7 +1184,8 @@ optimizeAtlasCache <- function(connectionDetails,
smallCellCount,
outputFolder,
sqlOnly,
logFile) {
logFile,
performanceTable) {
castedNames <- apply(resultsTable$schema, 1, function(field) {
SqlRender::render(
"cast(@fieldName as @fieldType) as @fieldName",
Expand Down Expand Up @@ -1307,21 +1342,13 @@ optimizeAtlasCache <- function(connectionDetails,
}

.getAchillesResultBenchmark <- function(analysisId, logs) {
logs <- logs[logs$analysisId == analysisId,]
logs <- logs[logs$analysis_id == analysisId,]
if (nrow(logs) == 1) {
runTime <- strsplit(logs[1,]$runTime, " ")[[1]]
runTimeValue <- round(as.numeric(runTime[1]), 2)
runTimeUnit <- runTime[2]
if (runTimeUnit == "mins") {
runTimeValue <- runTimeValue * 60
} else if (runTimeUnit == "hours") {
runTimeValue <- runTimeValue * 60 * 60
} else if (runTimeUnit == "days") {
runTimeValue <- runTimeValue * 60 * 60 * 24
}
runTime <- logs[1,]$elapsed_seconds
runTimeValue <- round(runTime[1], 2)
runTimeValue
} else {
"ERROR: no runtime found in log file"
"ERROR: no runtime found"
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
select aa.analysis_id, aa.analysis_name, aa.category, stratum_1 elapsed_seconds
select ap.analysis_id, aa.analysis_name, aa.category, ap.elapsed_seconds elapsed_seconds
from @results_database_schema.ACHILLES_ANALYSIS aa
join @results_database_schema.ACHILLES_RESULTS ar on aa.analysis_id + 2000000 = ar.analysis_id
join @results_database_schema.ACHILLES_PERFORMANCE ap on ap.analysis_id = aa.analysis_id
union
select aa.analysis_id, aa.analysis_name, aa.category, stratum_1 elapsed_seconds
select ap.analysis_id, aa.analysis_name, aa.category, ap.elapsed_seconds elapsed_seconds
from @results_database_schema.ACHILLES_ANALYSIS aa
join @results_database_schema.ACHILLES_RESULTS_DIST ar on aa.analysis_id + 2000000 = ar.analysis_id
join @results_database_schema.ACHILLES_PERFORMANCE ap on ap.analysis_id = aa.analysis_id

0 comments on commit b2b13fd

Please sign in to comment.