Skip to content

Commit

Permalink
Release v0.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
anthonysena authored Jun 4, 2024
2 parents 0aa564f + f12b1fd commit 33dd01b
Show file tree
Hide file tree
Showing 66 changed files with 850 additions and 204 deletions.
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: Strategus
Type: Package
Title: Coordinating and Executing Analytics Using HADES Modules
Version: 0.2.1
Date: 2023-01-29
Version: 0.3.0
Date: 2023-06-04
Authors@R: c(
person("Martijn", "Schuemie", email = "[email protected]", role = c("aut")),
person("Anthony", "Sena", email = "[email protected]", role = c("aut", "cre")),
Expand Down Expand Up @@ -33,7 +33,9 @@ Imports:
tibble,
ResultModelManager (>= 0.3.0),
SqlRender (>= 1.11.0),
semver
semver,
httr2,
jsonlite
Suggests:
testthat (>= 3.0.0),
fs,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ export(createResultsExecutionSettings)
export(ensureAllModulesInstantiated)
export(execute)
export(getModuleList)
export(installLatestModule)
export(retrieveConnectionDetails)
export(storeConnectionDetails)
export(syncLockFile)
export(unlockKeyring)
export(validateLockFile)
export(verifyModuleInstallation)
export(zipResults)
import(CohortGenerator)
import(DatabaseConnector)
import(dplyr)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
Strategus 0.3.0
===============
- Provide option to skip modules (#87)
- Central log file for execution (#132)
- Create function to collect all results into a single ZIP file for sharing (#46)
- Install latest modules (#125)

Strategus 0.2.1
===============
- Update SelfControlledCaseSeries Module to v0.4.1
Expand Down
11 changes: 9 additions & 2 deletions R/Execution.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
#' @template keyringName
#' @param restart Restart run? Requires `executionScriptFolder` to be specified, and be
#' the same as the `executionScriptFolder` used in the run to restart.
#'
#' @template enforceModuleDependencies
#'
#' @return
#' Does not return anything. Is called for the side-effect of executing the specified
#' analyses.
Expand All @@ -40,7 +43,8 @@ execute <- function(analysisSpecifications,
executionSettings,
executionScriptFolder = NULL,
keyringName = NULL,
restart = FALSE) {
restart = FALSE,
enforceModuleDependencies = TRUE) {
errorMessages <- checkmate::makeAssertCollection()
keyringList <- keyring::keyring_list()
checkmate::assertClass(analysisSpecifications, "AnalysisSpecifications", add = errorMessages)
Expand Down Expand Up @@ -98,7 +102,10 @@ execute <- function(analysisSpecifications,
}

# Validate the modules
modules <- ensureAllModulesInstantiated(analysisSpecifications)
modules <- ensureAllModulesInstantiated(
analysisSpecifications = analysisSpecifications,
enforceModuleDependencies = enforceModuleDependencies
)
if (isFALSE(modules$allModulesInstalled)) {
stop("Stopping execution due to module issues")
}
Expand Down
2 changes: 1 addition & 1 deletion R/ModuleEnv.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#' Load module execution space inside and renv
#' inspired by targets::tar_script but allowing custom variable execution
#'
#' Designed to allow more human readable code that is executed inside a module as well as simple variable substituion
#' Designed to allow more human readable code that is executed inside a module as well as simple variable substitution
#' for injecting constants (e.g. simple parameters or file paths used inside and outside of modules)
#'
#' This pattern also allows dependency injection which could be used if you don't want to use and renv and (instead)
Expand Down
121 changes: 91 additions & 30 deletions R/ModuleInstantiation.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,15 @@
#'
#' @template forceVerification
#'
#' @template enforceModuleDependencies
#'
#' @return
#' A list containing the install status of all modules
#' (TRUE if all are installed properly) and a tibble listing
#' the instantiated modules.
#'
#' @export
ensureAllModulesInstantiated <- function(analysisSpecifications, forceVerification = FALSE) {
ensureAllModulesInstantiated <- function(analysisSpecifications, forceVerification = FALSE, enforceModuleDependencies = TRUE) {
modules <- getModuleTable(analysisSpecifications, distinct = TRUE)

# Verify only one version per module:
Expand All @@ -62,20 +64,12 @@ ensureAllModulesInstantiated <- function(analysisSpecifications, forceVerificati
)
}

# Check required dependencies have been installed:
dependencies <- extractDependencies(modules)
missingDependencies <- dependencies %>%
filter(!dependsOn %in% modules$module)
if (nrow(missingDependencies) > 0) {
message <- paste(
c(
"Detected missing dependencies:",
sprintf("- Missing module '%s' required by module '%s'", missingDependencies$dependsOn, missingDependencies$module)
),
collapse = "\n"
)
stop(message)
}
# Check required dependencies have been declare in the specification
# unless the user has set enforceModuleDependencies == FALSE
checkModuleDependencies(
modules = modules,
enforceModuleDependencies = enforceModuleDependencies
)

# Verify all modules are properly installed
moduleInstallStatus <- list()
Expand Down Expand Up @@ -300,6 +294,88 @@ verifyModuleInstallation <- function(module, version, silent = FALSE, forceVerif
)
}


#' Install the latest release of a module
#'
#' @description
#' This function will call out to the OHDSI GitHub repo to find the latest
#' version of the module and attempt to install it. Only modules that are listed
#' in the `getModuleList()` function are allowed since it will have a known
#' GitHub location.
#'
#' @param moduleName The name of the module to install (i.e. "CohortGeneratorModule").
#' This parameter must match a value found in the `module` column of `getModuleList()`
#'
#' @return
#' None - this function is called for its side effects
#'
#' @export
installLatestModule <- function(moduleName) {
assertModulesFolderSetting(x = Sys.getenv("INSTANTIATED_MODULES_FOLDER"))
instantiatedModulesFolder <- Sys.getenv("INSTANTIATED_MODULES_FOLDER")
# Verify that the user's GITHUB_PAT is set properly
# otherwise we may hit a rate limit
if (Sys.getenv("GITHUB_PAT") == "") {
stop("You must set your GITHUB_PAT to use this function. Please use the function `usethis::create_github_token()` and try again after restarting your R session.")
}
moduleList <- getModuleList()
if (isFALSE(moduleName %in% moduleList$module)) {
stop("Module: ", module, " not found in the list from Strategus::getModuleList().")
}
moduleDetails <- moduleList %>%
dplyr::filter(module == moduleName)
urlTemplate <- "https://api.%s/repos/%s/%s/releases/latest"
baseUrl <- sprintf(urlTemplate, moduleDetails$remoteRepo, moduleDetails$remoteUsername, moduleDetails$module)
req <- httr2::request(base_url = baseUrl) |>
httr2::req_headers(
"Authorization" = paste0("Bearer ", Sys.getenv("GITHUB_PAT")),
"X-GitHub-Api-Version" = "2022-11-28"
)
response <- httr2::req_perform(req)
release <- jsonlite::fromJSON(httr2::resp_body_string(response))
version <- gsub("v", "", release$tag_name, ignore.case = TRUE)
moduleFolder <- ensureModuleInstantiated(
module = moduleDetails$module,
version = version,
remoteRepo = moduleDetails$remoteRepo,
remoteUsername = moduleDetails$remoteUsername
)
rlang::inform(paste0("Installed ", moduleName, " to ", moduleFolder))
}

extractDependencies <- function(modules) {
extractDependenciesSingleModule <- function(module) {
moduleFolder <- getModuleFolder(module$module, module$version)
metaData <- getModuleMetaData(moduleFolder)
dependencies <- tibble(
module = module$module,
dependsOn = as.character(metaData$Dependencies)
)
return(dependencies)
}
dependencies <- lapply(split(modules, 1:nrow(modules)), extractDependenciesSingleModule) %>%
bind_rows()
return(dependencies)
}

checkModuleDependencies <- function(modules, enforceModuleDependencies) {
# Check required dependencies have been declare in the specification
# unless the user has set enforceModuleDependencies == FALSE
dependencies <- extractDependencies(modules)
missingDependencies <- dependencies %>%
filter(!dependsOn %in% modules$module)
if (nrow(missingDependencies) > 0 && enforceModuleDependencies) {
message <- paste(
c(
"Detected missing dependencies:",
sprintf("- Missing module '%s' required by module '%s'", missingDependencies$dependsOn, missingDependencies$module)
),
collapse = "\n"
)
stop(message)
}
}

getModuleTable <- function(analysisSpecifications, distinct = FALSE) {
modules <- lapply(
analysisSpecifications$moduleSpecifications,
Expand All @@ -320,21 +396,6 @@ getModuleTable <- function(analysisSpecifications, distinct = FALSE) {
return(modules)
}

extractDependencies <- function(modules) {
extractDependenciesSingleModule <- function(module) {
moduleFolder <- getModuleFolder(module$module, module$version)
metaData <- getModuleMetaData(moduleFolder)
dependencies <- tibble(
module = module$module,
dependsOn = as.character(metaData$Dependencies)
)
return(dependencies)
}
dependencies <- lapply(split(modules, 1:nrow(modules)), extractDependenciesSingleModule) %>%
bind_rows()
return(dependencies)
}

getModuleMetaData <- function(moduleFolder) {
jsonFileName <- file.path(moduleFolder, "MetaData.json")
if (!file.exists(jsonFileName)) {
Expand Down
14 changes: 12 additions & 2 deletions R/ResultModelCreation.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,20 @@ createResultDataModels <- function(analysisSpecifications,
executionSettings,
executionScriptFolder = NULL,
keyringName = NULL,
restart = FALSE) {
restart = FALSE,
enforceModuleDependencies = TRUE) {
errorMessages <- checkmate::makeAssertCollection()
keyringList <- keyring::keyring_list()
checkmate::assertClass(analysisSpecifications, "AnalysisSpecifications", add = errorMessages)
checkmate::assertClass(executionSettings, "ResultsExecutionSettings", add = errorMessages)
checkmate::assertChoice(x = keyringName, choices = keyringList$keyring, null.ok = TRUE, add = errorMessages)
checkmate::reportAssertions(collection = errorMessages)

modules <- ensureAllModulesInstantiated(analysisSpecifications)
modules <- ensureAllModulesInstantiated(
analysisSpecifications = analysisSpecifications,
enforceModuleDependencies = enforceModuleDependencies
)

if (isFALSE(modules$allModulesInstalled)) {
stop("Stopping execution due to module issues")
}
Expand Down Expand Up @@ -203,8 +208,11 @@ runSchemaCreation <- function(analysisSpecifications, keyringSettings, moduleInd
renv::use(lockfile = "renv.lock")
}

ParallelLogger::addDefaultFileLogger(jobContext$moduleExecutionSettings$logFileName)
ParallelLogger::addDefaultFileLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "log.txt"))
ParallelLogger::addDefaultErrorReportLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "errorReportR.txt"))

message("START SCHEMA CREATION: ", moduleName)
# Main.R can override default behaviour by implementing this function
if (is.function(createDataModelSchema)) {
# If the keyring is locked, unlock it, set the value and then re-lock it
Expand Down Expand Up @@ -239,6 +247,7 @@ runSchemaCreation <- function(analysisSpecifications, keyringSettings, moduleInd
)
writeLines("specifications.not.written", doneFile)
}
message("FINISH SCHEMA CREATION: ", moduleName)

ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE)
ParallelLogger::unregisterLogger("DEFAULT_ERRORREPORT_LOGGER", silent = TRUE)
Expand All @@ -248,6 +257,7 @@ runSchemaCreation <- function(analysisSpecifications, keyringSettings, moduleInd
injectVars = list(
jobContextFileName = jobContextFileName,
dataModelExportPath = dataModelExportPath,
moduleName = module,
doneFile = doneFile
)
)
Expand Down
24 changes: 15 additions & 9 deletions R/ResultsUpload.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde
}

tempScriptFile <- file.path(moduleExecutionSettings$workSubFolder, "UploadScript.R")
ParallelLogger::addDefaultFileLogger(jobContext$moduleExecutionSettings$logFileName)
on.exit(ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE))

##
# Module space executed code
Expand All @@ -68,9 +70,10 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde

getDataModelSpecifications <- function(...) {
ParallelLogger::logInfo("Getting result model specification")
if (file.exists("resultsDataModelSpecification.csv")) {
rdmsFilePath <- file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "resultsDataModelSpecification.csv")
if (file.exists(rdmsFilePath)) {
res <- CohortGenerator::readCsv(
file = "resultsDataModelSpecification.csv"
file = rdmsFilePath
)
return(res)
}
Expand All @@ -81,13 +84,16 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde
moduleInfo <- ParallelLogger::loadSettingsFromJson("MetaData.json")
jobContext <- readRDS(jobContextFileName)
specifications <- getDataModelSpecifications(jobContext)

ParallelLogger::addDefaultFileLogger(jobContext$moduleExecutionSettings$logFileName)
ParallelLogger::addDefaultFileLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "log.txt"))
ParallelLogger::addDefaultErrorReportLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "errorReportR.txt"))

if (Sys.getenv("FORCE_RENV_USE", "") == "TRUE") {
renv::use(lockfile = "renv.lock")
}

message("START MODULE RESULTS UPLOAD: ", moduleName)
# Override default behaviour and do module specific upload inside module context?
if (is.function(uploadResultsCallback)) {
ParallelLogger::logInfo("Calling module result upload functionality")
Expand Down Expand Up @@ -122,6 +128,7 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde
writeLines("specifications.written", doneFile)
}

message("FINISH MODULE RESULTS UPLOAD: ", moduleName)
ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE)
ParallelLogger::unregisterLogger("DEFAULT_ERRORREPORT_LOGGER", silent = TRUE)
},
Expand All @@ -130,6 +137,7 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde
injectVars = list(
jobContextFileName = jobContextFileName,
dataModelExportPath = dataModelExportPath,
moduleName = module,
doneFile = doneFile
)
)
Expand All @@ -149,34 +157,32 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde
workStatus <- readLines(doneFile)

if (workStatus == "specifications.written") {
ParallelLogger::logInfo("Uploading results according to module specification")
message("Uploading results according to module specification")
specifications <- CohortGenerator::readCsv(dataModelExportPath)
moduleInfo <- ParallelLogger::loadSettingsFromJson(file.path(moduleFolder, "MetaData.json"))

keyringName <- jobContext$keyringSettings$keyringName
keyringLocked <- Strategus::unlockKeyring(keyringName = keyringName)

ParallelLogger::logInfo("Getting result database credentials")
message("Getting result database credentials")
resultsConnectionDetails <- keyring::key_get(jobContext$moduleExecutionSettings$resultsConnectionDetailsReference, keyring = keyringName)
resultsConnectionDetails <- ParallelLogger::convertJsonToSettings(resultsConnectionDetails)
resultsConnectionDetails <- do.call(DatabaseConnector::createConnectionDetails, resultsConnectionDetails)
jobContext$moduleExecutionSettings$resultsConnectionDetails <- resultsConnectionDetails

ParallelLogger::logInfo("Calling RMM for upload")
message("Calling RMM for upload")
ResultModelManager::uploadResults(
connectionDetails = jobContext$moduleExecutionSettings$resultsConnectionDetails,
schema = jobContext$moduleExecutionSettings$resultsDatabaseSchema,
resultsFolder = jobContext$moduleExecutionSettings$resultsSubFolder,
tablePrefix = moduleInfo$TablePrefix,
forceOverWriteOfSpecifications = FALSE,
purgeSiteDataBeforeUploading = FALSE,
databaseIdentifierFile = "database_meta_data.csv",
databaseIdentifierFile = file.path(executionSettings$resultsFolder, "DatabaseMetaData", "database_meta_data.csv"),
runCheckAndFixCommands = FALSE,
warnOnMissingTable = TRUE,
specifications = specifications
)

ParallelLogger::logInfo("Upload completed")
message("Upload completed")
if (keyringLocked) {
keyring::keyring_lock(keyring = keyringName)
}
Expand Down
Loading

0 comments on commit 33dd01b

Please sign in to comment.