diff --git a/DESCRIPTION b/DESCRIPTION index 0147d065..1fa7e091 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: Strategus Type: Package Title: Coordinating and Executing Analytics Using HADES Modules -Version: 0.2.1 -Date: 2023-01-29 +Version: 0.3.0 +Date: 2023-06-04 Authors@R: c( person("Martijn", "Schuemie", email = "schuemie@ohdsi.org", role = c("aut")), person("Anthony", "Sena", email = "sena@ohdsi.org", role = c("aut", "cre")), @@ -33,7 +33,9 @@ Imports: tibble, ResultModelManager (>= 0.3.0), SqlRender (>= 1.11.0), - semver + semver, + httr2, + jsonlite Suggests: testthat (>= 3.0.0), fs, diff --git a/NAMESPACE b/NAMESPACE index dd45e9e0..373b0586 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,12 +10,14 @@ export(createResultsExecutionSettings) export(ensureAllModulesInstantiated) export(execute) export(getModuleList) +export(installLatestModule) export(retrieveConnectionDetails) export(storeConnectionDetails) export(syncLockFile) export(unlockKeyring) export(validateLockFile) export(verifyModuleInstallation) +export(zipResults) import(CohortGenerator) import(DatabaseConnector) import(dplyr) diff --git a/NEWS.md b/NEWS.md index 28d06f42..44956b2f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +Strategus 0.3.0 +=============== +- Provide option to skip modules (#87) +- Central log file for execution (#132) +- Create function to collect all results into a single ZIP file for sharing (#46) +- Install latest modules (#125) + Strategus 0.2.1 =============== - Update SelfControlledCaseSeries Module to v0.4.1 diff --git a/R/Execution.R b/R/Execution.R index 2b073f3a..100d11b4 100644 --- a/R/Execution.R +++ b/R/Execution.R @@ -31,6 +31,9 @@ #' @template keyringName #' @param restart Restart run? Requires `executionScriptFolder` to be specified, and be #' the same as the `executionScriptFolder` used in the run to restart. +#' +#' @template enforceModuleDependencies +#' #' @return #' Does not return anything. Is called for the side-effect of executing the specified #' analyses. @@ -40,7 +43,8 @@ execute <- function(analysisSpecifications, executionSettings, executionScriptFolder = NULL, keyringName = NULL, - restart = FALSE) { + restart = FALSE, + enforceModuleDependencies = TRUE) { errorMessages <- checkmate::makeAssertCollection() keyringList <- keyring::keyring_list() checkmate::assertClass(analysisSpecifications, "AnalysisSpecifications", add = errorMessages) @@ -98,7 +102,10 @@ execute <- function(analysisSpecifications, } # Validate the modules - modules <- ensureAllModulesInstantiated(analysisSpecifications) + modules <- ensureAllModulesInstantiated( + analysisSpecifications = analysisSpecifications, + enforceModuleDependencies = enforceModuleDependencies + ) if (isFALSE(modules$allModulesInstalled)) { stop("Stopping execution due to module issues") } diff --git a/R/ModuleEnv.R b/R/ModuleEnv.R index 87bb8b27..03139f36 100644 --- a/R/ModuleEnv.R +++ b/R/ModuleEnv.R @@ -36,7 +36,7 @@ #' Load module execution space inside and renv #' inspired by targets::tar_script but allowing custom variable execution #' -#' Designed to allow more human readable code that is executed inside a module as well as simple variable substituion +#' Designed to allow more human readable code that is executed inside a module as well as simple variable substitution #' for injecting constants (e.g. simple parameters or file paths used inside and outside of modules) #' #' This pattern also allows dependency injection which could be used if you don't want to use and renv and (instead) diff --git a/R/ModuleInstantiation.R b/R/ModuleInstantiation.R index 82ef3407..6aa6403d 100644 --- a/R/ModuleInstantiation.R +++ b/R/ModuleInstantiation.R @@ -31,13 +31,15 @@ #' #' @template forceVerification #' +#' @template enforceModuleDependencies +#' #' @return #' A list containing the install status of all modules #' (TRUE if all are installed properly) and a tibble listing #' the instantiated modules. #' #' @export -ensureAllModulesInstantiated <- function(analysisSpecifications, forceVerification = FALSE) { +ensureAllModulesInstantiated <- function(analysisSpecifications, forceVerification = FALSE, enforceModuleDependencies = TRUE) { modules <- getModuleTable(analysisSpecifications, distinct = TRUE) # Verify only one version per module: @@ -62,20 +64,12 @@ ensureAllModulesInstantiated <- function(analysisSpecifications, forceVerificati ) } - # Check required dependencies have been installed: - dependencies <- extractDependencies(modules) - missingDependencies <- dependencies %>% - filter(!dependsOn %in% modules$module) - if (nrow(missingDependencies) > 0) { - message <- paste( - c( - "Detected missing dependencies:", - sprintf("- Missing module '%s' required by module '%s'", missingDependencies$dependsOn, missingDependencies$module) - ), - collapse = "\n" - ) - stop(message) - } + # Check required dependencies have been declare in the specification + # unless the user has set enforceModuleDependencies == FALSE + checkModuleDependencies( + modules = modules, + enforceModuleDependencies = enforceModuleDependencies + ) # Verify all modules are properly installed moduleInstallStatus <- list() @@ -300,6 +294,88 @@ verifyModuleInstallation <- function(module, version, silent = FALSE, forceVerif ) } + +#' Install the latest release of a module +#' +#' @description +#' This function will call out to the OHDSI GitHub repo to find the latest +#' version of the module and attempt to install it. Only modules that are listed +#' in the `getModuleList()` function are allowed since it will have a known +#' GitHub location. +#' +#' @param moduleName The name of the module to install (i.e. "CohortGeneratorModule"). +#' This parameter must match a value found in the `module` column of `getModuleList()` +#' +#' @return +#' None - this function is called for its side effects +#' +#' @export +installLatestModule <- function(moduleName) { + assertModulesFolderSetting(x = Sys.getenv("INSTANTIATED_MODULES_FOLDER")) + instantiatedModulesFolder <- Sys.getenv("INSTANTIATED_MODULES_FOLDER") + # Verify that the user's GITHUB_PAT is set properly + # otherwise we may hit a rate limit + if (Sys.getenv("GITHUB_PAT") == "") { + stop("You must set your GITHUB_PAT to use this function. Please use the function `usethis::create_github_token()` and try again after restarting your R session.") + } + moduleList <- getModuleList() + if (isFALSE(moduleName %in% moduleList$module)) { + stop("Module: ", module, " not found in the list from Strategus::getModuleList().") + } + moduleDetails <- moduleList %>% + dplyr::filter(module == moduleName) + urlTemplate <- "https://api.%s/repos/%s/%s/releases/latest" + baseUrl <- sprintf(urlTemplate, moduleDetails$remoteRepo, moduleDetails$remoteUsername, moduleDetails$module) + req <- httr2::request(base_url = baseUrl) |> + httr2::req_headers( + "Authorization" = paste0("Bearer ", Sys.getenv("GITHUB_PAT")), + "X-GitHub-Api-Version" = "2022-11-28" + ) + response <- httr2::req_perform(req) + release <- jsonlite::fromJSON(httr2::resp_body_string(response)) + version <- gsub("v", "", release$tag_name, ignore.case = TRUE) + moduleFolder <- ensureModuleInstantiated( + module = moduleDetails$module, + version = version, + remoteRepo = moduleDetails$remoteRepo, + remoteUsername = moduleDetails$remoteUsername + ) + rlang::inform(paste0("Installed ", moduleName, " to ", moduleFolder)) +} + +extractDependencies <- function(modules) { + extractDependenciesSingleModule <- function(module) { + moduleFolder <- getModuleFolder(module$module, module$version) + metaData <- getModuleMetaData(moduleFolder) + dependencies <- tibble( + module = module$module, + dependsOn = as.character(metaData$Dependencies) + ) + return(dependencies) + } + dependencies <- lapply(split(modules, 1:nrow(modules)), extractDependenciesSingleModule) %>% + bind_rows() + return(dependencies) +} + +checkModuleDependencies <- function(modules, enforceModuleDependencies) { + # Check required dependencies have been declare in the specification + # unless the user has set enforceModuleDependencies == FALSE + dependencies <- extractDependencies(modules) + missingDependencies <- dependencies %>% + filter(!dependsOn %in% modules$module) + if (nrow(missingDependencies) > 0 && enforceModuleDependencies) { + message <- paste( + c( + "Detected missing dependencies:", + sprintf("- Missing module '%s' required by module '%s'", missingDependencies$dependsOn, missingDependencies$module) + ), + collapse = "\n" + ) + stop(message) + } +} + getModuleTable <- function(analysisSpecifications, distinct = FALSE) { modules <- lapply( analysisSpecifications$moduleSpecifications, @@ -320,21 +396,6 @@ getModuleTable <- function(analysisSpecifications, distinct = FALSE) { return(modules) } -extractDependencies <- function(modules) { - extractDependenciesSingleModule <- function(module) { - moduleFolder <- getModuleFolder(module$module, module$version) - metaData <- getModuleMetaData(moduleFolder) - dependencies <- tibble( - module = module$module, - dependsOn = as.character(metaData$Dependencies) - ) - return(dependencies) - } - dependencies <- lapply(split(modules, 1:nrow(modules)), extractDependenciesSingleModule) %>% - bind_rows() - return(dependencies) -} - getModuleMetaData <- function(moduleFolder) { jsonFileName <- file.path(moduleFolder, "MetaData.json") if (!file.exists(jsonFileName)) { diff --git a/R/ResultModelCreation.R b/R/ResultModelCreation.R index 8ff0f4fb..5c13455a 100644 --- a/R/ResultModelCreation.R +++ b/R/ResultModelCreation.R @@ -27,7 +27,8 @@ createResultDataModels <- function(analysisSpecifications, executionSettings, executionScriptFolder = NULL, keyringName = NULL, - restart = FALSE) { + restart = FALSE, + enforceModuleDependencies = TRUE) { errorMessages <- checkmate::makeAssertCollection() keyringList <- keyring::keyring_list() checkmate::assertClass(analysisSpecifications, "AnalysisSpecifications", add = errorMessages) @@ -35,7 +36,11 @@ createResultDataModels <- function(analysisSpecifications, checkmate::assertChoice(x = keyringName, choices = keyringList$keyring, null.ok = TRUE, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - modules <- ensureAllModulesInstantiated(analysisSpecifications) + modules <- ensureAllModulesInstantiated( + analysisSpecifications = analysisSpecifications, + enforceModuleDependencies = enforceModuleDependencies + ) + if (isFALSE(modules$allModulesInstalled)) { stop("Stopping execution due to module issues") } @@ -203,8 +208,11 @@ runSchemaCreation <- function(analysisSpecifications, keyringSettings, moduleInd renv::use(lockfile = "renv.lock") } + ParallelLogger::addDefaultFileLogger(jobContext$moduleExecutionSettings$logFileName) ParallelLogger::addDefaultFileLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "log.txt")) ParallelLogger::addDefaultErrorReportLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "errorReportR.txt")) + + message("START SCHEMA CREATION: ", moduleName) # Main.R can override default behaviour by implementing this function if (is.function(createDataModelSchema)) { # If the keyring is locked, unlock it, set the value and then re-lock it @@ -239,6 +247,7 @@ runSchemaCreation <- function(analysisSpecifications, keyringSettings, moduleInd ) writeLines("specifications.not.written", doneFile) } + message("FINISH SCHEMA CREATION: ", moduleName) ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE) ParallelLogger::unregisterLogger("DEFAULT_ERRORREPORT_LOGGER", silent = TRUE) @@ -248,6 +257,7 @@ runSchemaCreation <- function(analysisSpecifications, keyringSettings, moduleInd injectVars = list( jobContextFileName = jobContextFileName, dataModelExportPath = dataModelExportPath, + moduleName = module, doneFile = doneFile ) ) diff --git a/R/ResultsUpload.R b/R/ResultsUpload.R index 9bd5f8de..ece4ec07 100644 --- a/R/ResultsUpload.R +++ b/R/ResultsUpload.R @@ -58,6 +58,8 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde } tempScriptFile <- file.path(moduleExecutionSettings$workSubFolder, "UploadScript.R") + ParallelLogger::addDefaultFileLogger(jobContext$moduleExecutionSettings$logFileName) + on.exit(ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE)) ## # Module space executed code @@ -68,9 +70,10 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde getDataModelSpecifications <- function(...) { ParallelLogger::logInfo("Getting result model specification") - if (file.exists("resultsDataModelSpecification.csv")) { + rdmsFilePath <- file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "resultsDataModelSpecification.csv") + if (file.exists(rdmsFilePath)) { res <- CohortGenerator::readCsv( - file = "resultsDataModelSpecification.csv" + file = rdmsFilePath ) return(res) } @@ -81,6 +84,8 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde moduleInfo <- ParallelLogger::loadSettingsFromJson("MetaData.json") jobContext <- readRDS(jobContextFileName) specifications <- getDataModelSpecifications(jobContext) + + ParallelLogger::addDefaultFileLogger(jobContext$moduleExecutionSettings$logFileName) ParallelLogger::addDefaultFileLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "log.txt")) ParallelLogger::addDefaultErrorReportLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "errorReportR.txt")) @@ -88,6 +93,7 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde renv::use(lockfile = "renv.lock") } + message("START MODULE RESULTS UPLOAD: ", moduleName) # Override default behaviour and do module specific upload inside module context? if (is.function(uploadResultsCallback)) { ParallelLogger::logInfo("Calling module result upload functionality") @@ -122,6 +128,7 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde writeLines("specifications.written", doneFile) } + message("FINISH MODULE RESULTS UPLOAD: ", moduleName) ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE) ParallelLogger::unregisterLogger("DEFAULT_ERRORREPORT_LOGGER", silent = TRUE) }, @@ -130,6 +137,7 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde injectVars = list( jobContextFileName = jobContextFileName, dataModelExportPath = dataModelExportPath, + moduleName = module, doneFile = doneFile ) ) @@ -149,34 +157,32 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde workStatus <- readLines(doneFile) if (workStatus == "specifications.written") { - ParallelLogger::logInfo("Uploading results according to module specification") + message("Uploading results according to module specification") specifications <- CohortGenerator::readCsv(dataModelExportPath) - moduleInfo <- ParallelLogger::loadSettingsFromJson(file.path(moduleFolder, "MetaData.json")) keyringName <- jobContext$keyringSettings$keyringName keyringLocked <- Strategus::unlockKeyring(keyringName = keyringName) - ParallelLogger::logInfo("Getting result database credentials") + message("Getting result database credentials") resultsConnectionDetails <- keyring::key_get(jobContext$moduleExecutionSettings$resultsConnectionDetailsReference, keyring = keyringName) resultsConnectionDetails <- ParallelLogger::convertJsonToSettings(resultsConnectionDetails) resultsConnectionDetails <- do.call(DatabaseConnector::createConnectionDetails, resultsConnectionDetails) jobContext$moduleExecutionSettings$resultsConnectionDetails <- resultsConnectionDetails - ParallelLogger::logInfo("Calling RMM for upload") + message("Calling RMM for upload") ResultModelManager::uploadResults( connectionDetails = jobContext$moduleExecutionSettings$resultsConnectionDetails, schema = jobContext$moduleExecutionSettings$resultsDatabaseSchema, resultsFolder = jobContext$moduleExecutionSettings$resultsSubFolder, - tablePrefix = moduleInfo$TablePrefix, forceOverWriteOfSpecifications = FALSE, purgeSiteDataBeforeUploading = FALSE, - databaseIdentifierFile = "database_meta_data.csv", + databaseIdentifierFile = file.path(executionSettings$resultsFolder, "DatabaseMetaData", "database_meta_data.csv"), runCheckAndFixCommands = FALSE, warnOnMissingTable = TRUE, specifications = specifications ) - ParallelLogger::logInfo("Upload completed") + message("Upload completed") if (keyringLocked) { keyring::keyring_lock(keyring = keyringName) } diff --git a/R/RunModule.R b/R/RunModule.R index 7485efc3..5aebcfa1 100644 --- a/R/RunModule.R +++ b/R/RunModule.R @@ -79,6 +79,7 @@ runModule <- function(analysisSpecifications, keyringSettings, moduleIndex, exec # unlockKeyring will be injected automatically keyringLocked <- unlockKeyring(keyringName = keyringName) + ParallelLogger::addDefaultFileLogger(jobContext$moduleExecutionSettings$logFileName) ParallelLogger::addDefaultFileLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "log.txt")) ParallelLogger::addDefaultErrorReportLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "errorReport.R")) @@ -109,7 +110,9 @@ runModule <- function(analysisSpecifications, keyringSettings, moduleIndex, exec if (keyringLocked) { keyring::keyring_lock(keyring = keyringName) } + message("START MODULE RUN: ", moduleName) execute(jobContext) + message("FINISH MODULE RUN: ", moduleName) ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE) ParallelLogger::unregisterLogger("DEFAULT_ERRORREPORT_LOGGER", silent = TRUE) @@ -120,7 +123,8 @@ runModule <- function(analysisSpecifications, keyringSettings, moduleIndex, exec injectVars = list( doneFile = doneFile, isCdmExecution = isCdmExecution, - jobContextFileName = jobContextFileName + jobContextFileName = jobContextFileName, + moduleName = module ) ) diff --git a/R/Settings.R b/R/Settings.R index e3463f36..5503d098 100644 --- a/R/Settings.R +++ b/R/Settings.R @@ -90,6 +90,7 @@ addModuleSpecifications <- function(analysisSpecifications, moduleSpecifications #' @param tempEmulationSchema Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write privileges where temp tables can be created. #' @param workFolder A folder in the local file system where intermediate results can be written. #' @param resultsFolder A folder in the local file system where the module output will be written. +#' @param logFileName Logging information from Strategus and all modules will be located in this file. Individual modules will continue to have their own module-specific logs. By default this will be written to the root of the `resultsFolder` #' @param minCellCount The minimum number of subjects contributing to a count before it can be included #' in results. #' @param integerAsNumeric Logical: should 32-bit integers be converted to numeric (double) values? If FALSE 32-bit integers will be represented using R's native `Integer` class. Default is TRUE @@ -109,6 +110,7 @@ createCdmExecutionSettings <- function(connectionDetailsReference, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), workFolder, resultsFolder, + logFileName = file.path(resultsFolder, "strategus-log.txt"), minCellCount = 5, integerAsNumeric = getOption("databaseConnectorIntegerAsNumeric", default = TRUE), integer64AsNumeric = getOption("databaseConnectorInteger64AsNumeric", default = TRUE), @@ -121,6 +123,7 @@ createCdmExecutionSettings <- function(connectionDetailsReference, checkmate::assertList(cohortTableNames, add = errorMessages) checkmate::assertCharacter(workFolder, len = 1, add = errorMessages) checkmate::assertCharacter(resultsFolder, len = 1, add = errorMessages) + checkmate::assertCharacter(logFileName, len = 1, add = errorMessages) checkmate::assertInt(minCellCount, add = errorMessages) checkmate::assertLogical(integerAsNumeric, max.len = 1, add = errorMessages) checkmate::assertLogical(integer64AsNumeric, max.len = 1, add = errorMessages) @@ -131,6 +134,7 @@ createCdmExecutionSettings <- function(connectionDetailsReference, # Normalize paths to convert relative paths to absolute paths workFolder <- normalizePath(workFolder, mustWork = F) resultsFolder <- normalizePath(resultsFolder, mustWork = F) + logFileName <- normalizePath(logFileName, mustWork = F) executionSettings <- list( connectionDetailsReference = connectionDetailsReference, @@ -140,6 +144,7 @@ createCdmExecutionSettings <- function(connectionDetailsReference, tempEmulationSchema = tempEmulationSchema, workFolder = workFolder, resultsFolder = resultsFolder, + logFileName = logFileName, minCellCount = minCellCount, integerAsNumeric = integerAsNumeric, integer64AsNumeric = integer64AsNumeric, @@ -157,6 +162,7 @@ createCdmExecutionSettings <- function(connectionDetailsReference, #' @param resultsDatabaseSchema A schema where the results tables are stored #' @param workFolder A folder in the local file system where intermediate results can be written. #' @param resultsFolder A folder in the local file system where the module output will be written. +#' @param logFileName Logging information from Strategus and all modules will be located in this file. Individual modules will continue to have their own module-specific logs. By default this will be written to the root of the `resultsFolder` #' @param minCellCount The minimum number of subjects contributing to a count before it can be included #' in results. #' @param integerAsNumeric Logical: should 32-bit integers be converted to numeric (double) values? If FALSE 32-bit integers will be represented using R's native `Integer` class. Default is TRUE @@ -170,6 +176,7 @@ createResultsExecutionSettings <- function(resultsConnectionDetailsReference, resultsDatabaseSchema, workFolder, resultsFolder, + logFileName = file.path(resultsFolder, "strategus-log.txt"), minCellCount = 5, integerAsNumeric = getOption("databaseConnectorIntegerAsNumeric", default = TRUE), integer64AsNumeric = getOption("databaseConnectorInteger64AsNumeric", default = TRUE)) { @@ -178,6 +185,7 @@ createResultsExecutionSettings <- function(resultsConnectionDetailsReference, checkmate::assertCharacter(resultsDatabaseSchema, len = 1, add = errorMessages) checkmate::assertCharacter(workFolder, len = 1, add = errorMessages) checkmate::assertCharacter(resultsFolder, len = 1, add = errorMessages) + checkmate::assertCharacter(logFileName, len = 1, add = errorMessages) checkmate::assertInt(minCellCount, add = errorMessages) checkmate::assertLogical(integerAsNumeric, max.len = 1, add = errorMessages) checkmate::assertLogical(integer64AsNumeric, max.len = 1, add = errorMessages) @@ -186,12 +194,14 @@ createResultsExecutionSettings <- function(resultsConnectionDetailsReference, # Normalize paths to convert relative paths to absolute paths workFolder <- normalizePath(workFolder, mustWork = F) resultsFolder <- normalizePath(resultsFolder, mustWork = F) + logFileName <- normalizePath(logFileName, mustWork = F) executionSettings <- list( resultsConnectionDetailsReference = resultsConnectionDetailsReference, resultsDatabaseSchema = resultsDatabaseSchema, workFolder = workFolder, resultsFolder = resultsFolder, + logFileName = logFileName, minCellCount = minCellCount, integerAsNumeric = integerAsNumeric, integer64AsNumeric = integer64AsNumeric diff --git a/R/ShareResults.R b/R/ShareResults.R new file mode 100644 index 00000000..d6f1da9a --- /dev/null +++ b/R/ShareResults.R @@ -0,0 +1,32 @@ +#' Create a zip file with all study results for sharing with study coordinator +#' +#' @details +#' Creates a `.zip` file of the `.csv` files found in the +#' `resultsFolder`. The resulting `.zip` file will have +#' relative paths to the root of the `resultsFolder` +#' which is generally found in `executionSettings$resultsFolder`. +#' +#' @param resultsFolder The folder holding the study results. This is found in +#' `executionSettings$resultsFolder`. +#' +#' @param zipFile The path to the zip file to be created. +#' +#' @return +#' Does not return anything. Is called for the side-effect of creating the +#' zip file with results. +#' +#' @export +zipResults <- function(resultsFolder, zipFile) { + files <- list.files( + path = resultsFolder, + pattern = "\\.csv$", + recursive = TRUE, + full.names = TRUE + ) + DatabaseConnector::createZipFile( + zipFile = zipFile, + files = files, + rootFolder = resultsFolder + ) + message(zipFile, " created.") +} diff --git a/docs/404.html b/docs/404.html index 428362fd..a68b54eb 100644 --- a/docs/404.html +++ b/docs/404.html @@ -6,7 +6,7 @@
vignettes/CreatingAnalysisSpecification.Rmd
CreatingAnalysisSpecification.Rmd
# Install correct versions of HADES packages
-remotes::install_github("ohdsi/CohortGenerator", ref = "v0.8.1")
+remotes::install_github("ohdsi/CohortGenerator", ref = "v0.9.0")
remotes::install_github("ohdsi/CohortDiagnostics", ref = "v3.2.5")
-remotes::install_github("ohdsi/Characterization", ref = "v0.1.3")
+remotes::install_github("ohdsi/Characterization", ref = "v0.2.0")
remotes::install_github("ohdsi/CohortIncidence", ref = "v3.3.0")
-remotes::install_github("ohdsi/CohortMethod", ref = "v5.2.0")
-remotes::install_github("ohdsi/SelfControlledCaseSeries", ref = "v5.1.1")
+remotes::install_github("ohdsi/CohortMethod", ref = "v5.3.0")
+remotes::install_github("ohdsi/SelfControlledCaseSeries", ref = "v5.2.0")
remotes::install_github("ohdsi/PatientLevelPrediction", ref = "v6.3.6")
-source("https://raw.githubusercontent.com/OHDSI/CohortGeneratorModule/v0.3.0/SettingsFunctions.R")
+source("https://raw.githubusercontent.com/OHDSI/CohortGeneratorModule/v0.4.1/SettingsFunctions.R")
# Create the cohort definition shared resource element for the analysis specification
cohortDefinitionSharedResource <- createCohortSharedResourceSpecifications(
@@ -238,7 +238,7 @@ CohortIncidence Module SettingscohortIncidenceModuleSpecifications
to perform an incidence
rate analysis for the target cohorts and outcome in this study.
-source("https://raw.githubusercontent.com/OHDSI/CohortIncidenceModule/v0.4.0/SettingsFunctions.R")
+source("https://raw.githubusercontent.com/OHDSI/CohortIncidenceModule/v0.4.1/SettingsFunctions.R")
library(CohortIncidence)
targets <- list(
createCohortRef(id = 1, name = "Celecoxib"),
@@ -281,7 +281,7 @@ Characterization Module Settings
-source("https://raw.githubusercontent.com/OHDSI/CharacterizationModule/v0.5.0/SettingsFunctions.R")
+source("https://raw.githubusercontent.com/OHDSI/CharacterizationModule/v0.6.0/SettingsFunctions.R")
characterizationModuleSpecifications <- createCharacterizationModuleSpecifications(
targetIds = c(1, 2),
outcomeIds = 3,
@@ -304,7 +304,7 @@ CohortMethod Module Settings
library(CohortMethod)
-source("https://raw.githubusercontent.com/OHDSI/CohortMethodModule/v0.3.0/SettingsFunctions.R")
+source("https://raw.githubusercontent.com/OHDSI/CohortMethodModule/v0.3.1/SettingsFunctions.R")
negativeControlOutcomes <- lapply(
X = ncoCohortSet$cohortId,
FUN = createOutcome,
@@ -407,7 +407,7 @@ SelfControlledCaseSeries Modul
cohort analysis for this study.
library(SelfControlledCaseSeries)
-source("https://raw.githubusercontent.com/OHDSI/SelfControlledCaseSeriesModule/v0.4.1/SettingsFunctions.R")
+source("https://raw.githubusercontent.com/OHDSI/SelfControlledCaseSeriesModule/v0.5.0/SettingsFunctions.R")
# Exposures-outcomes -----------------------------------------------------------
negativeControlOutcomeIds <- ncoCohortSet$cohortId
@@ -602,7 +602,7 @@ Strategus Analysis Specifications
-Site built with pkgdown 2.0.7.
+Site built with pkgdown 2.0.9.
diff --git a/docs/articles/CreatingModules.html b/docs/articles/CreatingModules.html
index 2c30932e..6aa95ae1 100644
--- a/docs/articles/CreatingModules.html
+++ b/docs/articles/CreatingModules.html
@@ -6,7 +6,7 @@
Creating Strategus Modules • Strategus
-
+
@@ -33,7 +33,7 @@
@@ -94,7 +94,7 @@ Creating Strategus Modules
Anthony G.
Sena
- 2024-01-29
+ 2024-06-04
Source: vignettes/CreatingModules.Rmd
CreatingModules.Rmd
@@ -419,7 +419,7 @@ Test Files
-Site built with pkgdown 2.0.7.
+Site built with pkgdown 2.0.9.
diff --git a/docs/articles/ExecuteStrategus.html b/docs/articles/ExecuteStrategus.html
index db42432b..0907a48f 100644
--- a/docs/articles/ExecuteStrategus.html
+++ b/docs/articles/ExecuteStrategus.html
@@ -6,7 +6,7 @@
Execute Strategus • Strategus
-
+
@@ -33,7 +33,7 @@
vignettes/ExecuteStrategus.Rmd
ExecuteStrategus.Rmd
## attempting to download GiBleed
+## attempting to extract and load: D:/eunomiaData/GiBleed_5.3.zip to: D:/eunomiaData/GiBleed_5.3.sqlite
Next we will use Strategus
to store the connection
details and provide a connectionDetailsReference
that
Strategus will use to look up the connection details.
+@@ -66,7 +66,7 @@ diff --git a/docs/index.html b/docs/index.html index 1f07ab80..1d214a67 100644 --- a/docs/index.html +++ b/docs/index.html @@ -6,7 +6,7 @@storeConnectionDetails( connectionDetails = connectionDetails, connectionDetailsReference = "eunomia" @@ -159,7 +161,7 @@
Creating an execution settings ob
Next, we will use
-Strategus
to create the CDM execution settings. TheconnectionDetailsReference
refers to the connection details we stored earlier:diff --git a/docs/authors.html b/docs/authors.html index 424de98e..95108baa 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,5 +1,5 @@ -+executionSettings <- createCdmExecutionSettings( connectionDetailsReference = "eunomia", workDatabaseSchema = "main", @@ -171,7 +173,7 @@
Creating an execution settings ob )
Finally, we can write out the execution settings to the file system to capture this information.
-+@@ -90,7 +90,7 @@ParallelLogger::saveSettingsToJson( object = executionSettings, file.path(outputFolder, "eunomiaExecutionSettings.json") @@ -191,7 +193,7 @@
Specifying the instantiated time-consuming task. We must specify a global location where these modules will be instantiated so that, once a module is instantiated, it can be used in all future studies: -
diff --git a/docs/articles/index.html b/docs/articles/index.html index fa6455b2..c537728a 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,5 +1,5 @@ -+Sys.setenv("INSTANTIATED_MODULES_FOLDER" = "c:/strategus/modules")
We recommend adding this environmental variable to your
@@ -201,7 +203,7 @@.renviron
file, so it is always set.Running the study
+analysisSpecifications <- ParallelLogger::loadSettingsFromJson( fileName = system.file("testdata/analysisSpecification.json", package = "Strategus" @@ -212,7 +214,7 @@
Running the study= file.path(outputFolder, "eunomiaExecutionSettings.json") )
And finally we execute the study:
-@@ -94,7 +94,7 @@+diff --git a/docs/articles/IntroductionToStrategus.html b/docs/articles/IntroductionToStrategus.html index 4ae1a497..03e69b9f 100644 --- a/docs/articles/IntroductionToStrategus.html +++ b/docs/articles/IntroductionToStrategus.html @@ -6,7 +6,7 @@execute( analysisSpecifications = analysisSpecifications, executionSettings = executionSettings, @@ -244,7 +246,7 @@
Running the study -
Site built with pkgdown 2.0.7.
+Site built with pkgdown 2.0.9.
Introduction to Strategus • Strategus - + @@ -33,7 +33,7 @@Introduction to Strategus
Anthony G. Sena
-2024-01-29
+2024-06-04
Source:vignettes/IntroductionToStrategus.Rmd
@@ -173,7 +173,7 @@IntroductionToStrategus.Rmd
What is a HADES module? -
Site built with pkgdown 2.0.7.
+Site built with pkgdown 2.0.9.
Articles • Strategus Articles • Strategus @@ -17,7 +17,7 @@All vignettes
Authors and Citation • Strategus Authors and Citation • Strategus @@ -17,7 +17,7 @@Coordinating and Executing Analytics Using HADES Modules • Strategus - + @@ -33,7 +33,7 @@
NEWS.md
+
A folder in the local file system where the module output will be written.
Logging information from Strategus and all modules will be located in this file. Individual modules will continue to have their own module-specific logs. By default this will be written to the root of the resultsFolder
The minimum number of subjects contributing to a count before it can be included in results.
A folder in the local file system where the module output will be written.
Logging information from Strategus and all modules will be located in this file. Individual modules will continue to have their own module-specific logs. By default this will be written to the root of the resultsFolder
The minimum number of subjects contributing to a count before it can be included in results.
ensureAllModulesInstantiated(analysisSpecifications, forceVerification = FALSE)
ensureAllModulesInstantiated(
+ analysisSpecifications,
+ forceVerification = FALSE,
+ enforceModuleDependencies = TRUE
+)
When set to TRUE, Strategus will enforce +module dependencies that are declared by each module. For example, the +CohortDiagnostics module declares a dependency on the CohortGenerator module +and Strategus will require that an analysis specification declare that both +modules must exist in order to execute the analysis. When set to FALSE, +Strategus will not enforce these module dependencies which assumes you have +properly run all module dependencies yourself. Setting this to FALSE is not +recommended since it is potentially unsafe.
Restart run? Requires executionScriptFolder
to be specified, and be
the same as the executionScriptFolder
used in the run to restart.
When set to TRUE, Strategus will enforce +module dependencies that are declared by each module. For example, the +CohortDiagnostics module declares a dependency on the CohortGenerator module +and Strategus will require that an analysis specification declare that both +modules must exist in order to execute the analysis. When set to FALSE, +Strategus will not enforce these module dependencies which assumes you have +properly run all module dependencies yourself. Setting this to FALSE is not +recommended since it is potentially unsafe.
getModuleList()
Provides a list of HADES modules to run through Strategus
Install the latest release of a module
Synchronize renv.lock files and overwrite the target file -(read the description)
Synchronize renv.lock files and overwrite the target file (read the description)
Load module execution space inside and renv -inspired by targets::tar_script but allowing custom variable execution
Load module execution space inside and renv inspired by targets::tar_script but allowing custom variable execution
Create a zip file with all study results for sharing with study coordinator
R/ModuleInstantiation.R
+ installLatestModule.Rd
This function will call out to the OHDSI GitHub repo to find the latest
+version of the module and attempt to install it. Only modules that are listed
+in the getModuleList()
function are allowed since it will have a known
+GitHub location.
installLatestModule(moduleName)
The name of the module to install (i.e. "CohortGeneratorModule").
+This parameter must match a value found in the module
column of getModuleList()
None - this function is called for its side effects
+R/RenvHelpers.R
syncLockFile.Rd
R/ShareResults.R
+ zipResults.Rd
Create a zip file with all study results for sharing with study coordinator
+zipResults(resultsFolder, zipFile)
The folder holding the study results. This is found in
+executionSettings$resultsFolder
.
The path to the zip file to be created.
Does not return anything. Is called for the side-effect of creating the +zip file with results.
+Creates a .zip
file of the .csv
files found in the
+resultsFolder
. The resulting .zip
file will have
+relative paths to the root of the resultsFolder
+which is generally found in executionSettings$resultsFolder
.