diff --git a/DESCRIPTION b/DESCRIPTION index 0180741..8fa61a7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: DeepPatientLevelPrediction Type: Package Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model -Version: 0.0.1 -Date: 2021-06-07 +Version: 1.0.0 +Date: 29-08-2022 Authors@R: c( person("Jenna", "Reps", email = "jreps@its.jnj.com", role = c("aut")), person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")), @@ -37,6 +37,6 @@ Remotes: ohdsi/PatientLevelPrediction@develop, ohdsi/FeatureExtraction, ohdsi/Eunomia -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.1 Encoding: UTF-8 Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index ddcfd4b..a102062 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,17 +2,12 @@ export(Dataset) export(Estimator) -export(doubleLayerNN) -export(fitDeepNNTorch) export(fitEstimator) export(gridCvDeep) export(predictDeepEstimator) -export(predictDeepNN) -export(setDeepNNTorch) +export(setMultiLayerPerceptron) export(setResNet) export(setTransformer) -export(singleLayerNN) -export(tripleLayerNN) import(data.table) importFrom(data.table,":=") importFrom(dplyr,"%>%") diff --git a/R/Dataset.R b/R/Dataset.R index bbe6313..73464d1 100644 --- a/R/Dataset.R +++ b/R/Dataset.R @@ -6,10 +6,9 @@ Dataset <- torch::dataset( #' @param data a dataframe like object with the covariates #' @param labels a dataframe with the labels #' @param numericalIndex in what column numeric data is in (if any) - #' @param all if True then returns all features instead of splitting num/cat - initialize = function(data, labels = NULL, numericalIndex = NULL, all = FALSE) { + initialize = function(data, labels = NULL, numericalIndex = NULL) { # determine numeric - if (is.null(numericalIndex) && all == FALSE) { + if (is.null(numericalIndex)) { numericalIndex <- data %>% dplyr::group_by(columnId) %>% dplyr::collect() %>% @@ -24,23 +23,12 @@ Dataset <- torch::dataset( if (!is.null(labels)) { self$target <- torch::torch_tensor(labels) } else { - if (all == FALSE) { - self$target <- torch::torch_tensor(rep(0, data %>% dplyr::distinct(rowId) - %>% dplyr::collect() %>% nrow())) - } else { - self$target <- torch::torch_tensor(rep(0, dim(data)[[1]])) - } + self$target <- torch::torch_tensor(rep(0, data %>% dplyr::distinct(rowId) + %>% dplyr::collect() %>% nrow())) } # Weight to add in loss function to positive class self$posWeight <- (self$target == 0)$sum() / self$target$sum() - # for DeepNNTorch - self$useAll <- all - if (all) { - self$all <- torch::torch_tensor(as.matrix(data), dtype = torch::torch_float32()) - self$cat <- NULL - self$num <- NULL - return() - } + # add features catColumns <- which(!numericalIndex) dataCat <- dplyr::filter(data, columnId %in% catColumns) %>% @@ -80,9 +68,6 @@ Dataset <- torch::dataset( size = c(self$target$shape, sum(numericalIndex)) )$to_dense() } - if (self$cat$shape[1] != self$num$shape[1]) { - browser() - } }, getNumericalIndex = function() { return( @@ -110,26 +95,20 @@ Dataset <- torch::dataset( }, .getBatchSingle = function(item) { # add leading singleton dimension since models expects 2d tensors - if (self$useAll) { - batch <- list(all = self$all[item]$unsqueeze(1)) - } else { - batch <- list(cat = self$cat[item]$unsqueeze(1), - num = self$num[item]$unsqueeze(1)) - } + batch <- list( + cat = self$cat[item]$unsqueeze(1), + num = self$num[item]$unsqueeze(1) + ) return(list( batch = batch, target = self$target[item]$unsqueeze(1) )) }, .getBatchRegular = function(item) { - if (self$useAll) { - batch <- list(all = self$all[item]) - } else { - batch = list( - cat = self$cat[item], - num = self$num[item] - ) - } + batch <- list( + cat = self$cat[item], + num = self$num[item] + ) return(list( batch = batch, target = self$target[item] diff --git a/R/DeepNNTorch.R b/R/DeepNNTorch.R deleted file mode 100644 index 4e20ca7..0000000 --- a/R/DeepNNTorch.R +++ /dev/null @@ -1,493 +0,0 @@ -#' settings for a Deep neural network -#' @param units A list of vectors for neurons per layer -#' @param layerDropout Dropout to use per layer -#' @param lr Learning rate ot use -#' @param decay Weight decay to use -#' @param outcomeWeight Weight for minority outcome in cost function -#' @param batchSize Batch size to use -#' @param epochs How many epochs to use -#' @param device Which device to use -#' @param seed A seed to make experiments more reproducible -#' @export -setDeepNNTorch <- function(units = list(c(128, 64), 128), - layerDropout = c(0.2), - lr = c(1e-4), - decay = c(1e-5), - outcomeWeight = c(1.0), - batchSize = c(10000), - epochs = c(100), - device = "cpu", - seed = NULL) { - param <- expand.grid( - units = units, - layerDropout = layerDropout, - lr = lr, decay = decay, outcomeWeight = outcomeWeight, epochs = epochs, - seed = ifelse(is.null(seed), "NULL", seed) - ) - - param$units1 <- unlist(lapply(param$units, function(x) x[1])) - param$units2 <- unlist(lapply(param$units, function(x) x[2])) - param$units3 <- unlist(lapply(param$units, function(x) x[3])) - param$units <- NULL - - attr(param, "settings") <- list( - modelType = "DeepNN", - seed = seed[1], - name = "DeepNNTorch", - units = units, - layerDropout = layerDropout, - lr = lr, - decay = decay, - outcomeWeight = outcomeWeight, - batchSize = batchSize, - device = device, - epochs = epochs - ) - - attr(param, "modelType") <- "binary" - attr(param, "settings")$saveType <- "file" - - result <- list( - fitFunction = "fitDeepNNTorch", - param = param - ) - - class(result) <- "modelSettings" - - return(result) -} - -#' Fits a deep neural network -#' @param trainData Training data object -#' @param modelSettings modelSettings object -#' @param search Which kind of search strategy to use -#' @param analysisId Analysis Id -#' @export -fitDeepNNTorch <- function(trainData, - modelSettings, - search = "grid", - analysisId) { - start <- Sys.time() - - # check covariateData - if (!FeatureExtraction::isCovariateData(trainData$covariateData)) { - stop("DeepNNTorch requires correct covariateData") - } - - param <- modelSettings$param - # get the settings from the param - settings <- attr(param, "settings") - - if (!is.null(trainData$folds)) { - trainData$labels <- merge(trainData$labels, trainData$fold, by = "rowId") - } - - mappedData <- PatientLevelPrediction::toSparseM( - plpData = trainData, - map = NULL - ) - - matrixData <- mappedData$dataMatrix - labels <- mappedData$labels - covariateRef <- mappedData$covariateRef - - outLoc <- PatientLevelPrediction::createTempModelLoc() - - cvResult <- do.call( - what = gridCvDeepNN, - args = list( - matrixData = matrixData, - labels = labels, - seed = settings$seed, - modelName = settings$name, - device = settings$device, - batchSize = settings$batchSize, - epochs = settings$epochs, - modelLocation = outLoc, - paramSearch = param - ) - ) - - hyperSummary <- do.call(rbind, lapply(cvResult$paramGridSearch, function(x) x$hyperSummary)) - - prediction <- cvResult$prediction - - incs <- rep(1, nrow(covariateRef)) - covariateRef$included <- incs - covariateRef$covariateValue <- 0 - - comp <- start - Sys.time() - - result <- list( - model = cvResult$estimator, # file.path(outLoc), - - prediction = prediction, - settings = list( - plpDataSettings = attr(trainData, "metaData")$plpDataSettings, - covariateSettings = attr(trainData, "metaData")$covariateSettings, - populationSettings = attr(trainData, "metaData")$populationSettings, - featureEngineering = attr(trainData$covariateData, "metaData")$featureEngineering, - tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings, - requireDenseMatrix = F, - modelSettings = list( - model = settings$name, - param = param, - finalModelParameters = cvResult$finalParam, - extraSettings = attr(param, "settings") - ), - splitSettings = attr(trainData, "metaData")$splitSettings, - sampleSettings = attr(trainData, "metaData")$sampleSettings - ), - trainDetails = list( - analysisId = analysisId, - cdmDatabaseSchema = attr(trainData, "metaData")$cdmDatabaseSchema, - outcomeId = attr(trainData, "metaData")$outcomeId, - cohortId = attr(trainData, "metaData")$cohortId, - attrition = attr(trainData, "metaData")$attrition, - trainingTime = comp, - trainingDate = Sys.Date(), - hyperParamSearch = hyperSummary - ), - covariateImportance = covariateRef - ) - - class(result) <- "plpModel" - attr(result, "predictionFunction") <- "predictDeepNN" - attr(result, "modelType") <- "binary" - attr(result, "saveType") <- attr(param, "settings")$saveType - - return(result) -} - -#' Create predictions for a deep neural network -#' @param plpModel The plpModel to predict for -#' @param data The data to make predictions for -#' @param cohort The cohort to use -#' @export -predictDeepNN <- function(plpModel, - data, - cohort, - batchSize=512, - device='cpu') { - if (!inherits(plpModel, 'plpModel') & !inherits(plpModel, 'nn_module')) { - plpModel <- list(model = plpModel) - attr(plpModel, "modelType") <- "binary" - } - - if (inherits(data, 'plpData')) { - dataMat <- PatientLevelPrediction::toSparseM( - plpData = data, - cohort = cohort, - map = plpModel$covariateImportance %>% - dplyr::select(.data$columnId, .data$covariateId) - ) - - data <- Dataset(dataMat$dataMatrix, all = TRUE) # add numeric details.. - } - - # get predictions - prediction <- cohort - - if (is.character(plpModel$model)) { - model <- torch::torch_load(file.path(plpModel$model, "DeepNNTorchModel.pt"), device = "cpu") - } else { - model <- plpModel - } - model$to(device=device) - batchIndex <- 1:length(data) - batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / batchSize)) - torch::with_no_grad({ - predictions <- c() - model$eval() - coro::loop(for (b in batchIndex) { - batch <- data[b]$batch$all$to(device=device) - target <- data[b]$target$to(device=device) - pred <- model(batch) - predictions <- c(predictions, as.array(torch::torch_sigmoid(pred[,1]$cpu()))) - }) - }) - prediction$value <- predictions - - attr(prediction, "metaData")$modelType <- attr(plpModel, "modelType") - - return(prediction) -} - - -gridCvDeepNN <- function(matrixData, - labels, - seed, - modelName, - device, - batchSize, - epochs, - modelLocation, - paramSearch) { - ParallelLogger::logInfo(paste0("Running CV for ", modelName, " model")) - - ########################################################################### - - - gridSearchPredictons <- list() - length(gridSearchPredictons) <- nrow(paramSearch) - - for (gridId in 1:nrow(paramSearch)) { - - # get the params - modelParamNames <- c("layerDropout", "lr", "decay", "outcomeWeight", "epochs", "units1", "units2", "units3") - modelParams <- paramSearch[gridId, modelParamNames] - - fitParams <- paramSearch[gridId, c("lr", "decay")] - fitParams$epochs <- epochs - fitParams$batchSize <- batchSize - - - # initiate prediction - prediction <- c() - - fold <- labels$index - ParallelLogger::logInfo(paste0("Max fold: ", max(fold))) - - dataset <- Dataset(matrixData, labels$outcomeCount, all = TRUE) - # modelParams$cat_features <- dataset$cat$shape[2] - # modelParams$num_features <- dataset$num$shape[2] - - for (i in 1:max(fold)) { - if (is.na(modelParams$units2)) { - model <- singleLayerNN( - inputN = ncol(matrixData), - layer1 = modelParams$units1, - outputN = 2, - layer_dropout = modelParams$layerDropout - ) - } else if (is.na(modelParams$units3)) { - model <- doubleLayerNN( - inputN = ncol(matrixData), - layer1 = modelParams$units1, - layer2 = modelParams$units2, - outputN = 2, - layer_dropout = modelParams$layerDropout - ) - } else { - model <- tripleLayerNN( - inputN = ncol(matrixData), - layer1 = modelParams$units1, - layer2 = modelParams$units2, - layer3 = modelParams$units3, - outputN = 2, - layer_dropout = modelParams$layerDropout - ) - } - - model$to(device=device) - criterion <- torch::nn_bce_loss() # Binary crossentropy only - optimizer <- torch::optim_adam(model$parameters, lr = fitParams$lr, - weight_decay = fitParams$decay) - - # Need earlyStopping - # Need setting decay - - ParallelLogger::logInfo(paste0("Fold ", i)) - trainDataset <- torch::dataset_subset(dataset, indices = which(fold != i)) - testDataset <- torch::dataset_subset(dataset, indices = which(fold == i)) - - batchIndex <- torch::torch_randperm(length(trainDataset)) + 1L - batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / batchSize)) - - testBatchIndex <- 1:length(testDataset) - testBatchIndex <- split(testBatchIndex, ceiling(seq_along(testBatchIndex) / batchSize)) - for (j in 1:epochs) { - startTime <- Sys.time() - trainLosses <- torch::torch_empty(length(batchIndex)) - ix <- 1 - model$train() - progressBar <- utils::txtProgressBar(style = 3) - coro::loop(for (b in batchIndex) { - optimizer$zero_grad() - batch <- trainDataset[b]$batch$all$to(device=device) - target <- trainDataset[b]$target$to(device=device) - y_pred <- model(batch) - loss <- criterion(y_pred[, 1], target) - loss$backward() - optimizer$step() - - trainLosses[ix] <- loss$detach() - utils::setTxtProgressBar(progressBar, ix / length(batchIndex)) - ix <- ix + 1 - }) - close(progressBar) - trainLoss <- trainLosses$mean()$item() - torch::with_no_grad({ - ix <- 1 - testLosses <- torch::torch_empty(length(batchIndex)) - model$eval() - predictions <- list() - targets <- list() - coro::loop(for (b in testBatchIndex) { - batch <- dataset[b]$batch$all$to(device=device) - target <- dataset[b]$target$to(device=device) - pred <- model(batch) - predictions <- c(predictions, pred[,1]) - targets <- c(targets, target) - testLosses[ix] <- criterion(pred[,1], target) - ix <- ix + 1 - }) - testLoss <- loss$mean()$item() - predictionsClass <- data.frame( - value = as.matrix(torch::torch_sigmoid(torch::torch_cat(predictions)$cpu())), - outcomeCount = as.matrix(torch::torch_cat(targets)$cpu()) - ) - attr(predictionsClass, "metaData")$modelType <- "binary" - auc <- PatientLevelPrediction::computeAuc(predictionsClass) - }) - - delta <- Sys.time() - startTime - ParallelLogger::logInfo( - "Epochs: ", j, - " | Val AUC: ", round(auc, 3), - " | Val Loss: ", round(testLoss, 3), - " | Train Loss: ", round(trainLoss, 3), - " | Time: ", round(delta, 3), " ", - units(delta) - ) - - } - - predictionTable <- labels[labels$index == i, ] - predictionTable$value <- predictionsClass$value - - if (!"plpModel" %in% class(model)) { - model <- list(model = model) - attr(model, "modelType") <- "binary" - } - attr(predictionTable, "metaData")$modelType <- attr(model, "modelType") - - prediction <- rbind(prediction, predictionTable) - } - gridSearchPredictons[[gridId]] <- list( - prediction = prediction, - param = paramSearch[gridId, ] - ) - } - - - # get best para (this could be modified to enable any metric instead of AUC, just need metric input in function) - paramGridSearch <- lapply(gridSearchPredictons, function(x) { - do.call(PatientLevelPrediction::computeGridPerformance, x) - }) # cvAUCmean, cvAUC, param - - optimalParamInd <- which.max(unlist(lapply(paramGridSearch, function(x) x$cvPerformance))) - - finalParam <- paramGridSearch[[optimalParamInd]]$param - - cvPrediction <- gridSearchPredictons[[optimalParamInd]]$prediction - cvPrediction$evaluationType <- "CV" - - ParallelLogger::logInfo("Training final model using optimal parameters") - - # get the params - modelParams <- finalParam[modelParamNames] - fitParams <- finalParam[c("lr", "decay")] - fitParams$epochs <- epochs - fitParams$batchSize <- batchSize - # create the dir - if (!dir.exists(file.path(modelLocation))) { - dir.create(file.path(modelLocation), recursive = T) - } - - trainDataset <- Dataset( - matrixData, - labels$outcomeCount, - all = TRUE - ) - - # modelParams$cat_features <- trainDataset$cat$shape[2] - # modelParams$num_features <- trainDataset$num$shape[2] - - # trainDataset <- torch::dataset_subset(dataset, indices=which(fold!=i)) - - if (is.na(modelParams$units2)) { - model <- singleLayerNN( - inputN = ncol(matrixData), - layer1 = modelParams$units1, - outputN = 2, - layer_dropout = modelParams$layerDropout - ) - } else if (is.na(modelParams$units3)) { - model <- doubleLayerNN( - inputN = ncol(matrixData), - layer1 = modelParams$units1, - layer2 = modelParams$units2, - outputN = 2, - layer_dropout = modelParams$layerDropout - ) - } else { - model <- tripleLayerNN( - inputN = ncol(matrixData), - layer1 = modelParams$units1, - layer2 = modelParams$units2, - layer3 = modelParams$units3, - outputN = 2, - layer_dropout = modelParams$layerDropout - ) - } - - model$to(device=device) - - criterion <- torch::nn_bce_loss() # Binary crossentropy only - optimizer <- torch::optim_adam(model$parameters, lr = fitParams$lr) - - batchIndex <- torch::torch_randperm(length(trainDataset)) + 1L - batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / batchSize)) - - for (epoch in 1:epochs) { - ix <- 1 - model$train() - progressBar <- utils::txtProgressBar(style = 3) - coro::loop(for (b in batchIndex) { - optimizer$zero_grad() - batch <- dataset[b]$batch$all$to(device=device) - target <- dataset[b]$target$to(device=device) - out <- model(batch) - loss <- criterion(out[,1], target) - loss$backward() - - optimizer$step() - utils::setTxtProgressBar(progressBar, ix / length(batchIndex)) - ix <- ix + 1 - }) - close(progressBar) - } - - browser() - ParallelLogger::logInfo("Calculating predictions on all train data...") - - prediction <- predictDeepNN(model, data=trainDataset, cohort=labels, - batchSize = batchSize, device = device) - prediction$evaluationType <- "Train" - - prediction <- rbind( - prediction, - cvPrediction - ) - - # modify prediction - prediction <- prediction %>% - dplyr::select(-.data$rowId, -.data$index) %>% - dplyr::rename(rowId = .data$originalRowId) - - prediction$cohortStartDate <- as.Date(prediction$cohortStartDate, origin = "1970-01-01") - - - # save torch code here - torch::torch_save(model, file.path(modelLocation, "DeepNNTorchModel.pt")) - - return( - list( - estimator = modelLocation, - prediction = prediction, - finalParam = finalParam, - paramGridSearch = paramGridSearch - ) - ) -} diff --git a/R/Estimator.R b/R/Estimator.R index e38bf6c..8abb048 100644 --- a/R/Estimator.R +++ b/R/Estimator.R @@ -16,6 +16,24 @@ # See the License for the specific language governing permissions and # limitations under the License. +#' setEstimator +#' +#' @description +#' creates settings for the Estimator, which takes a model and trains it +#' +#' @name setEstimator +#' @param learningRate what learning rate to use +#' @param weightDecay what weight_decay to use +#' @param optimizer which optimizer to use +#' @param scheduler which learning rate scheduler to use +#' @param criterion loss function to use +#' @param posWeight If more weight should be added to positive labels during training - will result in miscalibrated models +#' @param earlyStopping If earlyStopping should be used which stops the training of your metric is not improving +#' @param earlyStoppingMetric Which parameter to use for early stopping +#' @param patience patience for earlyStopper +#' @param hyperparameterMetric which metric to use for hyperparameter, loss, auc, auprc or a custom function +NULL + #' fitEstimator #' #' @description @@ -71,7 +89,8 @@ fitEstimator <- function(trainData, dplyr::collect() %>% dplyr::mutate( included = incs, - covariateValue = 0 + covariateValue = 0, + isNumeric = cvResult$numericalIndex ) @@ -79,32 +98,33 @@ fitEstimator <- function(trainData, result <- list( model = cvResult$estimator, # file.path(outLoc), + preprocessing = list( + featureEngineering = attr(trainData$covariateData, "metaData")$featureEngineering, + tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings, + requireDenseMatrix = settings$requiresDenseMatrix + ), prediction = prediction, - settings = list( - plpDataSettings = attr(trainData, "metaData")$plpDataSettings, + modelDesign = PatientLevelPrediction::createModelDesign( + targetId = attr(trainData, "metaData")$targetId, + outcomeId = attr(trainData, "metaData")$outcomeId, + restrictPlpDataSettings = attr(trainData, "metaData")$restrictPlpDataSettings, covariateSettings = attr(trainData, "metaData")$covariateSettings, populationSettings = attr(trainData, "metaData")$populationSettings, - featureEngineering = attr(trainData$covariateData, "metaData")$featureEngineering, - tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings, - requireDenseMatrix = F, - modelSettings = list( - model = settings$name, - param = param, - finalModelParameters = cvResult$finalParam, - numericalIndex = cvResult$numericalIndex, - extraSettings = attr(param, "settings") - ), + featureEngineeringSettings = attr(trainData$covariateData, "metaData")$featureEngineeringSettings, + preprocessSettings = attr(trainData$covariateData, "metaData")$preprocessSettings, + modelSettings = modelSettings, splitSettings = attr(trainData, "metaData")$splitSettings, sampleSettings = attr(trainData, "metaData")$sampleSettings ), trainDetails = list( analysisId = analysisId, - cdmDatabaseSchema = attr(trainData, "metaData")$cdmDatabaseSchema, - outcomeId = attr(trainData, "metaData")$outcomeId, - cohortId = attr(trainData, "metaData")$cohortId, + analysisSource = "", + developementDatabase = attr(trainData, "metaData")$cdmDatabaseSchema, attrition = attr(trainData, "metaData")$attrition, - trainingTime = comp, + trainingTime = paste(as.character(abs(comp)), attr(comp, "units")), trainingDate = Sys.Date(), + modelName = settings$name, + finalModelParameters = cvResult$finalParam, hyperParamSearch = hyperSummary ), covariateImportance = covariateRef @@ -145,20 +165,19 @@ predictDeepEstimator <- function(plpModel, ) ) data <- Dataset(mappedData$covariates, - numericalIndex = plpModel$settings$modelSettings$numericalIndex + numericalIndex = plpModel$covariateImportance$isNumeric ) } # get predictions prediction <- cohort - if (is.character(plpModel$model)) { model <- torch::torch_load(file.path(plpModel$model, "DeepEstimatorModel.pt"), device = "cpu") estimator <- Estimator$new( - baseModel = plpModel$settings$modelSettings$model, + baseModel = attr(plpModel$modelDesign$modelSettings$param, "settings")$baseModel, modelParameters = model$modelParameters, fitParameters = model$fitParameters, - device = plpModel$settings$modelSettings$extraSettings$device + device = attr(plpModel$modelDesign$modelSettings$param, "settings")$device ) estimator$model$load_state_dict(model$modelStateDict) prediction$value <- estimator$predictProba(data) @@ -228,7 +247,7 @@ gridCvDeep <- function(mappedData, ParallelLogger::logInfo(paste0("Fold ", i)) trainDataset <- torch::dataset_subset(dataset, indices = which(fold != i)) testDataset <- torch::dataset_subset(dataset, indices = which(fold == i)) - fitParams$posWeight <- trainDataset$dataset$posWeight + # fitParams$posWeight <- trainDataset$dataset$posWeight estimator <- Estimator$new( baseModel = baseModel, modelParameters = modelParams, @@ -266,7 +285,7 @@ gridCvDeep <- function(mappedData, } # get best para (this could be modified to enable any metric instead of AUC, just need metric input in function) paramGridSearch <- lapply(gridSearchPredictons, function(x) { - do.call(computeGridPerformance, x) + do.call(PatientLevelPrediction::computeGridPerformance, x) }) # cvAUCmean, cvAUC, param optimalParamInd <- which.max(unlist(lapply(paramGridSearch, function(x) x$cvPerformance))) diff --git a/R/MLP.R b/R/MLP.R new file mode 100644 index 0000000..dc5f56a --- /dev/null +++ b/R/MLP.R @@ -0,0 +1,175 @@ +# @file MLP.R +# +# Copyright 2022 Observational Health Data Sciences and Informatics +# +# This file is part of DeepPatientLevelPrediction +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#' setMultiLayerPerceptron +#' +#' @description +#' Creates settings for a Multilayer perceptron model +#' +#' @details +#' Model architecture +#' +#' +#' @param numLayers Number of layers in network, default: 1:16 +#' @param sizeHidden Amount of neurons in each default layer, default: 2^(6:10) (64 to 1024) +#' @param dropout How much dropout to apply after first linear, default: seq(0, 0.3, 0.05) +#' @param sizeEmbedding Size of embedding layer, default: 2^(6:9) (64 to 512) +#' @param weightDecay Weight decay to apply, default: c(1e-6, 1e-3) +#' @param learningRate Learning rate to use. default: c(1e-2, 1e-5) +#' @param seed Seed to use for sampling hyperparameter space +#' @param hyperParamSearch Which kind of hyperparameter search to use random sampling or exhaustive grid search. default: 'random' +#' @param randomSample How many random samples from hyperparameter space to use +#' @param device Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu' +#' @param batchSize Size of batch, default: 1024 +#' @param epochs Number of epochs to run, default: 10 +#' +#' @export +setMultiLayerPerceptron <- function(numLayers = c(1:8), + sizeHidden = c(2^(6:9)), + dropout = c(seq(0, 0.5, 0.05)), + sizeEmbedding = c(2^(6:9)), + weightDecay = c(1e-6, 1e-3), + learningRate = c(1e-2, 3e-4, 1e-5), + seed = NULL, + hyperParamSearch = "random", + randomSample = 100, + device = "cpu", + batchSize = 1024, + epochs = 30) { + if (is.null(seed)) { + seed <- as.integer(sample(1e5, 1)) + } + + paramGrid <- list( + numLayers = numLayers, + sizeHidden = sizeHidden, + dropout = dropout, + sizeEmbedding = sizeEmbedding, + weightDecay = weightDecay, + learningRate = learningRate, + seed = list(as.integer(seed[[1]])) + ) + + param <- PatientLevelPrediction::listCartesian(paramGrid) + + if (hyperParamSearch == "random") { + param <- param[sample(length(param), randomSample)] + } + + attr(param, "settings") <- list( + seed = seed[1], + device = device, + batchSize = batchSize, + epochs = epochs, + name = "MLP", + saveType = "file", + modelParamNames = c( + "numLayers", "sizeHidden", + "dropout", "sizeEmbedding" + ), + baseModel = "MLP" + ) + + results <- list( + fitFunction = "fitEstimator", + param = param + ) + + class(results) <- "modelSettings" + + return(results) +} + + +MLP <- torch::nn_module( + name = "MLP", + initialize = function(catFeatures, numFeatures = 0, sizeEmbedding, sizeHidden, numLayers, + activation = torch::nn_relu, + normalization = torch::nn_batch_norm1d, dropout = NULL, + d_out = 1) { + self$embedding <- torch::nn_embedding_bag( + num_embeddings = catFeatures + 1, + embedding_dim = sizeEmbedding, + padding_idx = 1 + ) + if (numFeatures != 0) { + self$numEmbedding <- numericalEmbedding(numFeatures, sizeEmbedding) + } + + self$first_layer <- torch::nn_linear(sizeEmbedding, sizeHidden) + + + self$layers <- torch::nn_module_list(lapply( + 1:numLayers, + function(x) { + MLPLayer( + sizeHidden, + normalization, activation, + dropout + ) + } + )) + self$lastNorm <- normalization(sizeHidden) + self$head <- torch::nn_linear(sizeHidden, d_out) + + self$lastAct <- activation() + }, + forward = function(x) { + x_cat <- x$cat + x_num <- x$num + x_cat <- self$embedding(x_cat + 1L) # padding_idx is 1 + if (!is.null(x_num)) { + x <- (x_cat + self$numEmbedding(x_num)$mean(dim = 2)) / 2 + } else { + x <- x_cat + } + x <- self$first_layer(x) + + for (i in 1:length(self$layers)) { + x <- self$layers[[i]](x) + } + x <- self$lastNorm(x) + x <- self$lastAct(x) + x <- self$head(x) + x <- x$squeeze(-1) + return(x) + } +) + +MLPLayer <- torch::nn_module( + name = "MLPLayer", + initialize = function(sizeHidden = 64, + normalization = torch::nn_batch_norm1d, + activation = torch::nn_relu, + dropout = 0.0, bias = TRUE) { + self$norm <- normalization(sizeHidden) + self$activation <- activation() + self$linear <- torch::nn_linear(sizeHidden, sizeHidden, bias = bias) + + if (!is.null(dropout) | !dropout == 0.0) { + self$dropout <- torch::nn_dropout(p = dropout) + } + }, + forward = function(x) { + x <- self$linear(self$norm(x)) + if (!is.null(self$dropout)) { + x <- self$dropout(x) + } + return(self$activation(x)) + } +) diff --git a/R/ResNet.R b/R/ResNet.R index a820861..e8ce322 100644 --- a/R/ResNet.R +++ b/R/ResNet.R @@ -71,7 +71,7 @@ setResNet <- function(numLayers = c(1:8), seed = list(as.integer(seed[[1]])) ) - param <- listCartesian(paramGrid) + param <- PatientLevelPrediction::listCartesian(paramGrid) if (hyperParamSearch == "random") { param <- param[sample(length(param), randomSample)] @@ -112,7 +112,11 @@ ResNet <- torch::nn_module( embedding_dim = sizeEmbedding, padding_idx = 1 ) - self$first_layer <- torch::nn_linear(sizeEmbedding + numFeatures, sizeHidden) + if (numFeatures != 0) { + self$numEmbedding <- numericalEmbedding(numFeatures, sizeEmbedding) + } + + self$first_layer <- torch::nn_linear(sizeEmbedding, sizeHidden) resHidden <- sizeHidden * hiddenFactor @@ -137,7 +141,8 @@ ResNet <- torch::nn_module( x_num <- x$num x_cat <- self$embedding(x_cat + 1L) # padding_idx is 1 if (!is.null(x_num)) { - x <- torch::torch_cat(list(x_cat, x_num), dim = 2L) + x <- (x_cat + self$numEmbedding(x_num)$mean(dim = 2)) / 2 + # x <- torch::torch_cat(list(x_cat, x_num), dim = 2L) } else { x <- x_cat } @@ -162,7 +167,6 @@ ResLayer <- torch::nn_module( self$linear0 <- torch::nn_linear(sizeHidden, resHidden) self$linear1 <- torch::nn_linear(resHidden, sizeHidden) - self$activation <- activation if (!is.null(hiddenDropout)) { self$hiddenDropout <- torch::nn_dropout(p = hiddenDropout) } @@ -188,27 +192,3 @@ ResLayer <- torch::nn_module( return(x) } ) - - -listCartesian <- function(allList) { - sizes <- lapply(allList, function(x) 1:length(x)) - combinations <- expand.grid(sizes) - - result <- list() - length(result) <- nrow(combinations) - - for (i in 1:nrow(combinations)) { - tempList <- list() - for (j in 1:ncol(combinations)) { - tempList <- c(tempList, list(allList[[j]][combinations[[i, j]]])) - } - names(tempList) <- names(allList) - result[[i]] <- tempList - } - - return(result) -} - - -# export this in PLP -computeGridPerformance <- PatientLevelPrediction::computeGridPerformance diff --git a/R/Topologies.R b/R/Topologies.R deleted file mode 100644 index b3ca9c7..0000000 --- a/R/Topologies.R +++ /dev/null @@ -1,97 +0,0 @@ -#' A single layer neural network -#' @param inputN Input neurons -#' @param layer1 Layer 1 neurons -#' @param outputN Output neurons -#' @param layer_dropout Layer dropout to use -#' @export -singleLayerNN <- function(inputN, layer1, outputN = 2, layer_dropout) { - self <- NA # fixing R check - - net <- torch::nn_module( - "classic_net", - initialize = function() { - self$linear1 <- torch::nn_linear(inputN, layer1) - self$linear2 <- torch::nn_linear(layer1, outputN) - self$softmax <- torch::nn_softmax(outputN) - }, - forward = function(x) { - x %>% - self$linear1() %>% - torch::nnf_dropout(p = layer_dropout) %>% - self$linear2() %>% - torch::nnf_dropout(p = layer_dropout) %>% - self$softmax() - } - ) - return(net()) -} - -#' Double layer neural network -#' @param inputN Input neurons -#' @param layer1 Layer 1 neurons -#' @param layer2 Layer 2 neurons -#' @param outputN output neurons -#' @param layer_dropout layer_dropout to use -#' @export -doubleLayerNN <- function(inputN, layer1, - layer2, outputN, - layer_dropout) { - self <- NA # fixing R check - - net <- torch::nn_module( - "classic_net", - initialize = function() { - self$linear1 <- torch::nn_linear(inputN, layer1) - self$linear2 <- torch::nn_linear(layer1, layer2) - self$linear3 <- torch::nn_linear(layer2, outputN) - self$softmax <- torch::nn_softmax(outputN) - }, - forward = function(x) { - x %>% - self$linear1() %>% - torch::nnf_dropout(p = layer_dropout) %>% - self$linear2() %>% - torch::nnf_dropout(p = layer_dropout) %>% - self$linear3() %>% - self$softmax() - } - ) - return(net()) -} - -#' Triple layer neural network -#' @param inputN Input neurons -#' @param layer1 amount of layer 1 neurons -#' @param layer2 amount of layer 2 neurons -#' @param layer3 amount of layer 3 neurons -#' @param outputN Number of output neurons -#' @param layer_dropout The dropout to use in layer -#' @export -tripleLayerNN <- function(inputN, layer1, - layer2, layer3, - outputN, layer_dropout) { - self <- NA # fixing R check - - net <- torch::nn_module( - "classic_net", - initialize = function() { - self$linear1 <- torch::nn_linear(inputN, layer1) - self$linear2 <- torch::nn_linear(layer1, layer2) - self$linear3 <- torch::nn_linear(layer2, layer3) - self$linear4 <- torch::nn_linear(layer3, outputN) - self$softmax <- torch::nn_softmax(outputN) - }, - forward = function(x) { - x %>% - self$linear1() %>% - torch::nnf_dropout(p = layer_dropout) %>% - self$linear2() %>% - torch::nnf_dropout(p = layer_dropout) %>% - self$linear3() %>% - torch::nnf_dropout(p = layer_dropout) %>% - self$linear4() %>% - self$softmax() - } - ) - model <- net() -} diff --git a/R/Transformer.R b/R/Transformer.R index 545290b..0ab7227 100644 --- a/R/Transformer.R +++ b/R/Transformer.R @@ -27,7 +27,7 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1, learningRate = 3e-4, batchSize = 1024, epochs = 10, device = "cpu", hyperParamSearch = "random", randomSamples = 100, seed = NULL) { - if (!is.null(seed)) { + if (is.null(seed)) { seed <- as.integer(sample(1e5, 1)) } @@ -52,7 +52,7 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1, seed = list(as.integer(seed[[1]])) ) - param <- listCartesian(paramGrid) + param <- PatientLevelPrediction::listCartesian(paramGrid) if (hyperParamSearch == "random") { param <- param[sample(length(param), randomSamples)] diff --git a/extras/DeepPatientLevelPrediction.pdf b/extras/DeepPatientLevelPrediction.pdf index 930f19f..d7bde43 100644 Binary files a/extras/DeepPatientLevelPrediction.pdf and b/extras/DeepPatientLevelPrediction.pdf differ diff --git a/inst/doc/BuildingDeepModels.pdf b/inst/doc/BuildingDeepModels.pdf deleted file mode 100644 index 244be6e..0000000 Binary files a/inst/doc/BuildingDeepModels.pdf and /dev/null differ diff --git a/inst/doc/Installing.pdf b/inst/doc/Installing.pdf deleted file mode 100644 index f74f55d..0000000 Binary files a/inst/doc/Installing.pdf and /dev/null differ diff --git a/man/Dataset.Rd b/man/Dataset.Rd index 44e7125..eb12468 100644 --- a/man/Dataset.Rd +++ b/man/Dataset.Rd @@ -4,7 +4,7 @@ \alias{Dataset} \title{A torch dataset} \usage{ -Dataset(data, labels = NULL, numericalIndex = NULL, all = FALSE) +Dataset(data, labels = NULL, numericalIndex = NULL) } \arguments{ \item{data}{a dataframe like object with the covariates} @@ -12,8 +12,6 @@ Dataset(data, labels = NULL, numericalIndex = NULL, all = FALSE) \item{labels}{a dataframe with the labels} \item{numericalIndex}{in what column numeric data is in (if any)} - -\item{all}{if True then returns all features instead of splitting num/cat} } \description{ A torch dataset diff --git a/man/doubleLayerNN.Rd b/man/doubleLayerNN.Rd deleted file mode 100644 index 966f7de..0000000 --- a/man/doubleLayerNN.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Topologies.R -\name{doubleLayerNN} -\alias{doubleLayerNN} -\title{Double layer neural network} -\usage{ -doubleLayerNN(inputN, layer1, layer2, outputN, layer_dropout) -} -\arguments{ -\item{inputN}{Input neurons} - -\item{layer1}{Layer 1 neurons} - -\item{layer2}{Layer 2 neurons} - -\item{outputN}{output neurons} - -\item{layer_dropout}{layer_dropout to use} -} -\description{ -Double layer neural network -} diff --git a/man/fitDeepNNTorch.Rd b/man/fitDeepNNTorch.Rd deleted file mode 100644 index ac759e8..0000000 --- a/man/fitDeepNNTorch.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DeepNNTorch.R -\name{fitDeepNNTorch} -\alias{fitDeepNNTorch} -\title{Fits a deep neural network} -\usage{ -fitDeepNNTorch(trainData, modelSettings, search = "grid", analysisId) -} -\arguments{ -\item{trainData}{Training data object} - -\item{modelSettings}{modelSettings object} - -\item{search}{Which kind of search strategy to use} - -\item{analysisId}{Analysis Id} -} -\description{ -Fits a deep neural network -} diff --git a/man/predictDeepNN.Rd b/man/predictDeepNN.Rd deleted file mode 100644 index 616a3a8..0000000 --- a/man/predictDeepNN.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DeepNNTorch.R -\name{predictDeepNN} -\alias{predictDeepNN} -\title{Create predictions for a deep neural network} -\usage{ -predictDeepNN(plpModel, data, cohort) -} -\arguments{ -\item{plpModel}{The plpModel to predict for} - -\item{data}{The data to make predictions for} - -\item{cohort}{The cohort to use} -} -\description{ -Create predictions for a deep neural network -} diff --git a/man/setDeepNNTorch.Rd b/man/setDeepNNTorch.Rd deleted file mode 100644 index e802cd5..0000000 --- a/man/setDeepNNTorch.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DeepNNTorch.R -\name{setDeepNNTorch} -\alias{setDeepNNTorch} -\title{settings for a Deep neural network} -\usage{ -setDeepNNTorch( - units = list(c(128, 64), 128), - layer_dropout = c(0.2), - lr = c(1e-04), - decay = c(1e-05), - outcome_weight = c(1), - batch_size = c(10000), - epochs = c(100), - device = "cpu", - seed = NULL -) -} -\arguments{ -\item{units}{A list of vectors for neurons per layer} - -\item{layer_dropout}{Dropout to use per layer} - -\item{lr}{Learning rate ot use} - -\item{decay}{Weight decay to use} - -\item{outcome_weight}{Weight for minority outcome in cost function} - -\item{batch_size}{Batch size to use} - -\item{epochs}{How many epochs to use} - -\item{device}{Which device to use} - -\item{seed}{A seed to make experiments more reproducible} -} -\description{ -settings for a Deep neural network -} diff --git a/man/setEstimator.Rd b/man/setEstimator.Rd new file mode 100644 index 0000000..d3001e3 --- /dev/null +++ b/man/setEstimator.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Estimator.R +\name{setEstimator} +\alias{setEstimator} +\title{setEstimator} +\arguments{ +\item{learningRate}{what learning rate to use} + +\item{weightDecay}{what weight_decay to use} + +\item{optimizer}{which optimizer to use} + +\item{scheduler}{which learning rate scheduler to use} + +\item{criterion}{loss function to use} + +\item{posWeight}{If more weight should be added to positive labels during training - will result in miscalibrated models} + +\item{earlyStopping}{If earlyStopping should be used which stops the training of your metric is not improving} + +\item{earlyStoppingMetric}{Which parameter to use for early stopping} + +\item{patience}{patience for earlyStopper} + +\item{hyperparameterMetric}{which metric to use for hyperparameter, loss, auc, auprc or a custom function} +} +\description{ +creates settings for the Estimator, which takes a model and trains it +} diff --git a/man/setMultiLayerPerceptron.Rd b/man/setMultiLayerPerceptron.Rd new file mode 100644 index 0000000..a79ab49 --- /dev/null +++ b/man/setMultiLayerPerceptron.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MLP.R +\name{setMultiLayerPerceptron} +\alias{setMultiLayerPerceptron} +\title{setMultiLayerPerceptron} +\usage{ +setMultiLayerPerceptron( + numLayers = c(1:8), + sizeHidden = c(2^(6:9)), + dropout = c(seq(0, 0.5, 0.05)), + sizeEmbedding = c(2^(6:9)), + weightDecay = c(1e-06, 0.001), + learningRate = c(0.01, 3e-04, 1e-05), + seed = NULL, + hyperParamSearch = "random", + randomSample = 100, + device = "cpu", + batchSize = 1024, + epochs = 30 +) +} +\arguments{ +\item{numLayers}{Number of layers in network, default: 1:16} + +\item{sizeHidden}{Amount of neurons in each default layer, default: 2^(6:10) (64 to 1024)} + +\item{dropout}{How much dropout to apply after first linear, default: seq(0, 0.3, 0.05)} + +\item{sizeEmbedding}{Size of embedding layer, default: 2^(6:9) (64 to 512)} + +\item{weightDecay}{Weight decay to apply, default: c(1e-6, 1e-3)} + +\item{learningRate}{Learning rate to use. default: c(1e-2, 1e-5)} + +\item{seed}{Seed to use for sampling hyperparameter space} + +\item{hyperParamSearch}{Which kind of hyperparameter search to use random sampling or exhaustive grid search. default: 'random'} + +\item{randomSample}{How many random samples from hyperparameter space to use} + +\item{device}{Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'} + +\item{batchSize}{Size of batch, default: 1024} + +\item{epochs}{Number of epochs to run, default: 10} +} +\description{ +Creates settings for a Multilayer perceptron model +} +\details{ +Model architecture +} diff --git a/man/singleLayerNN.Rd b/man/singleLayerNN.Rd deleted file mode 100644 index b84d2f0..0000000 --- a/man/singleLayerNN.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Topologies.R -\name{singleLayerNN} -\alias{singleLayerNN} -\title{A single layer neural network} -\usage{ -singleLayerNN(inputN, layer1, outputN = 2, layer_dropout) -} -\arguments{ -\item{inputN}{Input neurons} - -\item{layer1}{Layer 1 neurons} - -\item{outputN}{Output neurons} - -\item{layer_dropout}{Layer dropout to use} -} -\description{ -A single layer neural network -} diff --git a/man/tripleLayerNN.Rd b/man/tripleLayerNN.Rd deleted file mode 100644 index aaa33a1..0000000 --- a/man/tripleLayerNN.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Topologies.R -\name{tripleLayerNN} -\alias{tripleLayerNN} -\title{Triple layer neural network} -\usage{ -tripleLayerNN(inputN, layer1, layer2, layer3, outputN, layer_dropout) -} -\arguments{ -\item{inputN}{Input neurons} - -\item{layer1}{amount of layer 1 neurons} - -\item{layer2}{amount of layer 2 neurons} - -\item{layer3}{amount of layer 3 neurons} - -\item{outputN}{Number of output neurons} - -\item{layer_dropout}{The dropout to use in layer} -} -\description{ -Triple layer neural network -} diff --git a/tests/testthat/test-dataset.R b/tests/testthat/test-Dataset.R similarity index 100% rename from tests/testthat/test-dataset.R rename to tests/testthat/test-Dataset.R diff --git a/tests/testthat/test-DeepNNTorch.R b/tests/testthat/test-DeepNNTorch.R deleted file mode 100644 index 1a6e6ca..0000000 --- a/tests/testthat/test-DeepNNTorch.R +++ /dev/null @@ -1,87 +0,0 @@ - -# code to train models -deepset <- setDeepNNTorch( - units = list(c(128, 64), 128), layer_dropout = c(0.2), - lr = c(1e-4), decay = c(1e-5), outcome_weight = c(1.0), batch_size = c(100), - epochs = c(5), seed = NULL -) - -test_that("setDeepNNTorch works", { - testthat::expect_s3_class(object = deepset, class = "modelSettings") - - testthat::expect_equal(deepset$fitFunction, "fitDeepNNTorch") - - testthat::expect_true(nrow(deepset$param) > 0) -}) - -sink(nullfile()) -res <- tryCatch( - { - PatientLevelPrediction::runPlp( - plpData = plpData, - outcomeId = 3, - modelSettings = deepset, - analysisId = "DeepNNTorch", - analysisName = "Testing Deep Learning", - populationSettings = populationSet, - splitSettings = PatientLevelPrediction::createDefaultSplitSetting(), - sampleSettings = PatientLevelPrediction::createSampleSettings(), # none - featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(), # none - preprocessSettings = PatientLevelPrediction::createPreprocessSettings(), - executeSettings = PatientLevelPrediction::createExecuteSettings( - runSplitData = T, - runSampleData = F, - runfeatureEngineering = F, - runPreprocessData = T, - runModelDevelopment = T, - runCovariateSummary = F - ), - saveDirectory = file.path(testLoc, "DeepNNTorch") - ) - }, - error = function(e) { - print(e) - return(NULL) - } -) -sink() - -test_that("setDeepNNTorch with runPlp working checks", { - testthat::expect_false(is.null(res)) - - # check structure - testthat::expect_true("prediction" %in% names(res)) - testthat::expect_true("model" %in% names(res)) - testthat::expect_true("covariateSummary" %in% names(res)) - testthat::expect_true("performanceEvaluation" %in% names(res)) - - # check prediction same size as pop - testthat::expect_equal( - nrow(res$prediction %>% dplyr::filter(evaluationType %in% c("Train", "Test"))), - nrow(population) - ) - - # check prediction between 0 and 1 - testthat::expect_gte(min(res$prediction$value), 0) - testthat::expect_lte(max(res$prediction$value), 1) -}) - -test_that("Triple layer-nn works", { - deepset <- setDeepNNTorch( - units = list(c(64, 64, 32), c(64, 32, 16), c(32, 16, 8)), layer_dropout = c(0.2), - lr = c(1e-4), decay = c(1e-5), outcome_weight = c(1.0), batch_size = c(100), - epochs = c(5), seed = NULL - ) - - sink(nullfile()) - results <- fitDeepNNTorch(trainData$Train, deepset, analysisId = 1) - sink() - - expect_equal(class(results), "plpModel") - expect_equal(attr(results, "modelType"), "binary") - expect_equal(attr(results, "saveType"), "file") - - # check prediction between 0 and 1 - testthat::expect_gt(min(results$prediction$value), 0) - testthat::expect_lt(max(results$prediction$value), 1) -}) diff --git a/tests/testthat/test-MLP.R b/tests/testthat/test-MLP.R new file mode 100644 index 0000000..3bc0d8f --- /dev/null +++ b/tests/testthat/test-MLP.R @@ -0,0 +1,110 @@ + +modelSettings <- setMultiLayerPerceptron( + numLayers = c(2), + sizeHidden = c(32), + dropout = c(0.1), + sizeEmbedding = c(32), + weightDecay = c(1e-6), + learningRate = c(3e-4), + seed = 42, + hyperParamSearch = "random", + randomSample = 1, + batchSize = 128, + epochs = 3 +) + +test_that("setMultiLayerPerceptron works", { + testthat::expect_s3_class(object = modelSettings, class = "modelSettings") + + testthat::expect_equal(modelSettings$fitFunction, "fitEstimator") + + testthat::expect_true(length(modelSettings$param) > 0) +}) + +sink(nullfile()) +results <- tryCatch( + { + PatientLevelPrediction::runPlp( + plpData = plpData, + outcomeId = 3, + modelSettings = modelSettings, + analysisId = "MLP", + analysisName = "Testing Deep Learning", + populationSettings = populationSet, + splitSettings = PatientLevelPrediction::createDefaultSplitSetting(), + sampleSettings = PatientLevelPrediction::createSampleSettings(), # none + featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(), # none + preprocessSettings = PatientLevelPrediction::createPreprocessSettings(), + executeSettings = PatientLevelPrediction::createExecuteSettings( + runSplitData = T, + runSampleData = F, + runfeatureEngineering = F, + runPreprocessData = T, + runModelDevelopment = T, + runCovariateSummary = F + ), + saveDirectory = file.path(testLoc, "MLP") + ) + }, + error = function(e) { + print(e) + return(NULL) + } +) +sink() + +test_that("MLP with runPlp working checks", { + testthat::expect_false(is.null(results)) + + # check structure + testthat::expect_true("prediction" %in% names(results)) + testthat::expect_true("model" %in% names(results)) + testthat::expect_true("covariateSummary" %in% names(results)) + testthat::expect_true("performanceEvaluation" %in% names(results)) + + # check prediction same size as pop + testthat::expect_equal(nrow(results$prediction %>% + dplyr::filter(evaluationType %in% c("Train", "Test"))), nrow(population)) + + # check prediction between 0 and 1 + testthat::expect_gte(min(results$prediction$value), 0) + testthat::expect_lte(max(results$prediction$value), 1) +}) + + +test_that("MLP nn-module works ", { + model <- MLP( + catFeatures = 5, numFeatures = 1, sizeEmbedding = 5, + sizeHidden = 16, numLayers = 1, + activation = torch::nn_relu, + normalization = torch::nn_batch_norm1d, dropout = 0.3, + d_out = 1 + ) + + pars <- sum(sapply(model$parameters, function(x) prod(x$shape))) + + # expected number of parameters + expect_equal(pars, 489) + + input <- list() + input$cat <- torch::torch_randint(0, 5, c(10, 5), dtype = torch::torch_long()) + input$num <- torch::torch_randn(10, 1, dtype = torch::torch_float32()) + + + output <- model(input) + + # output is correct shape + expect_equal(output$shape, 10) + + input$num <- NULL + model <- MLP( + catFeatures = 5, numFeatures = 0, sizeEmbedding = 5, + sizeHidden = 16, numLayers = 1, + activation = torch::nn_relu, + normalization = torch::nn_batch_norm1d, dropout = 0.3, + d_out = 1 + ) + output <- model(input) + # model works without numeric variables + expect_equal(output$shape, 10) +}) diff --git a/tests/testthat/test-ResNet.R b/tests/testthat/test-ResNet.R index f9a4efd..cb9a397 100644 --- a/tests/testthat/test-ResNet.R +++ b/tests/testthat/test-ResNet.R @@ -13,7 +13,7 @@ resSet <- setResNet( randomSample = 1, # device='cuda:0', batchSize = 128, - epochs = 3 + epochs = 1 ) test_that("setResNet works", { @@ -87,7 +87,7 @@ test_that("ResNet nn-module works ", { pars <- sum(sapply(model$parameters, function(x) prod(x$shape))) # expected number of parameters - expect_equal(pars, 1295) + expect_equal(pars, 1289) input <- list() input$cat <- torch::torch_randint(0, 5, c(10, 5), dtype = torch::torch_long()) diff --git a/tests/testthat/test-Transformer.R b/tests/testthat/test-Transformer.R index 8b5a479..913e687 100644 --- a/tests/testthat/test-Transformer.R +++ b/tests/testthat/test-Transformer.R @@ -9,6 +9,10 @@ test_that("Transformer settings work", { testthat::expect_s3_class(object = settings, class = "modelSettings") testthat::expect_equal(settings$fitFunction, "fitEstimator") testthat::expect_true(length(settings$param) > 0) + testthat::expect_error(setTransformer( + numBlocks = 1, dimToken = 50, + numHeads = 7 + )) }) test_that("fitEstimator with Transformer works", { diff --git a/vignettes/BuildingDeepModels.Rmd b/vignettes/BuildingDeepModels.Rmd index 98f293b..8ac9925 100644 --- a/vignettes/BuildingDeepModels.Rmd +++ b/vignettes/BuildingDeepModels.Rmd @@ -157,31 +157,25 @@ model's parameters to reduce the error. #### Set Function -To use the package to fit a MLP model you can use the `setDeepNNTorch()` +To use the package to fit a MLP model you can use the `setMultiLayerPerceptron()` function to specify the hyper-parameter settings for the MLP. #### Inputs -The `units` input defines the network topology via the number of neurons -per layer in the network's hidden layers. A list of different topologies -can be investigated. `list(c(10,63), 128)` means two different -topologies will be fit, the first has two hidden layers with 10 neurons -in the first hidden layer and 63 in the second hidden layer. The second -just has one hidden layer with 128 neurons. +The `numLayers` and `sizeHidden` inputs define the network topology via the number +of layers and neurons in the network's hidden layers. -The `layer_dropout` input specifies the probability that a layer -randomly sets input units to 0 at each step during training time. A -value of `0.2` means that 20% of the time the layer input will be set to +The `dropout` input specifies the probability that a layer +randomly sets some inputs to 0 at each step during training time. A +value of `0.2` means that 20% of the layers inputs will be set to 0. This is used to reduce overfitting. -The `lr` input is the learning rate which is a hyperparameter that -controls how much to change the model in response to the estimated error -each time the model weights are updated. The smaller the `lr` the longer -it will take to fit the model and the model weights may get stuck, but -if the `lr` is too large, the weights may sub-optimally converge too -fast. +The `sizeEmbedding` input specifices the size of the embedding used. The first +layer is an embedding layer which converts each sparse feature to a dense vector +which it learns. An embedding is a lower dimensional projection of the features +where distance between points is a measure of similarity. -The `decay` input corresponds to the weight decay in the objective +The `weightDecay` input corresponds to the weight decay in the objective function. During model fitting the aim is to minimize the objective function. The objective function is made up of the prediction error (the difference between the prediction vs the truth) plus the square of the @@ -191,50 +185,67 @@ large, the model will never fit well enough, if you set it too low, you need to be careful of overfitting (so try to stop model fitting earlier). -The `outcome_weight` specifies whether to add more weight to -misclassifying one class (e.g., with outcome during TAR) vs the other -(e.g., without outcome during TAR). This can be useful if there is -imbalance between the classes (e.g., the outcome rarely occurs during -TAR). However be careful since this will also result in miscalibrated -models which need to be recalibrated. +The `learningRate` input is the learning rate which is a hyperparameter that +controls how much to change the model in response to the estimated error +each time the model weights are updated. The smaller the `learningRate` the longer +it will take to fit the model and the model weights may get stuck, but +if the `learningRate` is too large, the weights may sub-optimally converge too +fast. + +The `seed` lets the user use the same random initialization of the network's +weights as a previous run. + +The `hyperParamSearch` chooses the strategy to find the best hyperparameters. +Currently a random search and grid search are supported. Grid search searches +every possible combination of hyperparameters while random search samples +randomly from the combinations. Since neural networks can be very flexible and +have many hyperparameter combinations it's almost never feasible to do a full +grid search unless the network is really small. -The `batch_size` corresponds to the number of data points (patients) +The `randomSample` chooses how many random samples to use. + +The `device` specifies what device to use. Either `cpu` or `cuda`. Or if you +have many GPU's `cuda:x` where x is the gpu number as seen in `nvidia-smi`. + +The `batchSize` corresponds to the number of data points (patients) used per iteration to estimate the network error during model fitting. The `epochs` corresponds to how many time to run through the entire training data while fitting the model. -The `seed` lets the user reproduce the same network given the same -training data and hyper-parameter settings if they use the same seed. + #### Example Code -For example, the following code will try two different network -topologies and pick the topology that obtains the greatest AUROC via -cross validation in the training data and then fit the model with that -topology using all the training data. The standard output of `runPlp()` -will be returned - this contains the MLP model along with the -performance details and settings. +For example, the following code will try 10 different network +configurations sampled from the possible combinations given and pick the one +that obtains the greatest AUROC via cross validation in the training data and +then fit the model with that configuration using all the training data. The +standard output of `runPlp()` will be returned - this contains the MLP model +along with the performance details and settings. Note that all possible +combinations are 2*2*2*2 or 16 but specify ```randomSample=10``` to only try +10 of those. ```{r, eval=FALSE} -#singleLayerNN(inputN = 10, layer1 = 100, outputN = 2, layer_dropout = 0.1) -deepset <- setDeepNNTorch( - units = list(c(10,63), 128), - layer_dropout = c(0.2), - lr = c(1e-4), - decay = c(1e-5), - outcome_weight = c(1.0), - batch_size = c(100), - epochs = c(5), - seed = 12 - ) +modelSettings <- setMultiLayerPerceptron( + numLayers = c(3, 5), + numHidden = c(64, 128), + dropout = c(0.2), + sizeEmbedding = c(32, 64), + learningRate = c(1e-3, 1e-4), + weightDecay = c(1e-5), + randomSample=10, + batchSize = c(100), + epochs = c(5), + seed = 12 + ) mlpResult <- PatientLevelPrediction::runPlp( plpData = plpData, outcomeId = 3, - modelSettings = deepset, - analysisId = 'DeepNNTorch', + modelSettings = modelSettings, + analysisId = 'MLP', analysisName = 'Testing Deep Learning', populationSettings = populationSet, splitSettings = PatientLevelPrediction::createDefaultSplitSetting(), @@ -361,8 +372,8 @@ resResult <- PatientLevelPrediction::runPlp( analysisName = 'Testing ResNet', populationSettings = populationSet, splitSettings = PatientLevelPrediction::createDefaultSplitSetting(), - sampleSettings = PatientLevelPrediction::createSampleSettings(), # none - featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(), # none + sampleSettings = PatientLevelPrediction::createSampleSettings(), + featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(), preprocessSettings = PatientLevelPrediction::createPreprocessSettings(), executeSettings = PatientLevelPrediction::createExecuteSettings( runSplitData = T, diff --git a/vignettes/BuildingDeepModels.log b/vignettes/BuildingDeepModels.log deleted file mode 100644 index 16018c5..0000000 --- a/vignettes/BuildingDeepModels.log +++ /dev/null @@ -1,506 +0,0 @@ -This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022) (preloaded format=pdflatex 2022.7.25) 17 AUG 2022 17:17 -entering extended mode - restricted \write18 enabled. - %&-line parsing enabled. -**../inst/doc/BuildingDeepModels.tex -(../inst/doc/BuildingDeepModels.tex -LaTeX2e <2022-06-01> patch level 5 -L3 programming layer <2022-07-15> (/home/egill/.TinyTeX/texmf-dist/tex/latex/base/article.cls -Document Class: article 2021/10/04 v1.4n Standard LaTeX document class -(/home/egill/.TinyTeX/texmf-dist/tex/latex/base/size10.clo -File: size10.clo 2021/10/04 v1.4n Standard LaTeX file (size option) -) -\c@part=\count185 -\c@section=\count186 -\c@subsection=\count187 -\c@subsubsection=\count188 -\c@paragraph=\count189 -\c@subparagraph=\count190 -\c@figure=\count191 -\c@table=\count192 -\abovecaptionskip=\skip47 -\belowcaptionskip=\skip48 -\bibindent=\dimen138 -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/amsmath/amsmath.sty -Package: amsmath 2022/04/08 v2.17n AMS math features -\@mathmargin=\skip49 -For additional information on amsmath, use the `?' option. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/amsmath/amstext.sty -Package: amstext 2021/08/26 v2.01 AMS text -(/home/egill/.TinyTeX/texmf-dist/tex/latex/amsmath/amsgen.sty -File: amsgen.sty 1999/11/30 v2.0 generic functions -\@emptytoks=\toks16 -\ex@=\dimen139 -)) (/home/egill/.TinyTeX/texmf-dist/tex/latex/amsmath/amsbsy.sty -Package: amsbsy 1999/11/29 v1.2d Bold Symbols -\pmbraise@=\dimen140 -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/amsmath/amsopn.sty -Package: amsopn 2022/04/08 v2.04 operator names -) -\inf@bad=\count193 -LaTeX Info: Redefining \frac on input line 234. -\uproot@=\count194 -\leftroot@=\count195 -LaTeX Info: Redefining \overline on input line 399. -LaTeX Info: Redefining \colon on input line 410. -\classnum@=\count196 -\DOTSCASE@=\count197 -LaTeX Info: Redefining \ldots on input line 496. -LaTeX Info: Redefining \dots on input line 499. -LaTeX Info: Redefining \cdots on input line 620. -\Mathstrutbox@=\box51 -\strutbox@=\box52 -LaTeX Info: Redefining \big on input line 722. -LaTeX Info: Redefining \Big on input line 723. -LaTeX Info: Redefining \bigg on input line 724. -LaTeX Info: Redefining \Bigg on input line 725. -\big@size=\dimen141 -LaTeX Font Info: Redeclaring font encoding OML on input line 743. -LaTeX Font Info: Redeclaring font encoding OMS on input line 744. -\macc@depth=\count198 -LaTeX Info: Redefining \bmod on input line 905. -LaTeX Info: Redefining \pmod on input line 910. -LaTeX Info: Redefining \smash on input line 940. -LaTeX Info: Redefining \relbar on input line 970. -LaTeX Info: Redefining \Relbar on input line 971. -\c@MaxMatrixCols=\count199 -\dotsspace@=\muskip16 -\c@parentequation=\count266 -\dspbrk@lvl=\count267 -\tag@help=\toks17 -\row@=\count268 -\column@=\count269 -\maxfields@=\count270 -\andhelp@=\toks18 -\eqnshift@=\dimen142 -\alignsep@=\dimen143 -\tagshift@=\dimen144 -\tagwidth@=\dimen145 -\totwidth@=\dimen146 -\lineht@=\dimen147 -\@envbody=\toks19 -\multlinegap=\skip50 -\multlinetaggap=\skip51 -\mathdisplay@stack=\toks20 -LaTeX Info: Redefining \[ on input line 2953. -LaTeX Info: Redefining \] on input line 2954. -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/amsfonts/amssymb.sty -Package: amssymb 2013/01/14 v3.01 AMS font symbols -(/home/egill/.TinyTeX/texmf-dist/tex/latex/amsfonts/amsfonts.sty -Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support -\symAMSa=\mathgroup4 -\symAMSb=\mathgroup5 -LaTeX Font Info: Redeclaring math symbol \hbar on input line 98. -LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold' -(Font) U/euf/m/n --> U/euf/b/n on input line 106. -)) (/home/egill/.TinyTeX/texmf-dist/tex/latex/lm/lmodern.sty -Package: lmodern 2015/05/01 v1.6.1 Latin Modern Fonts -LaTeX Font Info: Overwriting symbol font `operators' in version `normal' -(Font) OT1/cmr/m/n --> OT1/lmr/m/n on input line 22. -LaTeX Font Info: Overwriting symbol font `letters' in version `normal' -(Font) OML/cmm/m/it --> OML/lmm/m/it on input line 23. -LaTeX Font Info: Overwriting symbol font `symbols' in version `normal' -(Font) OMS/cmsy/m/n --> OMS/lmsy/m/n on input line 24. -LaTeX Font Info: Overwriting symbol font `largesymbols' in version `normal' -(Font) OMX/cmex/m/n --> OMX/lmex/m/n on input line 25. -LaTeX Font Info: Overwriting symbol font `operators' in version `bold' -(Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 26. -LaTeX Font Info: Overwriting symbol font `letters' in version `bold' -(Font) OML/cmm/b/it --> OML/lmm/b/it on input line 27. -LaTeX Font Info: Overwriting symbol font `symbols' in version `bold' -(Font) OMS/cmsy/b/n --> OMS/lmsy/b/n on input line 28. -LaTeX Font Info: Overwriting symbol font `largesymbols' in version `bold' -(Font) OMX/cmex/m/n --> OMX/lmex/m/n on input line 29. -LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal' -(Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 31. -LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal' -(Font) OT1/cmss/m/n --> OT1/lmss/m/n on input line 32. -LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal' -(Font) OT1/cmr/m/it --> OT1/lmr/m/it on input line 33. -LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal' -(Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 34. -LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `bold' -(Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 35. -LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold' -(Font) OT1/cmss/bx/n --> OT1/lmss/bx/n on input line 36. -LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold' -(Font) OT1/cmr/bx/it --> OT1/lmr/bx/it on input line 37. -LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `bold' -(Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 38. -) (/home/egill/.TinyTeX/texmf-dist/tex/generic/iftex/iftex.sty -Package: iftex 2022/02/03 v1.0f TeX engine tests -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/base/fontenc.sty -Package: fontenc 2021/04/29 v2.0v Standard LaTeX package -LaTeX Font Info: Trying to load font information for T1+lmr on input line 112. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/lm/t1lmr.fd -File: t1lmr.fd 2015/05/01 v1.6.1 Font defs for Latin Modern -)) (/home/egill/.TinyTeX/texmf-dist/tex/latex/base/inputenc.sty -Package: inputenc 2021/02/14 v1.3d Input encoding file -\inpenc@prehook=\toks21 -\inpenc@posthook=\toks22 -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/base/textcomp.sty -Package: textcomp 2020/02/02 v2.0n Standard LaTeX package -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/xcolor/xcolor.sty -Package: xcolor 2022/06/12 v2.14 LaTeX color extensions (UK) -(/home/egill/.TinyTeX/texmf-dist/tex/latex/graphics-cfg/color.cfg -File: color.cfg 2016/01/02 v1.6 sample color configuration -) -Package xcolor Info: Driver file: pdftex.def on input line 227. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/graphics-def/pdftex.def -File: pdftex.def 2020/10/05 v1.2a Graphics/color driver for pdftex -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/graphics/mathcolor.ltx) -Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1353. -Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1357. -Package xcolor Info: Model `RGB' extended on input line 1369. -Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1371. -Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1372. -Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1373. -Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1374. -Package xcolor Info: Model `Gray' substituted by `gray' on input line 1375. -Package xcolor Info: Model `wave' substituted by `hsb' on input line 1376. -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/geometry/geometry.sty -Package: geometry 2020/01/02 v5.9 Page Geometry -(/home/egill/.TinyTeX/texmf-dist/tex/latex/graphics/keyval.sty -Package: keyval 2022/05/29 v1.15 key=value parser (DPC) -\KV@toks@=\toks23 -) (/home/egill/.TinyTeX/texmf-dist/tex/generic/iftex/ifvtex.sty -Package: ifvtex 2019/10/25 v1.7 ifvtex legacy package. Use iftex instead. -) -\Gm@cnth=\count271 -\Gm@cntv=\count272 -\c@Gm@tempcnt=\count273 -\Gm@bindingoffset=\dimen148 -\Gm@wd@mp=\dimen149 -\Gm@odd@mp=\dimen150 -\Gm@even@mp=\dimen151 -\Gm@layoutwidth=\dimen152 -\Gm@layoutheight=\dimen153 -\Gm@layouthoffset=\dimen154 -\Gm@layoutvoffset=\dimen155 -\Gm@dimlist=\toks24 -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/fancyvrb/fancyvrb.sty -Package: fancyvrb 2022/06/06 4.5 verbatim text (tvz,hv) -\FV@CodeLineNo=\count274 -\FV@InFile=\read2 -\FV@TabBox=\box53 -\c@FancyVerbLine=\count275 -\FV@StepNumber=\count276 -\FV@OutFile=\write3 -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/framed/framed.sty -Package: framed 2011/10/22 v 0.96: framed or shaded text with page breaks -\OuterFrameSep=\skip52 -\fb@frw=\dimen156 -\fb@frh=\dimen157 -\FrameRule=\dimen158 -\FrameSep=\dimen159 -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/graphics/graphicx.sty -Package: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR) -(/home/egill/.TinyTeX/texmf-dist/tex/latex/graphics/graphics.sty -Package: graphics 2022/03/10 v1.4e Standard LaTeX Graphics (DPC,SPQR) -(/home/egill/.TinyTeX/texmf-dist/tex/latex/graphics/trig.sty -Package: trig 2021/08/11 v1.11 sin cos tan (DPC) -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/graphics-cfg/graphics.cfg -File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration -) -Package graphics Info: Driver file: pdftex.def on input line 107. -) -\Gin@req@height=\dimen160 -\Gin@req@width=\dimen161 -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty -Package: fancyhdr 2022/05/18 v4.0.3 Extensive control of page headers and footers -\f@nch@headwidth=\skip53 -\f@nch@O@elh=\skip54 -\f@nch@O@erh=\skip55 -\f@nch@O@olh=\skip56 -\f@nch@O@orh=\skip57 -\f@nch@O@elf=\skip58 -\f@nch@O@erf=\skip59 -\f@nch@O@olf=\skip60 -\f@nch@O@orf=\skip61 -) - -Package fancyhdr Warning: \fancyfoot's `E' option without twoside option is useless on input line 97. - - -Package fancyhdr Warning: \fancyfoot's `E' option without twoside option is useless on input line 100. - - -Package fancyhdr Warning: \fancyfoot's `E' option without twoside option is useless on input line 101. - -(/home/egill/.TinyTeX/texmf-dist/tex/latex/hyperref/hyperref.sty -Package: hyperref 2022-06-20 v7.00s Hypertext links for LaTeX -(/home/egill/.TinyTeX/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty -Package: ltxcmds 2020-05-10 v1.25 LaTeX kernel commands for general use (HO) -) (/home/egill/.TinyTeX/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty -Package: pdftexcmds 2020-06-27 v0.33 Utility functions of pdfTeX for LuaTeX (HO) -(/home/egill/.TinyTeX/texmf-dist/tex/generic/infwarerr/infwarerr.sty -Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO) -) -Package pdftexcmds Info: \pdf@primitive is available. -Package pdftexcmds Info: \pdf@ifprimitive is available. -Package pdftexcmds Info: \pdfdraftmode found. -) (/home/egill/.TinyTeX/texmf-dist/tex/generic/kvsetkeys/kvsetkeys.sty -Package: kvsetkeys 2019/12/15 v1.18 Key value parser (HO) -) (/home/egill/.TinyTeX/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty -Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO) -) (/home/egill/.TinyTeX/texmf-dist/tex/generic/pdfescape/pdfescape.sty -Package: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO) -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/hycolor/hycolor.sty -Package: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO) -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/letltxmacro/letltxmacro.sty -Package: letltxmacro 2019/12/03 v1.6 Let assignment for LaTeX macros (HO) -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/auxhook/auxhook.sty -Package: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO) -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/hyperref/nameref.sty -Package: nameref 2022-05-17 v2.50 Cross-referencing by name of section -(/home/egill/.TinyTeX/texmf-dist/tex/latex/refcount/refcount.sty -Package: refcount 2019/12/15 v3.6 Data extraction from label references (HO) -) (/home/egill/.TinyTeX/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty -Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO) -(/home/egill/.TinyTeX/texmf-dist/tex/latex/kvoptions/kvoptions.sty -Package: kvoptions 2022-06-15 v3.15 Key value format for package options (HO) -)) -\c@section@level=\count277 -) -\@linkdim=\dimen162 -\Hy@linkcounter=\count278 -\Hy@pagecounter=\count279 -(/home/egill/.TinyTeX/texmf-dist/tex/latex/hyperref/pd1enc.def -File: pd1enc.def 2022-06-20 v7.00s Hyperref: PDFDocEncoding definition (HO) -Now handling font encoding PD1 ... -... no UTF-8 mapping file for font encoding PD1 -) (/home/egill/.TinyTeX/texmf-dist/tex/generic/intcalc/intcalc.sty -Package: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO) -) (/home/egill/.TinyTeX/texmf-dist/tex/generic/etexcmds/etexcmds.sty -Package: etexcmds 2019/12/15 v1.7 Avoid name clashes with e-TeX commands (HO) -) -\Hy@SavedSpaceFactor=\count280 -(/home/egill/.TinyTeX/texmf-dist/tex/latex/hyperref/puenc.def -File: puenc.def 2022-06-20 v7.00s Hyperref: PDF Unicode definition (HO) -Now handling font encoding PU ... -... no UTF-8 mapping file for font encoding PU -) -Package hyperref Info: Option `unicode' set `true' on input line 4045. -Package hyperref Info: Hyper figures OFF on input line 4162. -Package hyperref Info: Link nesting OFF on input line 4167. -Package hyperref Info: Hyper index ON on input line 4170. -Package hyperref Info: Plain pages OFF on input line 4177. -Package hyperref Info: Backreferencing OFF on input line 4182. -Package hyperref Info: Implicit mode ON; LaTeX internals redefined. -Package hyperref Info: Bookmarks ON on input line 4408. -\c@Hy@tempcnt=\count281 -(/home/egill/.TinyTeX/texmf-dist/tex/latex/url/url.sty -\Urlmuskip=\muskip17 -Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. -) -LaTeX Info: Redefining \url on input line 4746. -\XeTeXLinkMargin=\dimen163 -(/home/egill/.TinyTeX/texmf-dist/tex/generic/bitset/bitset.sty -Package: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO) -(/home/egill/.TinyTeX/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty -Package: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO) -)) -\Fld@menulength=\count282 -\Field@Width=\dimen164 -\Fld@charsize=\dimen165 -Package hyperref Info: Hyper figures OFF on input line 6024. -Package hyperref Info: Link nesting OFF on input line 6029. -Package hyperref Info: Hyper index ON on input line 6032. -Package hyperref Info: backreferencing OFF on input line 6039. -Package hyperref Info: Link coloring OFF on input line 6044. -Package hyperref Info: Link coloring with OCG OFF on input line 6049. -Package hyperref Info: PDF/A mode OFF on input line 6054. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/base/atbegshi-ltx.sty -Package: atbegshi-ltx 2021/01/10 v1.0c Emulation of the original atbegshi -package with kernel methods -) -\Hy@abspage=\count283 -\c@Item=\count284 -\c@Hfootnote=\count285 -) -Package hyperref Info: Driver (autodetected): hpdftex. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/hyperref/hpdftex.def -File: hpdftex.def 2022-06-20 v7.00s Hyperref driver for pdfTeX -(/home/egill/.TinyTeX/texmf-dist/tex/latex/base/atveryend-ltx.sty -Package: atveryend-ltx 2020/08/19 v1.0a Emulation of the original atveryend package -with kernel methods -) -\Fld@listcount=\count286 -\c@bookmark@seq@number=\count287 -(/home/egill/.TinyTeX/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty -Package: rerunfilecheck 2022-07-10 v1.10 Rerun checks for auxiliary files (HO) -(/home/egill/.TinyTeX/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty -Package: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO) -) -Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 285. -) -\Hy@SectionHShift=\skip62 -) (/home/egill/.TinyTeX/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def -File: l3backend-pdftex.def 2022-07-01 L3 backend support: PDF output (pdfTeX) -\l__color_backend_stack_int=\count288 -\l__pdf_internal_box=\box54 -) (./BuildingDeepModels.aux) -\openout1 = `BuildingDeepModels.aux'. - -LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 119. -LaTeX Font Info: ... okay on input line 119. -LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 119. -LaTeX Font Info: ... okay on input line 119. -LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 119. -LaTeX Font Info: ... okay on input line 119. -LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 119. -LaTeX Font Info: ... okay on input line 119. -LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 119. -LaTeX Font Info: ... okay on input line 119. -LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 119. -LaTeX Font Info: ... okay on input line 119. -LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 119. -LaTeX Font Info: ... okay on input line 119. -LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 119. -LaTeX Font Info: ... okay on input line 119. -LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 119. -LaTeX Font Info: ... okay on input line 119. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty -Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf -Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 485. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg -File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Live -)) -*geometry* driver: auto-detecting -*geometry* detected driver: pdftex -*geometry* verbose mode - [ preamble ] result: -* driver: pdftex -* paper: -* layout: -* layoutoffset:(h,v)=(0.0pt,0.0pt) -* modes: -* h-part:(L,W,R)=(72.26999pt, 469.75502pt, 72.26999pt) -* v-part:(T,H,B)=(72.26999pt, 650.43001pt, 72.26999pt) -* \paperwidth=614.295pt -* \paperheight=794.96999pt -* \textwidth=469.75502pt -* \textheight=650.43001pt -* \oddsidemargin=0.0pt -* \evensidemargin=0.0pt -* \topmargin=-37.0pt -* \headheight=12.0pt -* \headsep=25.0pt -* \topskip=10.0pt -* \footskip=30.0pt -* \marginparwidth=65.0pt -* \marginparsep=11.0pt -* \columnsep=10.0pt -* \skip\footins=9.0pt plus 4.0pt minus 2.0pt -* \hoffset=0.0pt -* \voffset=0.0pt -* \mag=1000 -* \@twocolumnfalse -* \@twosidefalse -* \@mparswitchfalse -* \@reversemarginfalse -* (1in=72.27pt=25.4mm, 1cm=28.453pt) - -Package hyperref Info: Link coloring OFF on input line 119. -(./BuildingDeepModels.out) (./BuildingDeepModels.out) -\@outlinefile=\write4 -\openout4 = `BuildingDeepModels.out'. - -LaTeX Font Info: Trying to load font information for OT1+lmr on input line 121. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/lm/ot1lmr.fd -File: ot1lmr.fd 2015/05/01 v1.6.1 Font defs for Latin Modern -) -LaTeX Font Info: Trying to load font information for OML+lmm on input line 121. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/lm/omllmm.fd -File: omllmm.fd 2015/05/01 v1.6.1 Font defs for Latin Modern -) -LaTeX Font Info: Trying to load font information for OMS+lmsy on input line 121. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/lm/omslmsy.fd -File: omslmsy.fd 2015/05/01 v1.6.1 Font defs for Latin Modern -) -LaTeX Font Info: Trying to load font information for OMX+lmex on input line 121. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/lm/omxlmex.fd -File: omxlmex.fd 2015/05/01 v1.6.1 Font defs for Latin Modern -) -LaTeX Font Info: External font `lmex10' loaded for size -(Font) <12> on input line 121. -LaTeX Font Info: External font `lmex10' loaded for size -(Font) <8> on input line 121. -LaTeX Font Info: External font `lmex10' loaded for size -(Font) <6> on input line 121. -LaTeX Font Info: Trying to load font information for U+msa on input line 121. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/amsfonts/umsa.fd -File: umsa.fd 2013/01/14 v3.01 AMS symbols A -) -LaTeX Font Info: Trying to load font information for U+msb on input line 121. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/amsfonts/umsb.fd -File: umsb.fd 2013/01/14 v3.01 AMS symbols B -) -Overfull \hbox (17.70074pt too wide) in paragraph at lines 121--121 - [][] - [] - -(./BuildingDeepModels.toc -LaTeX Font Info: External font `lmex10' loaded for size -(Font) <10> on input line 2. -LaTeX Font Info: External font `lmex10' loaded for size -(Font) <7> on input line 2. -LaTeX Font Info: External font `lmex10' loaded for size -(Font) <5> on input line 2. -) -\tf@toc=\write5 -\openout5 = `BuildingDeepModels.toc'. - -LaTeX Font Info: Trying to load font information for T1+lmtt on input line 149. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/lm/t1lmtt.fd -File: t1lmtt.fd 2015/05/01 v1.6.1 Font defs for Latin Modern -) [1 - -{/home/egill/.TinyTeX/texmf-var/fonts/map/pdftex/updmap/pdftex.map}] -LaTeX Font Info: Trying to load font information for TS1+lmtt on input line 215. -(/home/egill/.TinyTeX/texmf-dist/tex/latex/lm/ts1lmtt.fd -File: ts1lmtt.fd 2015/05/01 v1.6.1 Font defs for Latin Modern -) [2] [3] [4] [5] [6] [7] -Overfull \hbox (81.49338pt too wide) in paragraph at lines 628--628 -[]\T1/lmtt/m/n/10 ## Reps J, Fridgeirsson E, Chan You S, Kim C, John H (2021). _DeepPatientLevelPrediction: Deep Learning[] - [] - - -Overfull \hbox (81.49338pt too wide) in paragraph at lines 628--628 -[]\T1/lmtt/m/n/10 ## https://ohdsi.github.io/PatientLevelPrediction, https://github.com/OHDSI/DeepPatientLevelPrediction.[] - [] - - -Overfull \hbox (207.49301pt too wide) in paragraph at lines 628--628 -[]\T1/lmtt/m/n/10 ## title = {DeepPatientLevelPrediction: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model},[] - [] - - -Overfull \hbox (81.49338pt too wide) in paragraph at lines 628--628 -[]\T1/lmtt/m/n/10 ## author = {Jenna Reps and Egill Fridgeirsson and Seng {Chan You} and Chungsoo Kim and Henrik John},[] - [] - - -Overfull \hbox (139.24321pt too wide) in paragraph at lines 628--628 -[]\T1/lmtt/m/n/10 ## note = {https://ohdsi.github.io/PatientLevelPrediction, https://github.com/OHDSI/DeepPatientLevelPrediction},[] - [] - -[8] (./BuildingDeepModels.aux) -Package rerunfilecheck Info: File `BuildingDeepModels.out' has not changed. -(rerunfilecheck) Checksum: BC9824B59755D3A98BE9991AD8885AB4;2128. - ) -Here is how much of TeX's memory you used: - 11315 strings out of 479983 - 175086 string characters out of 5887868 - 482840 words of memory out of 5000000 - 29260 multiletter control sequences out of 15000+600000 - 515794 words of font info for 70 fonts, out of 8000000 for 9000 - 14 hyphenation exceptions out of 8191 - 72i,11n,77p,336b,478s stack positions out of 10000i,1000n,20000p,200000b,200000s -{/home/egill/.TinyTeX/texmf-dist/fonts/enc/dvips/lm/lm-ts1.enc}{/home/egill/.TinyTeX/texmf-dist/fonts/enc/dvips/lm/lm-ec.enc} -Output written on BuildingDeepModels.pdf (8 pages, 219658 bytes). -PDF statistics: - 239 PDF objects out of 1000 (max. 8388607) - 210 compressed objects within 3 object streams - 68 named destinations out of 1000 (max. 500000) - 129 words of extra memory for PDF output out of 10000 (max. 10000000) - diff --git a/vignettes/Installing.Rmd b/vignettes/Installing.Rmd index f06a26e..7946f76 100644 --- a/vignettes/Installing.Rmd +++ b/vignettes/Installing.Rmd @@ -77,13 +77,9 @@ remotes::install_github("OHDSI/PatientLevelPrediction") remotes::install_github("OHDSI/DeepPatientLevelPrediction") ``` -DeepPLP relies on [torch for R](https://torch.mlverse.org/). When torch is installed for the first time the installation is not finished until you run: - -```{r, echo = TRUE, message = FALSE, warning = FALSE, tidy=FALSE,eval=FALSE} -library(torch) -``` - -This will download the required libtorch and lantern binaries for your operating system and copy them to the required locations for torch to use. +DeepPLP relies on [torch for R](https://torch.mlverse.org/). When torch is installed the user +will be prompted if libtorch and lantern binaries should be downloaded. These binaries are neccesary +for the package to run. If you are using DeepPLP in an offline environment the function `torch::install_torch_from_file()` can be used. This will first require to download and move the correct binaries to the offline environment. See [torch installation guide](https://torch.mlverse.org/docs/articles/installation.html) for more detailed instructions.