From 38c88d6e8d942fc1508ff1367afff33fd3d82e0a Mon Sep 17 00:00:00 2001
From: Egill Fridgeirsson <egillax@gmail.com>
Date: Thu, 18 Aug 2022 13:32:17 +0200
Subject: [PATCH] Fix DeepNNTorch

---
 R/Dataset.R     |  46 ++++++++----
 R/DeepNNTorch.R | 196 ++++++++++++++++++++++++++++++++----------------
 2 files changed, 163 insertions(+), 79 deletions(-)

diff --git a/R/Dataset.R b/R/Dataset.R
index 9d10343..bbe6313 100644
--- a/R/Dataset.R
+++ b/R/Dataset.R
@@ -20,7 +20,6 @@ Dataset <- torch::dataset(
       self$numericalIndex <- NULL
     }
 
-
     # add labels if training (make 0 vector for prediction)
     if (!is.null(labels)) {
       self$target <- torch::torch_tensor(labels)
@@ -35,6 +34,7 @@ Dataset <- torch::dataset(
     # Weight to add in loss function to positive class
     self$posWeight <- (self$target == 0)$sum() / self$target$sum()
     # for DeepNNTorch
+    self$useAll <- all
     if (all) {
       self$all <- torch::torch_tensor(as.matrix(data), dtype = torch::torch_float32())
       self$cat <- NULL
@@ -103,23 +103,37 @@ Dataset <- torch::dataset(
   },
   .getbatch = function(item) {
     if (length(item) == 1) {
-      # add leading singleton dimension since models expects 2d tensors
-      return(list(
-        batch = list(
-          cat = self$cat[item]$unsqueeze(1),
-          num = self$num[item]$unsqueeze(1)
-        ),
-        target = self$target[item]$unsqueeze(1)
-      ))
+      return(self$.getBatchSingle(item))
+    } else {
+      return(self$.getBatchRegular(item))
+    }
+  },
+  .getBatchSingle = function(item) {
+    # add leading singleton dimension since models expects 2d tensors
+    if (self$useAll) {
+      batch <- list(all = self$all[item]$unsqueeze(1))
+    } else {
+      batch <- list(cat = self$cat[item]$unsqueeze(1),
+                    num = self$num[item]$unsqueeze(1))
+    }
+    return(list(
+      batch = batch,
+      target = self$target[item]$unsqueeze(1)
+    ))
+  },
+  .getBatchRegular = function(item) {
+    if (self$useAll) {
+      batch <- list(all = self$all[item])
     } else {
-      return(list(
-        batch = list(
-          cat = self$cat[item],
-          num = self$num[item]
-        ),
-        target = self$target[item]
-      ))
+      batch = list(
+        cat = self$cat[item],
+        num = self$num[item]
+      )
     }
+    return(list(
+      batch = batch,
+      target = self$target[item]
+    ))
   },
   .length = function() {
     self$target$size()[[1]] # shape[1]
diff --git a/R/DeepNNTorch.R b/R/DeepNNTorch.R
index c276f4a..4e20ca7 100644
--- a/R/DeepNNTorch.R
+++ b/R/DeepNNTorch.R
@@ -1,30 +1,27 @@
 #' settings for a Deep neural network
 #' @param units           A list of vectors for neurons per layer
-#' @param layer_dropout   Dropout to use per layer
+#' @param layerDropout   Dropout to use per layer
 #' @param lr              Learning rate ot use
 #' @param decay           Weight decay to use
-#' @param outcome_weight  Weight for minority outcome in cost function
-#' @param batch_size      Batch size to use
+#' @param outcomeWeight  Weight for minority outcome in cost function
+#' @param batchSize      Batch size to use
 #' @param epochs          How many epochs to use
 #' @param device          Which device to use
 #' @param seed            A seed to make experiments more reproducible
 #' @export
 setDeepNNTorch <- function(units = list(c(128, 64), 128),
-                           layer_dropout = c(0.2),
+                           layerDropout = c(0.2),
                            lr = c(1e-4),
                            decay = c(1e-5),
-                           outcome_weight = c(1.0),
-                           batch_size = c(10000),
+                           outcomeWeight = c(1.0),
+                           batchSize = c(10000),
                            epochs = c(100),
                            device = "cpu",
                            seed = NULL) {
-
-  # ensure_installed("torch")
-
   param <- expand.grid(
     units = units,
-    layer_dropout = layer_dropout,
-    lr = lr, decay = decay, outcome_weight = outcome_weight, epochs = epochs,
+    layerDropout = layerDropout,
+    lr = lr, decay = decay, outcomeWeight = outcomeWeight, epochs = epochs,
     seed = ifelse(is.null(seed), "NULL", seed)
   )
 
@@ -34,17 +31,15 @@ setDeepNNTorch <- function(units = list(c(128, 64), 128),
   param$units <- NULL
 
   attr(param, "settings") <- list(
-    selectorType = "byPid", # is this correct?
-    crossValidationInPrior = T,
     modelType = "DeepNN",
     seed = seed[1],
     name = "DeepNNTorch",
     units = units,
-    layer_dropout = layer_dropout,
+    layerDropout = layerDropout,
     lr = lr,
     decay = decay,
-    outcome_weight = outcome_weight,
-    batch_size = batch_size,
+    outcomeWeight = outcomeWeight,
+    batchSize = batchSize,
     device = device,
     epochs = epochs
   )
@@ -171,13 +166,15 @@ fitDeepNNTorch <- function(trainData,
 #' @export
 predictDeepNN <- function(plpModel,
                           data,
-                          cohort) {
-  if (!"plpModel" %in% class(plpModel)) {
+                          cohort,
+                          batchSize=512,
+                          device='cpu') {
+  if (!inherits(plpModel, 'plpModel') & !inherits(plpModel, 'nn_module')) {
     plpModel <- list(model = plpModel)
     attr(plpModel, "modelType") <- "binary"
   }
 
-  if ("plpData" %in% class(data)) {
+  if (inherits(data, 'plpData')) {
     dataMat <- PatientLevelPrediction::toSparseM(
       plpData = data,
       cohort = cohort,
@@ -193,9 +190,23 @@ predictDeepNN <- function(plpModel,
 
   if (is.character(plpModel$model)) {
     model <- torch::torch_load(file.path(plpModel$model, "DeepNNTorchModel.pt"), device = "cpu")
+  } else {
+    model <- plpModel
   }
-  y_pred <- model(data$all)
-  prediction$value <- as.array(y_pred$to())[, 1]
+  model$to(device=device)
+  batchIndex <- 1:length(data)
+  batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / batchSize))
+  torch::with_no_grad({
+    predictions <- c()
+    model$eval()
+    coro::loop(for (b in batchIndex) {
+      batch <- data[b]$batch$all$to(device=device)
+      target <- data[b]$target$to(device=device)
+      pred <- model(batch)
+      predictions <- c(predictions, as.array(torch::torch_sigmoid(pred[,1]$cpu())))
+    })
+  })
+  prediction$value <- predictions
 
   attr(prediction, "metaData")$modelType <- attr(plpModel, "modelType")
 
@@ -223,7 +234,7 @@ gridCvDeepNN <- function(matrixData,
   for (gridId in 1:nrow(paramSearch)) {
 
     # get the params
-    modelParamNames <- c("layer_dropout", "lr", "decay", "outcome_weight", "epochs", "units1", "units2", "units3")
+    modelParamNames <- c("layerDropout", "lr", "decay", "outcomeWeight", "epochs", "units1", "units2", "units3")
     modelParams <- paramSearch[gridId, modelParamNames]
 
     fitParams <- paramSearch[gridId, c("lr", "decay")]
@@ -247,7 +258,7 @@ gridCvDeepNN <- function(matrixData,
           inputN = ncol(matrixData),
           layer1 = modelParams$units1,
           outputN = 2,
-          layer_dropout = modelParams$layer_dropout
+          layer_dropout = modelParams$layerDropout
         )
       } else if (is.na(modelParams$units3)) {
         model <- doubleLayerNN(
@@ -255,7 +266,7 @@ gridCvDeepNN <- function(matrixData,
           layer1 = modelParams$units1,
           layer2 = modelParams$units2,
           outputN = 2,
-          layer_dropout = modelParams$layer_dropout
+          layer_dropout = modelParams$layerDropout
         )
       } else {
         model <- tripleLayerNN(
@@ -264,12 +275,14 @@ gridCvDeepNN <- function(matrixData,
           layer2 = modelParams$units2,
           layer3 = modelParams$units3,
           outputN = 2,
-          layer_dropout = modelParams$layer_dropout
+          layer_dropout = modelParams$layerDropout
         )
       }
-
+      
+      model$to(device=device)
       criterion <- torch::nn_bce_loss() # Binary crossentropy only
-      optimizer <- torch::optim_adam(model$parameters, lr = fitParams$lr)
+      optimizer <- torch::optim_adam(model$parameters, lr = fitParams$lr,
+                                     weight_decay = fitParams$decay)
 
       # Need earlyStopping
       # Need setting decay
@@ -278,30 +291,70 @@ gridCvDeepNN <- function(matrixData,
       trainDataset <- torch::dataset_subset(dataset, indices = which(fold != i))
       testDataset <- torch::dataset_subset(dataset, indices = which(fold == i))
 
-      # batches <- split(trainDataset, ceiling(seq_along(trainDataset)/batch_size))
-
+      batchIndex <- torch::torch_randperm(length(trainDataset)) + 1L
+      batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / batchSize))
+      
+      testBatchIndex <- 1:length(testDataset)
+      testBatchIndex <- split(testBatchIndex, ceiling(seq_along(testBatchIndex) / batchSize))
       for (j in 1:epochs) {
-        # for(batchRowIds in batches){
-        optimizer$zero_grad()
-
-        # this is full batch training, won't work on real data
-        y_pred <- model(trainDataset$dataset$all[trainDataset$indices])
-        loss <- criterion(y_pred[, 1], trainDataset$dataset$target[trainDataset$indices])
-        loss$backward()
-        optimizer$step()
-
-        if (j %% 1 == 0) {
-          cat("Epoch:", j, "out of ", epochs, ": Loss:", loss$item(), "\n")
-        }
-        # }
+        startTime <- Sys.time()
+        trainLosses <- torch::torch_empty(length(batchIndex))
+        ix <- 1
+        model$train()
+        progressBar <- utils::txtProgressBar(style = 3)
+        coro::loop(for (b in batchIndex) {
+          optimizer$zero_grad()
+          batch <- trainDataset[b]$batch$all$to(device=device)
+          target <- trainDataset[b]$target$to(device=device)
+          y_pred <- model(batch)
+          loss <- criterion(y_pred[, 1], target)
+          loss$backward()
+          optimizer$step()
+          
+          trainLosses[ix] <- loss$detach()
+          utils::setTxtProgressBar(progressBar, ix / length(batchIndex))
+          ix <- ix + 1
+        })
+        close(progressBar)
+        trainLoss <- trainLosses$mean()$item()
+        torch::with_no_grad({
+          ix <- 1
+          testLosses <- torch::torch_empty(length(batchIndex))
+          model$eval()
+          predictions <- list()
+          targets <- list()
+          coro::loop(for (b in testBatchIndex) {
+            batch <- dataset[b]$batch$all$to(device=device)
+            target <- dataset[b]$target$to(device=device)
+            pred <- model(batch)
+            predictions <- c(predictions, pred[,1])
+            targets <- c(targets, target)
+            testLosses[ix] <- criterion(pred[,1], target)
+            ix <- ix + 1
+          })
+          testLoss <- loss$mean()$item()
+          predictionsClass <- data.frame(
+            value = as.matrix(torch::torch_sigmoid(torch::torch_cat(predictions)$cpu())),
+            outcomeCount = as.matrix(torch::torch_cat(targets)$cpu())
+          )
+          attr(predictionsClass, "metaData")$modelType <- "binary"
+          auc <- PatientLevelPrediction::computeAuc(predictionsClass)
+        })
+        
+        delta <- Sys.time() - startTime
+        ParallelLogger::logInfo(
+          "Epochs: ", j,
+          " | Val AUC: ", round(auc, 3),
+          " | Val Loss: ", round(testLoss, 3),
+          " | Train Loss: ", round(trainLoss, 3),
+          " | Time: ", round(delta, 3), " ",
+          units(delta)
+        )
+        
       }
-      model$eval()
-
-      ParallelLogger::logInfo("Calculating predictions on left out fold set...")
-
-      pred <- model(testDataset$dataset$all[testDataset$indices])
+      
       predictionTable <- labels[labels$index == i, ]
-      predictionTable$value <- as.array(pred$to())[, 1]
+      predictionTable$value <- predictionsClass$value
 
       if (!"plpModel" %in% class(model)) {
         model <- list(model = model)
@@ -319,7 +372,6 @@ gridCvDeepNN <- function(matrixData,
 
 
   # get best para (this could be modified to enable any metric instead of AUC, just need metric input in function)
-
   paramGridSearch <- lapply(gridSearchPredictons, function(x) {
     do.call(PatientLevelPrediction::computeGridPerformance, x)
   }) # cvAUCmean, cvAUC, param
@@ -332,9 +384,8 @@ gridCvDeepNN <- function(matrixData,
   cvPrediction$evaluationType <- "CV"
 
   ParallelLogger::logInfo("Training final model using optimal parameters")
-
+  
   # get the params
-  modelParamNames <- c("layer_dropout", "lr", "decay", "outcome_weight", "epochs", "units1", "units2", "units3")
   modelParams <- finalParam[modelParamNames]
   fitParams <- finalParam[c("lr", "decay")]
   fitParams$epochs <- epochs
@@ -360,7 +411,7 @@ gridCvDeepNN <- function(matrixData,
       inputN = ncol(matrixData),
       layer1 = modelParams$units1,
       outputN = 2,
-      layer_dropout = modelParams$layer_dropout
+      layer_dropout = modelParams$layerDropout
     )
   } else if (is.na(modelParams$units3)) {
     model <- doubleLayerNN(
@@ -368,7 +419,7 @@ gridCvDeepNN <- function(matrixData,
       layer1 = modelParams$units1,
       layer2 = modelParams$units2,
       outputN = 2,
-      layer_dropout = modelParams$layer_dropout
+      layer_dropout = modelParams$layerDropout
     )
   } else {
     model <- tripleLayerNN(
@@ -377,23 +428,42 @@ gridCvDeepNN <- function(matrixData,
       layer2 = modelParams$units2,
       layer3 = modelParams$units3,
       outputN = 2,
-      layer_dropout = modelParams$layer_dropout
+      layer_dropout = modelParams$layerDropout
     )
   }
 
+  model$to(device=device)
+  
   criterion <- torch::nn_bce_loss() # Binary crossentropy only
   optimizer <- torch::optim_adam(model$parameters, lr = fitParams$lr)
-  optimizer$zero_grad()
-  y_pred <- model(trainDataset$all)
-  loss <- criterion(y_pred[, 1], trainDataset$target)
-  loss$backward()
-  optimizer$step()
-  model$eval()
-
+  
+  batchIndex <- torch::torch_randperm(length(trainDataset)) + 1L
+  batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / batchSize))
+  
+  for (epoch in 1:epochs) {
+    ix <- 1
+    model$train()
+    progressBar <- utils::txtProgressBar(style = 3)
+    coro::loop(for (b in batchIndex) {
+      optimizer$zero_grad()
+      batch <- dataset[b]$batch$all$to(device=device)
+      target <- dataset[b]$target$to(device=device)
+      out <- model(batch)
+      loss <- criterion(out[,1], target)
+      loss$backward()
+      
+      optimizer$step()
+      utils::setTxtProgressBar(progressBar, ix / length(batchIndex))
+      ix <- ix + 1
+    })
+    close(progressBar)
+  }
+  
+  browser()
   ParallelLogger::logInfo("Calculating predictions on all train data...")
 
-  prediction <- labels
-  prediction$value <- as.array(y_pred$to())[, 1]
+  prediction <- predictDeepNN(model, data=trainDataset, cohort=labels, 
+                              batchSize = batchSize, device = device)
   prediction$evaluationType <- "Train"
 
   prediction <- rbind(