From 64fce910091610ae3ad382ced1c8b44f52fd5543 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Mon, 14 Nov 2022 14:29:35 +0100
Subject: [PATCH 01/58] speed up data conversion about 3-5x

---
 R/Dataset.R | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/R/Dataset.R b/R/Dataset.R
index 73464d1..88ae967 100644
--- a/R/Dataset.R
+++ b/R/Dataset.R
@@ -11,7 +11,6 @@ Dataset <- torch::dataset(
     if (is.null(numericalIndex)) {
       numericalIndex <- data %>%
         dplyr::group_by(columnId) %>%
-        dplyr::collect() %>%
         dplyr::summarise(n = dplyr::n_distinct(.data$covariateValue)) %>%
         dplyr::pull(n) > 1
       self$numericalIndex <- numericalIndex
@@ -39,14 +38,12 @@ Dataset <- torch::dataset(
       dplyr::ungroup() %>%
       dplyr::select(c("rowId", "newColumnId")) %>%
       dplyr::rename(columnId = newColumnId)
-    # the fastest way I found so far to convert data using data.table
-    # 1.5 min for 100k rows :(
-    dt <- data.table::data.table(rows = dataCat$rowId, cols = dataCat$columnId)
-    maxFeatures <- max(dt[, .N, by = rows][, N])
     start <- Sys.time()
-    tensorList <- lapply(1:max(data %>% dplyr::pull(rowId)), function(x) {
-      torch::torch_tensor(dt[rows == x, cols])
-    })
+    catTensor <- torch::torch_tensor(cbind(dataCat$rowId, dataCat$columnId))
+    catTensor <- catTensor[catTensor[,1]$argsort(),]
+    tensorList <- torch::torch_split(catTensor[,2], 
+                                     as.numeric(torch::torch_unique_consecutive(catTensor[,1], 
+                                                                                return_counts = TRUE)[[3]]))
     self$lengths <- lengths
     self$cat <- torch::nn_utils_rnn_pad_sequence(tensorList, batch_first = T)
     delta <- Sys.time() - start

From 30e67fdcb6ff01b7ce0fa58344ac5fcbf994599d Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Mon, 14 Nov 2022 14:31:22 +0100
Subject: [PATCH 02/58] updated news

---
 NEWS.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index bc0a4c7..fd19dc1 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+DeepPatientLevelPrediction (develop)
+======================
+- used vectorised torch operations to speed up data conversion in torch dataset
+
 DeepPatientLevelPrediction 1.0.1
 ======================
 - Added changelog to website

From 131c4343cdb17d9f85ded870bb40156b13583e57 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Mon, 14 Nov 2022 17:20:45 +0100
Subject: [PATCH 03/58] make sure tensorList matches dataset length

---
 R/Dataset.R | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/R/Dataset.R b/R/Dataset.R
index 88ae967..90fa97f 100644
--- a/R/Dataset.R
+++ b/R/Dataset.R
@@ -44,8 +44,13 @@ Dataset <- torch::dataset(
     tensorList <- torch::torch_split(catTensor[,2], 
                                      as.numeric(torch::torch_unique_consecutive(catTensor[,1], 
                                                                                 return_counts = TRUE)[[3]]))
+    
+    # because of subjects without cat features, I need to create a list with all zeroes and then insert
+    # my tensorList. That way I can still index the dataset correctly.
+    totalList <- as.list(numeric(length(self$target)))
+    totalList[unique(dataCat$rowId)] <- tensorList
     self$lengths <- lengths
-    self$cat <- torch::nn_utils_rnn_pad_sequence(tensorList, batch_first = T)
+    self$cat <- torch::nn_utils_rnn_pad_sequence(totalList, batch_first = T)
     delta <- Sys.time() - start
     ParallelLogger::logInfo("Data conversion for dataset took ", signif(delta, 3), " ", attr(delta, "units"))
     if (sum(numericalIndex) == 0) {

From 38504b33b9c81cf0eb15eeba48df3c831fdaf95e Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Mon, 14 Nov 2022 18:20:12 +0100
Subject: [PATCH 04/58] user integers for tensorList

---
 R/Dataset.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/Dataset.R b/R/Dataset.R
index 90fa97f..a4fa3b6 100644
--- a/R/Dataset.R
+++ b/R/Dataset.R
@@ -47,7 +47,7 @@ Dataset <- torch::dataset(
     
     # because of subjects without cat features, I need to create a list with all zeroes and then insert
     # my tensorList. That way I can still index the dataset correctly.
-    totalList <- as.list(numeric(length(self$target)))
+    totalList <- as.list(integer(length(self$target)))
     totalList[unique(dataCat$rowId)] <- tensorList
     self$lengths <- lengths
     self$cat <- torch::nn_utils_rnn_pad_sequence(totalList, batch_first = T)

From 5d89faa3e04ca6c2081c5ddc922f209d89bd7239 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Thu, 15 Dec 2022 11:48:53 +0100
Subject: [PATCH 05/58] test a different way of getting torch binaries

---
 .github/workflows/R_CDM_check_hades.yaml | 2 +-
 tests/testthat/setup.R                   | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/R_CDM_check_hades.yaml b/.github/workflows/R_CDM_check_hades.yaml
index 7445265..dc6f52e 100644
--- a/.github/workflows/R_CDM_check_hades.yaml
+++ b/.github/workflows/R_CDM_check_hades.yaml
@@ -43,7 +43,7 @@ jobs:
       CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }}
       CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }}
       CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }}
-      TORCH_INSTALL: 1 # so torch runs torch::install_torch() 
+      # TORCH_INSTALL: 1 # so torch runs torch::install_torch() 
 
     steps:
       - uses: actions/checkout@v2
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index 93c9232..775e7e5 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -1,5 +1,9 @@
 library(PatientLevelPrediction)
 
+if(Sys.getenv('GITHUB_ACTIONS') == 'true') {
+  torch::install_torch()
+}
+
 testLoc <- tempdir()
 
 # get connection and data from Eunomia

From 2f8d6d64f9fba5cd82171b2724fd84649607e232 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Thu, 15 Dec 2022 11:48:53 +0100
Subject: [PATCH 06/58] test a different way of getting torch binaries

---
 .github/workflows/R_CDM_check_hades.yaml | 1 -
 tests/testthat/setup.R                   | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/R_CDM_check_hades.yaml b/.github/workflows/R_CDM_check_hades.yaml
index 7445265..3278c1a 100644
--- a/.github/workflows/R_CDM_check_hades.yaml
+++ b/.github/workflows/R_CDM_check_hades.yaml
@@ -43,7 +43,6 @@ jobs:
       CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }}
       CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }}
       CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }}
-      TORCH_INSTALL: 1 # so torch runs torch::install_torch() 
 
     steps:
       - uses: actions/checkout@v2
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index 93c9232..775e7e5 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -1,5 +1,9 @@
 library(PatientLevelPrediction)
 
+if(Sys.getenv('GITHUB_ACTIONS') == 'true') {
+  torch::install_torch()
+}
+
 testLoc <- tempdir()
 
 # get connection and data from Eunomia

From e79a43fad53a180052ffd51c2d92746b4707260d Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Thu, 15 Dec 2022 13:53:29 +0100
Subject: [PATCH 07/58] fix tidyselect warnings

---
 R/Estimator.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/Estimator.R b/R/Estimator.R
index 8abb048..45171c8 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -160,8 +160,8 @@ predictDeepEstimator <- function(plpModel,
       cohort = cohort,
       mapping = plpModel$covariateImportance %>%
         dplyr::select(
-          .data$columnId,
-          .data$covariateId
+          "columnId",
+          "covariateId"
         )
     )
     data <- Dataset(mappedData$covariates,
@@ -333,7 +333,7 @@ gridCvDeep <- function(mappedData,
   )
   # modify prediction
   prediction <- prediction %>%
-    dplyr::select(-.data$index)
+    dplyr::select(-"index")
 
   prediction$cohortStartDate <- as.Date(prediction$cohortStartDate, origin = "1970-01-01")
 

From 887fac95c2529da23aac5d7e97f12c7e8e2457da Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Thu, 15 Dec 2022 14:01:08 +0100
Subject: [PATCH 08/58] Update description and news

---
 DESCRIPTION | 6 +++---
 NEWS.md     | 6 ++++++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 13c063b..9e6c69f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.0.1
-Date: 09-10-2022
+Version: 1.0.2
+Date: 15-12-2022
 Authors@R: c(
     person("Jenna", "Reps", email = "jreps@its.jnj.com", role = c("aut")), 
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),
@@ -13,7 +13,7 @@ Authors@R: c(
 Maintainer: Egill Fridgeirsson <e.fridgeirsson@erasmusmc.nl>
 Description: A package for creating deep learning patient level prediction models following the OHDSI PatientLevelPrediction framework.
 License: Apache License 2.0
-URL: https://ohdsi.github.io/PatientLevelPrediction, https://github.com/OHDSI/DeepPatientLevelPrediction
+URL: https://github.com/OHDSI/DeepPatientLevelPrediction
 BugReports: https://github.com/OHDSI/DeepPatientLevelPrediction/issues
 VignetteBuilder: knitr
 Depends:
diff --git a/NEWS.md b/NEWS.md
index bc0a4c7..39db33a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,9 @@
+DeepPatientLevelPrediction 1.0.2
+======================
+- Fix torch binaries issue when running tests from other github actions
+- Fix link on website
+- Fix tidyselect to silence warnings.
+
 DeepPatientLevelPrediction 1.0.1
 ======================
 - Added changelog to website

From ad0ad19340b9527ab56ec690e908f09eae8a03fd Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Fri, 20 Jan 2023 18:16:09 +0100
Subject: [PATCH 09/58] fixed dataset

---
 R/Dataset.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/Dataset.R b/R/Dataset.R
index a4fa3b6..2ab4dbd 100644
--- a/R/Dataset.R
+++ b/R/Dataset.R
@@ -37,7 +37,8 @@ Dataset <- torch::dataset(
       dplyr::mutate(newColumnId = dplyr::cur_group_id()) %>%
       dplyr::ungroup() %>%
       dplyr::select(c("rowId", "newColumnId")) %>%
-      dplyr::rename(columnId = newColumnId)
+      dplyr::rename(columnId = newColumnId) %>% 
+      dplyr::arrange(rowId)
     start <- Sys.time()
     catTensor <- torch::torch_tensor(cbind(dataCat$rowId, dataCat$columnId))
     catTensor <- catTensor[catTensor[,1]$argsort(),]

From 03ff99ba318cedf9cb3707fba8d57e250046544d Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Fri, 20 Jan 2023 18:42:22 +0100
Subject: [PATCH 10/58] fix numericalIndex and address pull warning

---
 R/Dataset.R                   |  2 ++
 R/Estimator.R                 |  4 +++-
 tests/testthat/test-Dataset.R | 26 +++++++++++++++-----------
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/R/Dataset.R b/R/Dataset.R
index 2ab4dbd..e552d3d 100644
--- a/R/Dataset.R
+++ b/R/Dataset.R
@@ -10,8 +10,10 @@ Dataset <- torch::dataset(
     # determine numeric
     if (is.null(numericalIndex)) {
       numericalIndex <- data %>%
+        dplyr::arrange(columnId) %>% 
         dplyr::group_by(columnId) %>%
         dplyr::summarise(n = dplyr::n_distinct(.data$covariateValue)) %>%
+        dplyr::collect() %>%
         dplyr::pull(n) > 1
       self$numericalIndex <- numericalIndex
     } else {
diff --git a/R/Estimator.R b/R/Estimator.R
index 1023113..1ad577f 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -83,7 +83,9 @@ fitEstimator <- function(trainData,
 
   hyperSummary <- do.call(rbind, lapply(cvResult$paramGridSearch, function(x) x$hyperSummary))
   prediction <- cvResult$prediction
-  incs <- rep(1, covariateRef %>% dplyr::tally() %>% dplyr::pull())
+  incs <- rep(1, covariateRef %>% dplyr::tally() %>% 
+                dplyr::collect ()
+              %>% dplyr::pull())
   covariateRef <- covariateRef %>%
     dplyr::collect() %>%
     dplyr::mutate(
diff --git a/tests/testthat/test-Dataset.R b/tests/testthat/test-Dataset.R
index 989f886..b949394 100644
--- a/tests/testthat/test-Dataset.R
+++ b/tests/testthat/test-Dataset.R
@@ -5,14 +5,17 @@ test_that("dataset correct class", {
 test_that("length of index correct", {
   testthat::expect_equal(
     length(dataset$getNumericalIndex()),
-    dplyr::n_distinct(mappedData$covariates %>% dplyr::pull(covariateId))
+    dplyr::n_distinct(mappedData$covariates %>% 
+                        dplyr::collect() %>%
+                        dplyr::pull(covariateId))
   )
 })
 
 test_that("number of num and cat features sum correctly", {
   testthat::expect_equal(
     dataset$numNumFeatures() + dataset$numCatFeatures(),
-    dplyr::n_distinct(mappedData$covariates %>% dplyr::pull(covariateId))
+    dplyr::n_distinct(mappedData$covariates %>% dplyr::collect() %>%
+                        dplyr::pull(covariateId))
   )
 })
 
@@ -22,7 +25,8 @@ test_that("length of dataset correct", {
   expect_equal(length(dataset), dataset$num$shape[1])
   expect_equal(
     dataset$.length(),
-    dplyr::n_distinct(mappedData$covariates %>% dplyr::pull(rowId))
+    dplyr::n_distinct(mappedData$covariates %>% 
+                        dplyr::collect() %>% dplyr::pull(rowId))
   )
 })
 
@@ -30,31 +34,31 @@ test_that(".getbatch works", {
   batch_size <- 16
   # get one sample
   out <- dataset[10]
-
+  
   # output should be a list of two items, the batch and targets,
   # the batch is what goes to the model
   expect_equal(length(out), 2)
-
+  
   # targets should be binary
   expect_true(out$target$item() %in% c(0, 1))
-
+  
   # shape of batch is correct
   expect_equal(length(out$batch), 2)
   expect_equal(out$batch$cat$shape[1], 1)
   expect_equal(out$batch$num$shape[1], 1)
-
+  
   # shape of target
   expect_equal(out$target$shape[1], 1)
-
+  
   # get a whole batch
   out <- dataset[10:(10 + batch_size - 1)]
-
+  
   expect_equal(length(out), 2)
   expect_true(all(torch::as_array(out$target) %in% c(0, 1)))
-
+  
   expect_equal(length(out$batch), 2)
   expect_equal(out$batch$cat$shape[1], 16)
   expect_equal(out$batch$num$shape[1], 16)
-
+  
   expect_equal(out$target$shape[1], 16)
 })

From 517f6753c10ff625b3124e34091781d5cd076484 Mon Sep 17 00:00:00 2001
From: Henrik John <l.john@erasmusmc.nl>
Date: Thu, 26 Jan 2023 14:43:44 +0100
Subject: [PATCH 11/58] Allow dimToken and numHeads to take the form of vectors

---
 R/Transformer.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/Transformer.R b/R/Transformer.R
index ae799d9..d19cc94 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -84,7 +84,7 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
     seed <- as.integer(sample(1e5, 1))
   }
 
-  if (dimToken %% numHeads != 0) {
+  if (any(dimToken %% numHeads != 0)) {
     stop(paste(
       "dimToken needs to divisble by numHeads. dimToken =", dimToken,
       "is not divisible by numHeads =", numHeads

From dfca29a7bbd0f3e8a1bef93f850e88782f198141 Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Fri, 27 Jan 2023 15:16:46 +0100
Subject: [PATCH 12/58] Modeltype fix (#48)

* added modelType to modelSettings
---
 DESCRIPTION                     |  2 +-
 R/Estimator-class.R             |  8 ++++----
 R/Estimator.R                   | 11 +++++------
 R/MLP.R                         |  5 ++---
 R/ResNet.R                      |  5 ++---
 R/Transformer.R                 |  5 ++---
 man/Estimator.Rd                |  6 +++---
 tests/testthat/test-Estimator.R |  6 +++---
 8 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9e6c69f..8a3cd7e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -37,6 +37,6 @@ Remotes:
     ohdsi/PatientLevelPrediction,
     ohdsi/FeatureExtraction,
     ohdsi/Eunomia
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.3
 Encoding: UTF-8
 Config/testthat/edition: 3
diff --git a/R/Estimator-class.R b/R/Estimator-class.R
index 33980e1..4e6cecf 100644
--- a/R/Estimator-class.R
+++ b/R/Estimator-class.R
@@ -27,8 +27,8 @@ Estimator <- R6::R6Class(
   public = list(
     #' @description
     #' Creates a new estimator
-    #' @param baseModel       The torch nn module to use as model
-    #' @param modelParameters Parameters to initialize the baseModel
+    #' @param modelType       The torch nn module to use as model
+    #' @param modelParameters Parameters to initialize the model
     #' @param fitParameters   Parameters required for the estimator fitting
     #' @param optimizer       A torch optimizer to use, default is Adam
     #' @param criterion       The torch loss function to use, defaults to
@@ -36,7 +36,7 @@ Estimator <- R6::R6Class(
     #' @param scheduler       learning rate scheduler to use
     #' @param device           Which device to use for fitting, default is cpu
     #' @param patience         Patience to use for early stopping
-    initialize = function(baseModel,
+    initialize = function(modelType,
                           modelParameters,
                           fitParameters,
                           optimizer = torch::optim_adam,
@@ -45,7 +45,7 @@ Estimator <- R6::R6Class(
                           device = "cpu",
                           patience = 4) {
       self$device <- device
-      self$model <- do.call(baseModel, modelParameters)
+      self$model <- do.call(modelType, modelParameters)
       self$modelParameters <- modelParameters
       self$fitParameters <- fitParameters
       self$epochs <- self$itemOrDefaults(fitParameters, "epochs", 10)
diff --git a/R/Estimator.R b/R/Estimator.R
index 1ad577f..61ef755 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -175,7 +175,7 @@ predictDeepEstimator <- function(plpModel,
   if (is.character(plpModel$model)) {
     model <- torch::torch_load(file.path(plpModel$model, "DeepEstimatorModel.pt"), device = "cpu")
     estimator <- Estimator$new(
-      baseModel = attr(plpModel$modelDesign$modelSettings$param, "settings")$baseModel,
+      modelType = attr(plpModel$modelDesign$modelSettings$param, "settings")$modelType,
       modelParameters = model$modelParameters,
       fitParameters = model$fitParameters,
       device = attr(plpModel$modelDesign$modelSettings$param, "settings")$device
@@ -210,15 +210,14 @@ gridCvDeep <- function(mappedData,
                        settings,
                        modelLocation,
                        paramSearch) {
-  modelName <- settings$modelName
   modelParamNames <- settings$modelParamNames
   fitParamNames <- c("weightDecay", "learningRate")
   epochs <- settings$epochs
   batchSize <- settings$batchSize
-  baseModel <- settings$baseModel
+  modelType <- settings$modelType
   device <- settings$device
 
-  ParallelLogger::logInfo(paste0("Running CV for ", modelName, " model"))
+  ParallelLogger::logInfo(paste0("Running CV for ", modelType, " model"))
 
   ###########################################################################
 
@@ -249,7 +248,7 @@ gridCvDeep <- function(mappedData,
       trainDataset <- torch::dataset_subset(dataset, indices = which(fold != i))
       testDataset <- torch::dataset_subset(dataset, indices = which(fold == i))
       estimator <- Estimator$new(
-        baseModel = baseModel,
+        modelType = modelType,
         modelParameters = modelParams,
         fitParameters = fitParams,
         device = device
@@ -309,7 +308,7 @@ gridCvDeep <- function(mappedData,
   modelParams$numFeatures <- dataset$numNumFeatures()
 
   estimator <- Estimator$new(
-    baseModel = baseModel,
+    modelType = modelType,
     modelParameters = modelParams,
     fitParameters = fitParams,
     device = device
diff --git a/R/MLP.R b/R/MLP.R
index 7df166a..34c67f2 100644
--- a/R/MLP.R
+++ b/R/MLP.R
@@ -81,13 +81,12 @@ setMultiLayerPerceptron <- function(numLayers = c(1:8),
     device = device,
     batchSize = batchSize,
     epochs = epochs,
-    name = "MLP",
+    modelType = "MLP",
     saveType = "file",
     modelParamNames = c(
       "numLayers", "sizeHidden",
       "dropout", "sizeEmbedding"
-    ),
-    baseModel = "MLP"
+    )
   )
 
   results <- list(
diff --git a/R/ResNet.R b/R/ResNet.R
index 1f48650..0c428fd 100644
--- a/R/ResNet.R
+++ b/R/ResNet.R
@@ -127,13 +127,12 @@ setResNet <- function(numLayers = c(1:8),
     device = device,
     batchSize = batchSize,
     epochs = epochs,
-    name = "ResNet",
+    modelType ="ResNet",
     saveType = "file",
     modelParamNames = c(
       "numLayers", "sizeHidden", "hiddenFactor",
       "residualDropout", "hiddenDropout", "sizeEmbedding"
-    ),
-    baseModel = "ResNet"
+    )
   )
 
   results <- list(
diff --git a/R/Transformer.R b/R/Transformer.R
index d19cc94..fb32c1b 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -122,13 +122,12 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
     device = device,
     batchSize = batchSize,
     epochs = epochs,
-    name = "Transformer",
+    modelType = "Transformer",
     saveType = "file",
     modelParamNames = c(
       "numBlocks", "dimToken", "dimOut", "numHeads",
       "attDropout", "ffnDropout", "resDropout", "dimHidden"
-    ),
-    baseModel = "Transformer"
+    )
   )
 
   results <- list(
diff --git a/man/Estimator.Rd b/man/Estimator.Rd
index e825cf9..4cd1623 100644
--- a/man/Estimator.Rd
+++ b/man/Estimator.Rd
@@ -31,7 +31,7 @@ fit and predict the model defined in that module.
 Creates a new estimator
 \subsection{Usage}{
 \if{html}{\out{<div class="r">}}\preformatted{Estimator$new(
-  baseModel,
+  modelType,
   modelParameters,
   fitParameters,
   optimizer = torch::optim_adam,
@@ -45,9 +45,9 @@ Creates a new estimator
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
-\item{\code{baseModel}}{The torch nn module to use as model}
+\item{\code{modelType}}{The torch nn module to use as model}
 
-\item{\code{modelParameters}}{Parameters to initialize the baseModel}
+\item{\code{modelParameters}}{Parameters to initialize the model}
 
 \item{\code{fitParameters}}{Parameters required for the estimator fitting}
 
diff --git a/tests/testthat/test-Estimator.R b/tests/testthat/test-Estimator.R
index 2f110d3..f9024d8 100644
--- a/tests/testthat/test-Estimator.R
+++ b/tests/testthat/test-Estimator.R
@@ -2,7 +2,7 @@ catFeatures <- dataset$numCatFeatures()
 numFeatures <- dataset$numNumFeatures()
 
 fitParams <- list()
-baseModel <- ResNet
+modelType <- ResNet
 
 modelParameters <- list(
   catFeatures = catFeatures,
@@ -14,7 +14,7 @@ modelParameters <- list(
 )
 
 estimator <- Estimator$new(
-  baseModel = baseModel,
+  modelType = modelType,
   modelParameters = modelParameters,
   fitParameters = fitParams,
   device = "cpu"
@@ -25,7 +25,7 @@ test_that("Estimator initialization works", {
   # count parameters in both instances
   testthat::expect_equal(
     sum(sapply(estimator$model$parameters, function(x) prod(x$shape))),
-    sum(sapply(do.call(baseModel, modelParameters)$parameters, function(x) prod(x$shape)))
+    sum(sapply(do.call(modelType, modelParameters)$parameters, function(x) prod(x$shape)))
   )
 
   testthat::expect_equal(

From 65302bdf4683c7b3ff1d1362af15f8040b58781a Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Mon, 13 Feb 2023 14:28:02 +0100
Subject: [PATCH 13/58] update default ResNet and Transformer to have custom LR
 and WD

---
 R/ResNet.R                   |  8 ++++++--
 R/Transformer.R              |  8 ++++++--
 man/setDefaultResNet.Rd      | 13 ++++++++++++-
 man/setDefaultTransformer.Rd |  6 ++++++
 4 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/R/ResNet.R b/R/ResNet.R
index 0c428fd..199233f 100644
--- a/R/ResNet.R
+++ b/R/ResNet.R
@@ -28,12 +28,16 @@
 #' @param device            Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'
 #' @param batchSize         Size of batch, default: 1024
 #' @param epochs            Number of epochs to run, default: 10
+#' @param learningRate      Learning rate to use, default: 0.001
+#' @param weightDecay       The weight decay to use
 #' @param seed              Random seed to use
 
 #' @export
 setDefaultResNet <- function(device='cpu',
                              batchSize=1024,
                              epochs=10,
+                             learningRate=0.001,
+                             weightDecay=1e-6,
                              seed=NULL) {
   
   resnetSettings <- setResNet(numLayers = 6,
@@ -42,8 +46,8 @@ setDefaultResNet <- function(device='cpu',
                               residualDropout = 0.1,
                               hiddenDropout = 0.4,
                               sizeEmbedding = 256,
-                              weightDecay = 1e-6,
-                              learningRate = 0.001,
+                              weightDecay = weightDecay,
+                              learningRate = learningRate,
                               hyperParamSearch = 'random',
                               randomSample = 1,
                               device = device,
diff --git a/R/Transformer.R b/R/Transformer.R
index fb32c1b..9187a88 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -24,12 +24,16 @@
 #' @param device   Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'  
 #' @param batchSize Size of batch, default: 512
 #' @param epochs  Number of epochs to run, default: 10
+#' @param learningRate The learning rate to use
+#' @param weightDecay The weight decay to use
 #' @param seed    random seed to use
 #' 
 #' @export
 setDefaultTransformer <- function(device='cpu',
                                   batchSize=512,
                                   epochs=10,
+                                  learningRate=1e-4,
+                                  weightDecay=1e-5,
                                   seed=NULL) {
   transformerSettings <- setTransformer(numBlocks = 3,
                                         dimToken = 192,
@@ -39,8 +43,8 @@ setDefaultTransformer <- function(device='cpu',
                                         ffnDropout = 0.1,
                                         resDropout = 0.0,
                                         dimHidden = 256,
-                                        weightDecay = 1e-5,
-                                        learningRate = 1e-4,
+                                        weightDecay = weightDecay,
+                                        learningRate = learningRate,
                                         batchSize = batchSize,
                                         epochs = epochs,
                                         device = device,
diff --git a/man/setDefaultResNet.Rd b/man/setDefaultResNet.Rd
index 28c29df..98918aa 100644
--- a/man/setDefaultResNet.Rd
+++ b/man/setDefaultResNet.Rd
@@ -4,7 +4,14 @@
 \alias{setDefaultResNet}
 \title{setDefaultResNet}
 \usage{
-setDefaultResNet(device = "cpu", batchSize = 1024, epochs = 10, seed = NULL)
+setDefaultResNet(
+  device = "cpu",
+  batchSize = 1024,
+  epochs = 10,
+  learningRate = 0.001,
+  weightDecay = 1e-06,
+  seed = NULL
+)
 }
 \arguments{
 \item{device}{Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'}
@@ -13,6 +20,10 @@ setDefaultResNet(device = "cpu", batchSize = 1024, epochs = 10, seed = NULL)
 
 \item{epochs}{Number of epochs to run, default: 10}
 
+\item{learningRate}{Learning rate to use, default: 0.001}
+
+\item{weightDecay}{The weight decay to use}
+
 \item{seed}{Random seed to use}
 }
 \description{
diff --git a/man/setDefaultTransformer.Rd b/man/setDefaultTransformer.Rd
index d3b54fb..7c1e644 100644
--- a/man/setDefaultTransformer.Rd
+++ b/man/setDefaultTransformer.Rd
@@ -8,6 +8,8 @@ setDefaultTransformer(
   device = "cpu",
   batchSize = 512,
   epochs = 10,
+  learningRate = 1e-04,
+  weightDecay = 1e-05,
   seed = NULL
 )
 }
@@ -18,6 +20,10 @@ setDefaultTransformer(
 
 \item{epochs}{Number of epochs to run, default: 10}
 
+\item{learningRate}{The learning rate to use}
+
+\item{weightDecay}{The weight decay to use}
+
 \item{seed}{random seed to use}
 }
 \description{

From 353e0ba5ee5a782666de378cfdf49b56de179c4f Mon Sep 17 00:00:00 2001
From: Henrik <l.john@erasmusmc.nl>
Date: Wed, 15 Feb 2023 23:00:35 +0100
Subject: [PATCH 14/58] Add seed for sampling hyperparameter combinations (#50)

* Add random seed to sample hyperparameter combinations

* Supress warnings message due to NULL seed

* Add withr dependency
---
 DESCRIPTION                    | 3 ++-
 R/MLP.R                        | 4 +++-
 R/ResNet.R                     | 4 +++-
 R/Transformer.R                | 5 +++--
 man/setMultiLayerPerceptron.Rd | 3 +++
 man/setResNet.Rd               | 3 +++
 man/setTransformer.Rd          | 3 +++
 7 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 8a3cd7e..948f7ae 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -25,7 +25,8 @@ Imports:
     ParallelLogger (>= 2.0.0),
     PatientLevelPrediction (>= 6.0.4),
     rlang,
-    torch (>= 0.8.0)
+    torch (>= 0.8.0),
+    withr
 Suggests:
     devtools,
     Eunomia,
diff --git a/R/MLP.R b/R/MLP.R
index 34c67f2..cfcb242 100644
--- a/R/MLP.R
+++ b/R/MLP.R
@@ -34,6 +34,7 @@
 #' @param seed              Seed to use for sampling hyperparameter space
 #' @param hyperParamSearch  Which kind of hyperparameter search to use random sampling or exhaustive grid search. default: 'random'
 #' @param randomSample      How many random samples from hyperparameter space to use
+#' @param randomSampleSeed  Random seed to sample hyperparameter combinations
 #' @param device            Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'
 #' @param batchSize         Size of batch, default: 1024
 #' @param epochs            Number of epochs to run, default: 10
@@ -48,6 +49,7 @@ setMultiLayerPerceptron <- function(numLayers = c(1:8),
                                     seed = NULL,
                                     hyperParamSearch = "random",
                                     randomSample = 100,
+                                    randomSampleSeed = NULL,
                                     device = "cpu",
                                     batchSize = 1024,
                                     epochs = 30) {
@@ -73,7 +75,7 @@ setMultiLayerPerceptron <- function(numLayers = c(1:8),
   }
   
   if (hyperParamSearch == "random") {
-    param <- param[sample(length(param), randomSample)]
+    suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
 
   attr(param, "settings") <- list(
diff --git a/R/ResNet.R b/R/ResNet.R
index 199233f..a08c42c 100644
--- a/R/ResNet.R
+++ b/R/ResNet.R
@@ -79,6 +79,7 @@ setDefaultResNet <- function(device='cpu',
 #' @param seed              Seed to use for sampling hyperparameter space
 #' @param hyperParamSearch  Which kind of hyperparameter search to use random sampling or exhaustive grid search. default: 'random'
 #' @param randomSample      How many random samples from hyperparameter space to use
+#' @param randomSampleSeed  Random seed to sample hyperparameter combinations
 #' @param device            Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'
 #' @param batchSize         Size of batch, default: 1024
 #' @param epochs            Number of epochs to run, default: 10
@@ -95,6 +96,7 @@ setResNet <- function(numLayers = c(1:8),
                       seed = NULL,
                       hyperParamSearch = "random",
                       randomSample = 100,
+                      randomSampleSeed = NULL,
                       device = "cpu",
                       batchSize = 1024,
                       epochs = 30) {
@@ -123,7 +125,7 @@ setResNet <- function(numLayers = c(1:8),
   }
   
   if (hyperParamSearch == "random") {
-    param <- param[sample(length(param), randomSample)]
+    suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
 
   attr(param, "settings") <- list(
diff --git a/R/Transformer.R b/R/Transformer.R
index 9187a88..4570eb9 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -75,6 +75,7 @@ setDefaultTransformer <- function(device='cpu',
 #' @param device                  Which device to use, cpu or cuda
 #' @param hyperParamSearch        what kind of hyperparameter search to do, default 'random'
 #' @param randomSample            How many samples to use in hyperparameter search if random
+#' @param randomSampleSeed        Random seed to sample hyperparameter combinations
 #' @param seed                    Random seed to use
 #'
 #' @export
@@ -83,7 +84,7 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
                            resDropout = 0, dimHidden = 512, weightDecay = 1e-6,
                            learningRate = 3e-4, batchSize = 1024,
                            epochs = 10, device = "cpu", hyperParamSearch = "random",
-                           randomSample = 1, seed = NULL) {
+                           randomSample = 1, randomSampleSeed = NULL, seed = NULL) {
   if (is.null(seed)) {
     seed <- as.integer(sample(1e5, 1))
   }
@@ -118,7 +119,7 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
   }
   
   if (hyperParamSearch == "random") {
-    param <- param[sample(length(param), randomSample)]
+    suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
 
   attr(param, "settings") <- list(
diff --git a/man/setMultiLayerPerceptron.Rd b/man/setMultiLayerPerceptron.Rd
index a79ab49..07111d5 100644
--- a/man/setMultiLayerPerceptron.Rd
+++ b/man/setMultiLayerPerceptron.Rd
@@ -14,6 +14,7 @@ setMultiLayerPerceptron(
   seed = NULL,
   hyperParamSearch = "random",
   randomSample = 100,
+  randomSampleSeed = NULL,
   device = "cpu",
   batchSize = 1024,
   epochs = 30
@@ -38,6 +39,8 @@ setMultiLayerPerceptron(
 
 \item{randomSample}{How many random samples from hyperparameter space to use}
 
+\item{randomSampleSeed}{Random seed to sample hyperparameter combinations}
+
 \item{device}{Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'}
 
 \item{batchSize}{Size of batch, default: 1024}
diff --git a/man/setResNet.Rd b/man/setResNet.Rd
index 5f66d45..0bd8464 100644
--- a/man/setResNet.Rd
+++ b/man/setResNet.Rd
@@ -16,6 +16,7 @@ setResNet(
   seed = NULL,
   hyperParamSearch = "random",
   randomSample = 100,
+  randomSampleSeed = NULL,
   device = "cpu",
   batchSize = 1024,
   epochs = 30
@@ -44,6 +45,8 @@ setResNet(
 
 \item{randomSample}{How many random samples from hyperparameter space to use}
 
+\item{randomSampleSeed}{Random seed to sample hyperparameter combinations}
+
 \item{device}{Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'}
 
 \item{batchSize}{Size of batch, default: 1024}
diff --git a/man/setTransformer.Rd b/man/setTransformer.Rd
index f46a66a..483dc06 100644
--- a/man/setTransformer.Rd
+++ b/man/setTransformer.Rd
@@ -20,6 +20,7 @@ setTransformer(
   device = "cpu",
   hyperParamSearch = "random",
   randomSample = 1,
+  randomSampleSeed = NULL,
   seed = NULL
 )
 }
@@ -54,6 +55,8 @@ setTransformer(
 
 \item{randomSample}{How many samples to use in hyperparameter search if random}
 
+\item{randomSampleSeed}{Random seed to sample hyperparameter combinations}
+
 \item{seed}{Random seed to use}
 }
 \description{

From 926e7d00aa41434ebbdc9d88b1a5b9f11f6e460c Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Wed, 1 Mar 2023 12:47:02 +0100
Subject: [PATCH 15/58] Lr find (#51)

* add LRfinder, add setEstimator function for all estimator parameters, add metric to earlystopper. Adjust tests for setEstimator

* add tests for coverage

* add custom metric for scheduler/earlyStopping
---
 DESCRIPTION                       |   7 +-
 NAMESPACE                         |   4 +-
 R/Dataset.R                       |   1 -
 R/DeepPatientLevelPrediction.R    |   1 -
 R/Estimator-class.R               | 252 +++++++++++++++++------------
 R/Estimator.R                     | 253 +++++++++++++++++++-----------
 R/LRFinder.R                      | 119 ++++++++++++++
 R/MLP.R                           |  49 ++----
 R/ResNet.R                        |  87 ++++------
 R/Transformer.R                   |  76 ++++-----
 man/EarlyStopping.Rd              |   6 +-
 man/Estimator.Rd                  |  79 ++++------
 man/batchToDevice.Rd              |  20 +++
 man/gridCvDeep.Rd                 |   6 +-
 man/lrFinder.Rd                   |  42 +++++
 man/setDefaultResNet.Rd           |  20 +--
 man/setDefaultTransformer.Rd      |  20 +--
 man/setEstimator.Rd               |  29 +++-
 man/setMultiLayerPerceptron.Rd    |  22 +--
 man/setResNet.Rd                  |  22 +--
 man/setTransformer.Rd             |  22 +--
 tests/testthat/setup.R            |  27 +---
 tests/testthat/test-Estimator.R   | 190 +++++++++++++++++++---
 tests/testthat/test-LRFinder.R    |  44 ++++++
 tests/testthat/test-MLP.R         |  21 ++-
 tests/testthat/test-ResNet.R      |  25 ++-
 tests/testthat/test-Transformer.R |   8 +-
 27 files changed, 911 insertions(+), 541 deletions(-)
 create mode 100644 R/LRFinder.R
 create mode 100644 man/batchToDevice.Rd
 create mode 100644 man/lrFinder.Rd
 create mode 100644 tests/testthat/test-LRFinder.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 948f7ae..adf33bf 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -17,15 +17,15 @@ URL: https://github.com/OHDSI/DeepPatientLevelPrediction
 BugReports: https://github.com/OHDSI/DeepPatientLevelPrediction/issues
 VignetteBuilder: knitr
 Depends:
-    R (>= 3.5.0)
+    R (>= 4.0.0)
 Imports:
     dplyr,
-    data.table,
     FeatureExtraction (>= 3.0.0),
     ParallelLogger (>= 2.0.0),
     PatientLevelPrediction (>= 6.0.4),
     rlang,
     torch (>= 0.8.0),
+    torchopt,
     withr
 Suggests:
     devtools,
@@ -33,7 +33,8 @@ Suggests:
     knitr,
     markdown,
     plyr,  
-    testthat
+    testthat,
+    PRROC
 Remotes:
     ohdsi/PatientLevelPrediction,
     ohdsi/FeatureExtraction,
diff --git a/NAMESPACE b/NAMESPACE
index a65b67a..38bb05b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,13 +4,13 @@ export(Dataset)
 export(Estimator)
 export(fitEstimator)
 export(gridCvDeep)
+export(lrFinder)
 export(predictDeepEstimator)
 export(setDefaultResNet)
 export(setDefaultTransformer)
+export(setEstimator)
 export(setMultiLayerPerceptron)
 export(setResNet)
 export(setTransformer)
-import(data.table)
-importFrom(data.table,":=")
 importFrom(dplyr,"%>%")
 importFrom(rlang,.data)
diff --git a/R/Dataset.R b/R/Dataset.R
index e552d3d..d2c51a7 100644
--- a/R/Dataset.R
+++ b/R/Dataset.R
@@ -1,5 +1,4 @@
 #'  A torch dataset
-#' @import data.table
 #' @export
 Dataset <- torch::dataset(
   name = "myDataset",
diff --git a/R/DeepPatientLevelPrediction.R b/R/DeepPatientLevelPrediction.R
index f6f5a19..6070709 100644
--- a/R/DeepPatientLevelPrediction.R
+++ b/R/DeepPatientLevelPrediction.R
@@ -23,6 +23,5 @@
 #' @docType package
 #' @name DeepPatientLevelPrediction
 #' @importFrom dplyr %>%
-#' @importFrom data.table :=
 #' @importFrom rlang .data
 NULL
diff --git a/R/Estimator-class.R b/R/Estimator-class.R
index 4e6cecf..96a5f2d 100644
--- a/R/Estimator-class.R
+++ b/R/Estimator-class.R
@@ -27,54 +27,64 @@ Estimator <- R6::R6Class(
   public = list(
     #' @description
     #' Creates a new estimator
-    #' @param modelType       The torch nn module to use as model
+    #' @param modelType The torch nn module to use as model
     #' @param modelParameters Parameters to initialize the model
-    #' @param fitParameters   Parameters required for the estimator fitting
-    #' @param optimizer       A torch optimizer to use, default is Adam
-    #' @param criterion       The torch loss function to use, defaults to
-    #'                        binary cross entropy with logits
-    #' @param scheduler       learning rate scheduler to use
-    #' @param device           Which device to use for fitting, default is cpu
-    #' @param patience         Patience to use for early stopping
+    #' @param estimatorSettings Parameters required for the estimator fitting
     initialize = function(modelType,
                           modelParameters,
-                          fitParameters,
-                          optimizer = torch::optim_adam,
-                          criterion = torch::nn_bce_with_logits_loss,
-                          scheduler = torch::lr_reduce_on_plateau,
-                          device = "cpu",
-                          patience = 4) {
-      self$device <- device
+                          estimatorSettings) {
+      self$seed <- estimatorSettings$seed
+      self$device <- estimatorSettings$device
+      torch::torch_manual_seed(seed=self$seed)
       self$model <- do.call(modelType, modelParameters)
       self$modelParameters <- modelParameters
-      self$fitParameters <- fitParameters
-      self$epochs <- self$itemOrDefaults(fitParameters, "epochs", 10)
-      self$learningRate <- self$itemOrDefaults(fitParameters, "learningRate", 1e-3)
-      self$l2Norm <- self$itemOrDefaults(fitParameters, "weightDecay", 1e-5)
-      self$batchSize <- self$itemOrDefaults(fitParameters, "batchSize", 1024)
-      self$prefix <- self$itemOrDefaults(fitParameters, "prefix", self$model$name)
+      self$estimatorSettings <- estimatorSettings
+      self$epochs <- self$itemOrDefaults(estimatorSettings, "epochs", 10)
+      self$learningRate <- self$itemOrDefaults(estimatorSettings, "learningRate", 1e-3)
+      self$l2Norm <- self$itemOrDefaults(estimatorSettings, "weightDecay", 1e-5)
+      self$batchSize <- self$itemOrDefaults(estimatorSettings, "batchSize", 1024)
+      self$prefix <- self$itemOrDefaults(estimatorSettings, "prefix", self$model$name)
       
-      self$previousEpochs <- self$itemOrDefaults(fitParameters, "previousEpochs", 0)
+      self$previousEpochs <- self$itemOrDefaults(estimatorSettings, "previousEpochs", 0)
       self$model$to(device = self$device)
       
-      self$optimizer <- optimizer(
+      self$optimizer <- estimatorSettings$optimizer(
         params = self$model$parameters,
         lr = self$learningRate,
         weight_decay = self$l2Norm
       )
-      self$criterion <- criterion()
+      self$criterion <- estimatorSettings$criterion()
       
-      self$scheduler <- scheduler(self$optimizer,
-                                  patience = 1,
-                                  verbose = FALSE, mode = "max"
-      )
+      if (!is.null(estimatorSettings$metric)) {
+        self$metric <- estimatorSettings$metric
+        if (is.character(self$metric)) {
+          if (self$metric == "auc") {
+            self$metric <- list(name="auc",
+                                mode="max")
+          } else if (self$metric == "loss") {
+            self$metric <- list(name="loss",
+                                mode="min")
+          }
+        }
+        if (!is.null(estimatorSettings$scheduler)) {
+          estimatorSettings$scheduler$params$mode <- self$metrix$mode 
+        }
+        if (!is.null(estimatorSettings$earlyStopping)) {
+          estimatorSettings$earlyStopping$params$mode <- self$metric$mode
+        }
+      }
+      
+      if (!is.null(estimatorSettings$scheduler)) {
+        self$scheduler <- do.call(estimatorSettings$scheduler$fun,
+                                  c(self$optimizer, estimatorSettings$scheduler$params))
+      }
       
       # gradient accumulation is useful when training large numbers where
       # you can only fit few samples on the GPU in each batch.
       self$gradAccumulationIter <- 1
       
-      if (!is.null(patience)) {
-        self$earlyStopper <- EarlyStopping$new(patience = patience)
+      if (!is.null(estimatorSettings$earlyStopping) && estimatorSettings$earlyStopping$useEarlyStopping) {
+        self$earlyStopper <- do.call(EarlyStopping$new, estimatorSettings$earlyStopping$params)
       } else {
         self$earlyStopper <- NULL
       }
@@ -87,8 +97,7 @@ Estimator <- R6::R6Class(
     #' @param dataset     a torch dataset to use for model fitting
     #' @param testDataset a torch dataset to use for early stopping
     fit = function(dataset, testDataset) {
-      valLosses <- c()
-      valAUCs <- c()
+      allScores <- list()
       batchIndex <- torch::torch_randperm(length(dataset)) + 1L
       batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize))
       
@@ -109,31 +118,22 @@ Estimator <- R6::R6Class(
         delta <- endTime - startTime
         currentEpoch <- epochI + self$previousEpochs
         lr <- self$optimizer$param_groups[[1]]$lr
-        ParallelLogger::logInfo(
-          "Epochs: ", currentEpoch,
-          " | Val AUC: ", round(scores$auc, 3),
-          " | Val Loss: ", round(scores$loss, 3),
-          " | Train Loss: ", round(trainLoss, 3),
-          " | Time: ", round(delta, 3), " ",
-          units(delta),
-          " | LR: ", lr
-        )
-        self$scheduler$step(scores$auc)
-        valLosses <- c(valLosses, scores$loss)
-        valAUCs <- c(valAUCs, scores$auc)
+        self$printProgress(scores, trainLoss, delta, currentEpoch)
+        self$scheduler$step(scores$metric)
+        allScores[[epochI]] <- scores
         learnRates <- c(learnRates, lr)
         times <- c(times, round(delta, 3))
         if (!is.null(self$earlyStopper)) {
-          self$earlyStopper$call(scores$auc)
+          self$earlyStopper$call(scores$metric)
           if (self$earlyStopper$improved) {
             # here it saves the results to lists rather than files
             modelStateDict[[epochI]] <- lapply(self$model$state_dict(), function(x) x$detach()$cpu())
             epoch[[epochI]] <- currentEpoch
           }
           if (self$earlyStopper$earlyStop) {
-            ParallelLogger::logInfo("Early stopping, validation AUC stopped improving")
+            ParallelLogger::logInfo("Early stopping, validation metric stopped improving")
             ParallelLogger::logInfo("Average time per epoch was: ", round(mean(as.numeric(times)), 3), " ", units(delta))
-            self$finishFit(valAUCs, modelStateDict, valLosses, epoch, learnRates)
+            self$finishFit(allScores, modelStateDict, epoch, learnRates)
             return(invisible(self))
           }
         } else {
@@ -142,7 +142,7 @@ Estimator <- R6::R6Class(
         }
       }
       ParallelLogger::logInfo("Average time per epoch was: ", round(mean(as.numeric(times)), 3), " ", units(delta))
-      self$finishFit(valAUCs, modelStateDict, valLosses, epoch, learnRates)
+      self$finishFit(allScores, modelStateDict, epoch, learnRates)
       invisible(self)
     },
     
@@ -155,9 +155,9 @@ Estimator <- R6::R6Class(
       ix <- 1
       self$model$train()
       progressBar <- utils::txtProgressBar(style = 3)
-      coro::loop(for (b in batchIndex) {
+      for (b in batchIndex) {
         self$optimizer$zero_grad()
-        batch <- self$batchToDevice(dataset[b])
+        batch <- batchToDevice(dataset[b], device=self$device)
         out <- self$model(batch[[1]])
         loss <- self$criterion(out, batch[[2]])
         loss$backward()
@@ -166,7 +166,7 @@ Estimator <- R6::R6Class(
         trainLosses[ix] <- loss$detach()
         utils::setTxtProgressBar(progressBar, ix / length(batchIndex))
         ix <- ix + 1
-      })
+      }
       close(progressBar)
       trainLosses$mean()$item()
     },
@@ -183,14 +183,14 @@ Estimator <- R6::R6Class(
         targets <- list()
         self$model$eval()
         ix <- 1
-        coro::loop(for (b in batchIndex) {
-          batch <- self$batchToDevice(dataset[b])
+        for (b in batchIndex) {
+          batch <- batchToDevice(dataset[b], device=self$device)
           pred <- self$model(batch[[1]])
           predictions <- c(predictions, pred)
           targets <- c(targets, batch[[2]])
           loss[ix] <- self$criterion(pred, batch[[2]])
           ix <- ix + 1
-        })
+        }
         mean_loss <- loss$mean()$item()
         predictionsClass <- data.frame(
           value = as.matrix(torch::torch_sigmoid(torch::torch_cat(predictions)$cpu())),
@@ -198,19 +198,36 @@ Estimator <- R6::R6Class(
         )
         attr(predictionsClass, "metaData")$modelType <- "binary"
         auc <- PatientLevelPrediction::computeAuc(predictionsClass)
+        scores <- list()
+        if (!is.null(self$metric)) {
+          if (self$metric$name == "auc") {
+            scores$metric <- auc
+          } else if (self$metric$name == "loss") {
+            scores$metric <- mean_loss
+          } else {
+            metric <- self$metric$fun(predictionsClass$value, predictionsClass$outcomeCount)
+            scores$metric <- metric
+          }
+        }
+        scores$auc <- auc
+        scores$loss <- mean_loss
       })
-      return(list(loss = mean_loss, auc = auc))
+      return(scores)
     },
     
     #' @description
     #' operations that run when fitting is finished
-    #' @param valAUCs         validation AUC values
+    #' @param scores          validation scores
     #' @param modelStateDict  fitted model parameters
-    #' @param valLosses       validation losses
     #' @param epoch           list of epochs fit
     #' @param learnRates      learning rate sequence used so far
-    finishFit = function(valAUCs, modelStateDict, valLosses, epoch, learnRates) {
-      bestEpochInd <- which.max(valAUCs) # change this if a different metric is used
+    finishFit = function(scores, modelStateDict, epoch, learnRates) {
+      if (self$metric$mode=="max") {
+        bestEpochInd <- which.max(unlist(lapply(scores, function(x) x$metric)))
+      }
+      else if (self$metric$mode=="min") {
+        bestEpochInd <- which.min(unlist(lapply(scores, function(x) x$metric)))
+      }
       
       bestModelStateDict <- lapply(modelStateDict[[bestEpochInd]], function(x) x$to(device = self$device))
       self$model$load_state_dict(bestModelStateDict)
@@ -218,28 +235,62 @@ Estimator <- R6::R6Class(
       bestEpoch <- epoch[[bestEpochInd]]
       self$bestEpoch <- bestEpoch
       self$bestScore <- list(
-        loss = valLosses[bestEpochInd],
-        auc = valAUCs[bestEpochInd]
+        loss = scores[[bestEpochInd]]$loss,
+        auc = scores[[bestEpochInd]]$auc
       )
       self$learnRateSchedule <- learnRates[1:bestEpochInd]
       
       ParallelLogger::logInfo("Loaded best model (based on AUC) from epoch ", bestEpoch)
       ParallelLogger::logInfo("ValLoss: ", self$bestScore$loss)
       ParallelLogger::logInfo("valAUC: ", self$bestScore$auc)
+      if (!is.null(self$metric) && (!self$metric$name=='auc') && (!self$metric$name=='loss')) {
+        self$bestScore[[self$metric$name]] <- scores[[bestEpochInd]]$metric
+        ParallelLogger::logInfo(self$metric$name,": ", self$bestScore[[self$metric$name]])
+      }
+    },
+    
+    #' @description Print out training progress per epoch
+    #' @param scores scores returned by `self$score`
+    #' @param trainLoss training loss
+    #' @param delta how long did the epoch take
+    #' @param currentEpoch the current epoch number
+    printProgress = function(scores, trainLoss, delta, currentEpoch) {
+      if (!is.null(self$metric) && (!self$metric$name=='auc') && (!self$metric$name=='loss')) {
+        ParallelLogger::logInfo(
+          "Epochs: ", currentEpoch,
+          " | Val ", self$metric$name, ": ", round(scores$metric, 3),
+          " | Val AUC: ", round(scores$auc, 3),
+          " | Val Loss: ", round(scores$loss, 3),
+          " | Train Loss: ", round(trainLoss, 3),
+          " | Time: ", round(delta, 3), " ",
+          units(delta),
+          " | LR: ", self$optimizer$param_groups[[1]]$lr
+        )
+      } else {
+      ParallelLogger::logInfo(
+        "Epochs: ", currentEpoch,
+        " | Val AUC: ", round(scores$auc, 3),
+        " | Val Loss: ", round(scores$loss, 3),
+        " | Train Loss: ", round(trainLoss, 3),
+        " | Time: ", round(delta, 3), " ",
+        units(delta),
+        " | LR: ", self$optimizer$param_groups[[1]]$lr
+      )
+      }
     },
     
     #' @description
     #' Fits whole training set on a specific number of epochs
-    #' TODO What happens when learning rate changes per epochs?
-    #' Ideally I would copy the learning rate strategy from before
-    #' and adjust for different sizes ie more iterations/updates???
     #' @param dataset torch dataset
     #' @param learnRates learnRateSchedule from CV
     fitWholeTrainingSet = function(dataset, learnRates = NULL) {
-      if (is.null(self$bestEpoch)) {
+      if (length(learnRates) > 1) {
+        self$bestEpoch <- length(learnRates)
+      } else if (is.null(self$bestEpoch)) {
         self$bestEpoch <- self$epochs
       }
-      
+      # TODO constant LR
+
       batchIndex <- torch::torch_randperm(length(dataset)) + 1L
       batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize))
       for (epoch in 1:self$bestEpoch) {
@@ -259,7 +310,7 @@ Estimator <- R6::R6Class(
         list(
           modelStateDict = self$model$state_dict(),
           modelParameters = self$modelParameters,
-          fitParameters = self$fitParameters,
+          estimatorSettings = self$estimatorSettings,
           epoch = self$epochs
         ),
         savePath
@@ -281,7 +332,7 @@ Estimator <- R6::R6Class(
         progressBar <- utils::txtProgressBar(style = 3)
         ix <- 1
         coro::loop(for (b in batchIndex) {
-          batch <- self$batchToDevice(dataset[b])
+          batch <- batchToDevice(dataset[b], self$device)
           target <- batch$target
           pred <- self$model(batch$batch)
           predictions[b] <- torch::torch_sigmoid(pred)
@@ -310,31 +361,6 @@ Estimator <- R6::R6Class(
       return(predicted_class)
     },
     
-    #' @description
-    #' sends a batch of data to device
-    #' assumes batch includes lists of tensors to arbitrary nested depths
-    #' @param batch the batch to send, usually a list of torch tensors
-    #' @return the batch on the required device
-    batchToDevice = function(batch) {
-      if (class(batch)[1] == "torch_tensor") {
-        batch <- batch$to(device = self$device)
-      } else {
-        ix <- 1
-        for (b in batch) {
-          if (class(b)[1] == "torch_tensor") {
-            b <- b$to(device = self$device)
-          } else {
-            b <- self$batchToDevice(b)
-          }
-          if (!is.null(b)) {
-            batch[[ix]] <- b
-          }
-          ix <- ix + 1
-        }
-      }
-      return(batch)
-    },
-    
     #' @description
     #' select item from list, and if it's null sets a default
     #' @param list A list with items
@@ -356,12 +382,14 @@ EarlyStopping <- R6::R6Class(
   lock_objects = FALSE,
   public = list(
     #' @description
-    #' Creates a new earlystopping object
+    #' Creates a new earlyStopping object
     #' @param patience Stop after this number of epochs if loss doesn't improve
     #' @param delta    How much does the loss need to improve to count as improvement
     #' @param verbose  If information should be printed out
+    #' @param mode    either `min` or `max` depending on metric to be used for earlyStopping
     #' @return a new earlystopping object
-    initialize = function(patience = 3, delta = 0, verbose = TRUE) {
+    initialize = function(patience = 3, delta = 0, verbose = TRUE,
+                          mode='max') {
       self$patience <- patience
       self$counter <- 0
       self$verbose <- verbose
@@ -370,13 +398,18 @@ EarlyStopping <- R6::R6Class(
       self$improved <- FALSE
       self$delta <- delta
       self$previousScore <- 0
+      self$mode <- mode
     },
     #' @description
     #' call the earlystopping object and increment a counter if loss is not
     #' improving
     #' @param metric the current metric value
     call = function(metric) {
-      score <- metric
+      if (self$mode=='max') {
+        score <- metric
+      } else {
+        score <- -1 * metric
+      }
       if (is.null(self$bestScore)) {
         self$bestScore <- score
         self$improved <- TRUE
@@ -401,3 +434,30 @@ EarlyStopping <- R6::R6Class(
     }
   )
 )
+
+#' sends a batch of data to device
+#' @description 
+#' sends a batch of data to device
+#' assumes batch includes lists of tensors to arbitrary nested depths
+#' @param batch the batch to send, usually a list of torch tensors
+#' @param device which device to send batch to
+#' @return the batch on the required device
+batchToDevice = function(batch, device) {
+  if (class(batch)[1] == "torch_tensor") {
+    batch <- batch$to(device = device)
+  } else {
+    ix <- 1
+    for (b in batch) {
+      if (class(b)[1] == "torch_tensor") {
+        b <- b$to(device = device)
+      } else {
+        b <- batchToDevice(b, device)
+      }
+      if (!is.null(b)) {
+        batch[[ix]] <- b
+      }
+      ix <- ix + 1
+    }
+  }
+  return(batch)
+}
diff --git a/R/Estimator.R b/R/Estimator.R
index 61ef755..80e940a 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -22,16 +22,71 @@
 #' creates settings for the Estimator, which takes a model and trains it
 #'
 #' @name setEstimator
-#' @param learningRate          what learning rate to use
-#' @param weightDecay           what weight_decay to use
-#' @param optimizer             which optimizer to use
-#' @param scheduler             which learning rate scheduler to use
-#' @param criterion             loss function to use
-#' @param earlyStopping         If earlyStopping should be used which stops the training of your metric is not improving
-#' @param earlyStoppingMetric   Which parameter to use for early stopping
-#' @param patience              patience for earlyStopper
-#' @param hyperparameterMetric  which metric to use for hyperparameter, loss, auc, auprc or a custom function
-NULL
+#' @param learningRate  what learning rate to use
+#' @param weightDecay what weight_decay to use
+#' @param batchSize batchSize to use
+#' @param epochs  how many epochs to train for
+#' @param device  what device to train on
+#' @param optimizer which optimizer to use
+#' @param scheduler which learning rate scheduler to use
+#' @param criterion loss function to use
+#' @param earlyStopping If earlyStopping should be used which stops the training of your metric is not improving
+#' @param metric either `auc` or `loss` or a custom metric to use. This is the metric used for scheduler and earlyStopping. 
+#' Needs to be a list with function `fun`, mode either `min` or `max` and a `name`,
+#' `fun` needs to be a function that takes in prediction and labels and outputs a score.
+#' @param seed seed to initialize weights of model with
+#' @export
+setEstimator <- function(learningRate='auto',
+                         weightDecay = 0.0,
+                         batchSize = 512,
+                         epochs = 30,
+                         device='cpu',
+                         optimizer = torchopt::optim_adamw,
+                         scheduler = list(fun=torch::lr_reduce_on_plateau,
+                                          params=list(patience=1)),
+                         criterion = torch::nn_bce_with_logits_loss,
+                         earlyStopping = list(useEarlyStopping=TRUE,
+                                              params = list(patience=4)),
+                         metric = "auc",
+                         seed = NULL
+) {
+  if (length(learningRate)==1 && learningRate=='auto') {findLR <- TRUE} else {findLR <- FALSE}
+  if (is.null(seed)) {
+    seed <- as.integer(sample(1e5, 1))
+  }
+  
+  
+  estimatorSettings <- list(learningRate=learningRate,
+                            weightDecay=weightDecay,
+                            batchSize=batchSize,
+                            epochs=epochs,
+                            device=device,
+                            optimizer=optimizer,
+                            scheduler=scheduler,
+                            criterion=criterion,
+                            earlyStopping=earlyStopping,
+                            findLR=findLR,
+                            metric=metric,
+                            seed=seed[1] 
+  )
+  paramsToTune <- list()
+  for (name in names(estimatorSettings)) {
+    param <- estimatorSettings[[name]]
+    if (length(param) > 1 && is.atomic(param)) {
+      paramsToTune[[paste0('estimator.',name)]] <- param
+    }
+    if ("params" %in% names(param)) {
+      for (name2 in names(param[["params"]])) { 
+        param2 <- param[["params"]][[name2]]
+        if (length(param2) > 1) {
+          paramsToTune[[paste0('estimator.',name,'.',name2)]] <- param2
+        }
+      }
+    }
+  }
+  estimatorSettings$paramsToTune <- paramsToTune
+  return(estimatorSettings)
+}
 
 #' fitEstimator
 #'
@@ -49,16 +104,12 @@ fitEstimator <- function(trainData,
                          analysisId,
                          ...) {
   start <- Sys.time()
-
+  
   # check covariate data
   if (!FeatureExtraction::isCovariateData(trainData$covariateData)) {
     stop("Needs correct covariateData")
   }
-
-  param <- modelSettings$param
-
-  # get the settings from the param
-  settings <- attr(param, "settings")
+  
   if (!is.null(trainData$folds)) {
     trainData$labels <- merge(trainData$labels, trainData$fold, by = "rowId")
   }
@@ -66,21 +117,20 @@ fitEstimator <- function(trainData,
     covariateData = trainData$covariateData,
     cohort = trainData$labels
   )
-
+  
   covariateRef <- mappedCovariateData$covariateRef
-
+  
   outLoc <- PatientLevelPrediction::createTempModelLoc()
   cvResult <- do.call(
     what = gridCvDeep,
     args = list(
       mappedData = mappedCovariateData,
       labels = trainData$labels,
-      settings = settings,
-      modelLocation = outLoc,
-      paramSearch = param
+      modelSettings = modelSettings,
+      modelLocation = outLoc
     )
   )
-
+  
   hyperSummary <- do.call(rbind, lapply(cvResult$paramGridSearch, function(x) x$hyperSummary))
   prediction <- cvResult$prediction
   incs <- rep(1, covariateRef %>% dplyr::tally() %>% 
@@ -93,16 +143,16 @@ fitEstimator <- function(trainData,
       covariateValue = 0,
       isNumeric = cvResult$numericalIndex
     )
-
-
+  
+  
   comp <- start - Sys.time()
   result <- list(
     model = cvResult$estimator, # file.path(outLoc),
-
+    
     preprocessing = list(
       featureEngineering = attr(trainData$covariateData, "metaData")$featureEngineering,
       tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings,
-      requireDenseMatrix = settings$requiresDenseMatrix
+      requireDenseMatrix = F
     ),
     prediction = prediction,
     modelDesign = PatientLevelPrediction::createModelDesign(
@@ -124,18 +174,18 @@ fitEstimator <- function(trainData,
       attrition = attr(trainData, "metaData")$attrition,
       trainingTime = paste(as.character(abs(comp)), attr(comp, "units")),
       trainingDate = Sys.Date(),
-      modelName = settings$name,
+      modelName = modelSettings$modelType,
       finalModelParameters = cvResult$finalParam,
       hyperParamSearch = hyperSummary
     ),
     covariateImportance = covariateRef
   )
-
+  
   class(result) <- "plpModel"
   attr(result, "predictionFunction") <- "predictDeepEstimator"
   attr(result, "modelType") <- "binary"
-  attr(result, "saveType") <- attr(param, "settings")$saveType
-
+  attr(result, "saveType") <- modelSettings$saveType
+  
   return(result)
 }
 
@@ -158,37 +208,36 @@ predictDeepEstimator <- function(plpModel,
   }
   if ("plpData" %in% class(data)) {
     mappedData <- PatientLevelPrediction::MapIds(data$covariateData,
-      cohort = cohort,
-      mapping = plpModel$covariateImportance %>%
-        dplyr::select(
-          "columnId",
-          "covariateId"
-        )
+                                                 cohort = cohort,
+                                                 mapping = plpModel$covariateImportance %>%
+                                                   dplyr::select(
+                                                     "columnId",
+                                                     "covariateId"
+                                                   )
     )
     data <- Dataset(mappedData$covariates,
-      numericalIndex = plpModel$covariateImportance$isNumeric
+                    numericalIndex = plpModel$covariateImportance$isNumeric
     )
   }
-
+  
   # get predictions
   prediction <- cohort
   if (is.character(plpModel$model)) {
     model <- torch::torch_load(file.path(plpModel$model, "DeepEstimatorModel.pt"), device = "cpu")
     estimator <- Estimator$new(
-      modelType = attr(plpModel$modelDesign$modelSettings$param, "settings")$modelType,
+      modelType = plpModel$modelDesign$modelSettings$modelType,
       modelParameters = model$modelParameters,
-      fitParameters = model$fitParameters,
-      device = attr(plpModel$modelDesign$modelSettings$param, "settings")$device
+      estimatorSettings = model$estimatorSettings
     )
     estimator$model$load_state_dict(model$modelStateDict)
     prediction$value <- estimator$predictProba(data)
   } else {
     prediction$value <- plpModel$model$predictProba(data)
   }
-
-
+  
+  
   attr(prediction, "metaData")$modelType <- attr(plpModel, "modelType")
-
+  
   return(prediction)
 }
 
@@ -200,67 +249,73 @@ predictDeepEstimator <- function(plpModel,
 #'
 #' @param mappedData    Mapped data with covariates
 #' @param labels        Dataframe with the outcomes
-#' @param settings      Settings of the model
+#' @param modelSettings      Settings of the model
 #' @param modelLocation Where to save the model
-#' @param paramSearch   model parameters to perform search over
 #'
 #' @export
 gridCvDeep <- function(mappedData,
                        labels,
-                       settings,
-                       modelLocation,
-                       paramSearch) {
-  modelParamNames <- settings$modelParamNames
-  fitParamNames <- c("weightDecay", "learningRate")
-  epochs <- settings$epochs
-  batchSize <- settings$batchSize
-  modelType <- settings$modelType
-  device <- settings$device
-
-  ParallelLogger::logInfo(paste0("Running CV for ", modelType, " model"))
-
+                       modelSettings,
+                       modelLocation) {
+  ParallelLogger::logInfo(paste0("Running hyperparameter search for ", modelSettings$modelType, " model"))
+  
   ###########################################################################
-
+  
+  paramSearch <- modelSettings$param
   gridSearchPredictons <- list()
   length(gridSearchPredictons) <- length(paramSearch)
   dataset <- Dataset(mappedData$covariates, labels$outcomeCount)
+  
+  estimatorSettings <- modelSettings$estimatorSettings
+  
+  fitParams <- names(paramSearch[[1]])[grepl("^estimator", names(paramSearch[[1]]))]
+  
   for (gridId in 1:length(paramSearch)) {
     ParallelLogger::logInfo(paste0("Running hyperparameter combination no ", gridId))
     ParallelLogger::logInfo(paste0("HyperParameters: "))
     ParallelLogger::logInfo(paste(names(paramSearch[[gridId]]), paramSearch[[gridId]], collapse = " | "))
-    modelParams <- paramSearch[[gridId]][modelParamNames]
-
-    fitParams <- paramSearch[[gridId]][fitParamNames]
-    fitParams$epochs <- epochs
-    fitParams$batchSize <- batchSize
-
+    modelParams <- paramSearch[[gridId]][modelSettings$modelParamNames]
+    
+    
+    estimatorSettings <- fillEstimatorSettings(estimatorSettings, fitParams, 
+                                               paramSearch[[gridId]])
 
     # initiate prediction
     prediction <- c()
-
+    
     fold <- labels$index
     ParallelLogger::logInfo(paste0("Max fold: ", max(fold)))
     modelParams$catFeatures <- dataset$numCatFeatures()
     modelParams$numFeatures <- dataset$numNumFeatures()
+    
+    if (estimatorSettings$findLR) {
+      lr <- lrFinder(dataset=dataset, 
+                     modelType = modelSettings$modelType,
+                     modelParams = modelParams,
+                     estimatorSettings = estimatorSettings)
+      ParallelLogger::logInfo(paste0("Auto learning rate selected as: ", lr))
+      estimatorSettings$learningRate <- lr
+    }
+    
+    
     learnRates <- list()
     for (i in 1:max(fold)) {
       ParallelLogger::logInfo(paste0("Fold ", i))
       trainDataset <- torch::dataset_subset(dataset, indices = which(fold != i))
       testDataset <- torch::dataset_subset(dataset, indices = which(fold == i))
       estimator <- Estimator$new(
-        modelType = modelType,
+        modelType = modelSettings$modelType,
         modelParameters = modelParams,
-        fitParameters = fitParams,
-        device = device
+        estimatorSettings = estimatorSettings
       )
-
+      
       estimator$fit(
         trainDataset,
         testDataset
       )
-
+      
       ParallelLogger::logInfo("Calculating predictions on left out fold set...")
-
+      
       prediction <- rbind(
         prediction,
         predictDeepEstimator(
@@ -276,7 +331,7 @@ gridCvDeep <- function(mappedData,
     }
     maxIndex <- which.max(unlist(sapply(learnRates, `[`, 2)))
     paramSearch[[gridId]]$learnSchedule <- learnRates[[maxIndex]]
-
+    
     gridSearchPredictons[[gridId]] <- list(
       prediction = prediction,
       param = paramSearch[[gridId]]
@@ -286,37 +341,37 @@ gridCvDeep <- function(mappedData,
   paramGridSearch <- lapply(gridSearchPredictons, function(x) {
     do.call(PatientLevelPrediction::computeGridPerformance, x)
   }) # cvAUCmean, cvAUC, param
-
+  
   optimalParamInd <- which.max(unlist(lapply(paramGridSearch, function(x) x$cvPerformance)))
   finalParam <- paramGridSearch[[optimalParamInd]]$param
-
+  
   cvPrediction <- gridSearchPredictons[[optimalParamInd]]$prediction
   cvPrediction$evaluationType <- "CV"
-
+  
   ParallelLogger::logInfo("Training final model using optimal parameters")
-
   # get the params
-  modelParams <- finalParam[modelParamNames]
-  fitParams <- finalParam[fitParamNames]
-  fitParams$epochs <- finalParam$learnSchedule$bestEpoch
-  fitParams$batchSize <- batchSize
+  modelParams <- finalParam[modelSettings$modelParamNames]
+  
+  
   # create the dir
   if (!dir.exists(file.path(modelLocation))) {
     dir.create(file.path(modelLocation), recursive = T)
   }
   modelParams$catFeatures <- dataset$numCatFeatures()
   modelParams$numFeatures <- dataset$numNumFeatures()
-
+  
+  estimatorSettings <- fillEstimatorSettings(estimatorSettings, fitParams,
+                                             finalParam)
+  
   estimator <- Estimator$new(
-    modelType = modelType,
+    modelType = modelSettings$modelType,
     modelParameters = modelParams,
-    fitParameters = fitParams,
-    device = device
+    estimatorSettings = estimatorSettings
   )
   numericalIndex <- dataset$getNumericalIndex()
-
+  
   estimator$fitWholeTrainingSet(dataset, finalParam$learnSchedule$LRs)
-
+  
   ParallelLogger::logInfo("Calculating predictions on all train data...")
   prediction <- predictDeepEstimator(
     plpModel = estimator,
@@ -324,7 +379,7 @@ gridCvDeep <- function(mappedData,
     cohort = labels
   )
   prediction$evaluationType <- "Train"
-
+  
   prediction <- rbind(
     prediction,
     cvPrediction
@@ -332,13 +387,13 @@ gridCvDeep <- function(mappedData,
   # modify prediction
   prediction <- prediction %>%
     dplyr::select(-"index")
-
+  
   prediction$cohortStartDate <- as.Date(prediction$cohortStartDate, origin = "1970-01-01")
-
-
+  
+  
   # save torch code here
   estimator$save(modelLocation, "DeepEstimatorModel.pt")
-
+  
   return(
     list(
       estimator = modelLocation,
@@ -348,4 +403,18 @@ gridCvDeep <- function(mappedData,
       numericalIndex = numericalIndex
     )
   )
+}
+
+# utility function to add instances of parameters to estimatorSettings during grid search
+fillEstimatorSettings <- function(estimatorSettings, fitParams, paramSearch) {
+  for (fp in fitParams) {
+    components <- strsplit(fp, "[.]")[[1]]
+    
+    if (length(components)==2) {
+      estimatorSettings[[components[[2]]]] <- paramSearch[[fp]]
+    } else {
+      estimatorSettings[[components[[2]]]]$params[[components[[3]]]] <- paramSearch[[fp]]
+    }
+  }
+  return(estimatorSettings)
 }
\ No newline at end of file
diff --git a/R/LRFinder.R b/R/LRFinder.R
new file mode 100644
index 0000000..2e79fca
--- /dev/null
+++ b/R/LRFinder.R
@@ -0,0 +1,119 @@
+lrPerBatch <- torch::lr_scheduler(
+  "lrPerBatch",
+  initialize = function(
+    optimizer,
+    startLR = 1e-7,
+    endLR = 1.0,
+    nIters = 100,
+    lastEpoch = -1,
+    verbose = FALSE) {
+    
+    self$optimizer <- optimizer
+    self$endLR <- endLR
+    self$base_lrs <- startLR
+    self$iterations <- nIters
+    self$last_epoch <- lastEpoch
+    self$multiplier <- (endLR/startLR)^(1/nIters)
+    
+    super$initialize(optimizer, last_epoch=lastEpoch, verbose)
+    
+  },
+  
+  get_lr = function() {
+    if (self$last_epoch > 0) {
+      lrs <- numeric(length(self$optimizer$param_groups))
+      for (i in seq_along(self$optimizer$param_groups)) {
+        lrs[i] <- self$base_lrs[[i]] * (self$endLR / self$base_lrs[[i]]) ^ (self$last_epoch/(self$iterations-1))
+      }
+    } else {
+      lrs <- as.numeric(self$base_lrs)
+    }
+    lrs
+  }
+  
+)
+
+#' Find learning rate that decreases loss the most
+#' @description Method originated from https://arxiv.org/abs/1506.01186 but this
+#' implementation draws inspiration from various other implementations such as
+#' pytorch lightning, fastai, luz and pytorch-lr-finder.
+#' @param dataset torch dataset, training dataset
+#' @param modelType the function used to initialize the model
+#' @param modelParams parameters used to initialize model
+#' @param estimatorSettings settings for estimator to fit model
+#' @param minLR lower bound of learning rates to search through
+#' @param maxLR upper bound of learning rates to search through
+#' @param numLR number of learning rates to go through
+#' @param smooth smoothing to use on losses 
+#' @param divergenceThreshold if loss increases this amount above the minimum, stop.
+#' @export
+lrFinder <- function(dataset, modelType, modelParams, estimatorSettings,
+                     minLR=1e-7, maxLR=1, numLR=100, smooth=0.05,
+                     divergenceThreshold=4) {
+    torch::torch_manual_seed(seed=estimatorSettings$seed)
+    model <- do.call(modelType, modelParams)
+    model$to(device=estimatorSettings$device)
+    
+    optimizer <- estimatorSettings$optimizer(model$parameters, lr=minLR)
+    
+    # made a special lr scheduler for this task
+    scheduler <- lrPerBatch(optimizer = optimizer,
+                            startLR = minLR,
+                            endLR = maxLR,
+                            nIters = numLR)
+    
+    criterion <- estimatorSettings$criterion()
+    
+    batchIndex <- seq(length(dataset))
+    set.seed(estimatorSettings$seed)
+    
+    losses <- numeric(numLR)
+    lrs <- numeric(numLR)
+    ParallelLogger::logInfo('\nSearching for best learning rate')
+    progressBar <- utils::txtProgressBar(style = 3)
+    for (i in seq(numLR)) {
+      optimizer$zero_grad()
+      
+      batch <- dataset[sample(batchIndex, estimatorSettings$batchSize)]
+      batch <- batchToDevice(batch, device=estimatorSettings$device)
+      
+      output <- model(batch$batch)
+      
+      loss <- criterion(output, batch$target)
+      if (!is.null(smooth) && i != 1) {
+        losses[i] <- smooth * loss$item() + (1 - smooth) * losses[i-1]
+      } else {
+        losses[i] <- loss$item()  
+      }
+      lrs[i] <- optimizer$param_groups[[1]]$lr
+      
+      loss$backward()
+      optimizer$step()
+      scheduler$step()
+      utils::setTxtProgressBar(progressBar, i / numLR)
+      
+      if (i == 1) {
+        bestLoss <- losses[i]
+      } else {
+        if (losses[i] < bestLoss) {
+          bestLoss <- losses[i]
+        }
+      }
+      
+      if (losses[i] > (divergenceThreshold * bestLoss)) {
+        ParallelLogger::logInfo("\nLoss diverged - stopped early")
+        break
+      }
+      
+    }
+    
+    # find LR where gradient is highest but before global minimum is reached
+    # I added -5 to make sure it is not still in the minimum
+    globalMinimum <- which.min(losses)
+    grad <- as.numeric(torch::torch_diff(torch::torch_tensor(losses[1:(globalMinimum-5)])))
+    smallestGrad <- which.min(grad)
+    
+    suggestedLR <- lrs[smallestGrad]
+    
+  return(suggestedLR) 
+}
\ No newline at end of file
diff --git a/R/MLP.R b/R/MLP.R
index cfcb242..9363cfc 100644
--- a/R/MLP.R
+++ b/R/MLP.R
@@ -29,46 +29,37 @@
 #' @param sizeHidden        Amount of neurons in each default layer, default: 2^(6:10) (64 to 1024)
 #' @param dropout           How much dropout to apply after first linear, default: seq(0, 0.3, 0.05)
 #' @param sizeEmbedding     Size of embedding layer, default: 2^(6:9) (64 to 512)
-#' @param weightDecay       Weight decay to apply, default: c(1e-6, 1e-3)
-#' @param learningRate      Learning rate to use. default: c(1e-2, 1e-5)
-#' @param seed              Seed to use for sampling hyperparameter space
+#' @param estimatorSettings settings of Estimator created with `setEstimator`
 #' @param hyperParamSearch  Which kind of hyperparameter search to use random sampling or exhaustive grid search. default: 'random'
 #' @param randomSample      How many random samples from hyperparameter space to use
 #' @param randomSampleSeed  Random seed to sample hyperparameter combinations
-#' @param device            Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'
-#' @param batchSize         Size of batch, default: 1024
-#' @param epochs            Number of epochs to run, default: 10
 #'
 #' @export
 setMultiLayerPerceptron <- function(numLayers = c(1:8),
                                     sizeHidden = c(2^(6:9)),
                                     dropout = c(seq(0, 0.5, 0.05)),
                                     sizeEmbedding = c(2^(6:9)),
-                                    weightDecay = c(1e-6, 1e-3),
-                                    learningRate = c(1e-2, 3e-4, 1e-5),
-                                    seed = NULL,
+                                    estimatorSettings = setEstimator(
+                                      learningRate = 'auto',
+                                      weightDecay = c(1e-6, 1e-3),
+                                      batchSize = 1024,
+                                      epochs = 30,
+                                      device="cpu"),                              
                                     hyperParamSearch = "random",
                                     randomSample = 100,
-                                    randomSampleSeed = NULL,
-                                    device = "cpu",
-                                    batchSize = 1024,
-                                    epochs = 30) {
-  if (is.null(seed)) {
-    seed <- as.integer(sample(1e5, 1))
-  }
+                                    randomSampleSeed = NULL) {
 
   paramGrid <- list(
     numLayers = numLayers,
     sizeHidden = sizeHidden,
     dropout = dropout,
-    sizeEmbedding = sizeEmbedding,
-    weightDecay = weightDecay,
-    learningRate = learningRate,
-    seed = list(as.integer(seed[[1]]))
+    sizeEmbedding = sizeEmbedding
   )
-
+  
+  paramGrid <- c(paramGrid, estimatorSettings$paramsToTune)
+  
   param <- PatientLevelPrediction::listCartesian(paramGrid)
-  if (randomSample>length(param)) {
+  if (hyperParamSearch == "random" && randomSample>length(param)) {
     stop(paste("\n Chosen amount of randomSamples is higher than the amount of possible hyperparameter combinations.", 
                "\n randomSample:", randomSample,"\n Possible hyperparameter combinations:", length(param),
                "\n Please lower the amount of randomSamples"))
@@ -78,11 +69,10 @@ setMultiLayerPerceptron <- function(numLayers = c(1:8),
     suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
 
-  attr(param, "settings") <- list(
-    seed = seed[1],
-    device = device,
-    batchSize = batchSize,
-    epochs = epochs,
+  results <- list(
+    fitFunction = "fitEstimator",
+    param = param,
+    estimatorSettings = estimatorSettings,
     modelType = "MLP",
     saveType = "file",
     modelParamNames = c(
@@ -91,11 +81,6 @@ setMultiLayerPerceptron <- function(numLayers = c(1:8),
     )
   )
 
-  results <- list(
-    fitFunction = "fitEstimator",
-    param = param
-  )
-
   class(results) <- "modelSettings"
 
   return(results)
diff --git a/R/ResNet.R b/R/ResNet.R
index a08c42c..ac95170 100644
--- a/R/ResNet.R
+++ b/R/ResNet.R
@@ -25,35 +25,24 @@
 #' Model architecture from by https://arxiv.org/abs/2106.11959 . 
 #' Hyperparameters chosen by a experience on a few prediction problems.
 #'
-#' @param device            Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'
-#' @param batchSize         Size of batch, default: 1024
-#' @param epochs            Number of epochs to run, default: 10
-#' @param learningRate      Learning rate to use, default: 0.001
-#' @param weightDecay       The weight decay to use
-#' @param seed              Random seed to use
+#' @param estimatorSettings created with ```setEstimator```
 
 #' @export
-setDefaultResNet <- function(device='cpu',
-                             batchSize=1024,
-                             epochs=10,
-                             learningRate=0.001,
-                             weightDecay=1e-6,
-                             seed=NULL) {
-  
+setDefaultResNet <- function(estimatorSettings=setEstimator(learningRate='auto',
+                                                            weightDecay=1e-6,
+                                                            device='cpu',
+                                                            batchSize=1024,
+                                                            epochs=50,
+                                                            seed=NULL)) {
   resnetSettings <- setResNet(numLayers = 6,
                               sizeHidden = 512,
                               hiddenFactor = 2,
                               residualDropout = 0.1,
                               hiddenDropout = 0.4,
                               sizeEmbedding = 256,
-                              weightDecay = weightDecay,
-                              learningRate = learningRate,
+                              estimatorSettings = estimatorSettings,
                               hyperParamSearch = 'random',
-                              randomSample = 1,
-                              device = device,
-                              batchSize = batchSize,
-                              seed = seed,
-                              epochs = epochs)
+                              randomSample = 1)
   attr(resnetSettings, 'settings')$name <- 'defaultResnet'
   return(resnetSettings)
 }
@@ -74,16 +63,10 @@ setDefaultResNet <- function(device='cpu',
 #' @param residualDropout   How much dropout to apply after last linear layer in ResLayer, default: seq(0, 0.3, 0.05)
 #' @param hiddenDropout     How much dropout to apply after first linear layer in ResLayer, default: seq(0, 0.3, 0.05)
 #' @param sizeEmbedding     Size of embedding layer, default: 2^(6:9) (64 to 512)
-#' @param weightDecay       Weight decay to apply, default: c(1e-6, 1e-3)
-#' @param learningRate      Learning rate to use. default: c(1e-2, 1e-5)
-#' @param seed              Seed to use for sampling hyperparameter space
+#' @param estimatorSettings created with ```setEstimator```
 #' @param hyperParamSearch  Which kind of hyperparameter search to use random sampling or exhaustive grid search. default: 'random'
 #' @param randomSample      How many random samples from hyperparameter space to use
 #' @param randomSampleSeed  Random seed to sample hyperparameter combinations
-#' @param device            Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'
-#' @param batchSize         Size of batch, default: 1024
-#' @param epochs            Number of epochs to run, default: 10
-#'
 #' @export
 setResNet <- function(numLayers = c(1:8),
                       sizeHidden = c(2^(6:10)),
@@ -91,34 +74,30 @@ setResNet <- function(numLayers = c(1:8),
                       residualDropout = c(seq(0, 0.5, 0.05)),
                       hiddenDropout = c(seq(0, 0.5, 0.05)),
                       sizeEmbedding = c(2^(6:9)),
-                      weightDecay = c(1e-6, 1e-3),
-                      learningRate = c(1e-2, 3e-4, 1e-5),
-                      seed = NULL,
+                      estimatorSettings = setEstimator(learningRate='auto',
+                                                       weightDecay=c(1e-6, 1e-3),
+                                                       device='cpu',
+                                                       batchSize=1024,
+                                                       epochs=30,
+                                                       seed=NULL),
                       hyperParamSearch = "random",
                       randomSample = 100,
-                      randomSampleSeed = NULL,
-                      device = "cpu",
-                      batchSize = 1024,
-                      epochs = 30) {
-  if (is.null(seed)) {
-    seed <- as.integer(sample(1e5, 1))
-  }
-
+                      randomSampleSeed = NULL) 
+{
   paramGrid <- list(
     numLayers = numLayers,
     sizeHidden = sizeHidden,
     hiddenFactor = hiddenFactor,
     residualDropout = residualDropout,
     hiddenDropout = hiddenDropout,
-    sizeEmbedding = sizeEmbedding,
-    weightDecay = weightDecay,
-    learningRate = learningRate,
-    seed = list(as.integer(seed[[1]]))
-  )
+    sizeEmbedding = sizeEmbedding)
+  
 
+  paramGrid <- c(paramGrid, estimatorSettings$paramsToTune)
+  
   param <- PatientLevelPrediction::listCartesian(paramGrid)
 
-  if (randomSample>length(param)) {
+  if (hyperParamSearch == "random" && randomSample>length(param)) {
     stop(paste("\n Chosen amount of randomSamples is higher than the amount of possible hyperparameter combinations.", 
                "\n randomSample:", randomSample,"\n Possible hyperparameter combinations:", length(param),
                "\n Please lower the amount of randomSamples"))
@@ -128,22 +107,14 @@ setResNet <- function(numLayers = c(1:8),
     suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
 
-  attr(param, "settings") <- list(
-    seed = seed[1],
-    device = device,
-    batchSize = batchSize,
-    epochs = epochs,
-    modelType ="ResNet",
-    saveType = "file",
-    modelParamNames = c(
-      "numLayers", "sizeHidden", "hiddenFactor",
-      "residualDropout", "hiddenDropout", "sizeEmbedding"
-    )
-  )
-
   results <- list(
     fitFunction = "fitEstimator",
-    param = param
+    param = param,
+    estimatorSettings = estimatorSettings,
+    modelType = "ResNet",
+    saveType = "file",
+    modelParamNames = c("numLayers", "sizeHidden", "hiddenFactor",
+                        "residualDropout", "hiddenDropout", "sizeEmbedding")
   )
 
   class(results) <- "modelSettings"
diff --git a/R/Transformer.R b/R/Transformer.R
index 4570eb9..e822fec 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -21,20 +21,17 @@
 #' @description A transformer model with default hyperparameters
 #' @details from https://arxiv.org/abs/2106.11959
 #' Default hyperparameters from paper
-#' @param device   Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'  
-#' @param batchSize Size of batch, default: 512
-#' @param epochs  Number of epochs to run, default: 10
-#' @param learningRate The learning rate to use
-#' @param weightDecay The weight decay to use
-#' @param seed    random seed to use
+#' @param estimatorSettings created with `setEstimator`
 #' 
 #' @export
-setDefaultTransformer <- function(device='cpu',
-                                  batchSize=512,
-                                  epochs=10,
-                                  learningRate=1e-4,
-                                  weightDecay=1e-5,
-                                  seed=NULL) {
+setDefaultTransformer <- function(estimatorSettings=setEstimator(
+  learningRate = 'auto',
+  weightDecay = 1e-4,
+  batchSize=512,
+  epochs=10,
+  seed=NULL,
+  device='cpu')
+) {
   transformerSettings <- setTransformer(numBlocks = 3,
                                         dimToken = 192,
                                         dimOut = 1,
@@ -43,14 +40,9 @@ setDefaultTransformer <- function(device='cpu',
                                         ffnDropout = 0.1,
                                         resDropout = 0.0,
                                         dimHidden = 256,
-                                        weightDecay = weightDecay,
-                                        learningRate = learningRate,
-                                        batchSize = batchSize,
-                                        epochs = epochs,
-                                        device = device,
+                                        estimatorSettings=estimatorSettings,
                                         hyperParamSearch = 'random',
-                                        randomSample = 1,
-                                        seed = seed)
+                                        randomSample = 1)
   attr(transformerSettings, 'settings')$name <- 'defaultTransformer'
   return(transformerSettings)
 }
@@ -68,26 +60,21 @@ setDefaultTransformer <- function(device='cpu',
 #' @param ffnDropout              dropout to use in feedforward block
 #' @param resDropout              dropout to use in residual connections
 #' @param dimHidden               dimension of the feedworward block
-#' @param weightDecay             weightdecay to use
-#' @param learningRate            learning rate to use
-#' @param batchSize               batchSize to use
-#' @param epochs                  How many epochs to run the model for
-#' @param device                  Which device to use, cpu or cuda
+#' @param estimatorSettings       created with `setEstimator`
 #' @param hyperParamSearch        what kind of hyperparameter search to do, default 'random'
 #' @param randomSample            How many samples to use in hyperparameter search if random
 #' @param randomSampleSeed        Random seed to sample hyperparameter combinations
-#' @param seed                    Random seed to use
 #'
 #' @export
 setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
                            numHeads = 8, attDropout = 0.25, ffnDropout = 0.25,
-                           resDropout = 0, dimHidden = 512, weightDecay = 1e-6,
-                           learningRate = 3e-4, batchSize = 1024,
-                           epochs = 10, device = "cpu", hyperParamSearch = "random",
-                           randomSample = 1, randomSampleSeed = NULL, seed = NULL) {
-  if (is.null(seed)) {
-    seed <- as.integer(sample(1e5, 1))
-  }
+                           resDropout = 0, dimHidden = 512, 
+                           estimatorSettings=setEstimator(weightDecay = 1e-6,
+                                                          batchSize=1024,
+                                                          epochs=10,
+                                                          seed=NULL),
+                           hyperParamSearch = "random",
+                           randomSample = 1, randomSampleSeed = NULL) {
 
   if (any(dimToken %% numHeads != 0)) {
     stop(paste(
@@ -104,15 +91,14 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
     dimHidden = dimHidden,
     attDropout = attDropout,
     ffnDropout = ffnDropout,
-    resDropout = resDropout,
-    weightDecay = weightDecay,
-    learningRate = learningRate,
-    seed = list(as.integer(seed[[1]]))
+    resDropout = resDropout
   )
-
+  
+  paramGrid <- c(paramGrid, estimatorSettings$paramsToTune)
+  
   param <- PatientLevelPrediction::listCartesian(paramGrid)
 
-  if (randomSample>length(param)) {
+  if (hyperParamSearch == "random" && randomSample>length(param)) {
     stop(paste("\n Chosen amount of randomSamples is higher than the amount of possible hyperparameter combinations.", 
                "\n randomSample:", randomSample,"\n Possible hyperparameter combinations:", length(param),
                "\n Please lower the amount of randomSample"))
@@ -122,11 +108,10 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
     suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
 
-  attr(param, "settings") <- list(
-    seed = seed[1],
-    device = device,
-    batchSize = batchSize,
-    epochs = epochs,
+  results <- list(
+    fitFunction = "fitEstimator",
+    param = param,
+    estimatorSettings = estimatorSettings,
     modelType = "Transformer",
     saveType = "file",
     modelParamNames = c(
@@ -135,11 +120,6 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
     )
   )
 
-  results <- list(
-    fitFunction = "fitEstimator",
-    param = param
-  )
-
   class(results) <- "modelSettings"
   return(results)
 }
diff --git a/man/EarlyStopping.Rd b/man/EarlyStopping.Rd
index 0baf50a..53581b2 100644
--- a/man/EarlyStopping.Rd
+++ b/man/EarlyStopping.Rd
@@ -18,9 +18,9 @@ Stops training if a loss or metric has stopped improving
 \if{html}{\out{<a id="method-EarlyStopping-new"></a>}}
 \if{latex}{\out{\hypertarget{method-EarlyStopping-new}{}}}
 \subsection{Method \code{new()}}{
-Creates a new earlystopping object
+Creates a new earlyStopping object
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{EarlyStopping$new(patience = 3, delta = 0, verbose = TRUE)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{EarlyStopping$new(patience = 3, delta = 0, verbose = TRUE, mode = "max")}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -31,6 +31,8 @@ Creates a new earlystopping object
 \item{\code{delta}}{How much does the loss need to improve to count as improvement}
 
 \item{\code{verbose}}{If information should be printed out}
+
+\item{\code{mode}}{either `min` or `max` depending on metric to be used for earlyStopping}
 }
 \if{html}{\out{</div>}}
 }
diff --git a/man/Estimator.Rd b/man/Estimator.Rd
index 4cd1623..96692f0 100644
--- a/man/Estimator.Rd
+++ b/man/Estimator.Rd
@@ -15,11 +15,11 @@ fit and predict the model defined in that module.
 \item \href{#method-Estimator-fitEpoch}{\code{Estimator$fitEpoch()}}
 \item \href{#method-Estimator-score}{\code{Estimator$score()}}
 \item \href{#method-Estimator-finishFit}{\code{Estimator$finishFit()}}
+\item \href{#method-Estimator-printProgress}{\code{Estimator$printProgress()}}
 \item \href{#method-Estimator-fitWholeTrainingSet}{\code{Estimator$fitWholeTrainingSet()}}
 \item \href{#method-Estimator-save}{\code{Estimator$save()}}
 \item \href{#method-Estimator-predictProba}{\code{Estimator$predictProba()}}
 \item \href{#method-Estimator-predict}{\code{Estimator$predict()}}
-\item \href{#method-Estimator-batchToDevice}{\code{Estimator$batchToDevice()}}
 \item \href{#method-Estimator-itemOrDefaults}{\code{Estimator$itemOrDefaults()}}
 \item \href{#method-Estimator-clone}{\code{Estimator$clone()}}
 }
@@ -30,16 +30,7 @@ fit and predict the model defined in that module.
 \subsection{Method \code{new()}}{
 Creates a new estimator
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$new(
-  modelType,
-  modelParameters,
-  fitParameters,
-  optimizer = torch::optim_adam,
-  criterion = torch::nn_bce_with_logits_loss,
-  scheduler = torch::lr_reduce_on_plateau,
-  device = "cpu",
-  patience = 4
-)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Estimator$new(modelType, modelParameters, estimatorSettings)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -49,18 +40,7 @@ Creates a new estimator
 
 \item{\code{modelParameters}}{Parameters to initialize the model}
 
-\item{\code{fitParameters}}{Parameters required for the estimator fitting}
-
-\item{\code{optimizer}}{A torch optimizer to use, default is Adam}
-
-\item{\code{criterion}}{The torch loss function to use, defaults to
-binary cross entropy with logits}
-
-\item{\code{scheduler}}{learning rate scheduler to use}
-
-\item{\code{device}}{Which device to use for fitting, default is cpu}
-
-\item{\code{patience}}{Patience to use for early stopping}
+\item{\code{estimatorSettings}}{Parameters required for the estimator fitting}
 }
 \if{html}{\out{</div>}}
 }
@@ -131,18 +111,16 @@ list with average loss and auc in the dataset
 \subsection{Method \code{finishFit()}}{
 operations that run when fitting is finished
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$finishFit(valAUCs, modelStateDict, valLosses, epoch, learnRates)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Estimator$finishFit(scores, modelStateDict, epoch, learnRates)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
-\item{\code{valAUCs}}{validation AUC values}
+\item{\code{scores}}{validation scores}
 
 \item{\code{modelStateDict}}{fitted model parameters}
 
-\item{\code{valLosses}}{validation losses}
-
 \item{\code{epoch}}{list of epochs fit}
 
 \item{\code{learnRates}}{learning rate sequence used so far}
@@ -151,13 +129,33 @@ operations that run when fitting is finished
 }
 }
 \if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Estimator-printProgress"></a>}}
+\if{latex}{\out{\hypertarget{method-Estimator-printProgress}{}}}
+\subsection{Method \code{printProgress()}}{
+Print out training progress per epoch
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Estimator$printProgress(scores, trainLoss, delta, currentEpoch)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{scores}}{scores returned by `self$score`}
+
+\item{\code{trainLoss}}{training loss}
+
+\item{\code{delta}}{how long did the epoch take}
+
+\item{\code{currentEpoch}}{the current epoch number}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-Estimator-fitWholeTrainingSet"></a>}}
 \if{latex}{\out{\hypertarget{method-Estimator-fitWholeTrainingSet}{}}}
 \subsection{Method \code{fitWholeTrainingSet()}}{
 Fits whole training set on a specific number of epochs
-TODO What happens when learning rate changes per epochs?
-Ideally I would copy the learning rate strategy from before
-and adjust for different sizes ie more iterations/updates???
 \subsection{Usage}{
 \if{html}{\out{<div class="r">}}\preformatted{Estimator$fitWholeTrainingSet(dataset, learnRates = NULL)}\if{html}{\out{</div>}}
 }
@@ -237,27 +235,6 @@ The predicted class for the data in the dataset
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-batchToDevice"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-batchToDevice}{}}}
-\subsection{Method \code{batchToDevice()}}{
-sends a batch of data to device
-assumes batch includes lists of tensors to arbitrary nested depths
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$batchToDevice(batch)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{batch}}{the batch to send, usually a list of torch tensors}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-the batch on the required device
-}
-}
-\if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-Estimator-itemOrDefaults"></a>}}
 \if{latex}{\out{\hypertarget{method-Estimator-itemOrDefaults}{}}}
 \subsection{Method \code{itemOrDefaults()}}{
diff --git a/man/batchToDevice.Rd b/man/batchToDevice.Rd
new file mode 100644
index 0000000..a19e34c
--- /dev/null
+++ b/man/batchToDevice.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Estimator-class.R
+\name{batchToDevice}
+\alias{batchToDevice}
+\title{sends a batch of data to device}
+\usage{
+batchToDevice(batch, device)
+}
+\arguments{
+\item{batch}{the batch to send, usually a list of torch tensors}
+
+\item{device}{which device to send batch to}
+}
+\value{
+the batch on the required device
+}
+\description{
+sends a batch of data to device
+assumes batch includes lists of tensors to arbitrary nested depths
+}
diff --git a/man/gridCvDeep.Rd b/man/gridCvDeep.Rd
index 4d52d4d..1b304a4 100644
--- a/man/gridCvDeep.Rd
+++ b/man/gridCvDeep.Rd
@@ -4,18 +4,16 @@
 \alias{gridCvDeep}
 \title{gridCvDeep}
 \usage{
-gridCvDeep(mappedData, labels, settings, modelLocation, paramSearch)
+gridCvDeep(mappedData, labels, modelSettings, modelLocation)
 }
 \arguments{
 \item{mappedData}{Mapped data with covariates}
 
 \item{labels}{Dataframe with the outcomes}
 
-\item{settings}{Settings of the model}
+\item{modelSettings}{Settings of the model}
 
 \item{modelLocation}{Where to save the model}
-
-\item{paramSearch}{model parameters to perform search over}
 }
 \description{
 Performs grid search for a deep learning estimator
diff --git a/man/lrFinder.Rd b/man/lrFinder.Rd
new file mode 100644
index 0000000..74f31d2
--- /dev/null
+++ b/man/lrFinder.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/LRFinder.R
+\name{lrFinder}
+\alias{lrFinder}
+\title{Find learning rate that decreases loss the most}
+\usage{
+lrFinder(
+  dataset,
+  modelType,
+  modelParams,
+  estimatorSettings,
+  minLR = 1e-07,
+  maxLR = 1,
+  numLR = 100,
+  smooth = 0.05,
+  divergenceThreshold = 4
+)
+}
+\arguments{
+\item{dataset}{torch dataset, training dataset}
+
+\item{modelType}{the function used to initialize the model}
+
+\item{modelParams}{parameters used to initialize model}
+
+\item{estimatorSettings}{settings for estimator to fit model}
+
+\item{minLR}{lower bound of learning rates to search through}
+
+\item{maxLR}{upper bound of learning rates to search through}
+
+\item{numLR}{number of learning rates to go through}
+
+\item{smooth}{smoothing to use on losses}
+
+\item{divergenceThreshold}{if loss increases this amount above the minimum, stop.}
+}
+\description{
+Method originated from https://arxiv.org/abs/1506.01186 but this
+implementation draws inspiration from various other implementations such as
+pytorch lightning, fastai, luz and pytorch-lr-finder.
+}
diff --git a/man/setDefaultResNet.Rd b/man/setDefaultResNet.Rd
index 98918aa..c26f354 100644
--- a/man/setDefaultResNet.Rd
+++ b/man/setDefaultResNet.Rd
@@ -5,26 +5,12 @@
 \title{setDefaultResNet}
 \usage{
 setDefaultResNet(
-  device = "cpu",
-  batchSize = 1024,
-  epochs = 10,
-  learningRate = 0.001,
-  weightDecay = 1e-06,
-  seed = NULL
+  estimatorSettings = setEstimator(learningRate = "auto", weightDecay = 1e-06, device =
+    "cpu", batchSize = 1024, epochs = 50, seed = NULL)
 )
 }
 \arguments{
-\item{device}{Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'}
-
-\item{batchSize}{Size of batch, default: 1024}
-
-\item{epochs}{Number of epochs to run, default: 10}
-
-\item{learningRate}{Learning rate to use, default: 0.001}
-
-\item{weightDecay}{The weight decay to use}
-
-\item{seed}{Random seed to use}
+\item{estimatorSettings}{created with ```setEstimator```}
 }
 \description{
 Creates settings for a default ResNet model
diff --git a/man/setDefaultTransformer.Rd b/man/setDefaultTransformer.Rd
index 7c1e644..e0a3a32 100644
--- a/man/setDefaultTransformer.Rd
+++ b/man/setDefaultTransformer.Rd
@@ -5,26 +5,12 @@
 \title{Create default settings for a non-temporal transformer}
 \usage{
 setDefaultTransformer(
-  device = "cpu",
-  batchSize = 512,
-  epochs = 10,
-  learningRate = 1e-04,
-  weightDecay = 1e-05,
-  seed = NULL
+  estimatorSettings = setEstimator(learningRate = "auto", weightDecay = 1e-04, batchSize
+    = 512, epochs = 10, seed = NULL, device = "cpu")
 )
 }
 \arguments{
-\item{device}{Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'}
-
-\item{batchSize}{Size of batch, default: 512}
-
-\item{epochs}{Number of epochs to run, default: 10}
-
-\item{learningRate}{The learning rate to use}
-
-\item{weightDecay}{The weight decay to use}
-
-\item{seed}{random seed to use}
+\item{estimatorSettings}{created with `setEstimator`}
 }
 \description{
 A transformer model with default hyperparameters
diff --git a/man/setEstimator.Rd b/man/setEstimator.Rd
index 50919af..b924b03 100644
--- a/man/setEstimator.Rd
+++ b/man/setEstimator.Rd
@@ -3,11 +3,32 @@
 \name{setEstimator}
 \alias{setEstimator}
 \title{setEstimator}
+\usage{
+setEstimator(
+  learningRate = "auto",
+  weightDecay = 0,
+  batchSize = 512,
+  epochs = 30,
+  device = "cpu",
+  optimizer = torchopt::optim_adamw,
+  scheduler = list(fun = torch::lr_reduce_on_plateau, params = list(patience = 1)),
+  criterion = torch::nn_bce_with_logits_loss,
+  earlyStopping = list(useEarlyStopping = TRUE, params = list(patience = 4)),
+  metric = "auc",
+  seed = NULL
+)
+}
 \arguments{
 \item{learningRate}{what learning rate to use}
 
 \item{weightDecay}{what weight_decay to use}
 
+\item{batchSize}{batchSize to use}
+
+\item{epochs}{how many epochs to train for}
+
+\item{device}{what device to train on}
+
 \item{optimizer}{which optimizer to use}
 
 \item{scheduler}{which learning rate scheduler to use}
@@ -16,11 +37,11 @@
 
 \item{earlyStopping}{If earlyStopping should be used which stops the training of your metric is not improving}
 
-\item{earlyStoppingMetric}{Which parameter to use for early stopping}
-
-\item{patience}{patience for earlyStopper}
+\item{metric}{either `auc` or `loss` or a custom metric to use. This is the metric used for scheduler and earlyStopping. 
+Needs to be a list with function `fun`, mode either `min` or `max` and a `name`,
+`fun` needs to be a function that takes in prediction and labels and outputs a score.}
 
-\item{hyperparameterMetric}{which metric to use for hyperparameter, loss, auc, auprc or a custom function}
+\item{seed}{seed to initialize weights of model with}
 }
 \description{
 creates settings for the Estimator, which takes a model and trains it
diff --git a/man/setMultiLayerPerceptron.Rd b/man/setMultiLayerPerceptron.Rd
index 07111d5..d6ab36c 100644
--- a/man/setMultiLayerPerceptron.Rd
+++ b/man/setMultiLayerPerceptron.Rd
@@ -9,15 +9,11 @@ setMultiLayerPerceptron(
   sizeHidden = c(2^(6:9)),
   dropout = c(seq(0, 0.5, 0.05)),
   sizeEmbedding = c(2^(6:9)),
-  weightDecay = c(1e-06, 0.001),
-  learningRate = c(0.01, 3e-04, 1e-05),
-  seed = NULL,
+  estimatorSettings = setEstimator(learningRate = "auto", weightDecay = c(1e-06, 0.001),
+    batchSize = 1024, epochs = 30, device = "cpu"),
   hyperParamSearch = "random",
   randomSample = 100,
-  randomSampleSeed = NULL,
-  device = "cpu",
-  batchSize = 1024,
-  epochs = 30
+  randomSampleSeed = NULL
 )
 }
 \arguments{
@@ -29,23 +25,13 @@ setMultiLayerPerceptron(
 
 \item{sizeEmbedding}{Size of embedding layer, default: 2^(6:9) (64 to 512)}
 
-\item{weightDecay}{Weight decay to apply, default: c(1e-6, 1e-3)}
-
-\item{learningRate}{Learning rate to use. default: c(1e-2, 1e-5)}
-
-\item{seed}{Seed to use for sampling hyperparameter space}
+\item{estimatorSettings}{settings of Estimator created with `setEstimator`}
 
 \item{hyperParamSearch}{Which kind of hyperparameter search to use random sampling or exhaustive grid search. default: 'random'}
 
 \item{randomSample}{How many random samples from hyperparameter space to use}
 
 \item{randomSampleSeed}{Random seed to sample hyperparameter combinations}
-
-\item{device}{Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'}
-
-\item{batchSize}{Size of batch, default: 1024}
-
-\item{epochs}{Number of epochs to run, default: 10}
 }
 \description{
 Creates settings for a Multilayer perceptron model
diff --git a/man/setResNet.Rd b/man/setResNet.Rd
index 0bd8464..fbe3d77 100644
--- a/man/setResNet.Rd
+++ b/man/setResNet.Rd
@@ -11,15 +11,11 @@ setResNet(
   residualDropout = c(seq(0, 0.5, 0.05)),
   hiddenDropout = c(seq(0, 0.5, 0.05)),
   sizeEmbedding = c(2^(6:9)),
-  weightDecay = c(1e-06, 0.001),
-  learningRate = c(0.01, 3e-04, 1e-05),
-  seed = NULL,
+  estimatorSettings = setEstimator(learningRate = "auto", weightDecay = c(1e-06, 0.001),
+    device = "cpu", batchSize = 1024, epochs = 30, seed = NULL),
   hyperParamSearch = "random",
   randomSample = 100,
-  randomSampleSeed = NULL,
-  device = "cpu",
-  batchSize = 1024,
-  epochs = 30
+  randomSampleSeed = NULL
 )
 }
 \arguments{
@@ -35,23 +31,13 @@ setResNet(
 
 \item{sizeEmbedding}{Size of embedding layer, default: 2^(6:9) (64 to 512)}
 
-\item{weightDecay}{Weight decay to apply, default: c(1e-6, 1e-3)}
-
-\item{learningRate}{Learning rate to use. default: c(1e-2, 1e-5)}
-
-\item{seed}{Seed to use for sampling hyperparameter space}
+\item{estimatorSettings}{created with ```setEstimator```}
 
 \item{hyperParamSearch}{Which kind of hyperparameter search to use random sampling or exhaustive grid search. default: 'random'}
 
 \item{randomSample}{How many random samples from hyperparameter space to use}
 
 \item{randomSampleSeed}{Random seed to sample hyperparameter combinations}
-
-\item{device}{Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'}
-
-\item{batchSize}{Size of batch, default: 1024}
-
-\item{epochs}{Number of epochs to run, default: 10}
 }
 \description{
 Creates settings for a ResNet model
diff --git a/man/setTransformer.Rd b/man/setTransformer.Rd
index 483dc06..9dfc7dd 100644
--- a/man/setTransformer.Rd
+++ b/man/setTransformer.Rd
@@ -13,15 +13,11 @@ setTransformer(
   ffnDropout = 0.25,
   resDropout = 0,
   dimHidden = 512,
-  weightDecay = 1e-06,
-  learningRate = 3e-04,
-  batchSize = 1024,
-  epochs = 10,
-  device = "cpu",
+  estimatorSettings = setEstimator(weightDecay = 1e-06, batchSize = 1024, epochs = 10,
+    seed = NULL),
   hyperParamSearch = "random",
   randomSample = 1,
-  randomSampleSeed = NULL,
-  seed = NULL
+  randomSampleSeed = NULL
 )
 }
 \arguments{
@@ -41,23 +37,13 @@ setTransformer(
 
 \item{dimHidden}{dimension of the feedworward block}
 
-\item{weightDecay}{weightdecay to use}
-
-\item{learningRate}{learning rate to use}
-
-\item{batchSize}{batchSize to use}
-
-\item{epochs}{How many epochs to run the model for}
-
-\item{device}{Which device to use, cpu or cuda}
+\item{estimatorSettings}{created with `setEstimator`}
 
 \item{hyperParamSearch}{what kind of hyperparameter search to do, default 'random'}
 
 \item{randomSample}{How many samples to use in hyperparameter search if random}
 
 \item{randomSampleSeed}{Random seed to sample hyperparameter combinations}
-
-\item{seed}{Random seed to use}
 }
 \description{
 A transformer model
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index 775e7e5..c3a368c 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -22,21 +22,6 @@ covSet <- FeatureExtraction::createCovariateSettings(
 )
 
 
-covSetT <- FeatureExtraction::createTemporalSequenceCovariateSettings(
-  useDemographicsGender = T,
-  useDemographicsAge = T,
-  useDemographicsRace = T,
-  useDemographicsEthnicity = T,
-  useDemographicsAgeGroup = T,
-  useConditionEraGroupStart = T,
-  useDrugEraStart = T,
-  timePart = "month",
-  timeInterval = 1,
-  sequenceEndDay = -1,
-  sequenceStartDay = -365 * 5
-)
-
-
 databaseDetails <- PatientLevelPrediction::createDatabaseDetails(
   connectionDetails = connectionDetails,
   cdmDatabaseSchema = "main",
@@ -61,13 +46,6 @@ plpData <- PatientLevelPrediction::getPlpData(
 )
 
 
-plpDataT <- PatientLevelPrediction::getPlpData(
-  databaseDetails = databaseDetails,
-  restrictPlpDataSettings = restrictPlpDataSettings,
-  covariateSettings = covSetT
-)
-
-
 populationSet <- PatientLevelPrediction::createStudyPopulationSettings(
   requireTimeAtRisk = F,
   riskWindowStart = 1,
@@ -91,8 +69,13 @@ mappedData <- PatientLevelPrediction::MapIds(
   cohort = trainData$Train$labels
 )
 
+
+
 dataset <- Dataset(
   data = mappedData$covariates,
   labels = trainData$Train$labels$outcomeCount,
   numericalIndex = NULL
 )
+
+small_dataset <- torch::dataset_subset(dataset, (1:round(length(dataset)/3)))
+
diff --git a/tests/testthat/test-Estimator.R b/tests/testthat/test-Estimator.R
index f9024d8..aa8459d 100644
--- a/tests/testthat/test-Estimator.R
+++ b/tests/testthat/test-Estimator.R
@@ -1,7 +1,6 @@
-catFeatures <- dataset$numCatFeatures()
-numFeatures <- dataset$numNumFeatures()
+catFeatures <- small_dataset$dataset$numCatFeatures()
+numFeatures <- small_dataset$dataset$numNumFeatures()
 
-fitParams <- list()
 modelType <- ResNet
 
 modelParameters <- list(
@@ -13,11 +12,16 @@ modelParameters <- list(
   hiddenFactor = 2
 )
 
+estimatorSettings <- setEstimator(learningRate = 3e-4,
+                                 weightDecay = 0.0,
+                                 batchSize = 128,
+                                 epochs = 5,
+                                 device = 'cpu')
+
 estimator <- Estimator$new(
   modelType = modelType,
   modelParameters = modelParameters,
-  fitParameters = fitParams,
-  device = "cpu"
+  estimatorSettings = estimatorSettings
 )
 
 test_that("Estimator initialization works", {
@@ -38,18 +42,14 @@ test_that("Estimator initialization works", {
   expect_equal(val, 1)
   val <- estimator$itemOrDefaults(list(param = 1, test = 3), "paramater", default = NULL)
   expect_true(is.null(val))
+  
 })
 sink(nullfile())
-estimator$fit(dataset, dataset)
+estimator$fit(small_dataset, small_dataset)
 sink()
 
 test_that("estimator fitting works", {
 
-
-  # check the fitting
-  # estimator$fitEpoch(dataset, batchIndex)
-  # estimator$finishFit(valAUCs, modelStateDict, valLosses, epoch)
-  # estimator$score(dataset, batchIndex)
   expect_true(!is.null(estimator$bestEpoch))
   expect_true(!is.null(estimator$bestScore$loss))
   expect_true(!is.null(estimator$bestScore$auc))
@@ -57,7 +57,7 @@ test_that("estimator fitting works", {
   old_weights <- estimator$model$head$weight$mean()$item()
 
   sink(nullfile())
-  estimator$fitWholeTrainingSet(dataset, estimator$learnRateSchedule)
+  estimator$fitWholeTrainingSet(small_dataset, estimator$learnRateSchedule)
   sink()
 
   expect_equal(estimator$optimizer$param_groups[[1]]$lr, tail(estimator$learnRateSchedule, 1)[[1]])
@@ -70,16 +70,50 @@ test_that("estimator fitting works", {
   estimator$save(testLoc, "estimator.pt")
 
   expect_true(file.exists(file.path(testLoc, "estimator.pt")))
-
+  
+  sink(nullfile())
   preds <- estimator$predictProba(dataset)
-
+  sink()
+  
   expect_lt(max(preds), 1)
   expect_gt(min(preds), 0)
-
-  classes <- estimator$predict(dataset, threshold = 0.5)
+  
+  sink(nullfile())
+  classes <- estimator$predict(small_dataset, threshold = 0.5)
+  sink()
   expect_equal(all(unique(classes) %in% c(0, 1)), TRUE)
-
-  # not sure how to test: batchToDevice(batch)
+  
+  sink(nullfile())
+  classes <- estimator$predict(small_dataset$dataset)
+  sink()
+  expect_equal(all(unique(classes) %in% c(0, 1)), TRUE)
+  
+  estimatorSettings <- setEstimator(learningRate = 3e-4,
+                                    weightDecay = 0.0,
+                                    batchSize = 128,
+                                    epochs = 5,
+                                    device = 'cpu',
+                                    metric= "loss")
+  
+  estimator <- Estimator$new(
+    modelType = modelType,
+    modelParameters = modelParameters,
+    estimatorSettings = estimatorSettings
+  )
+  sink(nullfile())
+  estimator$fit(small_dataset, small_dataset)
+  sink()
+  
+  expect_equal(estimator$metric$mode, "min")
+  expect_equal(estimator$metric$name, "loss")
+  
+  sink(nullfile())
+  estimator$fitWholeTrainingSet(small_dataset, estimator$learnRateSchedule)
+  sink()
+  
+  expect_equal(estimator$learnRateSchedule[[estimator$bestEpoch]],
+               estimator$optimizer$param_groups[[1]]$lr)
+  
 })
 
 test_that("early stopping works", {
@@ -98,7 +132,9 @@ modelSettings <- setResNet(
   numLayers = 1, sizeHidden = 16, hiddenFactor = 1,
   residualDropout = 0, hiddenDropout = 0,
   sizeEmbedding = 16, hyperParamSearch = "random",
-  randomSample = 1, epochs = 1
+  randomSample = 1,
+  setEstimator(epochs=1,
+               learningRate = 3e-4)
 )
 
 sink(nullfile())
@@ -111,13 +147,16 @@ test_that("Estimator fit function works", {
   expect_equal(class(results), "plpModel")
   expect_equal(attr(results, "modelType"), "binary")
   expect_equal(attr(results, "saveType"), "file")
+  fakeTrainData <- trainData
+  fakeTrainData$train$covariateData <- list(fakeCovData <- c("Fake"))
+  expect_error(fitEstimator(fakeTrainData$train, modelSettings, analysisId = 1))
 })
 
 test_that("predictDeepEstimator works", {
 
   # input is an estimator and a dataset
   sink(nullfile())
-  predictions <- predictDeepEstimator(estimator, dataset, cohort = trainData$Train$labels)
+  predictions <- predictDeepEstimator(estimator, small_dataset, cohort = trainData$Train$labels)
   sink()
 
   expect_lt(max(predictions$value), 1)
@@ -141,7 +180,7 @@ test_that("batchToDevice works", {
   # which we can use to test of the device is updated
   estimator$device <- "meta"
   b <- 1:10
-  batch <- estimator$batchToDevice(dataset[b])
+  batch <- batchToDevice(dataset[b], device=estimator$device)
 
   devices <- lapply(
     lapply(unlist(batch, recursive = TRUE), function(x) x$device),
@@ -149,7 +188,114 @@ test_that("batchToDevice works", {
   )
   # test that all are meta
   expect_true(all(devices == TRUE))
+  
+  numDevice <- batchToDevice(dataset[b]$batch$num, device=estimator$device)
+  expect_true(numDevice$device==torch::torch_device(type="meta"))
+})
+
+test_that("Estimator without earlyStopping works", {
+  # estimator without earlyStopping
+  estimatorSettings <- setEstimator(learningRate = 3e-4,
+                                    weightDecay = 0.0,
+                                    batchSize = 128,
+                                    epochs = 1,
+                                    device = 'cpu',
+                                    earlyStopping = NULL)
+  estimator2 <- Estimator$new(
+    modelType = modelType,
+    modelParameters = modelParameters,
+    estimatorSettings = estimatorSettings
+  )
+  sink(nullfile())
+  estimator2$fit(small_dataset, small_dataset)
+  sink()
+  
+  expect_null(estimator2$earlyStopper)
+  expect_true(!is.null(estimator2$bestEpoch))
+  
+})
+
+test_that("Early stopper can use loss and stops early", {
+  estimatorSettings <- setEstimator(learningRate = 3e-2,
+                                    weightDecay = 0.0,
+                                    batchSize = 128,
+                                    epochs = 10,
+                                    device = 'cpu',
+                                    earlyStopping =list(useEarlyStopping=TRUE,
+                                                        params = list(mode=c('min'), 
+                                                                      patience=1)),
+                                    metric = 'loss',
+                                    seed=42)
+  estimator <- Estimator$new(
+    modelType=modelType,
+    modelParameters = modelParameters,
+    estimatorSettings = estimatorSettings
+  )
+  
+  sink(nullfile())
+  estimator$fit(small_dataset, small_dataset)
+  sink()
+  
+  expect_true(estimator$bestEpoch < estimator$epochs)
+  
 })
 
+test_that('Custom metric in estimator works', {
+  
+  metric_fun <- function(predictions, labels)  {
+    positive <- predictions[labels == 1]
+    negative <- predictions[labels == 0]
+    pr <- PRROC::pr.curve(scores.class0 = positive, scores.class1 = negative)
+    auprc <- pr$auc.integral
+  }
+    
+  estimatorSettings <- setEstimator(learningRate = 3e-4,
+                                    weightDecay = 0.0,
+                                    batchSize = 128,
+                                    device = "cpu",
+                                    epochs = 1,
+                                    metric=list(fun=metric_fun,
+                                                name="auprc",
+                                                mode="max"))
+  
+  estimator <- Estimator$new(modelType = modelType,
+                             modelParameters = modelParameters,
+                             estimatorSettings = estimatorSettings)
+  expect_true(is.function(estimator$metric$fun))
+  expect_true(is.character(estimator$metric$name))
+  expect_true(estimator$metric$mode %in% c("min", "max"))
+  
+  sink(nullfile())
+  estimator$fit(small_dataset, small_dataset)
+  sink()
+  
+  expect_true(estimator$bestScore[["auprc"]]>0)
+
+})
 
-# cases to add, estimator with early stopping that stops, and estimator without earlystopping
+test_that("setEstimator with paramsToTune is correctly added to hyperparameters", {
+  estimatorSettings <- setEstimator(learningRate=c(3e-4,1e-3),
+                                    batchSize=128,
+                                    epochs=1,
+                                    device="cpu",
+                                    metric=c("auc", "auprc"),
+                                    earlyStopping = list(useEarlyStopping=TRUE,
+                                                         params=list(patience=c(4,10))))
+  modelSettings <- setResNet(numLayers = 1, sizeHidden = 64,
+                             hiddenFactor = 1, residualDropout = 0.2,
+                             hiddenDropout = 0.2,sizeEmbedding = 128,
+                             estimatorSettings = estimatorSettings,
+                             hyperParamSearch = "grid")
+  
+  expect_true(length(modelSettings$param) == 8)
+  expect_true(length(unique(lapply(modelSettings$param, function(x) x$estimator.metric)))==2)
+  expect_true(length(unique(lapply(modelSettings$param, function(x) x$estimator.learningRate)))==2)
+  expect_true(length(unique(lapply(modelSettings$param, function(x) x$estimator.earlyStopping.patience)))==2)
+  fitParams <- names(modelSettings$param[[1]])[grepl("^estimator", names(modelSettings$param[[1]]))]
+  
+  estimatorSettings2 <- fillEstimatorSettings(estimatorSettings, fitParams, paramSearch=modelSettings$param[[8]])
+  
+  expect_equal(estimatorSettings2$learningRate, 1e-3)
+  expect_equal(as.character(estimatorSettings2$metric), "auprc")
+  expect_equal(estimatorSettings2$earlyStopping$params$patience, 10)
+})
\ No newline at end of file
diff --git a/tests/testthat/test-LRFinder.R b/tests/testthat/test-LRFinder.R
new file mode 100644
index 0000000..7f56a0f
--- /dev/null
+++ b/tests/testthat/test-LRFinder.R
@@ -0,0 +1,44 @@
+test_that("LR scheduler that changes per batch works", {
+  
+  model <- ResNet(catFeatures = 10, numFeatures = 1,
+                  sizeEmbedding = 32, sizeHidden = 64,
+                  numLayers = 1, hiddenFactor = 1)
+  optimizer <- torchopt::optim_adamw(model$parameters, lr=1e-7)
+  
+  scheduler <- lrPerBatch(optimizer,
+                          startLR = 1e-7,
+                          endLR = 1e-2,
+                          nIters = 5)
+  expect_equal(scheduler$last_epoch, 0)
+  expect_equal(scheduler$optimizer$param_groups[[1]]$lr, 1e-7)
+  
+  for (i in 1:5) {
+    scheduler$step()
+  }
+  
+  expect_equal(scheduler$last_epoch, 5)
+  expect_equal(scheduler$optimizer$param_groups[[1]]$lr, (1e-7 * (0.01 / 1e-7) ^ (5 / 4)))
+  
+})
+
+
+test_that("LR finder works", {
+  
+  lr <- lrFinder(dataset, modelType = ResNet, modelParams = list(catFeatures=dataset$numCatFeatures(),
+                                                           numFeatures=dataset$numNumFeatures(),
+                                                           sizeEmbedding=32,
+                                                           sizeHidden=64,
+                                                           numLayers=1,
+                                                           hiddenFactor=1),
+           estimatorSettings = setEstimator(batchSize=32,
+                                            seed = 42),
+           minLR = 3e-4,
+           maxLR = 10.0,
+           numLR = 20,
+           divergenceThreshold = 1.1)
+  
+  expect_true(lr<=10.0)
+  expect_true(lr>=3e-4)
+  
+  
+})
\ No newline at end of file
diff --git a/tests/testthat/test-MLP.R b/tests/testthat/test-MLP.R
index 97d7103..e84bb58 100644
--- a/tests/testthat/test-MLP.R
+++ b/tests/testthat/test-MLP.R
@@ -4,13 +4,15 @@ modelSettings <- setMultiLayerPerceptron(
   sizeHidden = c(32),
   dropout = c(0.1),
   sizeEmbedding = c(32),
-  weightDecay = c(1e-6),
-  learningRate = c(3e-4),
-  seed = 42,
+  estimatorSettings = setEstimator(
+    learningRate=c(3e-4),
+    weightDecay = c(1e-6),
+    seed=42,
+    batchSize=128,
+    epochs=1
+  ),
   hyperParamSearch = "random",
-  randomSample = 1,
-  batchSize = 128,
-  epochs = 3
+  randomSample = 1
 )
 
 test_that("setMultiLayerPerceptron works", {
@@ -19,6 +21,13 @@ test_that("setMultiLayerPerceptron works", {
   testthat::expect_equal(modelSettings$fitFunction, "fitEstimator")
 
   testthat::expect_true(length(modelSettings$param) > 0)
+  
+  expect_error(setMultiLayerPerceptron(numLayers=1,
+                                       sizeHidden = 128,
+                                       dropout= 0.2,
+                                       sizeEmbedding = 128,
+                                       estimatorSettings = setEstimator(learningRate=3e-4),
+                                       randomSample = 2))
 })
 
 sink(nullfile())
diff --git a/tests/testthat/test-ResNet.R b/tests/testthat/test-ResNet.R
index 8f5a12f..d87ff5f 100644
--- a/tests/testthat/test-ResNet.R
+++ b/tests/testthat/test-ResNet.R
@@ -6,14 +6,13 @@ resSet <- setResNet(
   residualDropout = c(0.1),
   hiddenDropout = c(0.1),
   sizeEmbedding = c(32),
-  weightDecay = c(1e-6),
-  learningRate = c(3e-4),
-  seed = 42,
+  estimatorSettings = setEstimator(learningRate="auto",
+                                   weightDecay = c(1e-6),
+                                   seed=42,
+                                   batchSize = 128,
+                                   epochs=1),
   hyperParamSearch = "random",
   randomSample = 1,
-  # device='cuda:0',
-  batchSize = 128,
-  epochs = 1
 )
 
 test_that("setResNet works", {
@@ -22,6 +21,20 @@ test_that("setResNet works", {
   testthat::expect_equal(resSet$fitFunction, "fitEstimator")
 
   testthat::expect_true(length(resSet$param) > 0)
+  
+  expect_error(setResNet(numLayers = c(2),
+                sizeHidden = c(32),
+                hiddenFactor = c(2),
+                residualDropout = c(0.1),
+                hiddenDropout = c(0.1),
+                sizeEmbedding = c(32),
+                estimatorSettings = setEstimator(learningRate=c(3e-4),
+                                                 weightDecay = c(1e-6),
+                                                 seed=42,
+                                                 batchSize = 128,
+                                                 epochs=1),
+                hyperParamSearch = "random",
+                randomSample = 2))
 })
 
 sink(nullfile())
diff --git a/tests/testthat/test-Transformer.R b/tests/testthat/test-Transformer.R
index 228e881..2e4709c 100644
--- a/tests/testthat/test-Transformer.R
+++ b/tests/testthat/test-Transformer.R
@@ -1,8 +1,11 @@
 settings <- setTransformer(
   numBlocks = 1, dimToken = 8, dimOut = 1,
   numHeads = 2, attDropout = 0.0, ffnDropout = 0.2,
-  resDropout = 0.0, dimHidden = 32, batchSize = 64,
-  epochs = 1, randomSample = 1
+  resDropout = 0.0, dimHidden = 32, 
+  estimatorSettings = setEstimator(learningRate = 3e-4,
+                                   batchSize=64,
+                                   epochs=1),
+  randomSample = 1
 )
 
 test_that("Transformer settings work", {
@@ -43,7 +46,6 @@ test_that("transformer nn-module works", {
   input$cat <- torch::torch_randint(0, 5, c(10, 5), dtype = torch::torch_long())
   input$num <- torch::torch_randn(10, 1, dtype = torch::torch_float32())
 
-
   output <- model(input)
 
   # output is correct shape, size of batch

From e580c2abbed445c8c42f5f9f1457bcd24fbf0326 Mon Sep 17 00:00:00 2001
From: Henrik <l.john@erasmusmc.nl>
Date: Sun, 5 Mar 2023 14:00:51 +0100
Subject: [PATCH 16/58] Derive dimension of feedforward block from embedding
 dimension (#53)

* Add dimHiddenRatio parameter to Transformer

* Update Transformer documentation
---
 R/Transformer.R       | 14 +++++++++++++-
 man/setTransformer.Rd |  3 +++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/R/Transformer.R b/R/Transformer.R
index e822fec..2b6e1f0 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -60,6 +60,7 @@ setDefaultTransformer <- function(estimatorSettings=setEstimator(
 #' @param ffnDropout              dropout to use in feedforward block
 #' @param resDropout              dropout to use in residual connections
 #' @param dimHidden               dimension of the feedworward block
+#' @param dimHiddenRatio          dimension of the feedforward block as a ratio of dimToken (embedding size)
 #' @param estimatorSettings       created with `setEstimator`
 #' @param hyperParamSearch        what kind of hyperparameter search to do, default 'random'
 #' @param randomSample            How many samples to use in hyperparameter search if random
@@ -68,7 +69,7 @@ setDefaultTransformer <- function(estimatorSettings=setEstimator(
 #' @export
 setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
                            numHeads = 8, attDropout = 0.25, ffnDropout = 0.25,
-                           resDropout = 0, dimHidden = 512, 
+                           resDropout = 0, dimHidden = 512, dimHiddenRatio = NULL,
                            estimatorSettings=setEstimator(weightDecay = 1e-6,
                                                           batchSize=1024,
                                                           epochs=10,
@@ -83,6 +84,17 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
     ))
   }
 
+  if (is.null(dimHidden) && is.null(dimHiddenRatio)
+      || !is.null(dimHidden) && !is.null(dimHiddenRatio)) {
+    stop(paste(
+      "dimHidden and dimHiddenRatio cannot be both set or both NULL"
+    ))
+  } else {
+    if (!is.null(dimHiddenRatio)) {
+      dimHidden <- round(dimToken*dimHiddenRatio, digits = 0)
+    }
+  }
+  
   paramGrid <- list(
     numBlocks = numBlocks,
     dimToken = dimToken,
diff --git a/man/setTransformer.Rd b/man/setTransformer.Rd
index 9dfc7dd..7de56c4 100644
--- a/man/setTransformer.Rd
+++ b/man/setTransformer.Rd
@@ -13,6 +13,7 @@ setTransformer(
   ffnDropout = 0.25,
   resDropout = 0,
   dimHidden = 512,
+  dimHiddenRatio = NULL,
   estimatorSettings = setEstimator(weightDecay = 1e-06, batchSize = 1024, epochs = 10,
     seed = NULL),
   hyperParamSearch = "random",
@@ -37,6 +38,8 @@ setTransformer(
 
 \item{dimHidden}{dimension of the feedworward block}
 
+\item{dimHiddenRatio}{dimension of the feedforward block as a ratio of dimToken (embedding size)}
+
 \item{estimatorSettings}{created with `setEstimator`}
 
 \item{hyperParamSearch}{what kind of hyperparameter search to do, default 'random'}

From fba85ec71ba4adf496bbcc5b2e472fbc7f37d512 Mon Sep 17 00:00:00 2001
From: Henrik <l.john@erasmusmc.nl>
Date: Sun, 5 Mar 2023 14:02:02 +0100
Subject: [PATCH 17/58] Divisible check for Transformer not comprehensive (#55)

* Resolve an issue with divisible check

* Update Transformer tests
---
 R/Transformer.R                   | 4 ++--
 tests/testthat/test-Transformer.R | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/R/Transformer.R b/R/Transformer.R
index 2b6e1f0..64b43b7 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -77,9 +77,9 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
                            hyperParamSearch = "random",
                            randomSample = 1, randomSampleSeed = NULL) {
 
-  if (any(dimToken %% numHeads != 0)) {
+  if (any(with(expand.grid(dimToken = dimToken, numHeads = numHeads), dimToken %% numHeads != 0))) {
     stop(paste(
-      "dimToken needs to divisble by numHeads. dimToken =", dimToken,
+      "dimToken needs to divisible by numHeads. dimToken =", dimToken,
       "is not divisible by numHeads =", numHeads
     ))
   }
diff --git a/tests/testthat/test-Transformer.R b/tests/testthat/test-Transformer.R
index 2e4709c..8409baa 100644
--- a/tests/testthat/test-Transformer.R
+++ b/tests/testthat/test-Transformer.R
@@ -16,6 +16,14 @@ test_that("Transformer settings work", {
     numBlocks = 1, dimToken = 50,
     numHeads = 7
   ))
+  testthat::expect_error(setTransformer(
+    numBlocks = 1, dimToken = c(2, 4),
+    numHeads = c(2, 4)
+  ))
+  testthat::expect_error(setTransformer(
+    numBlocks = 1, dimToken = c(4, 6),
+    numHeads = c(2, 4)
+  ))
 })
 
 test_that("fitEstimator with Transformer works", {

From e50d2e456d221aa6bf471b7e4a17afeaea3001a0 Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Mon, 6 Mar 2023 13:31:03 +0100
Subject: [PATCH 18/58] Update NEWS.md

---
 NEWS.md | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 854330d..5bd3e9c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,12 @@
 DeepPatientLevelPrediction (develop)
 ======================
+- Check for if number of heads is compatible with embedding dimension fixed (#55)
+- Now transformer width can be specified as a ratio of the embedding dimensions (dimToken), (#53)
+- A custom metric can now be defined for earlyStopping and learning rate schedule (#51)
+- Added a setEstimator function to configure the estimator (#51)
+- Seed added for model weight initialization to improve reproducibility (#51)
+- Added a learning rate finder for automatic calculatio of learning rate (#51)
+- Add seed for sampling hyperparameters (#50)
 - used vectorised torch operations to speed up data conversion in torch dataset
 
 DeepPatientLevelPrediction 1.0.2
@@ -19,4 +26,4 @@ DeepPatientLevelPrediction 1.0.0
 - created an Estimator R6 class to handle the model fitting
 - Added three non-temporal models. An MLP, a ResNet and a Transformer
 - ResNet and Transformer have default versions of hyperparameters
-- Created tests and documentation for the package
\ No newline at end of file
+- Created tests and documentation for the package

From 051ea88b8fea5732f4a42afc9c9e0e0acea5af83 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Wed, 22 Mar 2023 16:45:04 +0100
Subject: [PATCH 19/58] update website and docs

---
 .github/workflows/pkgdown.yaml   |  4 +-
 DESCRIPTION                      |  6 +--
 README.md                        |  2 +-
 _pkgdown.yml                     |  3 ++
 vignettes/BuildingDeepModels.Rmd | 80 ++++++++++++++++----------------
 vignettes/FirstModel.Rmd         | 19 ++++----
 vignettes/Installing.Rmd         | 20 ++++----
 7 files changed, 69 insertions(+), 65 deletions(-)

diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
index d22bb32..648f6ca 100644
--- a/.github/workflows/pkgdown.yaml
+++ b/.github/workflows/pkgdown.yaml
@@ -32,14 +32,14 @@ jobs:
           needs: website
 
       - name: Build site
-        run: Rscript -e 'pkgdown::build_site_github_pages(new_process = FALSE, install = TRUE)'
+        run: Rscript -e 'pkgdown::build_site_github_pages(new_process = FALSE)'
         
       - name: Fix Hades Logo
         run: Rscript -e 'OhdsiRTools::fixHadesLogo()'
 
       - name: Deploy to GitHub pages 🚀
         if: github.event_name != 'pull_request'
-        uses: JamesIves/github-pages-deploy-action@4.1.4
+        uses: JamesIves/github-pages-deploy-action@4.4.1
         with:
           clean: false
           branch: gh-pages
diff --git a/DESCRIPTION b/DESCRIPTION
index adf33bf..0a6b787 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,11 +1,11 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.0.2
+Version: 1.1.0
 Date: 15-12-2022
 Authors@R: c(
-    person("Jenna", "Reps", email = "jreps@its.jnj.com", role = c("aut")), 
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),
+    person("Jenna", "Reps", email = "jreps@its.jnj.com", role = c("aut")), 
     person("Seng", "Chan You", role = c("aut")),
     person("Chungsoo", "Kim", role = c("aut")),
     person("Henrik", "John", role = c("aut"))
@@ -24,7 +24,7 @@ Imports:
     ParallelLogger (>= 2.0.0),
     PatientLevelPrediction (>= 6.0.4),
     rlang,
-    torch (>= 0.8.0),
+    torch (>= 0.9.0),
     torchopt,
     withr
 Suggests:
diff --git a/README.md b/README.md
index fc73106..23711af 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ DeepPatientLevelPrediction is an R package. It uses [torch in R](https://torch.m
 
 System Requirements
 ===================
-Requires R (version 3.5.0 or higher). Installation on Windows requires [RTools](http://cran.r-project.org/bin/windows/Rtools/). For training deep learning models in most cases an nvidia GPU is required using either Windows or Linux.
+Requires R (version 4.0.0 or higher). Installation on Windows requires [RTools](http://cran.r-project.org/bin/windows/Rtools/). For training deep learning models in most cases an nvidia GPU is required using either Windows or Linux.
 
 
 Getting Started
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 500b114..411f5c7 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -1,6 +1,9 @@
 template:
   params:
     bootswatch: cosmo
+    
+development:
+  mode: auto
 
 home:
   links:
diff --git a/vignettes/BuildingDeepModels.Rmd b/vignettes/BuildingDeepModels.Rmd
index 41ee944..adbfd98 100644
--- a/vignettes/BuildingDeepModels.Rmd
+++ b/vignettes/BuildingDeepModels.Rmd
@@ -97,8 +97,7 @@ enable the extraction of both data formats as will be described with
 examples below.
 
 Note that training Deep Learning models is computationally intensive,
-our implementation therefore supports both GPU and CPU. It will
-automatically check whether there is GPU or not in your computer. A GPU
+our implementation therefore supports both GPU and CPU. A GPU
 is highly recommended for Deep Learning!
 
 ## Requirements
@@ -170,7 +169,7 @@ randomly sets some inputs to 0 at each step during training time. A
 value of `0.2` means that 20% of the layers inputs will be set to
 0. This is used to reduce overfitting.
 
-The `sizeEmbedding` input specifices the size of the embedding used. The first
+The `sizeEmbedding` input specifies the size of the embedding used. The first
 layer is an embedding layer which converts each sparse feature to a dense vector
 which it learns. An embedding is a lower dimensional projection of the features
 where distance between points is a measure of similarity.
@@ -229,17 +228,19 @@ combinations are 2*2*2*2 or 16 but specify ```randomSample=10``` to only try
 ```{r, eval=FALSE}
 
 modelSettings <- setMultiLayerPerceptron(
-                                numLayers = c(3, 5),
-                                numHidden = c(64, 128), 
-                                dropout = c(0.2),
-                                sizeEmbedding = c(32, 64),
-                                learningRate = c(1e-3, 1e-4), 
-                                weightDecay = c(1e-5), 
-                                randomSample=10,
-                                batchSize = c(100), 
-                                epochs = c(5),  
-                                seed = 12
-                                )
+  numLayers = c(3, 5),
+  sizeHidden = c(64, 128), 
+  dropout = c(0.2),
+  sizeEmbedding = c(32, 64),
+  estimatorSettings = setEstimator(
+    learningRate = c(1e-3, 1e-4),
+    weightDecay = c(1e-5),
+    batchSize = c(128),
+    epochs=c(5),
+    seed=12
+  ),
+  randomSample=10
+)
 
 mlpResult <- PatientLevelPrediction::runPlp(
     plpData = plpData, 
@@ -249,8 +250,6 @@ mlpResult <- PatientLevelPrediction::runPlp(
     analysisName = 'Testing Deep Learning', 
     populationSettings = populationSet, 
     splitSettings = PatientLevelPrediction::createDefaultSplitSetting(), 
-    sampleSettings = PatientLevelPrediction::createSampleSettings(),  # none 
-    featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(), # none 
     preprocessSettings = PatientLevelPrediction::createPreprocessSettings(), 
     executeSettings = PatientLevelPrediction::createExecuteSettings(
       runSplitData = T, 
@@ -280,9 +279,7 @@ impacts the model fitting.
 
 The residual network (ResNet) was introduced to address the vanishing or
 exploding gradient issue. It works by adding connections between
-non-adjacent layers, termed a 'skip connection'. Using some form of
-regularization with these 'skip connections' enables the network to
-ignore any problematic layer that resulted due to gradient issues.
+non-adjacent layers, termed a 'skip connection'.
 
 The ResNet calculates embeddings for every feature and then averages
 them to compute an embedding per patient.
@@ -312,16 +309,16 @@ hidden layer or residual connection
 
 `sizeEmbedding` : The size of the initial embedding layer
 
-##### Training process inputs:
+##### Estimator inputs:
 
 `weightDecay` : How much weight decay to apply, which penalizes bigger
 weights
 
 `learningRate` : Which learning rate to use
 
-`seed` : Use a seed for reproducibility
+`seed` : seed for weight initialization
 
-`device` : Which device to use, such as a cpu or a gpu
+`device` : Which device to use, such as a `cpu` or a `gpu`
 
 `batchSize` : Size of batch of data used per iteration during training
 
@@ -335,6 +332,8 @@ random sampling or exhaustive (grid) search
 `randomSample`: If doing a random search for hyperparameters, how many
 random samples to use
 
+`randomSampleSeed`: Seed to make hyperparameter search reproducible
+
 #### Example Code
 
 For example, the following code will fit a two layer ResNet where each
@@ -353,15 +352,15 @@ resset <- setResNet(
   hiddenFactor = c(2),
   residualDropout = c(0.1), 
   hiddenDropout = c(0.1),
-  sizeEmbedding = c(32), 
-  weightDecay = c(1e-6),
-  learningRate = c(3e-4), 
-  seed = 42, 
+  sizeEmbedding = c(32),
+  estimatorSettings = setEstimator(learningRate = c(3e-4),
+                                   weightDecay = c(1e-6),
+                                   #device='cuda:0', # uncomment to use GPU
+                                   batchSize = 128, 
+                                   epochs = 3,
+                                   seed = 42),
   hyperParamSearch = 'random',
-  randomSample = 1, 
-  #device='cuda:0', # uncomment to use GPU
-  batchSize = 128, 
-  epochs = 3
+  randomSample = 1
 )
 
 resResult <- PatientLevelPrediction::runPlp(
@@ -372,8 +371,6 @@ resResult <- PatientLevelPrediction::runPlp(
     analysisName = 'Testing ResNet', 
     populationSettings = populationSet, 
     splitSettings = PatientLevelPrediction::createDefaultSplitSetting(), 
-    sampleSettings = PatientLevelPrediction::createSampleSettings(),  
-    featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(), 
     preprocessSettings = PatientLevelPrediction::createPreprocessSettings(), 
     executeSettings = PatientLevelPrediction::createExecuteSettings(
       runSplitData = T, 
@@ -405,7 +402,7 @@ longer sequence length. In this case the sequence is the amount of
 features each patient has. Users need to be aware of how many features
 they are feeding to the model since this will effect the computation
 time heavily. This is something you control in `FeatureExtraction` when
-you create your covariate settings.
+you create your `covariateSettings`.
 
 ### Examples
 
@@ -449,12 +446,15 @@ modelSettings <- setTransformer(numBlocks = 3,
                                 ffnDropout = 0.25,
                                 resDropout = 0,
                                 dimHidden = 128,
-                                weightDecay = 1e-06,
-                                learningRate = 3e-04
-                                batchSize = 128,
-                                epochs = 10,
-                                device = 'cpu',  # or 'cuda' for GPU
-                                randomSample = 1)
+                                estimatorSettings = setEstimator(
+                                  learningRate = 3e-4,
+                                  weightDecay = 1e-6,
+                                  batchSize = 128,
+                                  epochs = 10,
+                                  device = 'cpu'
+                                ),
+                                randomSample=1)
+                              
 
 
 TransformerResult <- PatientLevelPrediction::runPlp(
@@ -465,8 +465,6 @@ TransformerResult <- PatientLevelPrediction::runPlp(
     analysisName = 'Testing transformer', 
     populationSettings = populationSet, 
     splitSettings = PatientLevelPrediction::createDefaultSplitSetting(), 
-    sampleSettings = PatientLevelPrediction::createSampleSettings(),  # none 
-    featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(), # none 
     preprocessSettings = PatientLevelPrediction::createPreprocessSettings(), 
     executeSettings = PatientLevelPrediction::createExecuteSettings(
       runSplitData = T, 
diff --git a/vignettes/FirstModel.Rmd b/vignettes/FirstModel.Rmd
index da29fd6..4aaeee3 100644
--- a/vignettes/FirstModel.Rmd
+++ b/vignettes/FirstModel.Rmd
@@ -73,7 +73,7 @@ covariateSettings <- FeatureExtraction::createCovariateSettings(
 )
 ```
 
-This means we are extracing gender as a binary variable, age as a continuous variable and conditions occurring in the long term window, which is by default 365 days prior.
+This means we are extracting gender as a binary variable, age as a continuous variable and conditions occurring in the long term window, which is by default 365 days prior.
 
 Next we need to define our database details, which defines from which database we are getting which cohorts. Since we don't have a database we are using Eunomia.
 
@@ -81,7 +81,7 @@ Next we need to define our database details, which defines from which database w
 databaseDetails <- PatientLevelPrediction::createDatabaseDetails(
   connectionDetails = connectionDetails, 
   cdmDatabaseSchema = "main",
-  cdmDatabaseId = 1,
+  cdmDatabaseId = "1",
   cohortDatabaseSchema = "main", 
   cohortTable = "cohort", 
   targetId= 4, 
@@ -111,23 +111,24 @@ When defining our study population we define the time-at-risk. Which is when we
 
 ## The model
 
-Now it's time to define our deep learning model. It can be daunting for those not familiar with deep learning to define their first model since the models are very flexible and have many hyperparameters to define to get your model architecture. To help with this `deepPLP` has helper functions with a sensible set of hyperparameters for testing. Best practice is though to do an extensive hyperparameter tuning step using cross validation.
+Now it's time to define our deep learning model. It can be daunting for those not familiar with deep learning to define their first model since the models are very flexible and have many hyperparameters to define for your model architecture. To help with this `deepPLP` has helper functions with a sensible set of hyperparameters for testing. Best practice is though to do an extensive hyperparameter tuning step using cross validation.
 
-We will use a simple ResNet for our example. ResNet are simple models that have so called skip connections between layers that allow for deeper models without overfitting. The default ResNet is a 6 layer model with 512 neurons per layer.
+We will use a simple ResNet for our example. ResNet are simple models that have skip connections between layers that allow for deeper models without overfitting. The default ResNet is a 6 layer model with 512 neurons per layer.
 
 ```{r, echo = TRUE, message = FALSE, warning = FALSE,tidy=FALSE,eval=FALSE}
 library(DeepPatientLevelPrediction)
 modelSettings <- setDefaultResNet(
-  device = 'cpu',
-  batchSize = 256,
-  epochs=3
+  estimatorSettings = setEstimator(learningRate=3e-4,
+                                   device="cpu",
+                                   batchSize=256,
+                                   epochs=3)
 )
 
 ```
 
-We still need to define a few parameters though. Device defines on which device to train the model. Usually deep learning models are slow to train so they need a GPU. However this example is small enough that we can use a CPU If you have access to a GPU you can try changing the device to `'cuda'` and see how much faster it goes.
+We still need to define a few parameters. Device defines on which device to train the model. Usually deep learning models are slow to train so they need a GPU. However this example is small enough that we can use a CPU If you have access to a GPU you can try changing the device to `'cuda'` and see how much faster it goes.
 
-We also need to define our batch size. Usually in deep learning the model sees only a small chunk of the data at a time, in this case 256 patients. After that the model is updated before seeing the next batch. This is called stochastic gradient descent.
+We also need to define our batch size. Usually in deep learning the model sees only a small chunk of the data at a time, in this case 256 patients. After that the model is updated before seeing the next batch. The batch order is random. This is called stochastic gradient descent.
 
 Finally we define our epochs. This is how long we will train the model. One epoch means the model has seen all the data once.
 
diff --git a/vignettes/Installing.Rmd b/vignettes/Installing.Rmd
index 7946f76..8d77599 100644
--- a/vignettes/Installing.Rmd
+++ b/vignettes/Installing.Rmd
@@ -44,7 +44,7 @@ This vignette describes how you need to install the Observational Health Data Sc
 
 Under Windows the OHDSI Deep Patient Level Prediction (DeepPLP) package requires installing:
 
--   R (<https://cran.cnr.berkeley.edu/> ) - (R \>= 3.5.0, but latest is recommended)
+-   R (<https://cran.cnr.berkeley.edu/> ) - (R \>= 4.0.0, but latest is recommended)
 -   Rstudio (<https://www.rstudio.com/> )
 -   Java (<http://www.java.com> )
 -   RTools (<https://cran.r-project.org/bin/windows/Rtools/>)
@@ -53,7 +53,7 @@ Under Windows the OHDSI Deep Patient Level Prediction (DeepPLP) package requires
 
 Under Mac and Linux the OHDSI deepPLP package requires installing:
 
--   R (<https://cran.cnr.berkeley.edu/> ) - (R \>= 3.3.0, but latest is recommended)
+-   R (<https://cran.cnr.berkeley.edu/> ) - (R \>= 4.0.0, but latest is recommended)
 -   Rstudio (<https://www.rstudio.com/> )
 -   Java (<http://www.java.com> )
 -   Xcode command line tools(run in terminal: xcode-select --install) [MAC USERS ONLY]
@@ -78,12 +78,12 @@ remotes::install_github("OHDSI/DeepPatientLevelPrediction")
 ```
 
 DeepPLP relies on [torch for R](https://torch.mlverse.org/). When torch is installed the user 
-will be prompted if libtorch and lantern binaries should be downloaded. These binaries are neccesary 
+will be prompted if libtorch and lantern binaries should be downloaded. These binaries are necessary 
 for the package to run.
 
 If you are using DeepPLP in an offline environment the function `torch::install_torch_from_file()` can be used. This will first require to download and move the correct binaries to the offline environment. See [torch installation guide](https://torch.mlverse.org/docs/articles/installation.html) for more detailed instructions.
 
-When installing make sure to close any other Rstudio sessions that are using `DeepPatientLevelPrediction` or any dependency. Keeping Rstudio sessions open can cause locks that prevent the package installing.
+When installing make sure to close any other Rstudio sessions that are using `DeepPatientLevelPrediction` or any dependency. Keeping Rstudio sessions open can cause locks on windows that prevent the package installing.
 
 # Testing Installation
 
@@ -109,12 +109,14 @@ modelSettings <- setResNet(numLayers = 2,
                            residualDropout = 0, 
                            hiddenDropout = 0.2, 
                            sizeEmbedding = 64, 
-                           weightDecay = 1e-6,
-                           learningRate = 3e-4, 
-                           seed = 42, 
+                           estimatorSettings = setEstimator(learningRate = 3e-4,
+                                                            weightDecay = 1e-6,
+                                                            device='cpu',
+                                                            batchSize=128,
+                                                            epochs=3,
+                                                            seed = 42),
                            hyperParamSearch = 'random',
-                           randomSample = 1, device = 'cpu',batchSize = 128,
-                           epochs = 3)
+                           randomSample = 1)
 
 plpResults <- PatientLevelPrediction::runPlp(plpData = plpData,
                outcomeId = 3,

From 793338a6a72bb023fd049bc958c9f6dbf48899c4 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Wed, 22 Mar 2023 16:45:52 +0100
Subject: [PATCH 20/58] remove docs folder from code branches

---
 docs/404.html                                 | 127 ------
 docs/articles/BuildingDeepModels.html         | 378 ---------------
 docs/articles/index.html                      | 100 ----
 docs/authors.html                             | 135 ------
 docs/bootstrap-toc.css                        |  60 ---
 docs/bootstrap-toc.js                         | 159 -------
 docs/docsearch.css                            | 148 ------
 docs/docsearch.js                             |  85 ----
 docs/index.html                               | 234 ----------
 docs/link.svg                                 |  12 -
 docs/pkgdown.css                              | 384 ----------------
 docs/pkgdown.js                               | 108 -----
 docs/pkgdown.yml                              |   8 -
 docs/reference/Dataset.html                   | 123 -----
 .../reference/DeepPatientLevelPrediction.html | 102 -----
 docs/reference/EarlyStopping.html             | 175 -------
 docs/reference/Estimator.html                 | 431 ------------------
 docs/reference/doubleLayerNN.html             | 127 ------
 docs/reference/fitDeepNNTorch.html            | 123 -----
 docs/reference/fitEstimator.html              | 123 -----
 docs/reference/gridCvDeep.html                | 127 ------
 docs/reference/index.html                     | 159 -------
 docs/reference/predictDeepEstimator.html      | 119 -----
 docs/reference/predictDeepNN.html             | 119 -----
 docs/reference/setDeepNNTorch.html            | 153 -------
 docs/reference/setResNet.html                 | 192 --------
 docs/reference/setTransformer.html            | 192 --------
 docs/reference/singleLayerNN.html             | 123 -----
 docs/reference/tripleLayerNN.html             | 131 ------
 docs/sitemap.xml                              |  69 ---
 30 files changed, 4526 deletions(-)
 delete mode 100644 docs/404.html
 delete mode 100644 docs/articles/BuildingDeepModels.html
 delete mode 100644 docs/articles/index.html
 delete mode 100644 docs/authors.html
 delete mode 100644 docs/bootstrap-toc.css
 delete mode 100644 docs/bootstrap-toc.js
 delete mode 100644 docs/docsearch.css
 delete mode 100644 docs/docsearch.js
 delete mode 100644 docs/index.html
 delete mode 100644 docs/link.svg
 delete mode 100644 docs/pkgdown.css
 delete mode 100644 docs/pkgdown.js
 delete mode 100644 docs/pkgdown.yml
 delete mode 100644 docs/reference/Dataset.html
 delete mode 100644 docs/reference/DeepPatientLevelPrediction.html
 delete mode 100644 docs/reference/EarlyStopping.html
 delete mode 100644 docs/reference/Estimator.html
 delete mode 100644 docs/reference/doubleLayerNN.html
 delete mode 100644 docs/reference/fitDeepNNTorch.html
 delete mode 100644 docs/reference/fitEstimator.html
 delete mode 100644 docs/reference/gridCvDeep.html
 delete mode 100644 docs/reference/index.html
 delete mode 100644 docs/reference/predictDeepEstimator.html
 delete mode 100644 docs/reference/predictDeepNN.html
 delete mode 100644 docs/reference/setDeepNNTorch.html
 delete mode 100644 docs/reference/setResNet.html
 delete mode 100644 docs/reference/setTransformer.html
 delete mode 100644 docs/reference/singleLayerNN.html
 delete mode 100644 docs/reference/tripleLayerNN.html
 delete mode 100644 docs/sitemap.xml

diff --git a/docs/404.html b/docs/404.html
deleted file mode 100644
index 491064f..0000000
--- a/docs/404.html
+++ /dev/null
@@ -1,127 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta charset="utf-8">
-<meta http-equiv="X-UA-Compatible" content="IE=edge">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
-<title>Page not found (404) • DeepPatientLevelPrediction</title>
-<!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
-<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css">
-<script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
-<!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet">
-<script src="pkgdown.js"></script><meta property="og:title" content="Page not found (404)">
-<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]-->
-</head>
-<body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-title-body">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav">
-<li>
-  <a href="index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu">
-<li>
-      <a href="articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul>
-</li>
-      </ul>
-<ul class="nav navbar-nav navbar-right">
-<li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul>
-</div>
-<!--/.nav-collapse -->
-  </div>
-<!--/.container -->
-</div>
-<!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="contents col-md-9">
-    <div class="page-header">
-      <h1>Page not found (404)</h1>
-    </div>
-
-Content not found. Please use links in the navbar.
-
-  </div>
-
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav>
-</div>
-
-</div>
-
-
-
-      <footer><div class="copyright">
-  <p></p>
-<p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p>
-<p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer>
-</div>
-
-  
-
-
-  
-
-  </body>
-</html>
diff --git a/docs/articles/BuildingDeepModels.html b/docs/articles/BuildingDeepModels.html
deleted file mode 100644
index 70c805a..0000000
--- a/docs/articles/BuildingDeepModels.html
+++ /dev/null
@@ -1,378 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta charset="utf-8">
-<meta http-equiv="X-UA-Compatible" content="IE=edge">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
-<title>Building Deep Learning Models • DeepPatientLevelPrediction</title>
-<!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
-<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css">
-<script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
-<!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet">
-<script src="../pkgdown.js"></script><meta property="og:title" content="Building Deep Learning Models">
-<meta property="og:description" content="DeepPatientLevelPrediction">
-<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]-->
-</head>
-<body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-article">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav">
-<li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu">
-<li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul>
-</li>
-      </ul>
-<ul class="nav navbar-nav navbar-right">
-<li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul>
-</div>
-<!--/.nav-collapse -->
-  </div>
-<!--/.container -->
-</div>
-<!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header toc-ignore">
-      <h1 data-toc-skip>Building Deep Learning Models</h1>
-                        <h4 data-toc-skip class="author">Jenna Reps, Egill Fridgeirsson, Chungsoo Kim, Henrik John, Seng Chan You, Xiaoyong Pan</h4>
-            
-            <h4 data-toc-skip class="date">2022-08-09</h4>
-      
-      <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/vignettes/BuildingDeepModels.Rmd" class="external-link"><code>vignettes/BuildingDeepModels.Rmd</code></a></small>
-      <div class="hidden name"><code>BuildingDeepModels.Rmd</code></div>
-
-    </div>
-
-    
-    
-<!--
-%\VignetteEngine{knitr}
-%\VignetteIndexEntry{Building Deep Learning Models}
--->
-<div class="section level2">
-<h2 id="introduction">Introduction<a class="anchor" aria-label="anchor" href="#introduction"></a>
-</h2>
-<div class="section level3">
-<h3 id="deeppatientlevelprediction">DeepPatientLevelPrediction<a class="anchor" aria-label="anchor" href="#deeppatientlevelprediction"></a>
-</h3>
-<p>Patient level prediction aims to use historic data to learn a function between an input (a patient’s features such as age/gender/comorbidities at index) and an output (whether the patient experienced an outcome during some time-at-risk). Deep learning is example of the the current state-of-the-art classifiers that can be implemented to learn the function between inputs and outputs.</p>
-<p>Deep Learning models are widely used to automatically learn high-level feature representations from the data, and have achieved remarkable results in image processing, speech recognition and computational biology. Recently, interesting results have been shown using large observational healthcare data (e.g., electronic healthcare data or claims data), but more extensive research is needed to assess the power of Deep Learning in this domain.</p>
-<p>This vignette describes how you can use the Observational Health Data Sciences and Informatics (OHDSI) <a href="http://github.com/OHDSI/PatientLevelPrediction" class="external-link"><code>PatientLevelPrediction</code></a> package and <a href="http://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link"><code>DeepPatientLevelPrediction</code></a> package to build Deep Learning models. This vignette assumes you have read and are comfortable with building patient level prediction models as described in the <a href="https://github.com/OHDSI/PatientLevelPrediction/blob/main/inst/doc/BuildingPredictiveModels.pdf" class="external-link"><code>BuildingPredictiveModels</code> vignette</a>. Furthermore, this vignette assumes you are familiar with Deep Learning methods.</p>
-</div>
-<div class="section level3">
-<h3 id="background">Background<a class="anchor" aria-label="anchor" href="#background"></a>
-</h3>
-<p>Deep Learning models are build by stacking an often large number of neural network layers that perform feature engineering steps, e.g embedding, and are collapsed in a final softmax layer (basically a logistic regression layer). These algorithms need a lot of data to converge to a good representation, but currently the sizes of the large observational healthcare databases are growing fast which would make Deep Learning an interesting approach to test within OHDSI’s <a href="https://academic.oup.com/jamia/article/25/8/969/4989437" class="external-link">Patient-Level Prediction Framework</a>. The current implementation allows us to perform research at scale on the value and limitations of Deep Learning using observational healthcare data.</p>
-<p>In the package we have used <a href="https://cran.r-project.org/web/packages/torch/index.html" class="external-link">torch</a> and <a href="https://cran.r-project.org/web/packages/tabnet/index.html" class="external-link">tabnet</a> but we invite the community to add other backends.</p>
-<p>Many network architectures have recently been proposed and we have implemented a number of them, however, this list will grow in the near future. It is important to understand that some of these architectures require a 2D data matrix, i.e. |patient|x|feature|, and others use a 3D data matrix |patient|x|feature|x|time|. The <a href="www.github.com%5Cohdsi%5CFeatureExtraction">FeatureExtraction Package</a> has been extended to enable the extraction of both data formats as will be described with examples below.</p>
-<p>Note that training Deep Learning models is computationally intensive, our implementation therefore supports both GPU and CPU. It will automatically check whether there is GPU or not in your computer. A GPU is highly recommended for Deep Learning!</p>
-</div>
-<div class="section level3">
-<h3 id="requirements">Requirements<a class="anchor" aria-label="anchor" href="#requirements"></a>
-</h3>
-<p>Full details about the package requirements and instructions on installing the package can be found <a href="https://ohdsi.github.io/DeepPatientLevelPrediction/articles/Installing.html" class="external-link">here</a>.</p>
-</div>
-<div class="section level3">
-<h3 id="integration-with-patientlevelprediction">Integration with PatientLevelPrediction<a class="anchor" aria-label="anchor" href="#integration-with-patientlevelprediction"></a>
-</h3>
-<p>The <code>DeepPatientLevelPrediction</code> package provides additional model settings that can be used within the <code>PatientLevelPrediction</code> package <code>runPlp()</code> function. To use both packages you first need to pick the deep learning architecture you wish to fit (see below) and then you specifiy this as the modelSettings inside <code>runPlp()</code>.</p>
-<div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="co"># load the data</span></span>
-<span><span class="va">plpData</span> <span class="op">&lt;-</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/loadPlpData.html" class="external-link">loadPlpData</a></span><span class="op">(</span><span class="st">'locationOfData'</span><span class="op">)</span></span>
-<span></span>
-<span><span class="co"># pick the set&lt;Model&gt; from  DeepPatientLevelPrediction</span></span>
-<span><span class="va">deepLearningModel</span> <span class="op">&lt;-</span> <span class="fu">DeepPatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="../reference/setResNet.html">setResNet</a></span><span class="op">(</span><span class="op">)</span></span>
-<span></span>
-<span><span class="co"># use PatientLevelPrediction to fit model</span></span>
-<span><span class="va">deepLearningResult</span> <span class="op">&lt;-</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/runPlp.html" class="external-link">runPlp</a></span><span class="op">(</span></span>
-<span>    plpData <span class="op">=</span> <span class="va">plpData</span>, </span>
-<span>    outcomeId <span class="op">=</span> <span class="fl">1230</span>, </span>
-<span>    modelSettings <span class="op">=</span> <span class="va">deepLearningModel</span>,</span>
-<span>    analysisId <span class="op">=</span> <span class="st">'resNetTorch'</span>, </span>
-<span>    <span class="va">...</span></span>
-<span>  <span class="op">)</span></span></code></pre></div>
-</div>
-</div>
-<div class="section level2">
-<h2 id="non-temporal-architectures">Non-Temporal Architectures<a class="anchor" aria-label="anchor" href="#non-temporal-architectures"></a>
-</h2>
-<p>We implemented the following non-temporal (2D data matrix) architectures:</p>
-<div class="section level3">
-<h3 id="simple-mlp">Simple MLP<a class="anchor" aria-label="anchor" href="#simple-mlp"></a>
-</h3>
-<div class="section level4">
-<h4 id="overall-concept">Overall concept<a class="anchor" aria-label="anchor" href="#overall-concept"></a>
-</h4>
-<p>A multilayer perceptron (MLP) model is a directed graph consisting of an input layer, one or more hidden layers and an output layer. The model takes in the input feature values and feeds these forward through the graph to determine the output class. A process known as ‘backpropogation’ is used to train the model. Backpropogation requires labelled data and involves iteratively calculating the error between the MLP model’s predictions and ground truth to learn how to adjust the model.</p>
-</div>
-<div class="section level4">
-<h4 id="example">Example<a class="anchor" aria-label="anchor" href="#example"></a>
-</h4>
-<div class="section level5">
-<h5 id="set-fuction">Set Fuction<a class="anchor" aria-label="anchor" href="#set-fuction"></a>
-</h5>
-<p>To use the package to fit a MLP model you can use the <code><a href="../reference/setDeepNNTorch.html">setDeepNNTorch()</a></code> function to specify the hyper-parameter settings for the MLP.</p>
-</div>
-<div class="section level5">
-<h5 id="inputs">Inputs<a class="anchor" aria-label="anchor" href="#inputs"></a>
-</h5>
-<p>The <code>units</code> input defines the network topology via the number of nodes per layer in the networks hidden layers. A list of different topologies can be investigated <code>list(c(10,63), 128)</code> means two different topologies will be fit, the first has two hidden layers with 10 nodes in the first hidden layer and 63 in the second hidden layer. The second just has one hidden layer with 128 nodes.</p>
-<p>The <code>layer_dropout</code> input specifies the probability that a layer randomly sets input units to 0 at each step during training time. A value of <code>0.2</code> means that 20% of the time the layer input units will be set to 0. This is used to reduce overfitting.</p>
-<p>The <code>lr</code> input is the learning rate which is a hyperparameter that controls how much to change the model in response to the estimated error each time the model weights are updated. The smaller the <code>lr</code> the longer it will take to fit the model and the model weights may get stuck, but if the <code>lr</code> is too large, the weights may sub-optimally converge too fast.</p>
-<p>The <code>decay</code> input corresponds to the weight decay in the objective function. During model fitting the aim is to minimize the objective function. The objective function is made up of the prediction error (the difference between the prediction vs the truth) plus the square of the weights multiplied by the weight decay. The larger the weight decay, the more you penalize having large weights. If you set the weight decay too large, the model will never fit well enough, if you set it too low, you need to be careful of overfitting (so try to stop model fitting earlier).</p>
-<p>The <code>outcome_weight</code> specifies whether to add more weight to misclassifying one class (e.g., with outcome during TAR) vs the other (e.g., without outcome during TAR). This can be useful if there is imbalance between the classes (e.g., the outcome rarely occurs during TAR).</p>
-<p>The <code>batch_size</code> corresponds to the number of data points (patients) used per iteration to estimate the network error during model fitting.</p>
-<p>The <code>epochs</code> corresponds to how many time to run through the entire training data while fitting the model.</p>
-<p>The <code>seed</code> lets the user reproduce the same network given the same training data and hyper-parameter settings if they use the same seed.</p>
-</div>
-<div class="section level5">
-<h5 id="example-code">Example Code<a class="anchor" aria-label="anchor" href="#example-code"></a>
-</h5>
-<p>For example, the following code will try two different network topologies and pick the topology that obtains the greatest AUROC via cross validation in the training data and then fit the model with that topology using all the training data. The standard output of <code>runPlp()</code> will be returned - this contains the MLP model along with the performance details and settings.</p>
-<div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="co">#singleLayerNN(inputN = 10, layer1 = 100, outputN = 2, layer_dropout = 0.1)</span></span>
-<span><span class="va">deepset</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/setDeepNNTorch.html">setDeepNNTorch</a></span><span class="op">(</span></span>
-<span>  units <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">10</span>,<span class="fl">63</span><span class="op">)</span>, <span class="fl">128</span><span class="op">)</span>, </span>
-<span>  layer_dropout <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">0.2</span><span class="op">)</span>,</span>
-<span>  lr <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">1e-4</span><span class="op">)</span>, </span>
-<span>  decay <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">1e-5</span><span class="op">)</span>, </span>
-<span>  outcome_weight <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">1.0</span><span class="op">)</span>, </span>
-<span>  batch_size <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">100</span><span class="op">)</span>, </span>
-<span>  epochs <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">5</span><span class="op">)</span>,  </span>
-<span>  seed <span class="op">=</span> <span class="fl">12</span>  </span>
-<span>  <span class="op">)</span></span>
-<span></span>
-<span><span class="va">mlpResult</span> <span class="op">&lt;-</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/runPlp.html" class="external-link">runPlp</a></span><span class="op">(</span></span>
-<span>    plpData <span class="op">=</span> <span class="va">plpData</span>, </span>
-<span>    outcomeId <span class="op">=</span> <span class="fl">3</span>, </span>
-<span>    modelSettings <span class="op">=</span> <span class="va">deepset</span>,</span>
-<span>    analysisId <span class="op">=</span> <span class="st">'DeepNNTorch'</span>, </span>
-<span>    analysisName <span class="op">=</span> <span class="st">'Testing Deep Learning'</span>, </span>
-<span>    populationSettings <span class="op">=</span> <span class="va">populationSet</span>, </span>
-<span>    splitSettings <span class="op">=</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/createDefaultSplitSetting.html" class="external-link">createDefaultSplitSetting</a></span><span class="op">(</span><span class="op">)</span>, </span>
-<span>    sampleSettings <span class="op">=</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/createSampleSettings.html" class="external-link">createSampleSettings</a></span><span class="op">(</span><span class="op">)</span>,  <span class="co"># none </span></span>
-<span>    featureEngineeringSettings <span class="op">=</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/createFeatureEngineeringSettings.html" class="external-link">createFeatureEngineeringSettings</a></span><span class="op">(</span><span class="op">)</span>, <span class="co"># none </span></span>
-<span>    preprocessSettings <span class="op">=</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/createPreprocessSettings.html" class="external-link">createPreprocessSettings</a></span><span class="op">(</span><span class="op">)</span>, </span>
-<span>    executeSettings <span class="op">=</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/createExecuteSettings.html" class="external-link">createExecuteSettings</a></span><span class="op">(</span></span>
-<span>      runSplitData <span class="op">=</span> <span class="cn">T</span>, </span>
-<span>      runSampleData <span class="op">=</span> <span class="cn">F</span>, </span>
-<span>      runfeatureEngineering <span class="op">=</span> <span class="cn">F</span>, </span>
-<span>      runPreprocessData <span class="op">=</span> <span class="cn">T</span>, </span>
-<span>      runModelDevelopment <span class="op">=</span> <span class="cn">T</span>, </span>
-<span>      runCovariateSummary <span class="op">=</span> <span class="cn">F</span></span>
-<span>    <span class="op">)</span>, </span>
-<span>    saveDirectory <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/file.path.html" class="external-link">file.path</a></span><span class="op">(</span><span class="va">testLoc</span>, <span class="st">'DeepNNTorch'</span><span class="op">)</span></span>
-<span>  <span class="op">)</span></span></code></pre></div>
-</div>
-</div>
-</div>
-<div class="section level3">
-<h3 id="resnet">ResNet<a class="anchor" aria-label="anchor" href="#resnet"></a>
-</h3>
-<div class="section level4">
-<h4 id="overall-concept-1">Overall concept<a class="anchor" aria-label="anchor" href="#overall-concept-1"></a>
-</h4>
-<p>Deep learning models are often trained via a process known as gradient descent during backpropogation. During this process the network weights are updated based on the gradient of the error function for the current weights. However, as the number of layers in the network increase, there is a greater chance of experiencing an issue known as the vanishing or exploding gradient during this process. The vanishing or exploding gradient is when the gradient goes to 0 or infinity, which negatively impacts the model fitting.</p>
-<p>The residual network (ResNet) was introduced to address the vanishing or exploding gradient issue. It works by adding connections between non-adjacent layers, termed a ‘skip connection’. Using some form of regularization with these ‘skip connections’ enables the network to ignore any problematic layer that resulted due to gradient issues.</p>
-</div>
-<div class="section level4">
-<h4 id="example-1">Example<a class="anchor" aria-label="anchor" href="#example-1"></a>
-</h4>
-<div class="section level5">
-<h5 id="set-fuction-1">Set Fuction<a class="anchor" aria-label="anchor" href="#set-fuction-1"></a>
-</h5>
-<p>To use the package to fit a ResNet model you can use the <code><a href="../reference/setResNet.html">setResNet()</a></code> function to specify the hyper-parameter settings for the network.</p>
-</div>
-<div class="section level5">
-<h5 id="inputs-1">Inputs<a class="anchor" aria-label="anchor" href="#inputs-1"></a>
-</h5>
-<p>[add info about each input here]</p>
-</div>
-<div class="section level5">
-<h5 id="example-code-1">Example Code<a class="anchor" aria-label="anchor" href="#example-code-1"></a>
-</h5>
-<p>For example, the following code will …</p>
-<div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="va">resset</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/setResNet.html">setResNet</a></span><span class="op">(</span></span>
-<span>  numLayers <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">2</span><span class="op">)</span>, </span>
-<span>  sizeHidden <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">32</span><span class="op">)</span>,</span>
-<span>  hiddenFactor <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">2</span><span class="op">)</span>,</span>
-<span>  residualDropout <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">0.1</span><span class="op">)</span>, </span>
-<span>  hiddenDropout <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">0.1</span><span class="op">)</span>,</span>
-<span>  normalization <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">'BatchNorm'</span><span class="op">)</span>, </span>
-<span>  activation <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">'RelU'</span><span class="op">)</span>,</span>
-<span>  sizeEmbedding <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">32</span><span class="op">)</span>, </span>
-<span>  weightDecay <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">1e-6</span><span class="op">)</span>,</span>
-<span>  learningRate <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">3e-4</span><span class="op">)</span>, </span>
-<span>  seed <span class="op">=</span> <span class="fl">42</span>, </span>
-<span>  hyperParamSearch <span class="op">=</span> <span class="st">'random'</span>,</span>
-<span>  randomSample <span class="op">=</span> <span class="fl">1</span>, </span>
-<span>  <span class="co">#device='cuda:0', </span></span>
-<span>  batchSize <span class="op">=</span> <span class="fl">128</span>, </span>
-<span>  epochs <span class="op">=</span> <span class="fl">3</span></span>
-<span><span class="op">)</span></span>
-<span></span>
-<span><span class="va">resResult</span> <span class="op">&lt;-</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/runPlp.html" class="external-link">runPlp</a></span><span class="op">(</span></span>
-<span>    plpData <span class="op">=</span> <span class="va">plpData</span>, </span>
-<span>    outcomeId <span class="op">=</span> <span class="fl">3</span>, </span>
-<span>    modelSettings <span class="op">=</span> <span class="va">resset</span>,</span>
-<span>    analysisId <span class="op">=</span> <span class="st">'ResNet'</span>, </span>
-<span>    analysisName <span class="op">=</span> <span class="st">'Testing ResNet'</span>, </span>
-<span>    populationSettings <span class="op">=</span> <span class="va">populationSet</span>, </span>
-<span>    splitSettings <span class="op">=</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/createDefaultSplitSetting.html" class="external-link">createDefaultSplitSetting</a></span><span class="op">(</span><span class="op">)</span>, </span>
-<span>    sampleSettings <span class="op">=</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/createSampleSettings.html" class="external-link">createSampleSettings</a></span><span class="op">(</span><span class="op">)</span>,  <span class="co"># none </span></span>
-<span>    featureEngineeringSettings <span class="op">=</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/createFeatureEngineeringSettings.html" class="external-link">createFeatureEngineeringSettings</a></span><span class="op">(</span><span class="op">)</span>, <span class="co"># none </span></span>
-<span>    preprocessSettings <span class="op">=</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/createPreprocessSettings.html" class="external-link">createPreprocessSettings</a></span><span class="op">(</span><span class="op">)</span>, </span>
-<span>    executeSettings <span class="op">=</span> <span class="fu">PatientLevelPrediction</span><span class="fu">::</span><span class="fu"><a href="https://ohdsi.github.io/PatientLevelPrediction/reference/createExecuteSettings.html" class="external-link">createExecuteSettings</a></span><span class="op">(</span></span>
-<span>      runSplitData <span class="op">=</span> <span class="cn">T</span>, </span>
-<span>      runSampleData <span class="op">=</span> <span class="cn">F</span>, </span>
-<span>      runfeatureEngineering <span class="op">=</span> <span class="cn">F</span>, </span>
-<span>      runPreprocessData <span class="op">=</span> <span class="cn">T</span>, </span>
-<span>      runModelDevelopment <span class="op">=</span> <span class="cn">T</span>, </span>
-<span>      runCovariateSummary <span class="op">=</span> <span class="cn">F</span></span>
-<span>    <span class="op">)</span>, </span>
-<span>    saveDirectory <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/file.path.html" class="external-link">file.path</a></span><span class="op">(</span><span class="va">testLoc</span>, <span class="st">'ResNet'</span><span class="op">)</span></span>
-<span>  <span class="op">)</span></span></code></pre></div>
-</div>
-</div>
-</div>
-<div class="section level3">
-<h3 id="tabnet">TabNet<a class="anchor" aria-label="anchor" href="#tabnet"></a>
-</h3>
-<div class="section level4">
-<h4 id="overall-concept-2">Overall concept<a class="anchor" aria-label="anchor" href="#overall-concept-2"></a>
-</h4>
-</div>
-<div class="section level4">
-<h4 id="examples">Examples<a class="anchor" aria-label="anchor" href="#examples"></a>
-</h4>
-</div>
-</div>
-<div class="section level3">
-<h3 id="transformer">Transformer<a class="anchor" aria-label="anchor" href="#transformer"></a>
-</h3>
-<div class="section level4">
-<h4 id="overall-concept-3">Overall concept<a class="anchor" aria-label="anchor" href="#overall-concept-3"></a>
-</h4>
-</div>
-<div class="section level4">
-<h4 id="examples-1">Examples<a class="anchor" aria-label="anchor" href="#examples-1"></a>
-</h4>
-</div>
-</div>
-</div>
-<div class="section level2">
-<h2 id="acknowledgments">Acknowledgments<a class="anchor" aria-label="anchor" href="#acknowledgments"></a>
-</h2>
-<p>Considerable work has been dedicated to provide the <code>DeepPatientLevelPrediction</code> package.</p>
-<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/citation.html" class="external-link">citation</a></span><span class="op">(</span><span class="st">"DeepPatientLevelPrediction"</span><span class="op">)</span></span></code></pre></div>
-<pre><code><span><span class="co">## </span></span>
-<span><span class="co">## To cite package 'DeepPatientLevelPrediction' in publications use:</span></span>
-<span><span class="co">## </span></span>
-<span><span class="co">##   Reps J, Fridgeirsson E, Chan You S, Kim C, John H (2021).</span></span>
-<span><span class="co">##   _DeepPatientLevelPrediction: Deep learning function for patient level</span></span>
-<span><span class="co">##   prediction using data in the OMOP Common Data Model_.</span></span>
-<span><span class="co">##   https://ohdsi.github.io/PatientLevelPrediction,</span></span>
-<span><span class="co">##   https://github.com/OHDSI/DeepPatientLevelPrediction.</span></span>
-<span><span class="co">## </span></span>
-<span><span class="co">## A BibTeX entry for LaTeX users is</span></span>
-<span><span class="co">## </span></span>
-<span><span class="co">##   @Manual{,</span></span>
-<span><span class="co">##     title = {DeepPatientLevelPrediction: Deep learning function for patient level prediction using data in the OMOP Common Data Model},</span></span>
-<span><span class="co">##     author = {Jenna Reps and Egill Fridgeirsson and Seng {Chan You} and Chungsoo Kim and Henrik John},</span></span>
-<span><span class="co">##     year = {2021},</span></span>
-<span><span class="co">##     note = {https://ohdsi.github.io/PatientLevelPrediction, https://github.com/OHDSI/DeepPatientLevelPrediction},</span></span>
-<span><span class="co">##   }</span></span></code></pre>
-<p><strong>Please reference this paper if you use the PLP Package in your work:</strong></p>
-<p><a href="http://dx.doi.org/10.1093/jamia/ocy032" class="external-link">Reps JM, Schuemie MJ, Suchard MA, Ryan PB, Rijnbeek PR. Design and implementation of a standardized framework to generate and evaluate patient-level prediction models using observational healthcare data. J Am Med Inform Assoc. 2018;25(8):969-975.</a></p>
-</div>
-  </div>
-
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-
-        <nav id="toc" data-toggle="toc"><h2 data-toc-skip>Contents</h2>
-    </nav>
-</div>
-
-</div>
-
-
-
-      <footer><div class="copyright">
-  <p></p>
-<p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p>
-<p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer>
-</div>
-
-  
-
-
-  
-
-  </body>
-</html>
diff --git a/docs/articles/index.html b/docs/articles/index.html
deleted file mode 100644
index 06f1bc3..0000000
--- a/docs/articles/index.html
+++ /dev/null
@@ -1,100 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Articles • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Articles"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-article-index">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-      <h1>Articles</h1>
-    </div>
-
-    <div class="section ">
-      <h3>All vignettes</h3>
-      <p class="section-desc"></p>
-
-      <dl><dt><a href="BuildingDeepModels.html">Building Deep Learning Models</a></dt>
-        <dd>
-        </dd><dt><a href="Installing.html">DeepPatientLevelPrediction Installation Guide</a></dt>
-        <dd>
-      </dd></dl></div>
-  </div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/authors.html b/docs/authors.html
deleted file mode 100644
index b203b7f..0000000
--- a/docs/authors.html
+++ /dev/null
@@ -1,135 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Authors and Citation • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css"><script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet"><script src="pkgdown.js"></script><meta property="og:title" content="Authors and Citation"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-citation-authors">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="contents col-md-9">
-    <div class="section level2 authors-section">
-      <div class="page-header">
-        <h1>Authors</h1>
-      </div>
-
-      
-      <ul class="list-unstyled"><li>
-          <p><strong>Jenna Reps</strong>. Author. 
-          </p>
-        </li>
-        <li>
-          <p><strong>Egill Fridgeirsson</strong>. Author, maintainer. 
-          </p>
-        </li>
-        <li>
-          <p><strong>Seng Chan You</strong>. Author. 
-          </p>
-        </li>
-        <li>
-          <p><strong>Chungsoo Kim</strong>. Author. 
-          </p>
-        </li>
-        <li>
-          <p><strong>Henrik John</strong>. Author. 
-          </p>
-        </li>
-      </ul></div>
-    <div class="section level2 citation-section">
-    <div>
-      <h1 id="citation">Citation</h1>
-      <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/DESCRIPTION" class="external-link"><code>DESCRIPTION</code></a></small>
-    </div>
-    </div>
-
-
-    <p>Reps J, Fridgeirsson E, Chan You S, Kim C, John H (2022).
-<em>DeepPatientLevelPrediction: Deep learning function for patient level prediction using data in the OMOP Common Data Model</em>.
-https://ohdsi.github.io/PatientLevelPrediction, https://github.com/OHDSI/DeepPatientLevelPrediction. 
-</p>
-    <pre>@Manual{,
-  title = {DeepPatientLevelPrediction: Deep learning function for patient level prediction using data in the OMOP Common Data Model},
-  author = {Jenna Reps and Egill Fridgeirsson and Seng {Chan You} and Chungsoo Kim and Henrik John},
-  year = {2022},
-  note = {https://ohdsi.github.io/PatientLevelPrediction, https://github.com/OHDSI/DeepPatientLevelPrediction},
-}</pre>
-
-  </div>
-
-</div>
-
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/bootstrap-toc.css b/docs/bootstrap-toc.css
deleted file mode 100644
index 5a85941..0000000
--- a/docs/bootstrap-toc.css
+++ /dev/null
@@ -1,60 +0,0 @@
-/*!
- * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
- * Copyright 2015 Aidan Feldman
- * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
-
-/* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
-
-/* All levels of nav */
-nav[data-toggle='toc'] .nav > li > a {
-  display: block;
-  padding: 4px 20px;
-  font-size: 13px;
-  font-weight: 500;
-  color: #767676;
-}
-nav[data-toggle='toc'] .nav > li > a:hover,
-nav[data-toggle='toc'] .nav > li > a:focus {
-  padding-left: 19px;
-  color: #563d7c;
-  text-decoration: none;
-  background-color: transparent;
-  border-left: 1px solid #563d7c;
-}
-nav[data-toggle='toc'] .nav > .active > a,
-nav[data-toggle='toc'] .nav > .active:hover > a,
-nav[data-toggle='toc'] .nav > .active:focus > a {
-  padding-left: 18px;
-  font-weight: bold;
-  color: #563d7c;
-  background-color: transparent;
-  border-left: 2px solid #563d7c;
-}
-
-/* Nav: second level (shown on .active) */
-nav[data-toggle='toc'] .nav .nav {
-  display: none; /* Hide by default, but at >768px, show it */
-  padding-bottom: 10px;
-}
-nav[data-toggle='toc'] .nav .nav > li > a {
-  padding-top: 1px;
-  padding-bottom: 1px;
-  padding-left: 30px;
-  font-size: 12px;
-  font-weight: normal;
-}
-nav[data-toggle='toc'] .nav .nav > li > a:hover,
-nav[data-toggle='toc'] .nav .nav > li > a:focus {
-  padding-left: 29px;
-}
-nav[data-toggle='toc'] .nav .nav > .active > a,
-nav[data-toggle='toc'] .nav .nav > .active:hover > a,
-nav[data-toggle='toc'] .nav .nav > .active:focus > a {
-  padding-left: 28px;
-  font-weight: 500;
-}
-
-/* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
-nav[data-toggle='toc'] .nav > .active > ul {
-  display: block;
-}
diff --git a/docs/bootstrap-toc.js b/docs/bootstrap-toc.js
deleted file mode 100644
index 1cdd573..0000000
--- a/docs/bootstrap-toc.js
+++ /dev/null
@@ -1,159 +0,0 @@
-/*!
- * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
- * Copyright 2015 Aidan Feldman
- * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
-(function() {
-  'use strict';
-
-  window.Toc = {
-    helpers: {
-      // return all matching elements in the set, or their descendants
-      findOrFilter: function($el, selector) {
-        // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/
-        // http://stackoverflow.com/a/12731439/358804
-        var $descendants = $el.find(selector);
-        return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])');
-      },
-
-      generateUniqueIdBase: function(el) {
-        var text = $(el).text();
-        var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-');
-        return anchor || el.tagName.toLowerCase();
-      },
-
-      generateUniqueId: function(el) {
-        var anchorBase = this.generateUniqueIdBase(el);
-        for (var i = 0; ; i++) {
-          var anchor = anchorBase;
-          if (i > 0) {
-            // add suffix
-            anchor += '-' + i;
-          }
-          // check if ID already exists
-          if (!document.getElementById(anchor)) {
-            return anchor;
-          }
-        }
-      },
-
-      generateAnchor: function(el) {
-        if (el.id) {
-          return el.id;
-        } else {
-          var anchor = this.generateUniqueId(el);
-          el.id = anchor;
-          return anchor;
-        }
-      },
-
-      createNavList: function() {
-        return $('<ul class="nav"></ul>');
-      },
-
-      createChildNavList: function($parent) {
-        var $childList = this.createNavList();
-        $parent.append($childList);
-        return $childList;
-      },
-
-      generateNavEl: function(anchor, text) {
-        var $a = $('<a></a>');
-        $a.attr('href', '#' + anchor);
-        $a.text(text);
-        var $li = $('<li></li>');
-        $li.append($a);
-        return $li;
-      },
-
-      generateNavItem: function(headingEl) {
-        var anchor = this.generateAnchor(headingEl);
-        var $heading = $(headingEl);
-        var text = $heading.data('toc-text') || $heading.text();
-        return this.generateNavEl(anchor, text);
-      },
-
-      // Find the first heading level (`<h1>`, then `<h2>`, etc.) that has more than one element. Defaults to 1 (for `<h1>`).
-      getTopLevel: function($scope) {
-        for (var i = 1; i <= 6; i++) {
-          var $headings = this.findOrFilter($scope, 'h' + i);
-          if ($headings.length > 1) {
-            return i;
-          }
-        }
-
-        return 1;
-      },
-
-      // returns the elements for the top level, and the next below it
-      getHeadings: function($scope, topLevel) {
-        var topSelector = 'h' + topLevel;
-
-        var secondaryLevel = topLevel + 1;
-        var secondarySelector = 'h' + secondaryLevel;
-
-        return this.findOrFilter($scope, topSelector + ',' + secondarySelector);
-      },
-
-      getNavLevel: function(el) {
-        return parseInt(el.tagName.charAt(1), 10);
-      },
-
-      populateNav: function($topContext, topLevel, $headings) {
-        var $context = $topContext;
-        var $prevNav;
-
-        var helpers = this;
-        $headings.each(function(i, el) {
-          var $newNav = helpers.generateNavItem(el);
-          var navLevel = helpers.getNavLevel(el);
-
-          // determine the proper $context
-          if (navLevel === topLevel) {
-            // use top level
-            $context = $topContext;
-          } else if ($prevNav && $context === $topContext) {
-            // create a new level of the tree and switch to it
-            $context = helpers.createChildNavList($prevNav);
-          } // else use the current $context
-
-          $context.append($newNav);
-
-          $prevNav = $newNav;
-        });
-      },
-
-      parseOps: function(arg) {
-        var opts;
-        if (arg.jquery) {
-          opts = {
-            $nav: arg
-          };
-        } else {
-          opts = arg;
-        }
-        opts.$scope = opts.$scope || $(document.body);
-        return opts;
-      }
-    },
-
-    // accepts a jQuery object, or an options object
-    init: function(opts) {
-      opts = this.helpers.parseOps(opts);
-
-      // ensure that the data attribute is in place for styling
-      opts.$nav.attr('data-toggle', 'toc');
-
-      var $topContext = this.helpers.createChildNavList(opts.$nav);
-      var topLevel = this.helpers.getTopLevel(opts.$scope);
-      var $headings = this.helpers.getHeadings(opts.$scope, topLevel);
-      this.helpers.populateNav($topContext, topLevel, $headings);
-    }
-  };
-
-  $(function() {
-    $('nav[data-toggle="toc"]').each(function(i, el) {
-      var $nav = $(el);
-      Toc.init($nav);
-    });
-  });
-})();
diff --git a/docs/docsearch.css b/docs/docsearch.css
deleted file mode 100644
index e5f1fe1..0000000
--- a/docs/docsearch.css
+++ /dev/null
@@ -1,148 +0,0 @@
-/* Docsearch -------------------------------------------------------------- */
-/*
-  Source: https://github.com/algolia/docsearch/
-  License: MIT
-*/
-
-.algolia-autocomplete {
-  display: block;
-  -webkit-box-flex: 1;
-  -ms-flex: 1;
-  flex: 1
-}
-
-.algolia-autocomplete .ds-dropdown-menu {
-  width: 100%;
-  min-width: none;
-  max-width: none;
-  padding: .75rem 0;
-  background-color: #fff;
-  background-clip: padding-box;
-  border: 1px solid rgba(0, 0, 0, .1);
-  box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175);
-}
-
-@media (min-width:768px) {
-  .algolia-autocomplete .ds-dropdown-menu {
-      width: 175%
-  }
-}
-
-.algolia-autocomplete .ds-dropdown-menu::before {
-  display: none
-}
-
-.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] {
-  padding: 0;
-  background-color: rgb(255,255,255);
-  border: 0;
-  max-height: 80vh;
-}
-
-.algolia-autocomplete .ds-dropdown-menu .ds-suggestions {
-  margin-top: 0
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion {
-  padding: 0;
-  overflow: visible
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--category-header {
-  padding: .125rem 1rem;
-  margin-top: 0;
-  font-size: 1.3em;
-  font-weight: 500;
-  color: #00008B;
-  border-bottom: 0
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--wrapper {
-    float: none;
-    padding-top: 0
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column {
-  float: none;
-  width: auto;
-  padding: 0;
-  text-align: left
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--content {
-  float: none;
-  width: auto;
-  padding: 0
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--content::before {
-  display: none
-}
-
-.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header {
-  padding-top: .75rem;
-  margin-top: .75rem;
-  border-top: 1px solid rgba(0, 0, 0, .1)
-}
-
-.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column {
-  display: block;
-  padding: .1rem 1rem;
-  margin-bottom: 0.1;
-  font-size: 1.0em;
-  font-weight: 400
-  /* display: none */
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--title {
-  display: block;
-  padding: .25rem 1rem;
-  margin-bottom: 0;
-  font-size: 0.9em;
-  font-weight: 400
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--text {
-  padding: 0 1rem .5rem;
-  margin-top: -.25rem;
-  font-size: 0.8em;
-  font-weight: 400;
-  line-height: 1.25
-}
-
-.algolia-autocomplete .algolia-docsearch-footer {
-  width: 110px;
-  height: 20px;
-  z-index: 3;
-  margin-top: 10.66667px;
-  float: right;
-  font-size: 0;
-  line-height: 0;
-}
-
-.algolia-autocomplete .algolia-docsearch-footer--logo {
-  background-image: url("data:image/svg+xml;utf8,<svg viewBox='0 0 130 18' xmlns='http://www.w3.org/2000/svg'><defs><linearGradient x1='-36.868%' y1='134.936%' x2='129.432%' y2='-27.7%' id='a'><stop stop-color='%2300AEFF' offset='0%'/><stop stop-color='%233369E7' offset='100%'/></linearGradient></defs><g fill='none' fill-rule='evenodd'><path d='M59.399.022h13.299a2.372 2.372 0 0 1 2.377 2.364V15.62a2.372 2.372 0 0 1-2.377 2.364H59.399a2.372 2.372 0 0 1-2.377-2.364V2.381A2.368 2.368 0 0 1 59.399.022z' fill='url(%23a)'/><path d='M66.257 4.56c-2.815 0-5.1 2.272-5.1 5.078 0 2.806 2.284 5.072 5.1 5.072 2.815 0 5.1-2.272 5.1-5.078 0-2.806-2.279-5.072-5.1-5.072zm0 8.652c-1.983 0-3.593-1.602-3.593-3.574 0-1.972 1.61-3.574 3.593-3.574 1.983 0 3.593 1.602 3.593 3.574a3.582 3.582 0 0 1-3.593 3.574zm0-6.418v2.664c0 .076.082.131.153.093l2.377-1.226c.055-.027.071-.093.044-.147a2.96 2.96 0 0 0-2.465-1.487c-.055 0-.11.044-.11.104l.001-.001zm-3.33-1.956l-.312-.311a.783.783 0 0 0-1.106 0l-.372.37a.773.773 0 0 0 0 1.101l.307.305c.049.049.121.038.164-.011.181-.245.378-.479.597-.697.225-.223.455-.42.707-.599.055-.033.06-.109.016-.158h-.001zm5.001-.806v-.616a.781.781 0 0 0-.783-.779h-1.824a.78.78 0 0 0-.783.779v.632c0 .071.066.12.137.104a5.736 5.736 0 0 1 1.588-.223c.52 0 1.035.071 1.534.207a.106.106 0 0 0 .131-.104z' fill='%23FFF'/><path d='M102.162 13.762c0 1.455-.372 2.517-1.123 3.193-.75.676-1.895 1.013-3.44 1.013-.564 0-1.736-.109-2.673-.316l.345-1.689c.783.163 1.819.207 2.361.207.86 0 1.473-.174 1.84-.523.367-.349.548-.866.548-1.553v-.349a6.374 6.374 0 0 1-.838.316 4.151 4.151 0 0 1-1.194.158 4.515 4.515 0 0 1-1.616-.278 3.385 3.385 0 0 1-1.254-.817 3.744 3.744 0 0 1-.811-1.351c-.192-.539-.29-1.504-.29-2.212 0-.665.104-1.498.307-2.054a3.925 3.925 0 0 1 .904-1.433 4.124 4.124 0 0 1 1.441-.926 5.31 5.31 0 0 1 1.945-.365c.696 0 1.337.087 1.961.191a15.86 15.86 0 0 1 1.588.332v8.456h-.001zm-5.954-4.206c0 .893.197 1.885.592 2.299.394.414.904.621 1.528.621.34 0 .663-.049.964-.142a2.75 2.75 0 0 0 .734-.332v-5.29a8.531 8.531 0 0 0-1.413-.18c-.778-.022-1.369.294-1.786.801-.411.507-.619 1.395-.619 2.223zm16.12 0c0 .719-.104 1.264-.318 1.858a4.389 4.389 0 0 1-.904 1.52c-.389.42-.854.746-1.402.975-.548.229-1.391.36-1.813.36-.422-.005-1.26-.125-1.802-.36a4.088 4.088 0 0 1-1.397-.975 4.486 4.486 0 0 1-.909-1.52 5.037 5.037 0 0 1-.329-1.858c0-.719.099-1.411.318-1.999.219-.588.526-1.09.92-1.509.394-.42.865-.741 1.402-.97a4.547 4.547 0 0 1 1.786-.338 4.69 4.69 0 0 1 1.791.338c.548.229 1.019.55 1.402.97.389.42.69.921.909 1.509.23.588.345 1.28.345 1.999h.001zm-2.191.005c0-.921-.203-1.689-.597-2.223-.394-.539-.948-.806-1.654-.806-.707 0-1.26.267-1.654.806-.394.539-.586 1.302-.586 2.223 0 .932.197 1.558.592 2.098.394.545.948.812 1.654.812.707 0 1.26-.272 1.654-.812.394-.545.592-1.166.592-2.098h-.001zm6.962 4.707c-3.511.016-3.511-2.822-3.511-3.274L113.583.926l2.142-.338v10.003c0 .256 0 1.88 1.375 1.885v1.792h-.001zm3.774 0h-2.153V5.072l2.153-.338v9.534zm-1.079-10.542c.718 0 1.304-.578 1.304-1.291 0-.714-.581-1.291-1.304-1.291-.723 0-1.304.578-1.304 1.291 0 .714.586 1.291 1.304 1.291zm6.431 1.013c.707 0 1.304.087 1.786.262.482.174.871.42 1.156.73.285.311.488.735.608 1.182.126.447.186.937.186 1.476v5.481a25.24 25.24 0 0 1-1.495.251c-.668.098-1.419.147-2.251.147a6.829 6.829 0 0 1-1.517-.158 3.213 3.213 0 0 1-1.178-.507 2.455 2.455 0 0 1-.761-.904c-.181-.37-.274-.893-.274-1.438 0-.523.104-.855.307-1.215.208-.36.487-.654.838-.883a3.609 3.609 0 0 1 1.227-.49 7.073 7.073 0 0 1 2.202-.103c.263.027.537.076.833.147v-.349c0-.245-.027-.479-.088-.697a1.486 1.486 0 0 0-.307-.583c-.148-.169-.34-.3-.581-.392a2.536 2.536 0 0 0-.915-.163c-.493 0-.942.06-1.353.131-.411.071-.75.153-1.008.245l-.257-1.749c.268-.093.668-.185 1.183-.278a9.335 9.335 0 0 1 1.66-.142l-.001-.001zm.181 7.731c.657 0 1.145-.038 1.484-.104v-2.168a5.097 5.097 0 0 0-1.978-.104c-.241.033-.46.098-.652.191a1.167 1.167 0 0 0-.466.392c-.121.169-.175.267-.175.523 0 .501.175.79.493.981.323.196.75.289 1.293.289h.001zM84.109 4.794c.707 0 1.304.087 1.786.262.482.174.871.42 1.156.73.29.316.487.735.608 1.182.126.447.186.937.186 1.476v5.481a25.24 25.24 0 0 1-1.495.251c-.668.098-1.419.147-2.251.147a6.829 6.829 0 0 1-1.517-.158 3.213 3.213 0 0 1-1.178-.507 2.455 2.455 0 0 1-.761-.904c-.181-.37-.274-.893-.274-1.438 0-.523.104-.855.307-1.215.208-.36.487-.654.838-.883a3.609 3.609 0 0 1 1.227-.49 7.073 7.073 0 0 1 2.202-.103c.257.027.537.076.833.147v-.349c0-.245-.027-.479-.088-.697a1.486 1.486 0 0 0-.307-.583c-.148-.169-.34-.3-.581-.392a2.536 2.536 0 0 0-.915-.163c-.493 0-.942.06-1.353.131-.411.071-.75.153-1.008.245l-.257-1.749c.268-.093.668-.185 1.183-.278a8.89 8.89 0 0 1 1.66-.142l-.001-.001zm.186 7.736c.657 0 1.145-.038 1.484-.104v-2.168a5.097 5.097 0 0 0-1.978-.104c-.241.033-.46.098-.652.191a1.167 1.167 0 0 0-.466.392c-.121.169-.175.267-.175.523 0 .501.175.79.493.981.318.191.75.289 1.293.289h.001zm8.682 1.738c-3.511.016-3.511-2.822-3.511-3.274L89.461.926l2.142-.338v10.003c0 .256 0 1.88 1.375 1.885v1.792h-.001z' fill='%23182359'/><path d='M5.027 11.025c0 .698-.252 1.246-.757 1.644-.505.397-1.201.596-2.089.596-.888 0-1.615-.138-2.181-.414v-1.214c.358.168.739.301 1.141.397.403.097.778.145 1.125.145.508 0 .884-.097 1.125-.29a.945.945 0 0 0 .363-.779.978.978 0 0 0-.333-.747c-.222-.204-.68-.446-1.375-.725-.716-.29-1.221-.621-1.515-.994-.294-.372-.44-.82-.44-1.343 0-.655.233-1.171.698-1.547.466-.376 1.09-.564 1.875-.564.752 0 1.5.165 2.245.494l-.408 1.047c-.698-.294-1.321-.44-1.869-.44-.415 0-.73.09-.945.271a.89.89 0 0 0-.322.717c0 .204.043.379.129.524.086.145.227.282.424.411.197.129.551.299 1.063.51.577.24.999.464 1.268.671.269.208.466.442.591.704.125.261.188.569.188.924l-.001.002zm3.98 2.24c-.924 0-1.646-.269-2.167-.808-.521-.539-.782-1.281-.782-2.226 0-.97.242-1.733.725-2.288.483-.555 1.148-.833 1.993-.833.784 0 1.404.238 1.858.714.455.476.682 1.132.682 1.966v.682H7.357c.018.577.174 1.02.467 1.329.294.31.707.465 1.241.465.351 0 .678-.033.98-.099a5.1 5.1 0 0 0 .975-.33v1.026a3.865 3.865 0 0 1-.935.312 5.723 5.723 0 0 1-1.08.091l.002-.001zm-.231-5.199c-.401 0-.722.127-.964.381s-.386.625-.432 1.112h2.696c-.007-.491-.125-.862-.354-1.115-.229-.252-.544-.379-.945-.379l-.001.001zm7.692 5.092l-.252-.827h-.043c-.286.362-.575.608-.865.739-.29.131-.662.196-1.117.196-.584 0-1.039-.158-1.367-.473-.328-.315-.491-.761-.491-1.337 0-.612.227-1.074.682-1.386.455-.312 1.148-.482 2.079-.51l1.026-.032v-.317c0-.38-.089-.663-.266-.851-.177-.188-.452-.282-.824-.282-.304 0-.596.045-.876.134a6.68 6.68 0 0 0-.806.317l-.408-.902a4.414 4.414 0 0 1 1.058-.384 4.856 4.856 0 0 1 1.085-.132c.756 0 1.326.165 1.711.494.385.329.577.847.577 1.552v4.002h-.902l-.001-.001zm-1.88-.859c.458 0 .826-.128 1.104-.384.278-.256.416-.615.416-1.077v-.516l-.763.032c-.594.021-1.027.121-1.297.298s-.406.448-.406.814c0 .265.079.47.236.615.158.145.394.218.709.218h.001zm7.557-5.189c.254 0 .464.018.628.054l-.124 1.176a2.383 2.383 0 0 0-.559-.064c-.505 0-.914.165-1.227.494-.313.329-.47.757-.47 1.284v3.105h-1.262V7.218h.988l.167 1.047h.064c.197-.354.454-.636.771-.843a1.83 1.83 0 0 1 1.023-.312h.001zm4.125 6.155c-.899 0-1.582-.262-2.049-.787-.467-.525-.701-1.277-.701-2.259 0-.999.244-1.767.733-2.304.489-.537 1.195-.806 2.119-.806.627 0 1.191.116 1.692.349l-.381 1.015c-.534-.208-.974-.312-1.321-.312-1.028 0-1.542.682-1.542 2.046 0 .666.128 1.166.384 1.501.256.335.631.502 1.125.502a3.23 3.23 0 0 0 1.595-.419v1.101a2.53 2.53 0 0 1-.722.285 4.356 4.356 0 0 1-.932.086v.002zm8.277-.107h-1.268V9.506c0-.458-.092-.8-.277-1.026-.184-.226-.477-.338-.878-.338-.53 0-.919.158-1.168.475-.249.317-.373.848-.373 1.593v2.949h-1.262V4.801h1.262v2.122c0 .34-.021.704-.064 1.09h.081a1.76 1.76 0 0 1 .717-.666c.306-.158.663-.236 1.072-.236 1.439 0 2.159.725 2.159 2.175v3.873l-.001-.001zm7.649-6.048c.741 0 1.319.269 1.732.806.414.537.62 1.291.62 2.261 0 .974-.209 1.732-.628 2.275-.419.542-1.001.814-1.746.814-.752 0-1.336-.27-1.751-.811h-.086l-.231.704h-.945V4.801h1.262v1.987l-.021.655-.032.553h.054c.401-.591.992-.886 1.772-.886zm-.328 1.031c-.508 0-.875.149-1.098.448-.224.299-.339.799-.346 1.501v.086c0 .723.115 1.247.344 1.571.229.324.603.486 1.123.486.448 0 .787-.177 1.018-.532.231-.354.346-.867.346-1.536 0-1.35-.462-2.025-1.386-2.025l-.001.001zm3.244-.924h1.375l1.209 3.368c.183.48.304.931.365 1.354h.043c.032-.197.091-.436.177-.717.086-.281.541-1.616 1.364-4.004h1.364l-2.541 6.73c-.462 1.235-1.232 1.853-2.31 1.853-.279 0-.551-.03-.816-.091v-.999c.19.043.406.064.65.064.609 0 1.037-.353 1.284-1.058l.22-.559-2.385-5.941h.001z' fill='%231D3657'/></g></svg>");
-  background-repeat: no-repeat;
-  background-position: 50%;
-  background-size: 100%;
-  overflow: hidden;
-  text-indent: -9000px;
-  width: 100%;
-  height: 100%;
-  display: block;
-  transform: translate(-8px);
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--highlight {
-  color: #FF8C00;
-  background: rgba(232, 189, 54, 0.1)
-}
-
-
-.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight {
-  box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5)
-}
-
-.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content {
-  background-color: rgba(192, 192, 192, .15)
-}
diff --git a/docs/docsearch.js b/docs/docsearch.js
deleted file mode 100644
index b35504c..0000000
--- a/docs/docsearch.js
+++ /dev/null
@@ -1,85 +0,0 @@
-$(function() {
-
-  // register a handler to move the focus to the search bar
-  // upon pressing shift + "/" (i.e. "?")
-  $(document).on('keydown', function(e) {
-    if (e.shiftKey && e.keyCode == 191) {
-      e.preventDefault();
-      $("#search-input").focus();
-    }
-  });
-
-  $(document).ready(function() {
-    // do keyword highlighting
-    /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
-    var mark = function() {
-
-      var referrer = document.URL ;
-      var paramKey = "q" ;
-
-      if (referrer.indexOf("?") !== -1) {
-        var qs = referrer.substr(referrer.indexOf('?') + 1);
-        var qs_noanchor = qs.split('#')[0];
-        var qsa = qs_noanchor.split('&');
-        var keyword = "";
-
-        for (var i = 0; i < qsa.length; i++) {
-          var currentParam = qsa[i].split('=');
-
-          if (currentParam.length !== 2) {
-            continue;
-          }
-
-          if (currentParam[0] == paramKey) {
-            keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
-          }
-        }
-
-        if (keyword !== "") {
-          $(".contents").unmark({
-            done: function() {
-              $(".contents").mark(keyword);
-            }
-          });
-        }
-      }
-    };
-
-    mark();
-  });
-});
-
-/* Search term highlighting ------------------------------*/
-
-function matchedWords(hit) {
-  var words = [];
-
-  var hierarchy = hit._highlightResult.hierarchy;
-  // loop to fetch from lvl0, lvl1, etc.
-  for (var idx in hierarchy) {
-    words = words.concat(hierarchy[idx].matchedWords);
-  }
-
-  var content = hit._highlightResult.content;
-  if (content) {
-    words = words.concat(content.matchedWords);
-  }
-
-  // return unique words
-  var words_uniq = [...new Set(words)];
-  return words_uniq;
-}
-
-function updateHitURL(hit) {
-
-  var words = matchedWords(hit);
-  var url = "";
-
-  if (hit.anchor) {
-    url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
-  } else {
-    url = hit.url + '?q=' + escape(words.join(" "));
-  }
-
-  return url;
-}
diff --git a/docs/index.html b/docs/index.html
deleted file mode 100644
index 020d123..0000000
--- a/docs/index.html
+++ /dev/null
@@ -1,234 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta charset="utf-8">
-<meta http-equiv="X-UA-Compatible" content="IE=edge">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
-<title>Deep learning function for patient level prediction using data in the OMOP Common Data Model • DeepPatientLevelPrediction</title>
-<!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
-<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css">
-<script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
-<!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet">
-<script src="pkgdown.js"></script><meta property="og:title" content="Deep learning function for patient level prediction using data in the OMOP Common Data Model">
-<meta property="og:description" content="A package for creating deep learning patient level prediction models following the OHDSI PatientLevelPrediction framework.">
-<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]-->
-</head>
-<body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-home">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav">
-<li>
-  <a href="index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu">
-<li>
-      <a href="articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul>
-</li>
-      </ul>
-<ul class="nav navbar-nav navbar-right">
-<li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul>
-</div>
-<!--/.nav-collapse -->
-  </div>
-<!--/.container -->
-</div>
-<!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="contents col-md-9">
-
-<div class="section level1">
-<div class="page-header"><h1 id="deeppatientlevelprediction">DeepPatientLevelPrediction<a class="anchor" aria-label="anchor" href="#deeppatientlevelprediction"></a>
-</h1></div>
-
-</div>
-<div class="section level1">
-<h1 id="introduction">Introduction<a class="anchor" aria-label="anchor" href="#introduction"></a>
-</h1>
-<p>DeepPatientLevelPrediction is an R package for building and validating deep learning patient-level predictive models using data in the OMOP Common Data Model format and OHDSI PatientLevelPrediction framework.</p>
-<p>Reps JM, Schuemie MJ, Suchard MA, Ryan PB, Rijnbeek PR. <a href="https://academic.oup.com/jamia/article/25/8/969/4989437" class="external-link">Design and implementation of a standardized framework to generate and evaluate patient-level prediction models using observational healthcare data.</a> J Am Med Inform Assoc. 2018;25(8):969-975.</p>
-</div>
-<div class="section level1">
-<h1 id="features">Features<a class="anchor" aria-label="anchor" href="#features"></a>
-</h1>
-<ul>
-<li>Adds deep learning models to use in the OHDSI PatientLevelPrediction framework.</li>
-<li>Allows to add custom deep learning models.</li>
-<li>Includes an MLP, ResNet and a Transformer</li>
-<li>Allows to use all the features of <a href="https://github.com/OHDSI/PatientLevelPrediction/" class="external-link">PatientLevelPrediction</a> to validate and explore your model performance.</li>
-</ul>
-</div>
-<div class="section level1">
-<h1 id="technology">Technology<a class="anchor" aria-label="anchor" href="#technology"></a>
-</h1>
-<p>DeepPatientLevelPrediction is an R package. It uses <a href="https://torch.mlverse.org/" class="external-link">torch in R</a> to build deep learning models without using python.</p>
-</div>
-<div class="section level1">
-<h1 id="system-requirements">System Requirements<a class="anchor" aria-label="anchor" href="#system-requirements"></a>
-</h1>
-<p>Requires R (version 3.5.0 or higher). Installation on Windows requires <a href="http://cran.r-project.org/bin/windows/Rtools/" class="external-link">RTools</a>. For training deep learning models in most cases an nvidia GPU is required using either Windows or Linux.</p>
-</div>
-<div class="section level1">
-<h1 id="getting-started">Getting Started<a class="anchor" aria-label="anchor" href="#getting-started"></a>
-</h1>
-<ul>
-<li>To install the package please read the <a href="">Package installation guide</a>
-</li>
-<li>Please read the main vignette for the package: <a href="https://ohdsi.github.io/DeepPatientLevelPrediction/articles/BuildingDeepModels.html" class="external-link">Building Deep Learning Models</a>
-</li>
-</ul>
-</div>
-<div class="section level1">
-<h1 id="user-documentation">User Documentation<a class="anchor" aria-label="anchor" href="#user-documentation"></a>
-</h1>
-<p>Documentation can be found on the <a href="https://ohdsi.github.io/DeepPatientLevelPrediction" class="external-link">package website</a>.</p>
-<p>PDF versions of the documentation are also available, as mentioned above.</p>
-</div>
-<div class="section level1">
-<h1 id="support">Support<a class="anchor" aria-label="anchor" href="#support"></a>
-</h1>
-<ul>
-<li>Developer questions/comments/feedback: <a href="http://forums.ohdsi.org/c/developers" class="external-link">OHDSI Forum</a>
-</li>
-<li>We use the <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/issues" class="external-link">GitHub issue tracker</a> for all bugs/issues/enhancements</li>
-</ul>
-</div>
-<div class="section level1">
-<h1 id="contributing">Contributing<a class="anchor" aria-label="anchor" href="#contributing"></a>
-</h1>
-<p>Read <a href="https://ohdsi.github.io/Hades/contribute.html" class="external-link">here</a> how you can contribute to this package.</p>
-</div>
-<div class="section level1">
-<h1 id="license">License<a class="anchor" aria-label="anchor" href="#license"></a>
-</h1>
-<p>DeepPatientLevelPrediction is licensed under Apache License 2.0</p>
-</div>
-<div class="section level1">
-<h1 id="development">Development<a class="anchor" aria-label="anchor" href="#development"></a>
-</h1>
-<p>DeepPatientLevelPrediction is being developed in R Studio.</p>
-</div>
-
-  </div>
-
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <div class="links">
-<h2 data-toc-skip>Links</h2>
-<ul class="list-unstyled">
-<li><a href="https://github.com/OHDSI/DeepPatientLevelPrediction/" class="external-link">Browse source code</a></li>
-<li><a href="https://github.com/OHDSI/DeepPatientLevelPrediction/issues" class="external-link">Report a bug</a></li>
-<li><a href="http://forums.ohdsi.org" class="external-link">Ask a question</a></li>
-</ul>
-</div>
-
-<div class="license">
-<h2 data-toc-skip>License</h2>
-<ul class="list-unstyled">
-<li>Apache License 2.0</li>
-</ul>
-</div>
-
-
-<div class="citation">
-<h2 data-toc-skip>Citation</h2>
-<ul class="list-unstyled">
-<li><a href="authors.html#citation">Citing DeepPatientLevelPrediction</a></li>
-</ul>
-</div>
-
-<div class="developers">
-<h2 data-toc-skip>Developers</h2>
-<ul class="list-unstyled">
-<li>Jenna Reps <br><small class="roles"> Author </small>  </li>
-<li>Egill Fridgeirsson <br><small class="roles"> Author, maintainer </small>  </li>
-<li>Seng Chan You <br><small class="roles"> Author </small>  </li>
-<li>Chungsoo Kim <br><small class="roles"> Author </small>  </li>
-<li>Henrik John <br><small class="roles"> Author </small>  </li>
-</ul>
-</div>
-
-<div class="dev-status">
-<h2 data-toc-skip>Dev status</h2>
-<ul class="list-unstyled">
-<li><a href="https://github.com/OHDSI/DeepPatientLevelPrediction/actions?query=workflow%3AR-CMD-check?branch=develop" class="external-link"><img src="https://github.com/OHDSI/DeepPatientLevelPrediction/workflows/R-CMD-check/badge.svg" alt="Build Status"></a></li>
-<li><a href="https://codecov.io/github/OHDSI/DeepPatientLevelPrediction?branch=main" class="external-link"><img src="https://codecov.io/github/OHDSI/DeepPatientLevelPrediction/coverage.svg?branch=master" alt="codecov.io"></a></li>
-</ul>
-</div>
-
-  </div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p>
-<p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p>
-<p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer>
-</div>
-
-  
-
-
-  
-
-  </body>
-</html>
diff --git a/docs/link.svg b/docs/link.svg
deleted file mode 100644
index 88ad827..0000000
--- a/docs/link.svg
+++ /dev/null
@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
-<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
-	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
-<style type="text/css">
-	.st0{fill:#75AADB;}
-</style>
-<path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
-	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
-	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
-	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
-</svg>
diff --git a/docs/pkgdown.css b/docs/pkgdown.css
deleted file mode 100644
index 80ea5b8..0000000
--- a/docs/pkgdown.css
+++ /dev/null
@@ -1,384 +0,0 @@
-/* Sticky footer */
-
-/**
- * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/
- * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css
- *
- * .Site -> body > .container
- * .Site-content -> body > .container .row
- * .footer -> footer
- *
- * Key idea seems to be to ensure that .container and __all its parents__
- * have height set to 100%
- *
- */
-
-html, body {
-  height: 100%;
-}
-
-body {
-  position: relative;
-}
-
-body > .container {
-  display: flex;
-  height: 100%;
-  flex-direction: column;
-}
-
-body > .container .row {
-  flex: 1 0 auto;
-}
-
-footer {
-  margin-top: 45px;
-  padding: 35px 0 36px;
-  border-top: 1px solid #e5e5e5;
-  color: #666;
-  display: flex;
-  flex-shrink: 0;
-}
-footer p {
-  margin-bottom: 0;
-}
-footer div {
-  flex: 1;
-}
-footer .pkgdown {
-  text-align: right;
-}
-footer p {
-  margin-bottom: 0;
-}
-
-img.icon {
-  float: right;
-}
-
-/* Ensure in-page images don't run outside their container */
-.contents img {
-  max-width: 100%;
-  height: auto;
-}
-
-/* Fix bug in bootstrap (only seen in firefox) */
-summary {
-  display: list-item;
-}
-
-/* Typographic tweaking ---------------------------------*/
-
-.contents .page-header {
-  margin-top: calc(-60px + 1em);
-}
-
-dd {
-  margin-left: 3em;
-}
-
-/* Section anchors ---------------------------------*/
-
-a.anchor {
-  display: none;
-  margin-left: 5px;
-  width: 20px;
-  height: 20px;
-
-  background-image: url(./link.svg);
-  background-repeat: no-repeat;
-  background-size: 20px 20px;
-  background-position: center center;
-}
-
-h1:hover .anchor,
-h2:hover .anchor,
-h3:hover .anchor,
-h4:hover .anchor,
-h5:hover .anchor,
-h6:hover .anchor {
-  display: inline-block;
-}
-
-/* Fixes for fixed navbar --------------------------*/
-
-.contents h1, .contents h2, .contents h3, .contents h4 {
-  padding-top: 60px;
-  margin-top: -40px;
-}
-
-/* Navbar submenu --------------------------*/
-
-.dropdown-submenu {
-  position: relative;
-}
-
-.dropdown-submenu>.dropdown-menu {
-  top: 0;
-  left: 100%;
-  margin-top: -6px;
-  margin-left: -1px;
-  border-radius: 0 6px 6px 6px;
-}
-
-.dropdown-submenu:hover>.dropdown-menu {
-  display: block;
-}
-
-.dropdown-submenu>a:after {
-  display: block;
-  content: " ";
-  float: right;
-  width: 0;
-  height: 0;
-  border-color: transparent;
-  border-style: solid;
-  border-width: 5px 0 5px 5px;
-  border-left-color: #cccccc;
-  margin-top: 5px;
-  margin-right: -10px;
-}
-
-.dropdown-submenu:hover>a:after {
-  border-left-color: #ffffff;
-}
-
-.dropdown-submenu.pull-left {
-  float: none;
-}
-
-.dropdown-submenu.pull-left>.dropdown-menu {
-  left: -100%;
-  margin-left: 10px;
-  border-radius: 6px 0 6px 6px;
-}
-
-/* Sidebar --------------------------*/
-
-#pkgdown-sidebar {
-  margin-top: 30px;
-  position: -webkit-sticky;
-  position: sticky;
-  top: 70px;
-}
-
-#pkgdown-sidebar h2 {
-  font-size: 1.5em;
-  margin-top: 1em;
-}
-
-#pkgdown-sidebar h2:first-child {
-  margin-top: 0;
-}
-
-#pkgdown-sidebar .list-unstyled li {
-  margin-bottom: 0.5em;
-}
-
-/* bootstrap-toc tweaks ------------------------------------------------------*/
-
-/* All levels of nav */
-
-nav[data-toggle='toc'] .nav > li > a {
-  padding: 4px 20px 4px 6px;
-  font-size: 1.5rem;
-  font-weight: 400;
-  color: inherit;
-}
-
-nav[data-toggle='toc'] .nav > li > a:hover,
-nav[data-toggle='toc'] .nav > li > a:focus {
-  padding-left: 5px;
-  color: inherit;
-  border-left: 1px solid #878787;
-}
-
-nav[data-toggle='toc'] .nav > .active > a,
-nav[data-toggle='toc'] .nav > .active:hover > a,
-nav[data-toggle='toc'] .nav > .active:focus > a {
-  padding-left: 5px;
-  font-size: 1.5rem;
-  font-weight: 400;
-  color: inherit;
-  border-left: 2px solid #878787;
-}
-
-/* Nav: second level (shown on .active) */
-
-nav[data-toggle='toc'] .nav .nav {
-  display: none; /* Hide by default, but at >768px, show it */
-  padding-bottom: 10px;
-}
-
-nav[data-toggle='toc'] .nav .nav > li > a {
-  padding-left: 16px;
-  font-size: 1.35rem;
-}
-
-nav[data-toggle='toc'] .nav .nav > li > a:hover,
-nav[data-toggle='toc'] .nav .nav > li > a:focus {
-  padding-left: 15px;
-}
-
-nav[data-toggle='toc'] .nav .nav > .active > a,
-nav[data-toggle='toc'] .nav .nav > .active:hover > a,
-nav[data-toggle='toc'] .nav .nav > .active:focus > a {
-  padding-left: 15px;
-  font-weight: 500;
-  font-size: 1.35rem;
-}
-
-/* orcid ------------------------------------------------------------------- */
-
-.orcid {
-  font-size: 16px;
-  color: #A6CE39;
-  /* margins are required by official ORCID trademark and display guidelines */
-  margin-left:4px;
-  margin-right:4px;
-  vertical-align: middle;
-}
-
-/* Reference index & topics ----------------------------------------------- */
-
-.ref-index th {font-weight: normal;}
-
-.ref-index td {vertical-align: top; min-width: 100px}
-.ref-index .icon {width: 40px;}
-.ref-index .alias {width: 40%;}
-.ref-index-icons .alias {width: calc(40% - 40px);}
-.ref-index .title {width: 60%;}
-
-.ref-arguments th {text-align: right; padding-right: 10px;}
-.ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px}
-.ref-arguments .name {width: 20%;}
-.ref-arguments .desc {width: 80%;}
-
-/* Nice scrolling for wide elements --------------------------------------- */
-
-table {
-  display: block;
-  overflow: auto;
-}
-
-/* Syntax highlighting ---------------------------------------------------- */
-
-pre, code, pre code {
-  background-color: #f8f8f8;
-  color: #333;
-}
-pre, pre code {
-  white-space: pre-wrap;
-  word-break: break-all;
-  overflow-wrap: break-word;
-}
-
-pre {
-  border: 1px solid #eee;
-}
-
-pre .img, pre .r-plt {
-  margin: 5px 0;
-}
-
-pre .img img, pre .r-plt img {
-  background-color: #fff;
-}
-
-code a, pre a {
-  color: #375f84;
-}
-
-a.sourceLine:hover {
-  text-decoration: none;
-}
-
-.fl      {color: #1514b5;}
-.fu      {color: #000000;} /* function */
-.ch,.st  {color: #036a07;} /* string */
-.kw      {color: #264D66;} /* keyword */
-.co      {color: #888888;} /* comment */
-
-.error   {font-weight: bolder;}
-.warning {font-weight: bolder;}
-
-/* Clipboard --------------------------*/
-
-.hasCopyButton {
-  position: relative;
-}
-
-.btn-copy-ex {
-  position: absolute;
-  right: 0;
-  top: 0;
-  visibility: hidden;
-}
-
-.hasCopyButton:hover button.btn-copy-ex {
-  visibility: visible;
-}
-
-/* headroom.js ------------------------ */
-
-.headroom {
-  will-change: transform;
-  transition: transform 200ms linear;
-}
-.headroom--pinned {
-  transform: translateY(0%);
-}
-.headroom--unpinned {
-  transform: translateY(-100%);
-}
-
-/* mark.js ----------------------------*/
-
-mark {
-  background-color: rgba(255, 255, 51, 0.5);
-  border-bottom: 2px solid rgba(255, 153, 51, 0.3);
-  padding: 1px;
-}
-
-/* vertical spacing after htmlwidgets */
-.html-widget {
-  margin-bottom: 10px;
-}
-
-/* fontawesome ------------------------ */
-
-.fab {
-    font-family: "Font Awesome 5 Brands" !important;
-}
-
-/* don't display links in code chunks when printing */
-/* source: https://stackoverflow.com/a/10781533 */
-@media print {
-  code a:link:after, code a:visited:after {
-    content: "";
-  }
-}
-
-/* Section anchors ---------------------------------
-   Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71
-*/
-
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-}
-.hanging-indent div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
diff --git a/docs/pkgdown.js b/docs/pkgdown.js
deleted file mode 100644
index 6f0eee4..0000000
--- a/docs/pkgdown.js
+++ /dev/null
@@ -1,108 +0,0 @@
-/* http://gregfranko.com/blog/jquery-best-practices/ */
-(function($) {
-  $(function() {
-
-    $('.navbar-fixed-top').headroom();
-
-    $('body').css('padding-top', $('.navbar').height() + 10);
-    $(window).resize(function(){
-      $('body').css('padding-top', $('.navbar').height() + 10);
-    });
-
-    $('[data-toggle="tooltip"]').tooltip();
-
-    var cur_path = paths(location.pathname);
-    var links = $("#navbar ul li a");
-    var max_length = -1;
-    var pos = -1;
-    for (var i = 0; i < links.length; i++) {
-      if (links[i].getAttribute("href") === "#")
-        continue;
-      // Ignore external links
-      if (links[i].host !== location.host)
-        continue;
-
-      var nav_path = paths(links[i].pathname);
-
-      var length = prefix_length(nav_path, cur_path);
-      if (length > max_length) {
-        max_length = length;
-        pos = i;
-      }
-    }
-
-    // Add class to parent <li>, and enclosing <li> if in dropdown
-    if (pos >= 0) {
-      var menu_anchor = $(links[pos]);
-      menu_anchor.parent().addClass("active");
-      menu_anchor.closest("li.dropdown").addClass("active");
-    }
-  });
-
-  function paths(pathname) {
-    var pieces = pathname.split("/");
-    pieces.shift(); // always starts with /
-
-    var end = pieces[pieces.length - 1];
-    if (end === "index.html" || end === "")
-      pieces.pop();
-    return(pieces);
-  }
-
-  // Returns -1 if not found
-  function prefix_length(needle, haystack) {
-    if (needle.length > haystack.length)
-      return(-1);
-
-    // Special case for length-0 haystack, since for loop won't run
-    if (haystack.length === 0) {
-      return(needle.length === 0 ? 0 : -1);
-    }
-
-    for (var i = 0; i < haystack.length; i++) {
-      if (needle[i] != haystack[i])
-        return(i);
-    }
-
-    return(haystack.length);
-  }
-
-  /* Clipboard --------------------------*/
-
-  function changeTooltipMessage(element, msg) {
-    var tooltipOriginalTitle=element.getAttribute('data-original-title');
-    element.setAttribute('data-original-title', msg);
-    $(element).tooltip('show');
-    element.setAttribute('data-original-title', tooltipOriginalTitle);
-  }
-
-  if(ClipboardJS.isSupported()) {
-    $(document).ready(function() {
-      var copyButton = "<button type='button' class='btn btn-primary btn-copy-ex' type = 'submit' title='Copy to clipboard' aria-label='Copy to clipboard' data-toggle='tooltip' data-placement='left auto' data-trigger='hover' data-clipboard-copy><i class='fa fa-copy'></i></button>";
-
-      $("div.sourceCode").addClass("hasCopyButton");
-
-      // Insert copy buttons:
-      $(copyButton).prependTo(".hasCopyButton");
-
-      // Initialize tooltips:
-      $('.btn-copy-ex').tooltip({container: 'body'});
-
-      // Initialize clipboard:
-      var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
-        text: function(trigger) {
-          return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, "");
-        }
-      });
-
-      clipboardBtnCopies.on('success', function(e) {
-        changeTooltipMessage(e.trigger, 'Copied!');
-        e.clearSelection();
-      });
-
-      clipboardBtnCopies.on('error', function() {
-        changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
-      });
-    });
-  }
-})(window.jQuery || window.$)
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
deleted file mode 100644
index e9b1b73..0000000
--- a/docs/pkgdown.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-pandoc: 2.14.0.3
-pkgdown: 2.0.5
-pkgdown_sha: ~
-articles:
-  BuildingDeepModels: BuildingDeepModels.html
-  Installing: Installing.html
-last_built: 2022-08-09T16:38Z
-
diff --git a/docs/reference/Dataset.html b/docs/reference/Dataset.html
deleted file mode 100644
index cd4ccac..0000000
--- a/docs/reference/Dataset.html
+++ /dev/null
@@ -1,123 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>A torch dataset — Dataset • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="A torch dataset — Dataset"><meta property="og:description" content="A torch dataset"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>A torch dataset</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/Dataset.R" class="external-link"><code>R/Dataset.R</code></a></small>
-    <div class="hidden name"><code>Dataset.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>A torch dataset</p>
-    </div>
-
-    <div id="ref-usage">
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">Dataset</span><span class="op">(</span><span class="va">data</span>, labels <span class="op">=</span> <span class="cn">NULL</span>, numericalIndex <span class="op">=</span> <span class="cn">NULL</span>, all <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span></code></pre></div>
-    </div>
-
-    <div id="arguments">
-    <h2>Arguments</h2>
-    <dl><dt>data</dt>
-<dd><p>a dataframe like object with the covariates</p></dd>
-
-
-<dt>labels</dt>
-<dd><p>a dataframe with the labels</p></dd>
-
-
-<dt>numericalIndex</dt>
-<dd><p>in what column numeric data is in (if any)</p></dd>
-
-
-<dt>all</dt>
-<dd><p>if True then returns all features instead of splitting num/cat</p></dd>
-
-</dl></div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/DeepPatientLevelPrediction.html b/docs/reference/DeepPatientLevelPrediction.html
deleted file mode 100644
index 5e82fea..0000000
--- a/docs/reference/DeepPatientLevelPrediction.html
+++ /dev/null
@@ -1,102 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>DeepPatientLevelPrediction — DeepPatientLevelPrediction • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="DeepPatientLevelPrediction — DeepPatientLevelPrediction"><meta property="og:description" content="A package containing deep learning extensions for developing prediction models using data in the OMOP CDM"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>DeepPatientLevelPrediction</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/DeepPatientLevelPrediction.R" class="external-link"><code>R/DeepPatientLevelPrediction.R</code></a></small>
-    <div class="hidden name"><code>DeepPatientLevelPrediction.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>A package containing deep learning extensions for developing prediction models using data in the OMOP CDM</p>
-    </div>
-
-
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/EarlyStopping.html b/docs/reference/EarlyStopping.html
deleted file mode 100644
index 049d2c9..0000000
--- a/docs/reference/EarlyStopping.html
+++ /dev/null
@@ -1,175 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Earlystopping class — EarlyStopping • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Earlystopping class — EarlyStopping"><meta property="og:description" content="Stops training if a loss or metric has stopped improving"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>Earlystopping class</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/Estimator.R" class="external-link"><code>R/Estimator.R</code></a></small>
-    <div class="hidden name"><code>EarlyStopping.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>Stops training if a loss or metric has stopped improving</p>
-    </div>
-
-
-    <div id="methods">
-    <h2>Methods</h2>
-    
-<div class="section">
-<h3 id="public-methods">Public methods<a class="anchor" aria-label="anchor" href="#public-methods"></a></h3>
-
-<ul><li><p><a href="#method-EarlyStopping-new"><code>EarlyStopping$new()</code></a></p></li>
-<li><p><a href="#method-EarlyStopping-call"><code>EarlyStopping$call()</code></a></p></li>
-<li><p><a href="#method-EarlyStopping-clone"><code>EarlyStopping$clone()</code></a></p></li>
-</ul></div><p></p><hr><a id="method-EarlyStopping-new"></a><div class="section">
-<h3 id="method-new-">Method <code>new()</code><a class="anchor" aria-label="anchor" href="#method-new-"></a></h3>
-<p>Creates a new earlystopping object</p><div class="section">
-<h4 id="usage">Usage<a class="anchor" aria-label="anchor" href="#usage"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va"><a href="../reference/EarlyStopping.html">EarlyStopping</a></span><span class="op">$</span><span class="fu">new</span><span class="op">(</span>patience <span class="op">=</span> <span class="fl">3</span>, delta <span class="op">=</span> <span class="fl">0</span>, verbose <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments">Arguments<a class="anchor" aria-label="anchor" href="#arguments"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>patience</code></dt>
-<dd><p>Stop after this number of epochs if loss doesn't improve</p></dd>
-
-
-<dt><code>delta</code></dt>
-<dd><p>How much does the loss need to improve to count as improvement</p></dd>
-
-
-<dt><code>verbose</code></dt>
-<dd></dd>
-
-
-</dl><p></p></div>
-</div>
-<div class="section">
-<h4 id="returns">Returns<a class="anchor" aria-label="anchor" href="#returns"></a></h4>
-<p>a new earlystopping object</p>
-</div>
-
-</div><p></p><hr><a id="method-EarlyStopping-call"></a><div class="section">
-<h3 id="method-call-">Method <code><a href="https://rdrr.io/r/base/call.html" class="external-link">call()</a></code><a class="anchor" aria-label="anchor" href="#method-call-"></a></h3>
-<p>call the earlystopping object and increment a counter if loss is not
-improving</p><div class="section">
-<h4 id="usage-1">Usage<a class="anchor" aria-label="anchor" href="#usage-1"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">EarlyStopping</span><span class="op">$</span><span class="fu">call</span><span class="op">(</span><span class="va">metric</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-1">Arguments<a class="anchor" aria-label="anchor" href="#arguments-1"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>metric</code></dt>
-<dd><p>the current metric value</p></dd>
-
-
-</dl><p></p></div>
-</div>
-
-</div><p></p><hr><a id="method-EarlyStopping-clone"></a><div class="section">
-<h3 id="method-clone-">Method <code>clone()</code><a class="anchor" aria-label="anchor" href="#method-clone-"></a></h3>
-<p>The objects of this class are cloneable with this method.</p><div class="section">
-<h4 id="usage-2">Usage<a class="anchor" aria-label="anchor" href="#usage-2"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">EarlyStopping</span><span class="op">$</span><span class="fu">clone</span><span class="op">(</span>deep <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-2">Arguments<a class="anchor" aria-label="anchor" href="#arguments-2"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>deep</code></dt>
-<dd><p>Whether to make a deep clone.</p></dd>
-
-
-</dl><p></p></div>
-</div>
-
-</div>
-
-    </div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/Estimator.html b/docs/reference/Estimator.html
deleted file mode 100644
index dc17110..0000000
--- a/docs/reference/Estimator.html
+++ /dev/null
@@ -1,431 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Estimator — Estimator • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Estimator — Estimator"><meta property="og:description" content="A generic R6 class that wraps around a torch nn module and can be used to 
-fit and predict the model defined in that module."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>Estimator</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/Estimator.R" class="external-link"><code>R/Estimator.R</code></a></small>
-    <div class="hidden name"><code>Estimator.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>A generic R6 class that wraps around a torch nn module and can be used to 
-fit and predict the model defined in that module.</p>
-    </div>
-
-
-    <div id="methods">
-    <h2>Methods</h2>
-    
-<div class="section">
-<h3 id="public-methods">Public methods<a class="anchor" aria-label="anchor" href="#public-methods"></a></h3>
-
-<ul><li><p><a href="#method-Estimator-new"><code>Estimator$new()</code></a></p></li>
-<li><p><a href="#method-Estimator-fit"><code>Estimator$fit()</code></a></p></li>
-<li><p><a href="#method-Estimator-fitEpoch"><code>Estimator$fitEpoch()</code></a></p></li>
-<li><p><a href="#method-Estimator-score"><code>Estimator$score()</code></a></p></li>
-<li><p><a href="#method-Estimator-finishFit"><code>Estimator$finishFit()</code></a></p></li>
-<li><p><a href="#method-Estimator-fitWholeTrainingSet"><code>Estimator$fitWholeTrainingSet()</code></a></p></li>
-<li><p><a href="#method-Estimator-save"><code>Estimator$save()</code></a></p></li>
-<li><p><a href="#method-Estimator-predictProba"><code>Estimator$predictProba()</code></a></p></li>
-<li><p><a href="#method-Estimator-predict"><code>Estimator$predict()</code></a></p></li>
-<li><p><a href="#method-Estimator-batchToDevice"><code>Estimator$batchToDevice()</code></a></p></li>
-<li><p><a href="#method-Estimator-itemOrDefaults"><code>Estimator$itemOrDefaults()</code></a></p></li>
-<li><p><a href="#method-Estimator-clone"><code>Estimator$clone()</code></a></p></li>
-</ul></div><p></p><hr><a id="method-Estimator-new"></a><div class="section">
-<h3 id="method-new-">Method <code>new()</code><a class="anchor" aria-label="anchor" href="#method-new-"></a></h3>
-<p>Creates a new estimator</p><div class="section">
-<h4 id="usage">Usage<a class="anchor" aria-label="anchor" href="#usage"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va"><a href="../reference/Estimator.html">Estimator</a></span><span class="op">$</span><span class="fu">new</span><span class="op">(</span></span>
-<span>  <span class="va">baseModel</span>,</span>
-<span>  <span class="va">modelParameters</span>,</span>
-<span>  <span class="va">fitParameters</span>,</span>
-<span>  optimizer <span class="op">=</span> <span class="fu">torch</span><span class="fu">::</span><span class="va"><a href="https://rdrr.io/pkg/torch/man/optim_adam.html" class="external-link">optim_adam</a></span>,</span>
-<span>  criterion <span class="op">=</span> <span class="fu">torch</span><span class="fu">::</span><span class="va"><a href="https://rdrr.io/pkg/torch/man/nn_bce_with_logits_loss.html" class="external-link">nn_bce_with_logits_loss</a></span>,</span>
-<span>  scheduler <span class="op">=</span> <span class="fu">torch</span><span class="fu">::</span><span class="va">lr_reduce_on_plateau</span>,</span>
-<span>  device <span class="op">=</span> <span class="st">"cpu"</span>,</span>
-<span>  patience <span class="op">=</span> <span class="fl">4</span></span>
-<span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments">Arguments<a class="anchor" aria-label="anchor" href="#arguments"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>baseModel</code></dt>
-<dd><p>The torch nn module to use as model</p></dd>
-
-
-<dt><code>modelParameters</code></dt>
-<dd><p>Parameters to initialize the baseModel</p></dd>
-
-
-<dt><code>fitParameters</code></dt>
-<dd><p>Parameters required for the estimator fitting</p></dd>
-
-
-<dt><code>optimizer</code></dt>
-<dd><p>A torch optimizer to use, default is Adam</p></dd>
-
-
-<dt><code>criterion</code></dt>
-<dd><p>The torch loss function to use, defaults to 
-binary cross entropy with logits</p></dd>
-
-
-<dt><code>scheduler</code></dt>
-<dd><p>learning rate scheduler to use</p></dd>
-
-
-<dt><code>device</code></dt>
-<dd><p>Which device to use for fitting, default is cpu</p></dd>
-
-
-<dt><code>patience</code></dt>
-<dd><p>Patience to use for early stopping</p></dd>
-
-
-</dl><p></p></div>
-</div>
-
-</div><p></p><hr><a id="method-Estimator-fit"></a><div class="section">
-<h3 id="method-fit-">Method <code>fit()</code><a class="anchor" aria-label="anchor" href="#method-fit-"></a></h3>
-<p>fits the estimator</p><div class="section">
-<h4 id="usage-1">Usage<a class="anchor" aria-label="anchor" href="#usage-1"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">Estimator</span><span class="op">$</span><span class="fu">fit</span><span class="op">(</span><span class="va">dataset</span>, <span class="va">testDataset</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-1">Arguments<a class="anchor" aria-label="anchor" href="#arguments-1"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>dataset</code></dt>
-<dd><p>a torch dataset to use for model fitting</p></dd>
-
-
-<dt><code>testDataset</code></dt>
-<dd><p>a torch dataset to use for early stopping</p></dd>
-
-
-</dl><p></p></div>
-</div>
-
-</div><p></p><hr><a id="method-Estimator-fitEpoch"></a><div class="section">
-<h3 id="method-fitepoch-">Method <code>fitEpoch()</code><a class="anchor" aria-label="anchor" href="#method-fitepoch-"></a></h3>
-<p>fits estimator for one epoch (one round through the data)</p><div class="section">
-<h4 id="usage-2">Usage<a class="anchor" aria-label="anchor" href="#usage-2"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">Estimator</span><span class="op">$</span><span class="fu">fitEpoch</span><span class="op">(</span><span class="va">dataset</span>, <span class="va">batchIndex</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-2">Arguments<a class="anchor" aria-label="anchor" href="#arguments-2"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>dataset</code></dt>
-<dd><p>torch dataset to use for fitting</p></dd>
-
-
-<dt><code>batchIndex</code></dt>
-<dd><p>indices of batches</p></dd>
-
-
-</dl><p></p></div>
-</div>
-
-</div><p></p><hr><a id="method-Estimator-score"></a><div class="section">
-<h3 id="method-score-">Method <code>score()</code><a class="anchor" aria-label="anchor" href="#method-score-"></a></h3>
-<p>calculates loss and auc after training for one epoch</p><div class="section">
-<h4 id="usage-3">Usage<a class="anchor" aria-label="anchor" href="#usage-3"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">Estimator</span><span class="op">$</span><span class="fu">score</span><span class="op">(</span><span class="va">dataset</span>, <span class="va">batchIndex</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-3">Arguments<a class="anchor" aria-label="anchor" href="#arguments-3"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>dataset</code></dt>
-<dd><p>The torch dataset to use to evaluate loss and auc</p></dd>
-
-
-<dt><code>batchIndex</code></dt>
-<dd><p>Indices of batches in the dataset</p></dd>
-
-
-</dl><p></p></div>
-</div>
-<div class="section">
-<h4 id="returns">Returns<a class="anchor" aria-label="anchor" href="#returns"></a></h4>
-<p>list with average loss and auc in the dataset</p>
-</div>
-
-</div><p></p><hr><a id="method-Estimator-finishFit"></a><div class="section">
-<h3 id="method-finishfit-">Method <code>finishFit()</code><a class="anchor" aria-label="anchor" href="#method-finishfit-"></a></h3>
-<p>operations that run when fitting is finished</p><div class="section">
-<h4 id="usage-4">Usage<a class="anchor" aria-label="anchor" href="#usage-4"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">Estimator</span><span class="op">$</span><span class="fu">finishFit</span><span class="op">(</span><span class="va">valAUCs</span>, <span class="va">modelStateDict</span>, <span class="va">valLosses</span>, <span class="va">epoch</span>, <span class="va">learnRates</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-4">Arguments<a class="anchor" aria-label="anchor" href="#arguments-4"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>valAUCs</code></dt>
-<dd><p>validation AUC values</p></dd>
-
-
-<dt><code>modelStateDict</code></dt>
-<dd><p>fitted model parameters</p></dd>
-
-
-<dt><code>valLosses</code></dt>
-<dd><p>validation losses</p></dd>
-
-
-<dt><code>epoch</code></dt>
-<dd><p>list of epochs fit</p></dd>
-
-
-<dt><code>learnRates</code></dt>
-<dd><p>learning rate sequence used so far</p></dd>
-
-
-</dl><p></p></div>
-</div>
-
-</div><p></p><hr><a id="method-Estimator-fitWholeTrainingSet"></a><div class="section">
-<h3 id="method-fitwholetrainingset-">Method <code>fitWholeTrainingSet()</code><a class="anchor" aria-label="anchor" href="#method-fitwholetrainingset-"></a></h3>
-<p>Fits whole training set on a specific number of epochs
-TODO What happens when learning rate changes per epochs?
-Ideally I would copy the learning rate strategy from before
-and adjust for different sizes ie more iterations/updates???</p><div class="section">
-<h4 id="usage-5">Usage<a class="anchor" aria-label="anchor" href="#usage-5"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">Estimator</span><span class="op">$</span><span class="fu">fitWholeTrainingSet</span><span class="op">(</span><span class="va">dataset</span>, learnRates <span class="op">=</span> <span class="cn">NULL</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-5">Arguments<a class="anchor" aria-label="anchor" href="#arguments-5"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>dataset</code></dt>
-<dd><p>torch dataset</p></dd>
-
-
-<dt><code>learnRates</code></dt>
-<dd><p>learnRateSchedule from CV</p></dd>
-
-
-</dl><p></p></div>
-</div>
-
-</div><p></p><hr><a id="method-Estimator-save"></a><div class="section">
-<h3 id="method-save-">Method <code><a href="https://rdrr.io/r/base/save.html" class="external-link">save()</a></code><a class="anchor" aria-label="anchor" href="#method-save-"></a></h3>
-<p>save model and those parameters needed to reconstruct it</p><div class="section">
-<h4 id="usage-6">Usage<a class="anchor" aria-label="anchor" href="#usage-6"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">Estimator</span><span class="op">$</span><span class="fu">save</span><span class="op">(</span><span class="va">path</span>, <span class="va">name</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-6">Arguments<a class="anchor" aria-label="anchor" href="#arguments-6"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>path</code></dt>
-<dd><p>where to save the model</p></dd>
-
-
-<dt><code>name</code></dt>
-<dd><p>name of file</p></dd>
-
-
-</dl><p></p></div>
-</div>
-<div class="section">
-<h4 id="returns-1">Returns<a class="anchor" aria-label="anchor" href="#returns-1"></a></h4>
-<p>the path to saved model</p>
-</div>
-
-</div><p></p><hr><a id="method-Estimator-predictProba"></a><div class="section">
-<h3 id="method-predictproba-">Method <code>predictProba()</code><a class="anchor" aria-label="anchor" href="#method-predictproba-"></a></h3>
-<p>predicts and outputs the probabilities</p><div class="section">
-<h4 id="usage-7">Usage<a class="anchor" aria-label="anchor" href="#usage-7"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">Estimator</span><span class="op">$</span><span class="fu">predictProba</span><span class="op">(</span><span class="va">dataset</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-7">Arguments<a class="anchor" aria-label="anchor" href="#arguments-7"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>dataset</code></dt>
-<dd><p>Torch dataset to create predictions for</p></dd>
-
-
-</dl><p></p></div>
-</div>
-<div class="section">
-<h4 id="returns-2">Returns<a class="anchor" aria-label="anchor" href="#returns-2"></a></h4>
-<p>predictions as probabilities</p>
-</div>
-
-</div><p></p><hr><a id="method-Estimator-predict"></a><div class="section">
-<h3 id="method-predict-">Method <code><a href="https://rdrr.io/r/stats/predict.html" class="external-link">predict()</a></code><a class="anchor" aria-label="anchor" href="#method-predict-"></a></h3>
-<p>predicts and outputs the class</p><div class="section">
-<h4 id="usage-8">Usage<a class="anchor" aria-label="anchor" href="#usage-8"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">Estimator</span><span class="op">$</span><span class="fu">predict</span><span class="op">(</span><span class="va">dataset</span>, threshold <span class="op">=</span> <span class="cn">NULL</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-8">Arguments<a class="anchor" aria-label="anchor" href="#arguments-8"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>dataset</code></dt>
-<dd><p>A torch dataset to create predictions for</p></dd>
-
-
-<dt><code>threshold</code></dt>
-<dd><p>Which threshold to use for predictions</p></dd>
-
-
-</dl><p></p></div>
-</div>
-<div class="section">
-<h4 id="returns-3">Returns<a class="anchor" aria-label="anchor" href="#returns-3"></a></h4>
-<p>The predicted class for the data in the dataset</p>
-</div>
-
-</div><p></p><hr><a id="method-Estimator-batchToDevice"></a><div class="section">
-<h3 id="method-batchtodevice-">Method <code>batchToDevice()</code><a class="anchor" aria-label="anchor" href="#method-batchtodevice-"></a></h3>
-<p>sends a batch of data to device
-assumes batch includes lists of tensors to arbitrary nested depths</p><div class="section">
-<h4 id="usage-9">Usage<a class="anchor" aria-label="anchor" href="#usage-9"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">Estimator</span><span class="op">$</span><span class="fu">batchToDevice</span><span class="op">(</span><span class="va">batch</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-9">Arguments<a class="anchor" aria-label="anchor" href="#arguments-9"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>batch</code></dt>
-<dd><p>the batch to send, usually a list of torch tensors</p></dd>
-
-
-</dl><p></p></div>
-</div>
-<div class="section">
-<h4 id="returns-4">Returns<a class="anchor" aria-label="anchor" href="#returns-4"></a></h4>
-<p>the batch on the required device</p>
-</div>
-
-</div><p></p><hr><a id="method-Estimator-itemOrDefaults"></a><div class="section">
-<h3 id="method-itemordefaults-">Method <code>itemOrDefaults()</code><a class="anchor" aria-label="anchor" href="#method-itemordefaults-"></a></h3>
-<p>select item from list, and if it's null sets a default</p><div class="section">
-<h4 id="usage-10">Usage<a class="anchor" aria-label="anchor" href="#usage-10"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">Estimator</span><span class="op">$</span><span class="fu">itemOrDefaults</span><span class="op">(</span><span class="va">list</span>, <span class="va">item</span>, default <span class="op">=</span> <span class="cn">NULL</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-10">Arguments<a class="anchor" aria-label="anchor" href="#arguments-10"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>list</code></dt>
-<dd><p>A list with items</p></dd>
-
-
-<dt><code>item</code></dt>
-<dd><p>Which list item to retrieve</p></dd>
-
-
-<dt><code>default</code></dt>
-<dd><p>The value to return if list doesn't have item</p></dd>
-
-
-</dl><p></p></div>
-</div>
-<div class="section">
-<h4 id="returns-5">Returns<a class="anchor" aria-label="anchor" href="#returns-5"></a></h4>
-<p>the list item or default</p>
-</div>
-
-</div><p></p><hr><a id="method-Estimator-clone"></a><div class="section">
-<h3 id="method-clone-">Method <code>clone()</code><a class="anchor" aria-label="anchor" href="#method-clone-"></a></h3>
-<p>The objects of this class are cloneable with this method.</p><div class="section">
-<h4 id="usage-11">Usage<a class="anchor" aria-label="anchor" href="#usage-11"></a></h4>
-<p></p><div class="r"><div class="sourceCode"><pre><code><span><span class="va">Estimator</span><span class="op">$</span><span class="fu">clone</span><span class="op">(</span>deep <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span></code></pre></div><p></p></div>
-</div>
-
-<div class="section">
-<h4 id="arguments-11">Arguments<a class="anchor" aria-label="anchor" href="#arguments-11"></a></h4>
-<p></p><div class="arguments"><dl><dt><code>deep</code></dt>
-<dd><p>Whether to make a deep clone.</p></dd>
-
-
-</dl><p></p></div>
-</div>
-
-</div>
-
-    </div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/doubleLayerNN.html b/docs/reference/doubleLayerNN.html
deleted file mode 100644
index b3bc13f..0000000
--- a/docs/reference/doubleLayerNN.html
+++ /dev/null
@@ -1,127 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Double layer neural network — doubleLayerNN • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Double layer neural network — doubleLayerNN"><meta property="og:description" content="Double layer neural network"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>Double layer neural network</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/Topologies.R" class="external-link"><code>R/Topologies.R</code></a></small>
-    <div class="hidden name"><code>doubleLayerNN.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>Double layer neural network</p>
-    </div>
-
-    <div id="ref-usage">
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">doubleLayerNN</span><span class="op">(</span><span class="va">inputN</span>, <span class="va">layer1</span>, <span class="va">layer2</span>, <span class="va">outputN</span>, <span class="va">layer_dropout</span><span class="op">)</span></span></code></pre></div>
-    </div>
-
-    <div id="arguments">
-    <h2>Arguments</h2>
-    <dl><dt>inputN</dt>
-<dd><p>Input neurons</p></dd>
-
-
-<dt>layer1</dt>
-<dd><p>Layer 1 neurons</p></dd>
-
-
-<dt>layer2</dt>
-<dd><p>Layer 2 neurons</p></dd>
-
-
-<dt>outputN</dt>
-<dd><p>output neurons</p></dd>
-
-
-<dt>layer_dropout</dt>
-<dd><p>layer_dropout to use</p></dd>
-
-</dl></div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/fitDeepNNTorch.html b/docs/reference/fitDeepNNTorch.html
deleted file mode 100644
index fa8c2e3..0000000
--- a/docs/reference/fitDeepNNTorch.html
+++ /dev/null
@@ -1,123 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Fits a deep neural network — fitDeepNNTorch • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Fits a deep neural network — fitDeepNNTorch"><meta property="og:description" content="Fits a deep neural network"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>Fits a deep neural network</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/DeepNNTorch.R" class="external-link"><code>R/DeepNNTorch.R</code></a></small>
-    <div class="hidden name"><code>fitDeepNNTorch.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>Fits a deep neural network</p>
-    </div>
-
-    <div id="ref-usage">
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">fitDeepNNTorch</span><span class="op">(</span><span class="va">trainData</span>, <span class="va">param</span>, search <span class="op">=</span> <span class="st">"grid"</span>, <span class="va">analysisId</span><span class="op">)</span></span></code></pre></div>
-    </div>
-
-    <div id="arguments">
-    <h2>Arguments</h2>
-    <dl><dt>trainData</dt>
-<dd><p>Training data object</p></dd>
-
-
-<dt>param</dt>
-<dd><p>Hyperparameters to search over</p></dd>
-
-
-<dt>search</dt>
-<dd><p>Which kind of search strategy to use</p></dd>
-
-
-<dt>analysisId</dt>
-<dd><p>Analysis Id</p></dd>
-
-</dl></div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/fitEstimator.html b/docs/reference/fitEstimator.html
deleted file mode 100644
index da8803b..0000000
--- a/docs/reference/fitEstimator.html
+++ /dev/null
@@ -1,123 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>fitEstimator — fitEstimator • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="fitEstimator — fitEstimator"><meta property="og:description" content="fits a deep learning estimator to data."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>fitEstimator</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/Estimator.R" class="external-link"><code>R/Estimator.R</code></a></small>
-    <div class="hidden name"><code>fitEstimator.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>fits a deep learning estimator to data.</p>
-    </div>
-
-    <div id="ref-usage">
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">fitEstimator</span><span class="op">(</span><span class="va">trainData</span>, <span class="va">param</span>, <span class="va">analysisId</span>, <span class="va">...</span><span class="op">)</span></span></code></pre></div>
-    </div>
-
-    <div id="arguments">
-    <h2>Arguments</h2>
-    <dl><dt>trainData</dt>
-<dd><p>the data to use</p></dd>
-
-
-<dt>param</dt>
-<dd><p>model parameters</p></dd>
-
-
-<dt>analysisId</dt>
-<dd><p>Id of the analysis</p></dd>
-
-
-<dt>...</dt>
-<dd><p>Extra inputs</p></dd>
-
-</dl></div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/gridCvDeep.html b/docs/reference/gridCvDeep.html
deleted file mode 100644
index 9467482..0000000
--- a/docs/reference/gridCvDeep.html
+++ /dev/null
@@ -1,127 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>gridCvDeep — gridCvDeep • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="gridCvDeep — gridCvDeep"><meta property="og:description" content="Performs grid search for a deep learning estimator"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>gridCvDeep</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/Estimator.R" class="external-link"><code>R/Estimator.R</code></a></small>
-    <div class="hidden name"><code>gridCvDeep.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>Performs grid search for a deep learning estimator</p>
-    </div>
-
-    <div id="ref-usage">
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">gridCvDeep</span><span class="op">(</span><span class="va">mappedData</span>, <span class="va">labels</span>, <span class="va">settings</span>, <span class="va">modelLocation</span>, <span class="va">paramSearch</span><span class="op">)</span></span></code></pre></div>
-    </div>
-
-    <div id="arguments">
-    <h2>Arguments</h2>
-    <dl><dt>mappedData</dt>
-<dd><p>Mapped data with covariates</p></dd>
-
-
-<dt>labels</dt>
-<dd><p>Dataframe with the outcomes</p></dd>
-
-
-<dt>settings</dt>
-<dd><p>Settings of the model</p></dd>
-
-
-<dt>modelLocation</dt>
-<dd><p>Where to save the model</p></dd>
-
-
-<dt>paramSearch</dt>
-<dd><p>model parameters to perform search over</p></dd>
-
-</dl></div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/index.html b/docs/reference/index.html
deleted file mode 100644
index 98aff88..0000000
--- a/docs/reference/index.html
+++ /dev/null
@@ -1,159 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Function reference • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Function reference"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-index">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="contents col-md-9">
-    <div class="page-header">
-      <h1>Reference</h1>
-    </div>
-
-    <table class="ref-index"><colgroup><col class="alias"><col class="title"></colgroup><tbody><tr><th colspan="2">
-          <h2 id="all-functions">All functions <a href="#all-functions" class="anchor" aria-hidden="true"></a></h2>
-          <p class="section-desc"></p>
-        </th>
-      </tr></tbody><tbody><tr><td>
-          <p><code><a href="Dataset.html">Dataset()</a></code> </p>
-        </td>
-        <td><p>A torch dataset</p></td>
-      </tr><tr><td>
-          <p><code><a href="DeepPatientLevelPrediction.html">DeepPatientLevelPrediction</a></code> </p>
-        </td>
-        <td><p>DeepPatientLevelPrediction</p></td>
-      </tr><tr><td>
-          <p><code><a href="EarlyStopping.html">EarlyStopping</a></code> </p>
-        </td>
-        <td><p>Earlystopping class</p></td>
-      </tr><tr><td>
-          <p><code><a href="Estimator.html">Estimator</a></code> </p>
-        </td>
-        <td><p>Estimator</p></td>
-      </tr><tr><td>
-          <p><code><a href="doubleLayerNN.html">doubleLayerNN()</a></code> </p>
-        </td>
-        <td><p>Double layer neural network</p></td>
-      </tr><tr><td>
-          <p><code><a href="fitDeepNNTorch.html">fitDeepNNTorch()</a></code> </p>
-        </td>
-        <td><p>Fits a deep neural network</p></td>
-      </tr><tr><td>
-          <p><code><a href="fitEstimator.html">fitEstimator()</a></code> </p>
-        </td>
-        <td><p>fitEstimator</p></td>
-      </tr><tr><td>
-          <p><code><a href="gridCvDeep.html">gridCvDeep()</a></code> </p>
-        </td>
-        <td><p>gridCvDeep</p></td>
-      </tr><tr><td>
-          <p><code><a href="predictDeepEstimator.html">predictDeepEstimator()</a></code> </p>
-        </td>
-        <td><p>predictDeepEstimator</p></td>
-      </tr><tr><td>
-          <p><code><a href="predictDeepNN.html">predictDeepNN()</a></code> </p>
-        </td>
-        <td><p>Create predictions for a deep neural network</p></td>
-      </tr><tr><td>
-          <p><code><a href="setDeepNNTorch.html">setDeepNNTorch()</a></code> </p>
-        </td>
-        <td><p>settings for a Deep neural network</p></td>
-      </tr><tr><td>
-          <p><code><a href="setResNet.html">setResNet()</a></code> </p>
-        </td>
-        <td><p>setResNet</p></td>
-      </tr><tr><td>
-          <p><code><a href="setTransformer.html">setTransformer()</a></code> </p>
-        </td>
-        <td><p>create settings for training a non-temporal transformer</p></td>
-      </tr><tr><td>
-          <p><code><a href="singleLayerNN.html">singleLayerNN()</a></code> </p>
-        </td>
-        <td><p>A single layer neural network</p></td>
-      </tr><tr><td>
-          <p><code><a href="tripleLayerNN.html">tripleLayerNN()</a></code> </p>
-        </td>
-        <td><p>Triple layer neural network</p></td>
-      </tr></tbody></table></div>
-
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/predictDeepEstimator.html b/docs/reference/predictDeepEstimator.html
deleted file mode 100644
index 3e9eb58..0000000
--- a/docs/reference/predictDeepEstimator.html
+++ /dev/null
@@ -1,119 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>predictDeepEstimator — predictDeepEstimator • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="predictDeepEstimator — predictDeepEstimator"><meta property="og:description" content="the prediction function for the estimator"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>predictDeepEstimator</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/Estimator.R" class="external-link"><code>R/Estimator.R</code></a></small>
-    <div class="hidden name"><code>predictDeepEstimator.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>the prediction function for the estimator</p>
-    </div>
-
-    <div id="ref-usage">
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">predictDeepEstimator</span><span class="op">(</span><span class="va">plpModel</span>, <span class="va">data</span>, <span class="va">cohort</span><span class="op">)</span></span></code></pre></div>
-    </div>
-
-    <div id="arguments">
-    <h2>Arguments</h2>
-    <dl><dt>plpModel</dt>
-<dd><p>the plpModel</p></dd>
-
-
-<dt>data</dt>
-<dd><p>plp data object or a torch dataset</p></dd>
-
-
-<dt>cohort</dt>
-<dd><p>data.frame with the rowIds of the people</p></dd>
-
-</dl></div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/predictDeepNN.html b/docs/reference/predictDeepNN.html
deleted file mode 100644
index 75341ec..0000000
--- a/docs/reference/predictDeepNN.html
+++ /dev/null
@@ -1,119 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Create predictions for a deep neural network — predictDeepNN • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Create predictions for a deep neural network — predictDeepNN"><meta property="og:description" content="Create predictions for a deep neural network"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>Create predictions for a deep neural network</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/DeepNNTorch.R" class="external-link"><code>R/DeepNNTorch.R</code></a></small>
-    <div class="hidden name"><code>predictDeepNN.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>Create predictions for a deep neural network</p>
-    </div>
-
-    <div id="ref-usage">
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">predictDeepNN</span><span class="op">(</span><span class="va">plpModel</span>, <span class="va">data</span>, <span class="va">cohort</span><span class="op">)</span></span></code></pre></div>
-    </div>
-
-    <div id="arguments">
-    <h2>Arguments</h2>
-    <dl><dt>plpModel</dt>
-<dd><p>The plpModel to predict for</p></dd>
-
-
-<dt>data</dt>
-<dd><p>The data to make predictions for</p></dd>
-
-
-<dt>cohort</dt>
-<dd><p>The cohort to use</p></dd>
-
-</dl></div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/setDeepNNTorch.html b/docs/reference/setDeepNNTorch.html
deleted file mode 100644
index 41b51de..0000000
--- a/docs/reference/setDeepNNTorch.html
+++ /dev/null
@@ -1,153 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>settings for a Deep neural network — setDeepNNTorch • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="settings for a Deep neural network — setDeepNNTorch"><meta property="og:description" content="settings for a Deep neural network"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>settings for a Deep neural network</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/DeepNNTorch.R" class="external-link"><code>R/DeepNNTorch.R</code></a></small>
-    <div class="hidden name"><code>setDeepNNTorch.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>settings for a Deep neural network</p>
-    </div>
-
-    <div id="ref-usage">
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">setDeepNNTorch</span><span class="op">(</span></span>
-<span>  units <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">128</span>, <span class="fl">64</span><span class="op">)</span>, <span class="fl">128</span><span class="op">)</span>,</span>
-<span>  layer_dropout <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">0.2</span><span class="op">)</span>,</span>
-<span>  lr <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">1e-04</span><span class="op">)</span>,</span>
-<span>  decay <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">1e-05</span><span class="op">)</span>,</span>
-<span>  outcome_weight <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">1</span><span class="op">)</span>,</span>
-<span>  batch_size <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">10000</span><span class="op">)</span>,</span>
-<span>  epochs <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">100</span><span class="op">)</span>,</span>
-<span>  device <span class="op">=</span> <span class="st">"cpu"</span>,</span>
-<span>  seed <span class="op">=</span> <span class="cn">NULL</span></span>
-<span><span class="op">)</span></span></code></pre></div>
-    </div>
-
-    <div id="arguments">
-    <h2>Arguments</h2>
-    <dl><dt>units</dt>
-<dd><p>A list of vectors for neurons per layer</p></dd>
-
-
-<dt>layer_dropout</dt>
-<dd><p>Dropout to use per layer</p></dd>
-
-
-<dt>lr</dt>
-<dd><p>Learning rate ot use</p></dd>
-
-
-<dt>decay</dt>
-<dd><p>Weight decay to use</p></dd>
-
-
-<dt>outcome_weight</dt>
-<dd><p>Weight for minority outcome in cost function</p></dd>
-
-
-<dt>batch_size</dt>
-<dd><p>Batch size to use</p></dd>
-
-
-<dt>epochs</dt>
-<dd><p>How many epochs to use</p></dd>
-
-
-<dt>device</dt>
-<dd><p>Which device to use</p></dd>
-
-
-<dt>seed</dt>
-<dd><p>A seed to make experiments more reproducible</p></dd>
-
-</dl></div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/setResNet.html b/docs/reference/setResNet.html
deleted file mode 100644
index e183455..0000000
--- a/docs/reference/setResNet.html
+++ /dev/null
@@ -1,192 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>setResNet — setResNet • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="setResNet — setResNet"><meta property="og:description" content="Creates settings for a ResNet model"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>setResNet</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/ResNet.R" class="external-link"><code>R/ResNet.R</code></a></small>
-    <div class="hidden name"><code>setResNet.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>Creates settings for a ResNet model</p>
-    </div>
-
-    <div id="ref-usage">
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">setResNet</span><span class="op">(</span></span>
-<span>  numLayers <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">1</span><span class="op">:</span><span class="fl">8</span><span class="op">)</span>,</span>
-<span>  sizeHidden <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">2</span><span class="op">^</span><span class="op">(</span><span class="fl">6</span><span class="op">:</span><span class="fl">9</span><span class="op">)</span><span class="op">)</span>,</span>
-<span>  hiddenFactor <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">1</span><span class="op">:</span><span class="fl">4</span><span class="op">)</span>,</span>
-<span>  residualDropout <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/seq.html" class="external-link">seq</a></span><span class="op">(</span><span class="fl">0</span>, <span class="fl">0.5</span>, <span class="fl">0.05</span><span class="op">)</span><span class="op">)</span>,</span>
-<span>  hiddenDropout <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/seq.html" class="external-link">seq</a></span><span class="op">(</span><span class="fl">0</span>, <span class="fl">0.5</span>, <span class="fl">0.05</span><span class="op">)</span><span class="op">)</span>,</span>
-<span>  normalization <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"BatchNorm"</span><span class="op">)</span>,</span>
-<span>  activation <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"RelU"</span><span class="op">)</span>,</span>
-<span>  sizeEmbedding <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">2</span><span class="op">^</span><span class="op">(</span><span class="fl">6</span><span class="op">:</span><span class="fl">9</span><span class="op">)</span><span class="op">)</span>,</span>
-<span>  weightDecay <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">1e-06</span>, <span class="fl">0.001</span><span class="op">)</span>,</span>
-<span>  learningRate <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">0.01</span>, <span class="fl">3e-04</span>, <span class="fl">1e-05</span><span class="op">)</span>,</span>
-<span>  seed <span class="op">=</span> <span class="cn">NULL</span>,</span>
-<span>  hyperParamSearch <span class="op">=</span> <span class="st">"random"</span>,</span>
-<span>  randomSample <span class="op">=</span> <span class="fl">100</span>,</span>
-<span>  device <span class="op">=</span> <span class="st">"cpu"</span>,</span>
-<span>  batchSize <span class="op">=</span> <span class="fl">1024</span>,</span>
-<span>  epochs <span class="op">=</span> <span class="fl">30</span></span>
-<span><span class="op">)</span></span></code></pre></div>
-    </div>
-
-    <div id="arguments">
-    <h2>Arguments</h2>
-    <dl><dt>numLayers</dt>
-<dd><p>Number of layers in network, default: 1:16</p></dd>
-
-
-<dt>sizeHidden</dt>
-<dd><p>Amount of neurons in each default layer, default: 2^(6:10) (64 to 1024)</p></dd>
-
-
-<dt>hiddenFactor</dt>
-<dd><p>How much to grow the amount of neurons in each ResLayer, default: 1:4</p></dd>
-
-
-<dt>residualDropout</dt>
-<dd><p>How much dropout to apply after last linear layer in ResLayer, default: seq(0, 0.3, 0.05)</p></dd>
-
-
-<dt>hiddenDropout</dt>
-<dd><p>How much dropout to apply after first linear layer in ResLayer, default: seq(0, 0.3, 0.05)</p></dd>
-
-
-<dt>normalization</dt>
-<dd><p>Which type of normalization to use. Default: 'Batchnorm'</p></dd>
-
-
-<dt>activation</dt>
-<dd><p>What kind of activation to use. Default: 'RelU'</p></dd>
-
-
-<dt>sizeEmbedding</dt>
-<dd><p>Size of embedding layer, default: 2^(6:9) (64 to 512)</p></dd>
-
-
-<dt>weightDecay</dt>
-<dd><p>Weight decay to apply, default: c(1e-6, 1e-3)</p></dd>
-
-
-<dt>learningRate</dt>
-<dd><p>Learning rate to use. default: c(1e-2, 1e-5)</p></dd>
-
-
-<dt>seed</dt>
-<dd><p>Seed to use for sampling hyperparameter space</p></dd>
-
-
-<dt>hyperParamSearch</dt>
-<dd><p>Which kind of hyperparameter search to use random sampling or exhaustive grid search. default: 'random'</p></dd>
-
-
-<dt>randomSample</dt>
-<dd><p>How many random samples from hyperparameter space to use</p></dd>
-
-
-<dt>device</dt>
-<dd><p>Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'</p></dd>
-
-
-<dt>batchSize</dt>
-<dd><p>Size of batch, default: 1024</p></dd>
-
-
-<dt>epochs</dt>
-<dd><p>Number of epochs to run, default: 10</p></dd>
-
-</dl></div>
-    <div id="details">
-    <h2>Details</h2>
-    <p>Model architecture from by https://arxiv.org/abs/2106.11959</p>
-    </div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/setTransformer.html b/docs/reference/setTransformer.html
deleted file mode 100644
index fb2503d..0000000
--- a/docs/reference/setTransformer.html
+++ /dev/null
@@ -1,192 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>create settings for training a non-temporal transformer — setTransformer • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="create settings for training a non-temporal transformer — setTransformer"><meta property="og:description" content="A transformer model"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>create settings for training a non-temporal transformer</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/Transformer.R" class="external-link"><code>R/Transformer.R</code></a></small>
-    <div class="hidden name"><code>setTransformer.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>A transformer model</p>
-    </div>
-
-    <div id="ref-usage">
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">setTransformer</span><span class="op">(</span></span>
-<span>  numBlocks <span class="op">=</span> <span class="fl">3</span>,</span>
-<span>  dimToken <span class="op">=</span> <span class="fl">96</span>,</span>
-<span>  dimOut <span class="op">=</span> <span class="fl">1</span>,</span>
-<span>  numHeads <span class="op">=</span> <span class="fl">8</span>,</span>
-<span>  attDropout <span class="op">=</span> <span class="fl">0.25</span>,</span>
-<span>  ffnDropout <span class="op">=</span> <span class="fl">0.25</span>,</span>
-<span>  resDropout <span class="op">=</span> <span class="fl">0</span>,</span>
-<span>  dimHidden <span class="op">=</span> <span class="fl">512</span>,</span>
-<span>  weightDecay <span class="op">=</span> <span class="fl">1e-06</span>,</span>
-<span>  learningRate <span class="op">=</span> <span class="fl">3e-04</span>,</span>
-<span>  batchSize <span class="op">=</span> <span class="fl">1024</span>,</span>
-<span>  epochs <span class="op">=</span> <span class="fl">10</span>,</span>
-<span>  device <span class="op">=</span> <span class="st">"cpu"</span>,</span>
-<span>  hyperParamSearch <span class="op">=</span> <span class="st">"random"</span>,</span>
-<span>  randomSamples <span class="op">=</span> <span class="fl">100</span>,</span>
-<span>  seed <span class="op">=</span> <span class="cn">NULL</span></span>
-<span><span class="op">)</span></span></code></pre></div>
-    </div>
-
-    <div id="arguments">
-    <h2>Arguments</h2>
-    <dl><dt>numBlocks</dt>
-<dd><p>number of transformer blocks</p></dd>
-
-
-<dt>dimToken</dt>
-<dd><p>dimension of each token (embedding size)</p></dd>
-
-
-<dt>dimOut</dt>
-<dd><p>dimension of output, usually 1 for binary problems</p></dd>
-
-
-<dt>numHeads</dt>
-<dd><p>number of attention heads</p></dd>
-
-
-<dt>attDropout</dt>
-<dd><p>dropout to use on attentions</p></dd>
-
-
-<dt>ffnDropout</dt>
-<dd><p>dropout to use in feedforward block</p></dd>
-
-
-<dt>resDropout</dt>
-<dd><p>dropout to use in residual connections</p></dd>
-
-
-<dt>dimHidden</dt>
-<dd><p>dimension of the feedworward block</p></dd>
-
-
-<dt>weightDecay</dt>
-<dd><p>weightdecay to use</p></dd>
-
-
-<dt>learningRate</dt>
-<dd><p>learning rate to use</p></dd>
-
-
-<dt>batchSize</dt>
-<dd><p>batchSize to use</p></dd>
-
-
-<dt>epochs</dt>
-<dd><p>How many epochs to run the model for</p></dd>
-
-
-<dt>device</dt>
-<dd><p>Which device to use, cpu or cuda</p></dd>
-
-
-<dt>hyperParamSearch</dt>
-<dd><p>what kind of hyperparameter search to do, default 'random'</p></dd>
-
-
-<dt>randomSamples</dt>
-<dd><p>How many samples to use in hyperparameter search if random</p></dd>
-
-
-<dt>seed</dt>
-<dd><p>Random seed to use</p></dd>
-
-</dl></div>
-    <div id="details">
-    <h2>Details</h2>
-    <p>from https://arxiv.org/abs/2106.11959</p>
-    </div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/singleLayerNN.html b/docs/reference/singleLayerNN.html
deleted file mode 100644
index 40fb76e..0000000
--- a/docs/reference/singleLayerNN.html
+++ /dev/null
@@ -1,123 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>A single layer neural network — singleLayerNN • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="A single layer neural network — singleLayerNN"><meta property="og:description" content="A single layer neural network"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>A single layer neural network</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/Topologies.R" class="external-link"><code>R/Topologies.R</code></a></small>
-    <div class="hidden name"><code>singleLayerNN.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>A single layer neural network</p>
-    </div>
-
-    <div id="ref-usage">
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">singleLayerNN</span><span class="op">(</span><span class="va">inputN</span>, <span class="va">layer1</span>, outputN <span class="op">=</span> <span class="fl">2</span>, <span class="va">layer_dropout</span><span class="op">)</span></span></code></pre></div>
-    </div>
-
-    <div id="arguments">
-    <h2>Arguments</h2>
-    <dl><dt>inputN</dt>
-<dd><p>Input neurons</p></dd>
-
-
-<dt>layer1</dt>
-<dd><p>Layer 1 neurons</p></dd>
-
-
-<dt>outputN</dt>
-<dd><p>Output neurons</p></dd>
-
-
-<dt>layer_dropout</dt>
-<dd><p>Layer dropout to use</p></dd>
-
-</dl></div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/reference/tripleLayerNN.html b/docs/reference/tripleLayerNN.html
deleted file mode 100644
index b5e0969..0000000
--- a/docs/reference/tripleLayerNN.html
+++ /dev/null
@@ -1,131 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Triple layer neural network — tripleLayerNN • DeepPatientLevelPrediction</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Triple layer neural network — tripleLayerNN"><meta property="og:description" content="Triple layer neural network"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-reference-topic">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="../index.html">DeepPatientLevelPrediction</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.0.1</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="../index.html">
-    <span class="fa fa-home fa-lg"></span>
-     
-  </a>
-</li>
-<li>
-  <a href="../articles/Installing.html">Get started</a>
-</li>
-<li>
-  <a href="../reference/index.html">Reference</a>
-</li>
-<li class="dropdown">
-  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
-    Articles
-     
-    <span class="caret"></span>
-  </a>
-  <ul class="dropdown-menu" role="menu"><li>
-      <a href="../articles/BuildingDeepModels.html">Building Deep Learning Models</a>
-    </li>
-    <li>
-      <a href="../articles/Installing.html">DeepPatientLevelPrediction Installation Guide</a>
-    </li>
-  </ul></li>
-      </ul><ul class="nav navbar-nav navbar-right"><li>
-  <a href="https://ohdsi.github.io/Hades" class="external-link"><img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'></a>
-</li>
-<li>
-  <a href="https://github.com/OHDSI/DeepPatientLevelPrediction" class="external-link">
-    <span class="fa fa-github fa-lg"></span>
-     
-  </a>
-</li>
-      </ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="col-md-9 contents">
-    <div class="page-header">
-    <h1>Triple layer neural network</h1>
-    <small class="dont-index">Source: <a href="https://github.com/OHDSI/DeepPatientLevelPrediction/blob/HEAD/R/Topologies.R" class="external-link"><code>R/Topologies.R</code></a></small>
-    <div class="hidden name"><code>tripleLayerNN.Rd</code></div>
-    </div>
-
-    <div class="ref-description">
-    <p>Triple layer neural network</p>
-    </div>
-
-    <div id="ref-usage">
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">tripleLayerNN</span><span class="op">(</span><span class="va">inputN</span>, <span class="va">layer1</span>, <span class="va">layer2</span>, <span class="va">layer3</span>, <span class="va">outputN</span>, <span class="va">layer_dropout</span><span class="op">)</span></span></code></pre></div>
-    </div>
-
-    <div id="arguments">
-    <h2>Arguments</h2>
-    <dl><dt>inputN</dt>
-<dd><p>Input neurons</p></dd>
-
-
-<dt>layer1</dt>
-<dd><p>amount of layer 1 neurons</p></dd>
-
-
-<dt>layer2</dt>
-<dd><p>amount of layer 2 neurons</p></dd>
-
-
-<dt>layer3</dt>
-<dd><p>amount of layer 3 neurons</p></dd>
-
-
-<dt>outputN</dt>
-<dd><p>Number of output neurons</p></dd>
-
-
-<dt>layer_dropout</dt>
-<dd><p>The dropout to use in layer</p></dd>
-
-</dl></div>
-
-  </div>
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-</div>
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Jenna Reps, Egill Fridgeirsson, Seng Chan You, Chungsoo Kim, Henrik John.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.5.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
deleted file mode 100644
index 1e9115f..0000000
--- a/docs/sitemap.xml
+++ /dev/null
@@ -1,69 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
-  <url>
-    <loc>/404.html</loc>
-  </url>
-  <url>
-    <loc>/articles/BuildingDeepModels.html</loc>
-  </url>
-  <url>
-    <loc>/articles/Installing.html</loc>
-  </url>
-  <url>
-    <loc>/articles/index.html</loc>
-  </url>
-  <url>
-    <loc>/authors.html</loc>
-  </url>
-  <url>
-    <loc>/index.html</loc>
-  </url>
-  <url>
-    <loc>/reference/Dataset.html</loc>
-  </url>
-  <url>
-    <loc>/reference/DeepPatientLevelPrediction.html</loc>
-  </url>
-  <url>
-    <loc>/reference/EarlyStopping.html</loc>
-  </url>
-  <url>
-    <loc>/reference/Estimator.html</loc>
-  </url>
-  <url>
-    <loc>/reference/doubleLayerNN.html</loc>
-  </url>
-  <url>
-    <loc>/reference/fitDeepNNTorch.html</loc>
-  </url>
-  <url>
-    <loc>/reference/fitEstimator.html</loc>
-  </url>
-  <url>
-    <loc>/reference/gridCvDeep.html</loc>
-  </url>
-  <url>
-    <loc>/reference/index.html</loc>
-  </url>
-  <url>
-    <loc>/reference/predictDeepEstimator.html</loc>
-  </url>
-  <url>
-    <loc>/reference/predictDeepNN.html</loc>
-  </url>
-  <url>
-    <loc>/reference/setDeepNNTorch.html</loc>
-  </url>
-  <url>
-    <loc>/reference/setResNet.html</loc>
-  </url>
-  <url>
-    <loc>/reference/setTransformer.html</loc>
-  </url>
-  <url>
-    <loc>/reference/singleLayerNN.html</loc>
-  </url>
-  <url>
-    <loc>/reference/tripleLayerNN.html</loc>
-  </url>
-</urlset>

From d0cb45bd355603107792e6d39352c736f2b48ed8 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Wed, 22 Mar 2023 17:00:45 +0100
Subject: [PATCH 21/58] render dev website

---
 .github/workflows/pkgdown.yaml | 2 +-
 DESCRIPTION                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
index 648f6ca..3eaac6e 100644
--- a/.github/workflows/pkgdown.yaml
+++ b/.github/workflows/pkgdown.yaml
@@ -2,7 +2,7 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: [main]
+    branches: [main, develop]
   release:
     types: [published]
   workflow_dispatch:
diff --git a/DESCRIPTION b/DESCRIPTION
index 0a6b787..6c827aa 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.0
+Version: 1.1.0.9000
 Date: 15-12-2022
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),

From 609df5b39ca20a01de1b0b11c18a7c99b9b429a2 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Wed, 22 Mar 2023 17:04:10 +0100
Subject: [PATCH 22/58] fix action

---
 .github/workflows/pkgdown.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
index 3eaac6e..fb2ac95 100644
--- a/.github/workflows/pkgdown.yaml
+++ b/.github/workflows/pkgdown.yaml
@@ -39,7 +39,7 @@ jobs:
 
       - name: Deploy to GitHub pages 🚀
         if: github.event_name != 'pull_request'
-        uses: JamesIves/github-pages-deploy-action@4.4.1
+        uses: JamesIves/github-pages-deploy-action@v4
         with:
           clean: false
           branch: gh-pages

From 3330c28cc231041e46dab22a7f0f1d24f9f01254 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Wed, 22 Mar 2023 17:16:11 +0100
Subject: [PATCH 23/58] fix action

---
 .github/workflows/pkgdown.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
index fb2ac95..eff50ff 100644
--- a/.github/workflows/pkgdown.yaml
+++ b/.github/workflows/pkgdown.yaml
@@ -28,7 +28,7 @@ jobs:
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::pkgdown, ohdsi/OhdsiRTools
+          extra-packages: any::pkgdown, ohdsi/OhdsiRTools, local::.
           needs: website
 
       - name: Build site

From dcb9423d237bdbc7a5943dbba371b29444ec6eeb Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Wed, 22 Mar 2023 17:21:18 +0100
Subject: [PATCH 24/58] fix badge in readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 23711af..87c0bed 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 DeepPatientLevelPrediction
 ======================
 
-[![Build Status](https://github.com/OHDSI/DeepPatientLevelPrediction/workflows/R-CMD-check/badge.svg)](https://github.com/OHDSI/DeepPatientLevelPrediction/actions?query=workflow%3AR-CMD-check?branch=develop)
+[![Build Status](https://github.com/OHDSI/DeepPatientLevelPrediction/workflows/R-CMD-check/badge.svg)](https://github.com/OHDSI/DeepPatientLevelPrediction/actions?query=workflow%3AR-CMD-check?branch=main)
 [![codecov.io](https://codecov.io/github/OHDSI/DeepPatientLevelPrediction/coverage.svg?branch=main)](https://codecov.io/github/OHDSI/DeepPatientLevelPrediction?branch=main)
 
 

From ce5e235d36b0a156ae6be100384e20e84bd08948 Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Wed, 22 Mar 2023 18:06:59 +0100
Subject: [PATCH 25/58] prepare version for release

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 6c827aa..0a6b787 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.0.9000
+Version: 1.1.0
 Date: 15-12-2022
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),

From 2d07b6bde8eb7a36a29b881e52cf11c9895ae846 Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Thu, 23 Mar 2023 09:37:29 +0100
Subject: [PATCH 26/58] Update DESCRIPTION

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 0a6b787..6c827aa 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.0
+Version: 1.1.0.9000
 Date: 15-12-2022
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),

From 05fecc288a8d277fac369f3b1a4b1bae5f3197e4 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Fri, 24 Mar 2023 14:04:27 +0100
Subject: [PATCH 27/58] modelType as attribute and tests to cover database
 upload

---
 R/Estimator.R                |  2 +-
 R/MLP.R                      |  1 -
 README.md                    |  2 +-
 tests/testthat/test-MLP.R    | 33 +++++++++++++-
 tests/testthat/test-ResNet.R | 85 ++++++++++++++++++++++++------------
 5 files changed, 92 insertions(+), 31 deletions(-)

diff --git a/R/Estimator.R b/R/Estimator.R
index 5e7f320..1d0878a 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -144,7 +144,7 @@ fitEstimator <- function(trainData,
       isNumeric = cvResult$numericalIndex
     )
   
-  
+  attr(modelSettings$param, 'settings')$modelType <- modelSettings$modelType  
   comp <- start - Sys.time()
   result <- list(
     model = cvResult$estimator, # file.path(outLoc),
diff --git a/R/MLP.R b/R/MLP.R
index 9363cfc..edf323a 100644
--- a/R/MLP.R
+++ b/R/MLP.R
@@ -68,7 +68,6 @@ setMultiLayerPerceptron <- function(numLayers = c(1:8),
   if (hyperParamSearch == "random") {
     suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
-
   results <- list(
     fitFunction = "fitEstimator",
     param = param,
diff --git a/README.md b/README.md
index 87c0bed..1b4fdb9 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ Requires R (version 4.0.0 or higher). Installation on Windows requires [RTools](
 Getting Started
 ===============
 
-- To install the package please read the [Package installation guide]()
+- To install the package please read the [Package installation guide](https://ohdsi.github.io/DeepPatientLevelPrediction/articles/Installing.html)
 - Please read the main vignette for the package:
 [Building Deep Learning Models](https://ohdsi.github.io/DeepPatientLevelPrediction/articles/BuildingDeepModels.html)
 
diff --git a/tests/testthat/test-MLP.R b/tests/testthat/test-MLP.R
index e84bb58..4daa6aa 100644
--- a/tests/testthat/test-MLP.R
+++ b/tests/testthat/test-MLP.R
@@ -37,7 +37,7 @@ results <- tryCatch(
       plpData = plpData,
       outcomeId = 3,
       modelSettings = modelSettings,
-      analysisId = "MLP",
+      analysisId = "Analysis_MLP",
       analysisName = "Testing Deep Learning",
       populationSettings = populationSet,
       splitSettings = PatientLevelPrediction::createDefaultSplitSetting(),
@@ -133,4 +133,35 @@ test_that("Errors are produced by settings function", {
     hyperParamSearch = 'random',
     randomSample = randomSample))
                            
+})
+
+test_that("Can upload results to database", {
+  cohortDefinitions = data.frame(
+    cohortName = c('blank1'), 
+    cohortId = c(1), 
+    json = c('json')
+  )
+  
+  sink(nullfile())
+  sqliteFile <- insertResultsToSqlite(resultLocation = file.path(testLoc, "MLP"),
+                           cohortDefinitions = cohortDefinitions)
+  sink()
+  
+  testthat::expect_true(file.exists(sqliteFile))
+  
+  cdmDatabaseSchema <- 'main'
+  ohdsiDatabaseSchema <- 'main'
+  connectionDetails <- DatabaseConnector::createConnectionDetails(
+    dbms = 'sqlite',
+    server = sqliteFile
+  )
+  conn <- DatabaseConnector::connect(connectionDetails = connectionDetails)
+  targetDialect <- 'sqlite'
+  
+  # check the results table is populated
+  sql <- 'select count(*) as N from main.performances;'
+  res <- DatabaseConnector::querySql(conn, sql)
+  testthat::expect_true(res$N[1]>0)
+  
+  
 })
\ No newline at end of file
diff --git a/tests/testthat/test-ResNet.R b/tests/testthat/test-ResNet.R
index d87ff5f..7a380a4 100644
--- a/tests/testthat/test-ResNet.R
+++ b/tests/testthat/test-ResNet.R
@@ -17,24 +17,24 @@ resSet <- setResNet(
 
 test_that("setResNet works", {
   testthat::expect_s3_class(object = resSet, class = "modelSettings")
-
+  
   testthat::expect_equal(resSet$fitFunction, "fitEstimator")
-
+  
   testthat::expect_true(length(resSet$param) > 0)
   
   expect_error(setResNet(numLayers = c(2),
-                sizeHidden = c(32),
-                hiddenFactor = c(2),
-                residualDropout = c(0.1),
-                hiddenDropout = c(0.1),
-                sizeEmbedding = c(32),
-                estimatorSettings = setEstimator(learningRate=c(3e-4),
-                                                 weightDecay = c(1e-6),
-                                                 seed=42,
-                                                 batchSize = 128,
-                                                 epochs=1),
-                hyperParamSearch = "random",
-                randomSample = 2))
+                         sizeHidden = c(32),
+                         hiddenFactor = c(2),
+                         residualDropout = c(0.1),
+                         hiddenDropout = c(0.1),
+                         sizeEmbedding = c(32),
+                         estimatorSettings = setEstimator(learningRate=c(3e-4),
+                                                          weightDecay = c(1e-6),
+                                                          seed=42,
+                                                          batchSize = 128,
+                                                          epochs=1),
+                         hyperParamSearch = "random",
+                         randomSample = 2))
 })
 
 sink(nullfile())
@@ -44,7 +44,7 @@ res2 <- tryCatch(
       plpData = plpData,
       outcomeId = 3,
       modelSettings = resSet,
-      analysisId = "ResNet",
+      analysisId = "Analysis_ResNet",
       analysisName = "Testing Deep Learning",
       populationSettings = populationSet,
       splitSettings = PatientLevelPrediction::createDefaultSplitSetting(),
@@ -59,7 +59,7 @@ res2 <- tryCatch(
         runModelDevelopment = T,
         runCovariateSummary = F
       ),
-      saveDirectory = file.path(testLoc, "Deep")
+      saveDirectory = file.path(testLoc, "ResNet")
     )
   },
   error = function(e) {
@@ -71,17 +71,17 @@ sink()
 
 test_that("ResNet with runPlp working checks", {
   testthat::expect_false(is.null(res2))
-
+  
   # check structure
   testthat::expect_true("prediction" %in% names(res2))
   testthat::expect_true("model" %in% names(res2))
   testthat::expect_true("covariateSummary" %in% names(res2))
   testthat::expect_true("performanceEvaluation" %in% names(res2))
-
+  
   # check prediction same size as pop
   testthat::expect_equal(nrow(res2$prediction %>%
-    dplyr::filter(evaluationType %in% c("Train", "Test"))), nrow(population))
-
+                                dplyr::filter(evaluationType %in% c("Train", "Test"))), nrow(population))
+  
   # check prediction between 0 and 1
   testthat::expect_gte(min(res2$prediction$value), 0)
   testthat::expect_lte(max(res2$prediction$value), 1)
@@ -96,22 +96,22 @@ test_that("ResNet nn-module works ", {
     normalization = torch::nn_batch_norm1d, hiddenDropout = 0.3,
     residualDropout = 0.3, d_out = 1
   )
-
+  
   pars <- sum(sapply(model$parameters, function(x) prod(x$shape)))
-
+  
   # expected number of parameters
   expect_equal(pars, 1295)
-
+  
   input <- list()
   input$cat <- torch::torch_randint(0, 5, c(10, 5), dtype = torch::torch_long())
   input$num <- torch::torch_randn(10, 1, dtype = torch::torch_float32())
-
-
+  
+  
   output <- model(input)
-
+  
   # output is correct shape
   expect_equal(output$shape, 10)
-
+  
   input$num <- NULL
   model <- ResNet(
     catFeatures = 5, numFeatures = 0, sizeEmbedding = 5,
@@ -154,3 +154,34 @@ test_that("Errors are produced by settings function", {
     hyperParamSearch = 'random',
     randomSample = randomSample))
 })
+
+
+test_that("Can upload results to database", { 
+  cohortDefinitions = data.frame(
+    cohortName = c('blank1'), 
+    cohortId = c(1), 
+    json = c('json')
+  )
+  
+  sink(nullfile())
+  sqliteFile <- insertResultsToSqlite(resultLocation = file.path(testLoc, "ResNet"),
+                                      cohortDefinitions = cohortDefinitions)
+  sink()
+  
+  testthat::expect_true(file.exists(sqliteFile))
+  
+  cdmDatabaseSchema <- 'main'
+  ohdsiDatabaseSchema <- 'main'
+  connectionDetails <- DatabaseConnector::createConnectionDetails(
+    dbms = 'sqlite',
+    server = sqliteFile
+  )
+  conn <- DatabaseConnector::connect(connectionDetails = connectionDetails)
+  targetDialect <- 'sqlite'
+  
+  # check the results table is populated
+  sql <- 'select count(*) as N from main.performances;'
+  res <- DatabaseConnector::querySql(conn, sql)
+  testthat::expect_true(res$N[1]>0)
+})
+

From 32a7e23d99e0017a8f7f3354b6c277e8f4814e08 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Fri, 24 Mar 2023 14:04:27 +0100
Subject: [PATCH 28/58] modelType as attribute and tests to cover database
 upload

---
 DESCRIPTION                  |  4 +-
 R/Estimator.R                |  2 +-
 R/MLP.R                      |  1 -
 README.md                    |  2 +-
 tests/testthat/test-MLP.R    | 33 +++++++++++++-
 tests/testthat/test-ResNet.R | 85 ++++++++++++++++++++++++------------
 6 files changed, 95 insertions(+), 32 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 0a6b787..8b8d208 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -34,11 +34,13 @@ Suggests:
     markdown,
     plyr,  
     testthat,
-    PRROC
+    PRROC,
+    ResultModelManager (>= 0.2.0)
 Remotes:
     ohdsi/PatientLevelPrediction,
     ohdsi/FeatureExtraction,
     ohdsi/Eunomia
+    ohdsi/ResultModelManager
 RoxygenNote: 7.2.3
 Encoding: UTF-8
 Config/testthat/edition: 3
diff --git a/R/Estimator.R b/R/Estimator.R
index 5e7f320..1d0878a 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -144,7 +144,7 @@ fitEstimator <- function(trainData,
       isNumeric = cvResult$numericalIndex
     )
   
-  
+  attr(modelSettings$param, 'settings')$modelType <- modelSettings$modelType  
   comp <- start - Sys.time()
   result <- list(
     model = cvResult$estimator, # file.path(outLoc),
diff --git a/R/MLP.R b/R/MLP.R
index 9363cfc..edf323a 100644
--- a/R/MLP.R
+++ b/R/MLP.R
@@ -68,7 +68,6 @@ setMultiLayerPerceptron <- function(numLayers = c(1:8),
   if (hyperParamSearch == "random") {
     suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
-
   results <- list(
     fitFunction = "fitEstimator",
     param = param,
diff --git a/README.md b/README.md
index 87c0bed..1b4fdb9 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ Requires R (version 4.0.0 or higher). Installation on Windows requires [RTools](
 Getting Started
 ===============
 
-- To install the package please read the [Package installation guide]()
+- To install the package please read the [Package installation guide](https://ohdsi.github.io/DeepPatientLevelPrediction/articles/Installing.html)
 - Please read the main vignette for the package:
 [Building Deep Learning Models](https://ohdsi.github.io/DeepPatientLevelPrediction/articles/BuildingDeepModels.html)
 
diff --git a/tests/testthat/test-MLP.R b/tests/testthat/test-MLP.R
index e84bb58..4daa6aa 100644
--- a/tests/testthat/test-MLP.R
+++ b/tests/testthat/test-MLP.R
@@ -37,7 +37,7 @@ results <- tryCatch(
       plpData = plpData,
       outcomeId = 3,
       modelSettings = modelSettings,
-      analysisId = "MLP",
+      analysisId = "Analysis_MLP",
       analysisName = "Testing Deep Learning",
       populationSettings = populationSet,
       splitSettings = PatientLevelPrediction::createDefaultSplitSetting(),
@@ -133,4 +133,35 @@ test_that("Errors are produced by settings function", {
     hyperParamSearch = 'random',
     randomSample = randomSample))
                            
+})
+
+test_that("Can upload results to database", {
+  cohortDefinitions = data.frame(
+    cohortName = c('blank1'), 
+    cohortId = c(1), 
+    json = c('json')
+  )
+  
+  sink(nullfile())
+  sqliteFile <- insertResultsToSqlite(resultLocation = file.path(testLoc, "MLP"),
+                           cohortDefinitions = cohortDefinitions)
+  sink()
+  
+  testthat::expect_true(file.exists(sqliteFile))
+  
+  cdmDatabaseSchema <- 'main'
+  ohdsiDatabaseSchema <- 'main'
+  connectionDetails <- DatabaseConnector::createConnectionDetails(
+    dbms = 'sqlite',
+    server = sqliteFile
+  )
+  conn <- DatabaseConnector::connect(connectionDetails = connectionDetails)
+  targetDialect <- 'sqlite'
+  
+  # check the results table is populated
+  sql <- 'select count(*) as N from main.performances;'
+  res <- DatabaseConnector::querySql(conn, sql)
+  testthat::expect_true(res$N[1]>0)
+  
+  
 })
\ No newline at end of file
diff --git a/tests/testthat/test-ResNet.R b/tests/testthat/test-ResNet.R
index d87ff5f..7a380a4 100644
--- a/tests/testthat/test-ResNet.R
+++ b/tests/testthat/test-ResNet.R
@@ -17,24 +17,24 @@ resSet <- setResNet(
 
 test_that("setResNet works", {
   testthat::expect_s3_class(object = resSet, class = "modelSettings")
-
+  
   testthat::expect_equal(resSet$fitFunction, "fitEstimator")
-
+  
   testthat::expect_true(length(resSet$param) > 0)
   
   expect_error(setResNet(numLayers = c(2),
-                sizeHidden = c(32),
-                hiddenFactor = c(2),
-                residualDropout = c(0.1),
-                hiddenDropout = c(0.1),
-                sizeEmbedding = c(32),
-                estimatorSettings = setEstimator(learningRate=c(3e-4),
-                                                 weightDecay = c(1e-6),
-                                                 seed=42,
-                                                 batchSize = 128,
-                                                 epochs=1),
-                hyperParamSearch = "random",
-                randomSample = 2))
+                         sizeHidden = c(32),
+                         hiddenFactor = c(2),
+                         residualDropout = c(0.1),
+                         hiddenDropout = c(0.1),
+                         sizeEmbedding = c(32),
+                         estimatorSettings = setEstimator(learningRate=c(3e-4),
+                                                          weightDecay = c(1e-6),
+                                                          seed=42,
+                                                          batchSize = 128,
+                                                          epochs=1),
+                         hyperParamSearch = "random",
+                         randomSample = 2))
 })
 
 sink(nullfile())
@@ -44,7 +44,7 @@ res2 <- tryCatch(
       plpData = plpData,
       outcomeId = 3,
       modelSettings = resSet,
-      analysisId = "ResNet",
+      analysisId = "Analysis_ResNet",
       analysisName = "Testing Deep Learning",
       populationSettings = populationSet,
       splitSettings = PatientLevelPrediction::createDefaultSplitSetting(),
@@ -59,7 +59,7 @@ res2 <- tryCatch(
         runModelDevelopment = T,
         runCovariateSummary = F
       ),
-      saveDirectory = file.path(testLoc, "Deep")
+      saveDirectory = file.path(testLoc, "ResNet")
     )
   },
   error = function(e) {
@@ -71,17 +71,17 @@ sink()
 
 test_that("ResNet with runPlp working checks", {
   testthat::expect_false(is.null(res2))
-
+  
   # check structure
   testthat::expect_true("prediction" %in% names(res2))
   testthat::expect_true("model" %in% names(res2))
   testthat::expect_true("covariateSummary" %in% names(res2))
   testthat::expect_true("performanceEvaluation" %in% names(res2))
-
+  
   # check prediction same size as pop
   testthat::expect_equal(nrow(res2$prediction %>%
-    dplyr::filter(evaluationType %in% c("Train", "Test"))), nrow(population))
-
+                                dplyr::filter(evaluationType %in% c("Train", "Test"))), nrow(population))
+  
   # check prediction between 0 and 1
   testthat::expect_gte(min(res2$prediction$value), 0)
   testthat::expect_lte(max(res2$prediction$value), 1)
@@ -96,22 +96,22 @@ test_that("ResNet nn-module works ", {
     normalization = torch::nn_batch_norm1d, hiddenDropout = 0.3,
     residualDropout = 0.3, d_out = 1
   )
-
+  
   pars <- sum(sapply(model$parameters, function(x) prod(x$shape)))
-
+  
   # expected number of parameters
   expect_equal(pars, 1295)
-
+  
   input <- list()
   input$cat <- torch::torch_randint(0, 5, c(10, 5), dtype = torch::torch_long())
   input$num <- torch::torch_randn(10, 1, dtype = torch::torch_float32())
-
-
+  
+  
   output <- model(input)
-
+  
   # output is correct shape
   expect_equal(output$shape, 10)
-
+  
   input$num <- NULL
   model <- ResNet(
     catFeatures = 5, numFeatures = 0, sizeEmbedding = 5,
@@ -154,3 +154,34 @@ test_that("Errors are produced by settings function", {
     hyperParamSearch = 'random',
     randomSample = randomSample))
 })
+
+
+test_that("Can upload results to database", { 
+  cohortDefinitions = data.frame(
+    cohortName = c('blank1'), 
+    cohortId = c(1), 
+    json = c('json')
+  )
+  
+  sink(nullfile())
+  sqliteFile <- insertResultsToSqlite(resultLocation = file.path(testLoc, "ResNet"),
+                                      cohortDefinitions = cohortDefinitions)
+  sink()
+  
+  testthat::expect_true(file.exists(sqliteFile))
+  
+  cdmDatabaseSchema <- 'main'
+  ohdsiDatabaseSchema <- 'main'
+  connectionDetails <- DatabaseConnector::createConnectionDetails(
+    dbms = 'sqlite',
+    server = sqliteFile
+  )
+  conn <- DatabaseConnector::connect(connectionDetails = connectionDetails)
+  targetDialect <- 'sqlite'
+  
+  # check the results table is populated
+  sql <- 'select count(*) as N from main.performances;'
+  res <- DatabaseConnector::querySql(conn, sql)
+  testthat::expect_true(res$N[1]>0)
+})
+

From c5a984f5e4e87f67b9ccd4bc930c16c2cb5a5b82 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Fri, 24 Mar 2023 14:32:13 +0100
Subject: [PATCH 29/58] fix dependanceis

---
 DESCRIPTION | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f0af591..4adeb3c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -35,7 +35,8 @@ Suggests:
     plyr,  
     testthat,
     PRROC,
-    ResultModelManager (>= 0.2.0)
+    ResultModelManager (>= 0.2.0),
+    DatabaseConnector (>= 6.0.0)
 Remotes:
     ohdsi/PatientLevelPrediction,
     ohdsi/FeatureExtraction,

From 7541a380ae9f15b09a9c33d18b4e44ffff3c5f48 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Fri, 24 Mar 2023 14:49:46 +0100
Subject: [PATCH 30/58] prepare version and news for release

---
 DESCRIPTION | 2 +-
 NEWS.md     | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4adeb3c..7382550 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.0
+Version: 1.1.1
 Date: 15-12-2022
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),
diff --git a/NEWS.md b/NEWS.md
index 7486523..f3423f9 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,6 @@
+DeepPatientLevelPrediction 1.1.1
+- Fix bug introduced by removing modelType from attributes (#59)
+
 DeepPatientLevelPrediction 1.1
 ======================
 - Check for if number of heads is compatible with embedding dimension fixed (#55)

From 4cd2e309170a3555ecd8e1e5b5c202726c4acd24 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Fri, 24 Mar 2023 17:40:09 +0100
Subject: [PATCH 31/58] modelType attribute back to modelSettings functions

---
 DESCRIPTION     | 2 +-
 NEWS.md         | 6 +-----
 R/Estimator.R   | 1 -
 R/MLP.R         | 1 +
 R/ResNet.R      | 2 +-
 R/Transformer.R | 2 +-
 6 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 6e9ae62..ee7b9c8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.1.9000
+Version: 1.1.2
 Date: 15-12-2022
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),
diff --git a/NEWS.md b/NEWS.md
index 35f30e3..784270f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,8 +1,4 @@
-DeepPatientLevelPrediction (develop)
-======================
-
-
-DeepPatientLevelPrediction 1.1.1
+DeepPatientLevelPrediction 1.1.2
 ======================
 - Fix bug introduced by removing modelType from attributes (#59)
 
diff --git a/R/Estimator.R b/R/Estimator.R
index 1d0878a..c777d7d 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -144,7 +144,6 @@ fitEstimator <- function(trainData,
       isNumeric = cvResult$numericalIndex
     )
   
-  attr(modelSettings$param, 'settings')$modelType <- modelSettings$modelType  
   comp <- start - Sys.time()
   result <- list(
     model = cvResult$estimator, # file.path(outLoc),
diff --git a/R/MLP.R b/R/MLP.R
index edf323a..51d5f20 100644
--- a/R/MLP.R
+++ b/R/MLP.R
@@ -68,6 +68,7 @@ setMultiLayerPerceptron <- function(numLayers = c(1:8),
   if (hyperParamSearch == "random") {
     suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
+  attr(param, 'settings')$modelType <- "MLP"  
   results <- list(
     fitFunction = "fitEstimator",
     param = param,
diff --git a/R/ResNet.R b/R/ResNet.R
index ac95170..dc610b5 100644
--- a/R/ResNet.R
+++ b/R/ResNet.R
@@ -106,7 +106,7 @@ setResNet <- function(numLayers = c(1:8),
   if (hyperParamSearch == "random") {
     suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
-
+  attr(param, 'settings')$modelType <- "ResNet"
   results <- list(
     fitFunction = "fitEstimator",
     param = param,
diff --git a/R/Transformer.R b/R/Transformer.R
index 64b43b7..7e25ff7 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -119,7 +119,7 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
   if (hyperParamSearch == "random") {
     suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
-
+  attr(param, 'settings')$modelType <- "Transformer"  
   results <- list(
     fitFunction = "fitEstimator",
     param = param,

From 9c19173dd83c037fc43daf08cd3cb78f4b50f2ee Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Mon, 27 Mar 2023 19:04:51 +0200
Subject: [PATCH 32/58] Update DESCRIPTION

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ee7b9c8..47a7d24 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.2
+Version: 1.1.2.9000
 Date: 15-12-2022
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),

From 808ead857fc4a35c5792e1c5e6446066c395656a Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Sun, 16 Apr 2023 12:23:37 +0200
Subject: [PATCH 33/58] debug actions

---
 .github/workflows/R_CDM_check_hades.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/R_CDM_check_hades.yaml b/.github/workflows/R_CDM_check_hades.yaml
index 3278c1a..44b1a58 100644
--- a/.github/workflows/R_CDM_check_hades.yaml
+++ b/.github/workflows/R_CDM_check_hades.yaml
@@ -70,6 +70,9 @@ jobs:
           extra-packages: any::rcmdcheck
           needs: check
 
+      - name: Setup upterm session
+        uses: lhotari/action-upterm@v1
+
       - uses: r-lib/actions/check-r-package@v2
         with:
           args: 'c("--no-manual", "--as-cran")'

From 8e1de7f862eab341f6de1fbf1b67f1bae85bd703 Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Mon, 17 Apr 2023 11:47:02 +0200
Subject: [PATCH 34/58] Update R_CDM_check_hades.yaml

---
 .github/workflows/R_CDM_check_hades.yaml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/R_CDM_check_hades.yaml b/.github/workflows/R_CDM_check_hades.yaml
index 44b1a58..3278c1a 100644
--- a/.github/workflows/R_CDM_check_hades.yaml
+++ b/.github/workflows/R_CDM_check_hades.yaml
@@ -70,9 +70,6 @@ jobs:
           extra-packages: any::rcmdcheck
           needs: check
 
-      - name: Setup upterm session
-        uses: lhotari/action-upterm@v1
-
       - uses: r-lib/actions/check-r-package@v2
         with:
           args: 'c("--no-manual", "--as-cran")'

From d18d5795f4fd1c4c7acf27426d259b31bfdf53f1 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Mon, 17 Apr 2023 12:55:56 +0200
Subject: [PATCH 35/58] torch install environment variable

---
 .github/workflows/R_CDM_check_hades.yaml       | 1 +
 .github/workflows/R_CMD_check_main_weekly.yaml | 1 +
 tests/testthat/setup.R                         | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/R_CDM_check_hades.yaml b/.github/workflows/R_CDM_check_hades.yaml
index 44b1a58..63a1f6f 100644
--- a/.github/workflows/R_CDM_check_hades.yaml
+++ b/.github/workflows/R_CDM_check_hades.yaml
@@ -43,6 +43,7 @@ jobs:
       CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }}
       CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }}
       CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }}
+      TORCH_INSTALL: '1'
 
     steps:
       - uses: actions/checkout@v2
diff --git a/.github/workflows/R_CMD_check_main_weekly.yaml b/.github/workflows/R_CMD_check_main_weekly.yaml
index aada2e5..73bfe18 100644
--- a/.github/workflows/R_CMD_check_main_weekly.yaml
+++ b/.github/workflows/R_CMD_check_main_weekly.yaml
@@ -47,6 +47,7 @@ jobs:
       WEBAPI_TEST_SECURE_WEBAPI_URL: ${{ secrets.WEBAPI_TEST_SECURE_WEBAPI_URL }}
       WEBAPI_TEST_ADMIN_USER_NAME: ${{ secrets.WEBAPI_TEST_ADMIN_USER_NAME }}
       WEBAPI_TEST_ADMIN_USER_PASSWORD: ${{ secrets.WEBAPI_TEST_ADMIN_USER_PASSWORD }}
+      TORCH_INSTALL: '1'
 
     steps:
       - uses: actions/checkout@v3
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index c3a368c..fde852b 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -1,6 +1,6 @@
 library(PatientLevelPrediction)
 
-if(Sys.getenv('GITHUB_ACTIONS') == 'true') {
+if(Sys.getenv('GITHUB_ACTIONS') == 'true' & torch::torch_is_installed() != FALSE) {
   torch::install_torch()
 }
 

From 510f4f18ffaefefdaa6ec14e567673dcae0fbe7a Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Mon, 17 Apr 2023 13:12:55 +0200
Subject: [PATCH 36/58] update version and news

---
 DESCRIPTION | 2 +-
 NEWS.md     | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ee7b9c8..7e4cb2e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.2
+Version: 1.1.3
 Date: 15-12-2022
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),
diff --git a/NEWS.md b/NEWS.md
index 784270f..0472025 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+DeepPatientLevelPrediction 1.1.3
+======================
+- Fix actions after torch updated to v0.10 (#65)
+
 DeepPatientLevelPrediction 1.1.2
 ======================
 - Fix bug introduced by removing modelType from attributes (#59)

From a1cb2e7a344f89cca40b456a9313b95a9619418c Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Tue, 18 Apr 2023 09:19:38 -0400
Subject: [PATCH 37/58] add device as expression with tests (#66)

---
 R/Estimator-class.R             |  7 +++++-
 R/Estimator.R                   |  3 ++-
 man/setEstimator.Rd             |  3 ++-
 tests/testthat/test-Estimator.R | 40 ++++++++++++++++++++++++++++++++-
 4 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/R/Estimator-class.R b/R/Estimator-class.R
index 96a5f2d..5d2fe76 100644
--- a/R/Estimator-class.R
+++ b/R/Estimator-class.R
@@ -34,7 +34,12 @@ Estimator <- R6::R6Class(
                           modelParameters,
                           estimatorSettings) {
       self$seed <- estimatorSettings$seed
-      self$device <- estimatorSettings$device
+      if (is.function(estimatorSettings$device)) {
+        device <- estimatorSettings$device()
+      } else {
+        device <- estimatorSettings$device
+      }
+      self$device <- device
       torch::torch_manual_seed(seed=self$seed)
       self$model <- do.call(modelType, modelParameters)
       self$modelParameters <- modelParameters
diff --git a/R/Estimator.R b/R/Estimator.R
index c777d7d..3a6402f 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -26,7 +26,8 @@
 #' @param weightDecay what weight_decay to use
 #' @param batchSize batchSize to use
 #' @param epochs  how many epochs to train for
-#' @param device  what device to train on
+#' @param device  what device to train on, can be a string or a function to that evaluates
+#' to the device during runtime
 #' @param optimizer which optimizer to use
 #' @param scheduler which learning rate scheduler to use
 #' @param criterion loss function to use
diff --git a/man/setEstimator.Rd b/man/setEstimator.Rd
index b924b03..b512bcb 100644
--- a/man/setEstimator.Rd
+++ b/man/setEstimator.Rd
@@ -27,7 +27,8 @@ setEstimator(
 
 \item{epochs}{how many epochs to train for}
 
-\item{device}{what device to train on}
+\item{device}{what device to train on, can be a string or a function to that evaluates
+to the device during runtime}
 
 \item{optimizer}{which optimizer to use}
 
diff --git a/tests/testthat/test-Estimator.R b/tests/testthat/test-Estimator.R
index aa8459d..faf95db 100644
--- a/tests/testthat/test-Estimator.R
+++ b/tests/testthat/test-Estimator.R
@@ -298,4 +298,42 @@ test_that("setEstimator with paramsToTune is correctly added to hyperparameters"
   expect_equal(estimatorSettings2$learningRate, 1e-3)
   expect_equal(as.character(estimatorSettings2$metric), "auprc")
   expect_equal(estimatorSettings2$earlyStopping$params$patience, 10)
-})
\ No newline at end of file
+})
+
+test_that("device as a function argument works", {
+  getDevice <- function() {
+    dev <- Sys.getenv("testDeepPLPDevice") 
+    if (dev == ""){
+      dev = "cpu"
+    } else{
+      dev
+    }
+  }
+  
+  estimatorSettings <- setEstimator(device=getDevice)
+  
+  model <- setDefaultResNet(estimatorSettings = estimatorSettings) 
+  model$param[[1]]$catFeatures <- 10
+  
+  estimator <- Estimator$new(modelType="ResNet",
+                             modelParameters = model$param[[1]],
+                             estimatorSettings = estimatorSettings)
+  
+  expect_equal(estimator$device, "cpu")
+  
+  Sys.setenv("testDeepPLPDevice" = "meta")
+  
+  estimatorSettings <- setEstimator(device=getDevice)
+  
+  model <- setDefaultResNet(estimatorSettings = estimatorSettings) 
+  model$param[[1]]$catFeatures <- 10
+  
+  estimator <- Estimator$new(modelType="ResNet",
+                             modelParameters = model$param[[1]],
+                             estimatorSettings = estimatorSettings)
+  
+  expect_equal(estimator$device, "meta")
+  
+  Sys.unsetenv("testDeepPLPDevice")
+  
+  })

From 2113a483ef5a2237eca98020fb56f8835efec625 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Tue, 18 Apr 2023 15:22:33 +0200
Subject: [PATCH 38/58] remove torchopt

---
 R/Estimator.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/Estimator.R b/R/Estimator.R
index 3a6402f..3b9a000 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -42,7 +42,7 @@ setEstimator <- function(learningRate='auto',
                          batchSize = 512,
                          epochs = 30,
                          device='cpu',
-                         optimizer = torchopt::optim_adamw,
+                         optimizer = torch::optim_adamw,
                          scheduler = list(fun=torch::lr_reduce_on_plateau,
                                           params=list(patience=1)),
                          criterion = torch::nn_bce_with_logits_loss,

From 575d2e59090ce38c9c58ba6f1d761731d60f0abe Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Tue, 18 Apr 2023 15:26:47 +0200
Subject: [PATCH 39/58] update news and version

---
 DESCRIPTION | 2 +-
 NEWS.md     | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a5a459f..03626df 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
 Version: 1.1.4
-Date: 15-12-2022
+Date: 18-04-2023
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),
     person("Jenna", "Reps", email = "jreps@its.jnj.com", role = c("aut")), 
diff --git a/NEWS.md b/NEWS.md
index 0472025..8202096 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,9 @@
+DeepPatientLevelPrediction 1.1.4
+======================
+ - Remove torchopt dependancy since adamw is now in torch
+ - Update torch dependency to >=0.10.0
+ - Allow device to be a function that resolves during Estimator initialization
+
 DeepPatientLevelPrediction 1.1.3
 ======================
 - Fix actions after torch updated to v0.10 (#65)

From 20d4a0d7623988c818874450aa24600c2ec752a7 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Tue, 18 Apr 2023 15:43:17 +0200
Subject: [PATCH 40/58] fix docs

---
 man/setEstimator.Rd            | 2 +-
 tests/testthat/test-LRFinder.R | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/man/setEstimator.Rd b/man/setEstimator.Rd
index b512bcb..8454c55 100644
--- a/man/setEstimator.Rd
+++ b/man/setEstimator.Rd
@@ -10,7 +10,7 @@ setEstimator(
   batchSize = 512,
   epochs = 30,
   device = "cpu",
-  optimizer = torchopt::optim_adamw,
+  optimizer = torch::optim_adamw,
   scheduler = list(fun = torch::lr_reduce_on_plateau, params = list(patience = 1)),
   criterion = torch::nn_bce_with_logits_loss,
   earlyStopping = list(useEarlyStopping = TRUE, params = list(patience = 4)),
diff --git a/tests/testthat/test-LRFinder.R b/tests/testthat/test-LRFinder.R
index 7f56a0f..30bdc8d 100644
--- a/tests/testthat/test-LRFinder.R
+++ b/tests/testthat/test-LRFinder.R
@@ -3,7 +3,7 @@ test_that("LR scheduler that changes per batch works", {
   model <- ResNet(catFeatures = 10, numFeatures = 1,
                   sizeEmbedding = 32, sizeHidden = 64,
                   numLayers = 1, hiddenFactor = 1)
-  optimizer <- torchopt::optim_adamw(model$parameters, lr=1e-7)
+  optimizer <- torch::optim_adamw(model$parameters, lr=1e-7)
   
   scheduler <- lrPerBatch(optimizer,
                           startLR = 1e-7,

From f97b37fa3416494c7431f751d38ad2921c26f5d1 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Wed, 19 Apr 2023 11:48:04 +0200
Subject: [PATCH 41/58] update version number

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 03626df..59bc0c4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.4
+Version: 1.1.4.9000
 Date: 18-04-2023
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),

From 783f4176743f05116442ea4bd7d0cee156695b99 Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Mon, 24 Apr 2023 12:27:16 -0400
Subject: [PATCH 42/58] LRFinder works with device fun (#68)

---
 R/LRFinder.R                   |  7 +++++--
 tests/testthat/test-LRFinder.R | 22 ++++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/R/LRFinder.R b/R/LRFinder.R
index 2e79fca..9f670e4 100644
--- a/R/LRFinder.R
+++ b/R/LRFinder.R
@@ -52,7 +52,10 @@ lrFinder <- function(dataset, modelType, modelParams, estimatorSettings,
                      divergenceThreshold=4) {
     torch::torch_manual_seed(seed=estimatorSettings$seed)
     model <- do.call(modelType, modelParams)
-    model$to(device=estimatorSettings$device)
+    if (is.function(estimatorSettings$device)) {
+      device = estimatorSettings$device()
+    } else {device = estimatorSettings$device}
+    model$to(device=device)
     
     optimizer <- estimatorSettings$optimizer(model$parameters, lr=minLR)
     
@@ -75,7 +78,7 @@ lrFinder <- function(dataset, modelType, modelParams, estimatorSettings,
       optimizer$zero_grad()
       
       batch <- dataset[sample(batchIndex, estimatorSettings$batchSize)]
-      batch <- batchToDevice(batch, device=estimatorSettings$device)
+      batch <- batchToDevice(batch, device=device)
       
       output <- model(batch$batch)
       
diff --git a/tests/testthat/test-LRFinder.R b/tests/testthat/test-LRFinder.R
index 30bdc8d..f6c263e 100644
--- a/tests/testthat/test-LRFinder.R
+++ b/tests/testthat/test-LRFinder.R
@@ -39,6 +39,28 @@ test_that("LR finder works", {
   
   expect_true(lr<=10.0)
   expect_true(lr>=3e-4)
+})
+
+test_that("LR finder works with device specified by a function", {
+  
+  deviceFun <- function(){
+    dev = "cpu"
+  }
+  lr <- lrFinder(dataset, modelType = ResNet, modelParams = list(catFeatures=dataset$numCatFeatures(),
+                                                                 numFeatures=dataset$numNumFeatures(),
+                                                                 sizeEmbedding=8,
+                                                                 sizeHidden=16,
+                                                                 numLayers=1,
+                                                                 hiddenFactor=1),
+                 estimatorSettings = setEstimator(batchSize=32,
+                                                  seed = 42,
+                                                  device = deviceFun),
+                 minLR = 3e-4,
+                 maxLR = 10.0,
+                 numLR = 20,
+                 divergenceThreshold = 1.1)
+  expect_true(lr<=10.0)
+  expect_true(lr>=3e-4)
   
   
 })
\ No newline at end of file

From 0a6f9de0e10819459173e512692f976669ffd282 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Mon, 24 Apr 2023 18:29:05 +0200
Subject: [PATCH 43/58] update version and news

---
 DESCRIPTION | 2 +-
 NEWS.md     | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 59bc0c4..f76e3f4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.4.9000
+Version: 1.1.5
 Date: 18-04-2023
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),
diff --git a/NEWS.md b/NEWS.md
index 8202096..fa7b721 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+DeepPatientLevelPrediction 1.1.5
+======================
+  - Fix bug where device function was not working for LRFinder
+
 DeepPatientLevelPrediction 1.1.4
 ======================
  - Remove torchopt dependancy since adamw is now in torch

From 2883b5ebe42137e43991e0f541dd748f82686f5b Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Tue, 25 Apr 2023 11:52:48 +0200
Subject: [PATCH 44/58] update version

---
 DESCRIPTION | 2 +-
 NEWS.md     | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f76e3f4..3e11544 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.5
+Version: 1.1.5.9000
 Date: 18-04-2023
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),
diff --git a/NEWS.md b/NEWS.md
index fa7b721..3c786d8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,6 @@
+DeepPatientLevelPrediction 1.1.5.9000
+======================
+
 DeepPatientLevelPrediction 1.1.5
 ======================
   - Fix bug where device function was not working for LRFinder

From 8a01ed7f1a223c5c19eb20f76052fb1e8a89ffa9 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Sun, 18 Jun 2023 12:39:21 +0200
Subject: [PATCH 45/58] fix bug when test subject has no features

---
 R/Dataset.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/Dataset.R b/R/Dataset.R
index d2c51a7..3b0dd8d 100644
--- a/R/Dataset.R
+++ b/R/Dataset.R
@@ -23,8 +23,8 @@ Dataset <- torch::dataset(
     if (!is.null(labels)) {
       self$target <- torch::torch_tensor(labels)
     } else {
-      self$target <- torch::torch_tensor(rep(0, data %>% dplyr::distinct(rowId)
-        %>% dplyr::collect() %>% nrow()))
+      self$target <- torch::torch_tensor(rep(0, data %>% dplyr::summarize(m=max(rowId)) %>%
+                                               dplyr::collect() %>% dplyr::pull()))
     }
     # Weight to add in loss function to positive class
     self$posWeight <- (self$target == 0)$sum() / self$target$sum()

From e55571082fb007a6f63ac2ddfd5c7269a9d21be6 Mon Sep 17 00:00:00 2001
From: Henrik <l.john@erasmusmc.nl>
Date: Sun, 18 Jun 2023 13:17:17 +0200
Subject: [PATCH 46/58] Add parameter caching for training persistence and
 continuity (#63)

* Add training cache + tests
---------

Co-authored-by: Egill Axfjord Fridgeirsson <egillax@gmail.com>
---
 DESCRIPTION                         |   2 +-
 NAMESPACE                           |   1 +
 R/Estimator.R                       |  27 +++++-
 R/TrainingCache-class.R             |  90 +++++++++++++++++
 man/TrainingCache.Rd                | 145 ++++++++++++++++++++++++++++
 man/fitEstimator.Rd                 |   4 +-
 man/gridCvDeep.Rd                   |   4 +-
 tests/testthat/test-Estimator.R     |   4 +-
 tests/testthat/test-TrainingCache.R |  87 +++++++++++++++++
 tests/testthat/test-Transformer.R   |   2 +-
 10 files changed, 355 insertions(+), 11 deletions(-)
 create mode 100644 R/TrainingCache-class.R
 create mode 100644 man/TrainingCache.Rd
 create mode 100644 tests/testthat/test-TrainingCache.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 3e11544..824bdfb 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -37,7 +37,7 @@ Suggests:
     ResultModelManager (>= 0.2.0),
     DatabaseConnector (>= 6.0.0)
 Remotes:
-    ohdsi/PatientLevelPrediction,
+    ohdsi/PatientLevelPrediction@develop,
     ohdsi/FeatureExtraction,
     ohdsi/Eunomia,
     ohdsi/ResultModelManager
diff --git a/NAMESPACE b/NAMESPACE
index 38bb05b..bea6721 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(Dataset)
 export(Estimator)
+export(TrainingCache)
 export(fitEstimator)
 export(gridCvDeep)
 export(lrFinder)
diff --git a/R/Estimator.R b/R/Estimator.R
index 3b9a000..5fb23bf 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -97,12 +97,14 @@ setEstimator <- function(learningRate='auto',
 #' @param trainData      the data to use
 #' @param modelSettings  modelSettings object
 #' @param analysisId     Id of the analysis
+#' @param analysisPath   Path of the analysis
 #' @param ...            Extra inputs
 #'
 #' @export
 fitEstimator <- function(trainData,
                          modelSettings,
                          analysisId,
+                         analysisPath,
                          ...) {
   start <- Sys.time()
   
@@ -128,7 +130,8 @@ fitEstimator <- function(trainData,
       mappedData = mappedCovariateData,
       labels = trainData$labels,
       modelSettings = modelSettings,
-      modelLocation = outLoc
+      modelLocation = outLoc,
+      analysisPath = analysisPath
     )
   )
   
@@ -251,26 +254,37 @@ predictDeepEstimator <- function(plpModel,
 #' @param labels        Dataframe with the outcomes
 #' @param modelSettings      Settings of the model
 #' @param modelLocation Where to save the model
+#' @param analysisPath  Path of the analysis
 #'
 #' @export
 gridCvDeep <- function(mappedData,
                        labels,
                        modelSettings,
-                       modelLocation) {
+                       modelLocation,
+                       analysisPath) {
   ParallelLogger::logInfo(paste0("Running hyperparameter search for ", modelSettings$modelType, " model"))
   
   ###########################################################################
   
   paramSearch <- modelSettings$param
-  gridSearchPredictons <- list()
-  length(gridSearchPredictons) <- length(paramSearch)
+  trainCache <- TrainingCache$new(analysisPath)
+  
+  if (trainCache$isParamGridIdentical(paramSearch)) {
+    gridSearchPredictons <- trainCache$getGridSearchPredictions()
+  } else {
+    gridSearchPredictons <- list()
+    length(gridSearchPredictons) <- length(paramSearch)
+    trainCache$saveGridSearchPredictions(gridSearchPredictons)
+    trainCache$saveModelParams(paramSearch)
+  }
+  
   dataset <- Dataset(mappedData$covariates, labels$outcomeCount)
   
   estimatorSettings <- modelSettings$estimatorSettings
   
   fitParams <- names(paramSearch[[1]])[grepl("^estimator", names(paramSearch[[1]]))]
   
-  for (gridId in 1:length(paramSearch)) {
+  for (gridId in trainCache$getLastGridSearchIndex():length(paramSearch)) {
     ParallelLogger::logInfo(paste0("Running hyperparameter combination no ", gridId))
     ParallelLogger::logInfo(paste0("HyperParameters: "))
     ParallelLogger::logInfo(paste(names(paramSearch[[gridId]]), paramSearch[[gridId]], collapse = " | "))
@@ -336,7 +350,10 @@ gridCvDeep <- function(mappedData,
       prediction = prediction,
       param = paramSearch[[gridId]]
     )
+
+    trainCache$saveGridSearchPredictions(gridSearchPredictons)
   }
+  
   # get best para (this could be modified to enable any metric instead of AUC, just need metric input in function)
   paramGridSearch <- lapply(gridSearchPredictons, function(x) {
     do.call(PatientLevelPrediction::computeGridPerformance, x)
diff --git a/R/TrainingCache-class.R b/R/TrainingCache-class.R
new file mode 100644
index 0000000..fac668e
--- /dev/null
+++ b/R/TrainingCache-class.R
@@ -0,0 +1,90 @@
+#' TrainingCache
+#' @description
+#' Parameter caching for training persistence and continuity
+#' @export
+TrainingCache <- R6::R6Class(
+  "TrainingCache",
+  
+  private = list(
+    .paramPersistence = list(
+      gridSearchPredictions = NULL,
+      modelParams = NULL
+    ),
+    .paramContinuity = list(),
+    .saveDir = NULL,
+    
+    writeToFile = function() {
+      saveRDS(private$.paramPersistence, file.path(private$.saveDir))
+    },
+    
+    readFromFile = function() {
+        private$.paramPersistence <- readRDS(file.path(private$.saveDir))
+    }
+  ),
+  
+  public = list(
+    #' @description
+    #' Creates a new training cache
+    #' @param inDir Path to the analysis directory
+    initialize = function(inDir) {
+      private$.saveDir <- file.path(inDir, "paramPersistence.rds")
+      
+      if (file.exists(private$.saveDir)) {
+        private$readFromFile()
+      } else {
+        private$writeToFile()
+      }
+    },
+    
+    #' @description
+    #' Checks whether the parameter grid in the model settings is identical to
+    #' the cached parameters.
+    #' @param inModelParams Parameter grid from the model settings
+    #' @returns Whether the provided and cached parameter grid is identical
+    isParamGridIdentical = function(inModelParams) {
+      return(identical(inModelParams, private$.paramPersistence$modelParams))
+    },
+    
+    #' @description
+    #' Saves the grid search results to the training cache
+    #' @param inGridSearchPredictions Grid search predictions
+    saveGridSearchPredictions = function(inGridSearchPredictions) {
+      private$.paramPersistence$gridSearchPredictions <-
+        inGridSearchPredictions
+      private$writeToFile()
+    },
+    
+    #' @description
+    #' Saves the parameter grid to the training cache
+    #' @param inModelParams Parameter grid from the model settings
+    saveModelParams = function(inModelParams) {
+      private$.paramPersistence$modelParams <- inModelParams
+      private$writeToFile()
+    },
+    
+    #' @description
+    #' Gets the grid search results from the training cache
+    #' @returns Grid search results from the training cache
+    getGridSearchPredictions = function() {
+      return(private$.paramPersistence$gridSearchPredictions)
+    },
+    
+    #' @description
+    #' Gets the last index from the cached grid search
+    #' @returns Last grid search index
+    getLastGridSearchIndex = function() {
+      if (is.null(private$.paramPersistence$gridSearchPredictions)) {
+        return(1)
+      } else {
+        return(which(sapply(private$.paramPersistence$gridSearchPredictions,
+            is.null))[1])
+      }
+    },
+    
+    #' @description
+    #' Remove the training cache from the analysis path
+    dropCache = function() {
+      # TODO
+    }
+  )
+)
diff --git a/man/TrainingCache.Rd b/man/TrainingCache.Rd
new file mode 100644
index 0000000..0a7ec7b
--- /dev/null
+++ b/man/TrainingCache.Rd
@@ -0,0 +1,145 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/TrainingCache-class.R
+\name{TrainingCache}
+\alias{TrainingCache}
+\title{TrainingCache}
+\value{
+Whether the provided and cached parameter grid is identical
+
+Grid search results from the training cache
+
+Last grid search index
+}
+\description{
+Parameter caching for training persistence and continuity
+}
+\section{Methods}{
+\subsection{Public methods}{
+\itemize{
+\item \href{#method-TrainingCache-new}{\code{TrainingCache$new()}}
+\item \href{#method-TrainingCache-isParamGridIdentical}{\code{TrainingCache$isParamGridIdentical()}}
+\item \href{#method-TrainingCache-saveGridSearchPredictions}{\code{TrainingCache$saveGridSearchPredictions()}}
+\item \href{#method-TrainingCache-saveModelParams}{\code{TrainingCache$saveModelParams()}}
+\item \href{#method-TrainingCache-getGridSearchPredictions}{\code{TrainingCache$getGridSearchPredictions()}}
+\item \href{#method-TrainingCache-getLastGridSearchIndex}{\code{TrainingCache$getLastGridSearchIndex()}}
+\item \href{#method-TrainingCache-dropCache}{\code{TrainingCache$dropCache()}}
+\item \href{#method-TrainingCache-clone}{\code{TrainingCache$clone()}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-TrainingCache-new"></a>}}
+\if{latex}{\out{\hypertarget{method-TrainingCache-new}{}}}
+\subsection{Method \code{new()}}{
+Creates a new training cache
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{TrainingCache$new(inDir)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{inDir}}{Path to the analysis directory}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-TrainingCache-isParamGridIdentical"></a>}}
+\if{latex}{\out{\hypertarget{method-TrainingCache-isParamGridIdentical}{}}}
+\subsection{Method \code{isParamGridIdentical()}}{
+Checks whether the parameter grid in the model settings is identical to
+the cached parameters.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{TrainingCache$isParamGridIdentical(inModelParams)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{inModelParams}}{Parameter grid from the model settings}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-TrainingCache-saveGridSearchPredictions"></a>}}
+\if{latex}{\out{\hypertarget{method-TrainingCache-saveGridSearchPredictions}{}}}
+\subsection{Method \code{saveGridSearchPredictions()}}{
+Saves the grid search results to the training cache
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{TrainingCache$saveGridSearchPredictions(inGridSearchPredictions)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{inGridSearchPredictions}}{Grid search predictions}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-TrainingCache-saveModelParams"></a>}}
+\if{latex}{\out{\hypertarget{method-TrainingCache-saveModelParams}{}}}
+\subsection{Method \code{saveModelParams()}}{
+Saves the parameter grid to the training cache
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{TrainingCache$saveModelParams(inModelParams)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{inModelParams}}{Parameter grid from the model settings}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-TrainingCache-getGridSearchPredictions"></a>}}
+\if{latex}{\out{\hypertarget{method-TrainingCache-getGridSearchPredictions}{}}}
+\subsection{Method \code{getGridSearchPredictions()}}{
+Gets the grid search results from the training cache
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{TrainingCache$getGridSearchPredictions()}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-TrainingCache-getLastGridSearchIndex"></a>}}
+\if{latex}{\out{\hypertarget{method-TrainingCache-getLastGridSearchIndex}{}}}
+\subsection{Method \code{getLastGridSearchIndex()}}{
+Gets the last index from the cached grid search
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{TrainingCache$getLastGridSearchIndex()}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-TrainingCache-dropCache"></a>}}
+\if{latex}{\out{\hypertarget{method-TrainingCache-dropCache}{}}}
+\subsection{Method \code{dropCache()}}{
+Remove the training cache from the analysis path
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{TrainingCache$dropCache()}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-TrainingCache-clone"></a>}}
+\if{latex}{\out{\hypertarget{method-TrainingCache-clone}{}}}
+\subsection{Method \code{clone()}}{
+The objects of this class are cloneable with this method.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{TrainingCache$clone(deep = FALSE)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{deep}}{Whether to make a deep clone.}
+}
+\if{html}{\out{</div>}}
+}
+}
+}
diff --git a/man/fitEstimator.Rd b/man/fitEstimator.Rd
index 87b6e39..a99bb74 100644
--- a/man/fitEstimator.Rd
+++ b/man/fitEstimator.Rd
@@ -4,7 +4,7 @@
 \alias{fitEstimator}
 \title{fitEstimator}
 \usage{
-fitEstimator(trainData, modelSettings, analysisId, ...)
+fitEstimator(trainData, modelSettings, analysisId, analysisPath, ...)
 }
 \arguments{
 \item{trainData}{the data to use}
@@ -13,6 +13,8 @@ fitEstimator(trainData, modelSettings, analysisId, ...)
 
 \item{analysisId}{Id of the analysis}
 
+\item{analysisPath}{Path of the analysis}
+
 \item{...}{Extra inputs}
 }
 \description{
diff --git a/man/gridCvDeep.Rd b/man/gridCvDeep.Rd
index 1b304a4..3dc8ac8 100644
--- a/man/gridCvDeep.Rd
+++ b/man/gridCvDeep.Rd
@@ -4,7 +4,7 @@
 \alias{gridCvDeep}
 \title{gridCvDeep}
 \usage{
-gridCvDeep(mappedData, labels, modelSettings, modelLocation)
+gridCvDeep(mappedData, labels, modelSettings, modelLocation, analysisPath)
 }
 \arguments{
 \item{mappedData}{Mapped data with covariates}
@@ -14,6 +14,8 @@ gridCvDeep(mappedData, labels, modelSettings, modelLocation)
 \item{modelSettings}{Settings of the model}
 
 \item{modelLocation}{Where to save the model}
+
+\item{analysisPath}{Path of the analysis}
 }
 \description{
 Performs grid search for a deep learning estimator
diff --git a/tests/testthat/test-Estimator.R b/tests/testthat/test-Estimator.R
index faf95db..a803003 100644
--- a/tests/testthat/test-Estimator.R
+++ b/tests/testthat/test-Estimator.R
@@ -138,7 +138,7 @@ modelSettings <- setResNet(
 )
 
 sink(nullfile())
-results <- fitEstimator(trainData$Train, modelSettings = modelSettings, analysisId = 1)
+results <- fitEstimator(trainData$Train, modelSettings = modelSettings, analysisId = 1, analysisPath = testLoc)
 sink()
 
 test_that("Estimator fit function works", {
@@ -149,7 +149,7 @@ test_that("Estimator fit function works", {
   expect_equal(attr(results, "saveType"), "file")
   fakeTrainData <- trainData
   fakeTrainData$train$covariateData <- list(fakeCovData <- c("Fake"))
-  expect_error(fitEstimator(fakeTrainData$train, modelSettings, analysisId = 1))
+  expect_error(fitEstimator(fakeTrainData$train, modelSettings, analysisId = 1, analysisPath = testLoc))
 })
 
 test_that("predictDeepEstimator works", {
diff --git a/tests/testthat/test-TrainingCache.R b/tests/testthat/test-TrainingCache.R
new file mode 100644
index 0000000..feca8a4
--- /dev/null
+++ b/tests/testthat/test-TrainingCache.R
@@ -0,0 +1,87 @@
+resNetSettings <- setResNet(numLayers = c(1, 2, 4),
+                            sizeHidden = 2^6,
+                            hiddenFactor = 1,
+                            residualDropout = 0.5,
+                            hiddenDropout = 0.5,
+                            sizeEmbedding = 2^6,
+                            estimatorSettings = setEstimator(learningRate='auto',
+                                                             weightDecay=1e-3,
+                                                             device='cpu',
+                                                             batchSize=1024,
+                                                             epochs=30,
+                                                             seed=NULL),
+                            hyperParamSearch = "random",
+                            randomSample = 2,
+                            randomSampleSeed = NULL)
+
+trainCache <- TrainingCache$new(testLoc)
+paramSearch <- resNetSettings$param
+
+test_that("Training cache exists on disk", {
+  testthat::expect_true(
+    file.exists(file.path(testLoc, "paramPersistence.rds")))
+})
+
+test_that("Grid search can be cached", {
+  gridSearchPredictons <- list()
+  length(gridSearchPredictons) <- length(paramSearch)
+  trainCache$saveGridSearchPredictions(gridSearchPredictons)
+  
+  index <- 1
+  
+  gridSearchPredictons[[index]] <- list(
+    prediction = list(NULL),
+    param = paramSearch[[index]]
+  )
+  trainCache$saveGridSearchPredictions(gridSearchPredictons)
+
+  testthat::expect_identical(trainCache$getGridSearchPredictions(), gridSearchPredictons)
+  testthat::expect_equal(trainCache$getLastGridSearchIndex(), index+1)
+})
+
+test_that("Param grid predictions can be cached", {
+  testthat::expect_false(trainCache$isParamGridIdentical(paramSearch))
+  
+  trainCache$saveModelParams(paramSearch)
+  testthat::expect_true(trainCache$isParamGridIdentical(paramSearch))
+})
+
+test_that("Estimator can resume training from cache", {
+  modelPath <- tempdir()
+  analysisPath <- file.path(modelPath, "Analysis_TrainCacheResNet")
+  dir.create(analysisPath)
+  trainCache <- TrainingCache$new(analysisPath)
+  trainCache$saveModelParams(paramSearch)
+  
+  sink(nullfile())
+  res2 <- tryCatch(
+    {
+      PatientLevelPrediction::runPlp(
+        plpData = plpData,
+        outcomeId = 3,
+        modelSettings = resNetSettings,
+        analysisId = "Analysis_TrainCacheResNet",
+        analysisName = "Testing Training Cache",
+        populationSettings = populationSet,
+        splitSettings = PatientLevelPrediction::createDefaultSplitSetting(),
+        sampleSettings = PatientLevelPrediction::createSampleSettings(), # none
+        featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(), # none
+        preprocessSettings = PatientLevelPrediction::createPreprocessSettings(),
+        executeSettings = PatientLevelPrediction::createExecuteSettings(
+          runSplitData = T,
+          runSampleData = F,
+          runfeatureEngineering = F,
+          runPreprocessData = T,
+          runModelDevelopment = T,
+          runCovariateSummary = F
+        ),
+        saveDirectory = modelPath
+      )
+    },
+    error = function(e) {
+      print(e)
+      return(NULL)
+    }
+  )
+  sink()
+})
diff --git a/tests/testthat/test-Transformer.R b/tests/testthat/test-Transformer.R
index 8409baa..94c2e48 100644
--- a/tests/testthat/test-Transformer.R
+++ b/tests/testthat/test-Transformer.R
@@ -27,7 +27,7 @@ test_that("Transformer settings work", {
 })
 
 test_that("fitEstimator with Transformer works", {
-  results <- fitEstimator(trainData$Train, settings, analysisId = 1)
+  results <- fitEstimator(trainData$Train, settings, analysisId = 1, analysisPath = testLoc)
 
   expect_equal(class(results), "plpModel")
   expect_equal(attr(results, "modelType"), "binary")

From 1e640eec4938c009b4de92e793a5440988fe30b4 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Sun, 18 Jun 2023 13:39:34 +0200
Subject: [PATCH 47/58] fix incs issue

---
 R/Estimator.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/Estimator.R b/R/Estimator.R
index 3b9a000..cf0708e 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -135,8 +135,8 @@ fitEstimator <- function(trainData,
   hyperSummary <- do.call(rbind, lapply(cvResult$paramGridSearch, function(x) x$hyperSummary))
   prediction <- cvResult$prediction
   incs <- rep(1, covariateRef %>% dplyr::tally() %>% 
-                dplyr::collect ()
-              %>% dplyr::pull())
+                dplyr::collect () %>%
+                as.integer)
   covariateRef <- covariateRef %>%
     dplyr::collect() %>%
     dplyr::mutate(

From 5d9dc59765c395d73141e0582f2da302eea532ae Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Sun, 18 Jun 2023 13:42:08 +0200
Subject: [PATCH 48/58] Release version and news updated

---
 DESCRIPTION | 2 +-
 NEWS.md     | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 824bdfb..c2a17c7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.5.9000
+Version: 1.1.6
 Date: 18-04-2023
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),
diff --git a/NEWS.md b/NEWS.md
index 3c786d8..eb6a59b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,6 @@
-DeepPatientLevelPrediction 1.1.5.9000
+DeepPatientLevelPrediction 1.1.6
 ======================
+  - Caching and resuming of hyperparameter iterations
 
 DeepPatientLevelPrediction 1.1.5
 ======================

From 506b940db4f301c104b03a4e28168e5aea783553 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Sun, 18 Jun 2023 14:02:47 +0200
Subject: [PATCH 49/58] Release and NEWS

---
 DESCRIPTION | 2 +-
 NEWS.md     | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index c2a17c7..cfcb374 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.6
+Version: 1.1.6.9000
 Date: 18-04-2023
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),
diff --git a/NEWS.md b/NEWS.md
index eb6a59b..f1fdc06 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+DeepPatientLevelPrediction 1.1.6.9000
+======================
+
+
 DeepPatientLevelPrediction 1.1.6
 ======================
   - Caching and resuming of hyperparameter iterations

From 74608ffb7543371f11863d41c8beb3ca28abc6dd Mon Sep 17 00:00:00 2001
From: Henrik <l.john@erasmusmc.nl>
Date: Thu, 22 Jun 2023 10:55:44 +0200
Subject: [PATCH 50/58] Resolve an issue with hidden dimension ratio (#74)

* Resolve an issue with hidden dimension ratio

* Optimize solution

* add test case

---------

Co-authored-by: egillax <egillax@gmail.com>
---
 R/Estimator.R                     |  4 ++--
 R/Transformer.R                   |  9 ++++++++-
 extras/example.R                  |  8 +++-----
 tests/testthat/test-Transformer.R | 14 ++++++++++++++
 4 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/R/Estimator.R b/R/Estimator.R
index 529c315..1452503 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -138,8 +138,8 @@ fitEstimator <- function(trainData,
   hyperSummary <- do.call(rbind, lapply(cvResult$paramGridSearch, function(x) x$hyperSummary))
   prediction <- cvResult$prediction
   incs <- rep(1, covariateRef %>% dplyr::tally() %>% 
-                dplyr::collect () %>%
-                as.integer)
+                dplyr::collect() %>%
+                as.integer())
   covariateRef <- covariateRef %>%
     dplyr::collect() %>%
     dplyr::mutate(
diff --git a/R/Transformer.R b/R/Transformer.R
index 7e25ff7..04212fd 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -91,7 +91,7 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
     ))
   } else {
     if (!is.null(dimHiddenRatio)) {
-      dimHidden <- round(dimToken*dimHiddenRatio, digits = 0)
+      dimHidden <- dimHiddenRatio
     }
   }
   
@@ -110,6 +110,13 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
   
   param <- PatientLevelPrediction::listCartesian(paramGrid)
 
+  if (!is.null(dimHiddenRatio)) {
+    param <- lapply(param, function(x) {
+      x$dimHidden <- round(x$dimToken*x$dimHidden, digits = 0)
+      return(x)
+    })
+  }
+  
   if (hyperParamSearch == "random" && randomSample>length(param)) {
     stop(paste("\n Chosen amount of randomSamples is higher than the amount of possible hyperparameter combinations.", 
                "\n randomSample:", randomSample,"\n Possible hyperparameter combinations:", length(param),
diff --git a/extras/example.R b/extras/example.R
index fa989b1..49ead98 100644
--- a/extras/example.R
+++ b/extras/example.R
@@ -16,11 +16,9 @@ populationSet <- PatientLevelPrediction::createStudyPopulationSettings(
   riskWindowEnd = 365)
 
  
-modelSettings <- setResNet(numLayers = 2, sizeHidden = 64, hiddenFactor = 1,
-                          residualDropout = 0, hiddenDropout = 0.2, normalization = 'BatchNorm',
-                          activation = 'RelU', sizeEmbedding = 512, weightDecay = 1e-6,
-                          learningRate = 3e-4, seed = 42, hyperParamSearch = 'random',
-                          randomSample = 1, device = 'cuda:0',batchSize = 32,epochs = 10)
+modelSettings <- setDefaultResNet(estimatorSettings = setEstimator(epochs=1L,
+                                                                   device='cuda:0',
+                                                                   batchSize=128L))
 
 # modelSettings <- setTransformer(numBlocks=1, dimToken = 33, dimOut = 1, numHeads = 3,
 #                                 attDropout = 0.2, ffnDropout = 0.2, resDropout = 0,
diff --git a/tests/testthat/test-Transformer.R b/tests/testthat/test-Transformer.R
index 94c2e48..52139ad 100644
--- a/tests/testthat/test-Transformer.R
+++ b/tests/testthat/test-Transformer.R
@@ -90,3 +90,17 @@ test_that("Errors are produced by settings function", {
   
   expect_error(setTransformer(randomSample = randomSample))
 })
+
+test_that("dimHidden ratio works as expected", {
+  randomSample <- 4
+  dimToken <- c(64, 128, 256, 512)
+  dimHiddenRatio <- 2
+  modelSettings <- setTransformer(dimToken = dimToken,
+                                  dimHiddenRatio = dimHiddenRatio,
+                                  dimHidden = NULL,
+                                  randomSample = randomSample)
+  dimHidden <- unlist(lapply(modelSettings$param, function(x) x$dimHidden))
+  tokens <-   unlist(lapply(modelSettings$param, function(x) x$dimToken))
+  expect_true(all(dimHidden == dimHiddenRatio * tokens))
+
+})

From ba60c28f70c64d754861946ff33c11b2fb48458e Mon Sep 17 00:00:00 2001
From: Henrik <l.john@erasmusmc.nl>
Date: Thu, 20 Jul 2023 15:30:49 +0200
Subject: [PATCH 51/58] Cache single hyperparameter combination (#78)

---
 R/TrainingCache-class.R | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/R/TrainingCache-class.R b/R/TrainingCache-class.R
index fac668e..8577f31 100644
--- a/R/TrainingCache-class.R
+++ b/R/TrainingCache-class.R
@@ -76,8 +76,13 @@ TrainingCache <- R6::R6Class(
       if (is.null(private$.paramPersistence$gridSearchPredictions)) {
         return(1)
       } else {
-        return(which(sapply(private$.paramPersistence$gridSearchPredictions,
+        # if only a single hyperparameter combination is assessed return 1
+        if (length(private$.paramPersistence$gridSearchPredictions) == 1) {
+          return(1)
+        } else {
+          return(which(sapply(private$.paramPersistence$gridSearchPredictions,
             is.null))[1])
+        }
       }
     },
     

From bd9b357ce4f2e649e21f9eaed35daca6b875beb9 Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Mon, 28 Aug 2023 12:47:32 +0200
Subject: [PATCH 52/58] Change backend to pytorch (#80)

---
 .Rbuildignore                            |   1 +
 .github/workflows/R_CDM_check_hades.yaml |  24 +-
 .gitignore                               |   3 +
 DESCRIPTION                              |  19 +-
 NAMESPACE                                |   5 +-
 R/Dataset.R                              | 157 ++------
 R/DeepPatientLevelPrediction.R           |   9 +
 R/Estimator-class.R                      | 468 -----------------------
 R/Estimator.R                            | 186 +++++----
 R/HelperFunctions.R                      |  41 ++
 R/LRFinder.R                             | 156 ++------
 R/MLP.R                                  | 107 +-----
 R/ResNet.R                               | 120 +-----
 R/Transformer.R                          | 253 +-----------
 extras/example.R                         |  89 +++--
 inst/python/Dataset.py                   | 102 +++++
 inst/python/Estimator.py                 | 355 +++++++++++++++++
 inst/python/LrFinder.py                  | 105 +++++
 inst/python/MLP.py                       |  76 ++++
 inst/python/ResNet.py                    | 130 +++++++
 inst/python/Transformer.py               | 188 +++++++++
 inst/python/__init__.py                  |   0
 man/Dataset.Rd                           |  18 -
 man/EarlyStopping.Rd                     |  78 ----
 man/Estimator.Rd                         | 278 --------------
 man/batchToDevice.Rd                     |  20 -
 man/camelCaseToSnakeCase.Rd              |  17 +
 man/camelCaseToSnakeCaseNames.Rd         |  17 +
 man/lrFinder.Rd                          |  42 --
 man/setDefaultResNet.Rd                  |   2 +-
 man/setDefaultTransformer.Rd             |   2 +-
 man/setEstimator.Rd                      |  13 +-
 man/setMultiLayerPerceptron.Rd           |  14 +-
 man/setResNet.Rd                         |  10 +-
 tests/testthat/setup.R                   |  23 +-
 tests/testthat/test-Dataset.R            |  45 +--
 tests/testthat/test-Estimator.R          | 223 ++++++-----
 tests/testthat/test-LRFinder.R           |  75 ++--
 tests/testthat/test-MLP.R                |  64 ++--
 tests/testthat/test-ResNet.R             |  93 +++--
 tests/testthat/test-TrainingCache.R      |  17 +-
 tests/testthat/test-Transformer.R        |  72 ++--
 vignettes/BuildingDeepModels.Rmd         |  42 +-
 vignettes/FirstModel.Rmd                 |   4 +-
 vignettes/Installing.Rmd                 |  43 ++-
 45 files changed, 1762 insertions(+), 2044 deletions(-)
 delete mode 100644 R/Estimator-class.R
 create mode 100644 R/HelperFunctions.R
 create mode 100644 inst/python/Dataset.py
 create mode 100644 inst/python/Estimator.py
 create mode 100644 inst/python/LrFinder.py
 create mode 100644 inst/python/MLP.py
 create mode 100644 inst/python/ResNet.py
 create mode 100644 inst/python/Transformer.py
 create mode 100644 inst/python/__init__.py
 delete mode 100644 man/Dataset.Rd
 delete mode 100644 man/EarlyStopping.Rd
 delete mode 100644 man/Estimator.Rd
 delete mode 100644 man/batchToDevice.Rd
 create mode 100644 man/camelCaseToSnakeCase.Rd
 create mode 100644 man/camelCaseToSnakeCaseNames.Rd
 delete mode 100644 man/lrFinder.Rd

diff --git a/.Rbuildignore b/.Rbuildignore
index d003b0a..ac33fdc 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,6 +1,7 @@
 ^.*\.Rproj$
 ^\.Rproj\.user$
 ^\.github$
+^\.idea$
 ^_pkgdown\.yml$
 ^docs$
 ^pkgdown$
diff --git a/.github/workflows/R_CDM_check_hades.yaml b/.github/workflows/R_CDM_check_hades.yaml
index f064053..669e780 100644
--- a/.github/workflows/R_CDM_check_hades.yaml
+++ b/.github/workflows/R_CDM_check_hades.yaml
@@ -43,7 +43,6 @@ jobs:
       CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }}
       CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }}
       CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }}
-      TORCH_INSTALL: '1'
 
     steps:
       - uses: actions/checkout@v2
@@ -52,9 +51,13 @@ jobs:
         with:
           r-version: ${{ matrix.config.r }}
 
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10' 
+      
       - uses: r-lib/actions/setup-tinytex@v2
 
-      - uses: r-lib/actions/setup-pandoc@v1
+      - uses: r-lib/actions/setup-pandoc@v2
   
       - name: Install system requirements
         if: runner.os == 'Linux'
@@ -70,7 +73,22 @@ jobs:
         with:
           extra-packages: any::rcmdcheck
           needs: check
-
+          
+      - name: setup r-reticulate venv
+        shell: Rscript {0}
+        run: |
+          python_packages <- 
+            c("polars", "tqdm", "connectorx", "pyarrow", "scikit-learn")
+          
+          library(reticulate)
+          virtualenv_create("r-reticulate", Sys.which("python"))
+          virtualenv_install("r-reticulate", python_packages)
+          virtualenv_install("r-reticulate", "torch", pip_options = c("--index-url https://download.pytorch.org/whl/cpu"))
+          
+          path_to_python <- virtualenv_python("r-reticulate")
+          writeLines(sprintf("RETICULATE_PYTHON=%s", path_to_python),
+                     Sys.getenv("GITHUB_ENV"))
+      
       - uses: r-lib/actions/check-r-package@v2
         with:
           args: 'c("--no-manual", "--as-cran")'
diff --git a/.gitignore b/.gitignore
index c567877..56db315 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,6 @@
 results
 config.yml
 docs
+.idea/
+renv.lock
+extras/
\ No newline at end of file
diff --git a/DESCRIPTION b/DESCRIPTION
index cfcb374..08d955d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DeepPatientLevelPrediction
 Type: Package
 Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
-Version: 1.1.6.9000
+Version: 1.1.6.9999
 Date: 18-04-2023
 Authors@R: c(
     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),
@@ -24,8 +24,8 @@ Imports:
     ParallelLogger (>= 2.0.0),
     PatientLevelPrediction (>= 6.0.4),
     rlang,
-    torch (>= 0.10.0),
-    withr
+    withr,
+    reticulate (>= 1.31)
 Suggests:
     devtools,
     Eunomia,
@@ -37,10 +37,21 @@ Suggests:
     ResultModelManager (>= 0.2.0),
     DatabaseConnector (>= 6.0.0)
 Remotes:
-    ohdsi/PatientLevelPrediction@develop,
+    ohdsi/PatientLevelPrediction,
     ohdsi/FeatureExtraction,
     ohdsi/Eunomia,
     ohdsi/ResultModelManager
 RoxygenNote: 7.2.3
 Encoding: UTF-8
 Config/testthat/edition: 3
+Config/reticulate:
+  list(
+    packages = list(
+      list(package = "torch"),
+      list(package = "polars"),
+      list(package = "tqdm"),
+      list(package = "connectorx"),
+      list(package = "scikit-learn"),
+      list(package = "pyarrow")
+      )
+  )
diff --git a/NAMESPACE b/NAMESPACE
index bea6721..4806f0d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,11 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 
-export(Dataset)
-export(Estimator)
 export(TrainingCache)
 export(fitEstimator)
 export(gridCvDeep)
-export(lrFinder)
 export(predictDeepEstimator)
 export(setDefaultResNet)
 export(setDefaultTransformer)
@@ -14,4 +11,6 @@ export(setMultiLayerPerceptron)
 export(setResNet)
 export(setTransformer)
 importFrom(dplyr,"%>%")
+importFrom(reticulate,py_to_r)
+importFrom(reticulate,r_to_py)
 importFrom(rlang,.data)
diff --git a/R/Dataset.R b/R/Dataset.R
index 3b0dd8d..f4b5170 100644
--- a/R/Dataset.R
+++ b/R/Dataset.R
@@ -1,124 +1,35 @@
-#'  A torch dataset
-#' @export
-Dataset <- torch::dataset(
-  name = "myDataset",
-  #' @param data           a dataframe like object with the covariates
-  #' @param labels         a dataframe with the labels
-  #' @param numericalIndex in what column numeric data is in (if any)
-  initialize = function(data, labels = NULL, numericalIndex = NULL) {
-    # determine numeric
-    if (is.null(numericalIndex)) {
-      numericalIndex <- data %>%
-        dplyr::arrange(columnId) %>% 
-        dplyr::group_by(columnId) %>%
-        dplyr::summarise(n = dplyr::n_distinct(.data$covariateValue)) %>%
-        dplyr::collect() %>%
-        dplyr::pull(n) > 1
-      self$numericalIndex <- numericalIndex
-    } else {
-      self$numericalIndex <- NULL
-    }
-
-    # add labels if training (make 0 vector for prediction)
-    if (!is.null(labels)) {
-      self$target <- torch::torch_tensor(labels)
-    } else {
-      self$target <- torch::torch_tensor(rep(0, data %>% dplyr::summarize(m=max(rowId)) %>%
-                                               dplyr::collect() %>% dplyr::pull()))
-    }
-    # Weight to add in loss function to positive class
-    self$posWeight <- (self$target == 0)$sum() / self$target$sum()
-
-    # add features
-    catColumns <- which(!numericalIndex)
-    dataCat <- dplyr::filter(data, columnId %in% catColumns) %>%
-      dplyr::arrange(columnId) %>%
-      dplyr::group_by(columnId) %>%
-      dplyr::collect() %>%
-      dplyr::mutate(newColumnId = dplyr::cur_group_id()) %>%
-      dplyr::ungroup() %>%
-      dplyr::select(c("rowId", "newColumnId")) %>%
-      dplyr::rename(columnId = newColumnId) %>% 
-      dplyr::arrange(rowId)
-    start <- Sys.time()
-    catTensor <- torch::torch_tensor(cbind(dataCat$rowId, dataCat$columnId))
-    catTensor <- catTensor[catTensor[,1]$argsort(),]
-    tensorList <- torch::torch_split(catTensor[,2], 
-                                     as.numeric(torch::torch_unique_consecutive(catTensor[,1], 
-                                                                                return_counts = TRUE)[[3]]))
-    
-    # because of subjects without cat features, I need to create a list with all zeroes and then insert
-    # my tensorList. That way I can still index the dataset correctly.
-    totalList <- as.list(integer(length(self$target)))
-    totalList[unique(dataCat$rowId)] <- tensorList
-    self$lengths <- lengths
-    self$cat <- torch::nn_utils_rnn_pad_sequence(totalList, batch_first = T)
-    delta <- Sys.time() - start
-    ParallelLogger::logInfo("Data conversion for dataset took ", signif(delta, 3), " ", attr(delta, "units"))
-    if (sum(numericalIndex) == 0) {
-      self$num <- NULL
-    } else {
-      numericalData <- data %>%
-        dplyr::filter(columnId %in% !!which(numericalIndex)) %>%
-        dplyr::collect()
-      numericalData <- numericalData %>%
-        dplyr::group_by(columnId) %>%
-        dplyr::mutate(newId = dplyr::cur_group_id())
-      indices <- torch::torch_tensor(cbind(numericalData$rowId, numericalData$newId), dtype = torch::torch_long())$t_()
-      values <- torch::torch_tensor(numericalData$covariateValue, dtype = torch::torch_float32())
-      self$num <- torch::torch_sparse_coo_tensor(
-        indices = indices,
-        values = values,
-        size = c(self$target$shape, sum(numericalIndex))
-      )$to_dense()
-    }
-  },
-  getNumericalIndex = function() {
-    return(
-      self$numericalIndex
-    )
-  },
-  numCatFeatures = function() {
-    return(
-      sum(!self$numericalIndex)
-    )
-  },
-  numNumFeatures = function() {
-    if (!is.null(self$num)) {
-      return(self$num$shape[2])
-    } else {
-      return(0)
-    }
-  },
-  .getbatch = function(item) {
-    if (length(item) == 1) {
-      return(self$.getBatchSingle(item))
-    } else {
-      return(self$.getBatchRegular(item))
-    }
-  },
-  .getBatchSingle = function(item) {
-    # add leading singleton dimension since models expects 2d tensors
-    batch <- list(
-      cat = self$cat[item]$unsqueeze(1),
-      num = self$num[item]$unsqueeze(1)
-    )
-    return(list(
-      batch = batch,
-      target = self$target[item]$unsqueeze(1)
-    ))
-  },
-  .getBatchRegular = function(item) {
-    batch <- list(
-      cat = self$cat[item],
-      num = self$num[item]
-    )
-    return(list(
-      batch = batch,
-      target = self$target[item]
-    ))
-  },
-  .length = function() {
-    self$target$size()[[1]] # shape[1]
+# @file Dataset.R
+#
+# Copyright 2023 Observational Health Data Sciences and Informatics
+#
+# This file is part of DeepPatientLevelPrediction
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+createDataset <- function(data, labels, plpModel=NULL) {
+  path <- system.file("python", package = "DeepPatientLevelPrediction")
+  Dataset <- reticulate::import_from_path("Dataset", path = path)$Data
+  if (is.null(attributes(data)$path)) {
+    # sqlite object
+    attributes(data)$path <- attributes(data)$dbname
   }
-)
+  if (is.null(plpModel)) {
+    data <- Dataset(r_to_py(normalizePath(attributes(data)$path)), 
+                            r_to_py(labels$outcomeCount))
+  }
+  else {
+    data <- Dataset(r_to_py(normalizePath(attributes(data)$path)),
+                    numerical_features = r_to_py(as.array(which(plpModel$covariateImportance$isNumeric)))  )
+  }
+  
+  return(data)  
+}
\ No newline at end of file
diff --git a/R/DeepPatientLevelPrediction.R b/R/DeepPatientLevelPrediction.R
index 6070709..e77812e 100644
--- a/R/DeepPatientLevelPrediction.R
+++ b/R/DeepPatientLevelPrediction.R
@@ -23,5 +23,14 @@
 #' @docType package
 #' @name DeepPatientLevelPrediction
 #' @importFrom dplyr %>%
+#' @importFrom reticulate r_to_py py_to_r
 #' @importFrom rlang .data
 NULL
+
+.onLoad <- function(libname, pkgname) {
+  # use superassignment to update global reference 
+  reticulate::configure_environment(pkgname)
+  torch <<- reticulate::import("torch", delay_load = TRUE)
+}
+
+
diff --git a/R/Estimator-class.R b/R/Estimator-class.R
deleted file mode 100644
index 5d2fe76..0000000
--- a/R/Estimator-class.R
+++ /dev/null
@@ -1,468 +0,0 @@
-# @file Estimator-class.R
-#
-# Copyright 2022 Observational Health Data Sciences and Informatics
-#
-# This file is part of DeepPatientLevelPrediction
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#' Estimator
-#' @description
-#' A generic R6 class that wraps around a torch nn module and can be used to
-#' fit and predict the model defined in that module.
-#' @export
-Estimator <- R6::R6Class(
-  classname = "Estimator",
-  lock_objects = FALSE,
-  public = list(
-    #' @description
-    #' Creates a new estimator
-    #' @param modelType The torch nn module to use as model
-    #' @param modelParameters Parameters to initialize the model
-    #' @param estimatorSettings Parameters required for the estimator fitting
-    initialize = function(modelType,
-                          modelParameters,
-                          estimatorSettings) {
-      self$seed <- estimatorSettings$seed
-      if (is.function(estimatorSettings$device)) {
-        device <- estimatorSettings$device()
-      } else {
-        device <- estimatorSettings$device
-      }
-      self$device <- device
-      torch::torch_manual_seed(seed=self$seed)
-      self$model <- do.call(modelType, modelParameters)
-      self$modelParameters <- modelParameters
-      self$estimatorSettings <- estimatorSettings
-      self$epochs <- self$itemOrDefaults(estimatorSettings, "epochs", 10)
-      self$learningRate <- self$itemOrDefaults(estimatorSettings, "learningRate", 1e-3)
-      self$l2Norm <- self$itemOrDefaults(estimatorSettings, "weightDecay", 1e-5)
-      self$batchSize <- self$itemOrDefaults(estimatorSettings, "batchSize", 1024)
-      self$prefix <- self$itemOrDefaults(estimatorSettings, "prefix", self$model$name)
-      
-      self$previousEpochs <- self$itemOrDefaults(estimatorSettings, "previousEpochs", 0)
-      self$model$to(device = self$device)
-      
-      self$optimizer <- estimatorSettings$optimizer(
-        params = self$model$parameters,
-        lr = self$learningRate,
-        weight_decay = self$l2Norm
-      )
-      self$criterion <- estimatorSettings$criterion()
-      
-      if (!is.null(estimatorSettings$metric)) {
-        self$metric <- estimatorSettings$metric
-        if (is.character(self$metric)) {
-          if (self$metric == "auc") {
-            self$metric <- list(name="auc",
-                                mode="max")
-          } else if (self$metric == "loss") {
-            self$metric <- list(name="loss",
-                                mode="min")
-          }
-        }
-        if (!is.null(estimatorSettings$scheduler)) {
-          estimatorSettings$scheduler$params$mode <- self$metrix$mode 
-        }
-        if (!is.null(estimatorSettings$earlyStopping)) {
-          estimatorSettings$earlyStopping$params$mode <- self$metric$mode
-        }
-      }
-      
-      if (!is.null(estimatorSettings$scheduler)) {
-        self$scheduler <- do.call(estimatorSettings$scheduler$fun,
-                                  c(self$optimizer, estimatorSettings$scheduler$params))
-      }
-      
-      # gradient accumulation is useful when training large numbers where
-      # you can only fit few samples on the GPU in each batch.
-      self$gradAccumulationIter <- 1
-      
-      if (!is.null(estimatorSettings$earlyStopping) && estimatorSettings$earlyStopping$useEarlyStopping) {
-        self$earlyStopper <- do.call(EarlyStopping$new, estimatorSettings$earlyStopping$params)
-      } else {
-        self$earlyStopper <- NULL
-      }
-      
-      self$bestScore <- NULL
-      self$bestEpoch <- NULL
-    },
-    
-    #' @description fits the estimator
-    #' @param dataset     a torch dataset to use for model fitting
-    #' @param testDataset a torch dataset to use for early stopping
-    fit = function(dataset, testDataset) {
-      allScores <- list()
-      batchIndex <- torch::torch_randperm(length(dataset)) + 1L
-      batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize))
-      
-      testBatchIndex <- 1:length(testDataset)
-      testBatchIndex <- split(testBatchIndex, ceiling(seq_along(testBatchIndex) / self$batchSize))
-      
-      modelStateDict <- list()
-      epoch <- list()
-      times <- list()
-      learnRates <- list()
-      for (epochI in 1:self$epochs) {
-        startTime <- Sys.time()
-        trainLoss <- self$fitEpoch(dataset, batchIndex)
-        endTime <- Sys.time()
-        
-        # predict on test data
-        scores <- self$score(testDataset, testBatchIndex)
-        delta <- endTime - startTime
-        currentEpoch <- epochI + self$previousEpochs
-        lr <- self$optimizer$param_groups[[1]]$lr
-        self$printProgress(scores, trainLoss, delta, currentEpoch)
-        self$scheduler$step(scores$metric)
-        allScores[[epochI]] <- scores
-        learnRates <- c(learnRates, lr)
-        times <- c(times, round(delta, 3))
-        if (!is.null(self$earlyStopper)) {
-          self$earlyStopper$call(scores$metric)
-          if (self$earlyStopper$improved) {
-            # here it saves the results to lists rather than files
-            modelStateDict[[epochI]] <- lapply(self$model$state_dict(), function(x) x$detach()$cpu())
-            epoch[[epochI]] <- currentEpoch
-          }
-          if (self$earlyStopper$earlyStop) {
-            ParallelLogger::logInfo("Early stopping, validation metric stopped improving")
-            ParallelLogger::logInfo("Average time per epoch was: ", round(mean(as.numeric(times)), 3), " ", units(delta))
-            self$finishFit(allScores, modelStateDict, epoch, learnRates)
-            return(invisible(self))
-          }
-        } else {
-          modelStateDict[[epochI]] <- lapply(self$model$state_dict(), function(x) x$detach()$cpu())
-          epoch[[epochI]] <- currentEpoch
-        }
-      }
-      ParallelLogger::logInfo("Average time per epoch was: ", round(mean(as.numeric(times)), 3), " ", units(delta))
-      self$finishFit(allScores, modelStateDict, epoch, learnRates)
-      invisible(self)
-    },
-    
-    #' @description
-    #' fits estimator for one epoch (one round through the data)
-    #' @param dataset     torch dataset to use for fitting
-    #' @param batchIndex  indices of batches
-    fitEpoch = function(dataset, batchIndex) {
-      trainLosses <- torch::torch_empty(length(batchIndex))
-      ix <- 1
-      self$model$train()
-      progressBar <- utils::txtProgressBar(style = 3)
-      for (b in batchIndex) {
-        self$optimizer$zero_grad()
-        batch <- batchToDevice(dataset[b], device=self$device)
-        out <- self$model(batch[[1]])
-        loss <- self$criterion(out, batch[[2]])
-        loss$backward()
-        
-        self$optimizer$step()
-        trainLosses[ix] <- loss$detach()
-        utils::setTxtProgressBar(progressBar, ix / length(batchIndex))
-        ix <- ix + 1
-      }
-      close(progressBar)
-      trainLosses$mean()$item()
-    },
-    
-    #' @description
-    #' calculates loss and auc after training for one epoch
-    #' @param dataset    The torch dataset to use to evaluate loss and auc
-    #' @param batchIndex Indices of batches in the dataset
-    #' @return list with average loss and auc in the dataset
-    score = function(dataset, batchIndex) {
-      torch::with_no_grad({
-        loss <- torch::torch_empty(c(length(batchIndex)))
-        predictions <- list()
-        targets <- list()
-        self$model$eval()
-        ix <- 1
-        for (b in batchIndex) {
-          batch <- batchToDevice(dataset[b], device=self$device)
-          pred <- self$model(batch[[1]])
-          predictions <- c(predictions, pred)
-          targets <- c(targets, batch[[2]])
-          loss[ix] <- self$criterion(pred, batch[[2]])
-          ix <- ix + 1
-        }
-        mean_loss <- loss$mean()$item()
-        predictionsClass <- data.frame(
-          value = as.matrix(torch::torch_sigmoid(torch::torch_cat(predictions)$cpu())),
-          outcomeCount = as.matrix(torch::torch_cat(targets)$cpu())
-        )
-        attr(predictionsClass, "metaData")$modelType <- "binary"
-        auc <- PatientLevelPrediction::computeAuc(predictionsClass)
-        scores <- list()
-        if (!is.null(self$metric)) {
-          if (self$metric$name == "auc") {
-            scores$metric <- auc
-          } else if (self$metric$name == "loss") {
-            scores$metric <- mean_loss
-          } else {
-            metric <- self$metric$fun(predictionsClass$value, predictionsClass$outcomeCount)
-            scores$metric <- metric
-          }
-        }
-        scores$auc <- auc
-        scores$loss <- mean_loss
-      })
-      return(scores)
-    },
-    
-    #' @description
-    #' operations that run when fitting is finished
-    #' @param scores          validation scores
-    #' @param modelStateDict  fitted model parameters
-    #' @param epoch           list of epochs fit
-    #' @param learnRates      learning rate sequence used so far
-    finishFit = function(scores, modelStateDict, epoch, learnRates) {
-      if (self$metric$mode=="max") {
-        bestEpochInd <- which.max(unlist(lapply(scores, function(x) x$metric)))
-      }
-      else if (self$metric$mode=="min") {
-        bestEpochInd <- which.min(unlist(lapply(scores, function(x) x$metric)))
-      }
-      
-      bestModelStateDict <- lapply(modelStateDict[[bestEpochInd]], function(x) x$to(device = self$device))
-      self$model$load_state_dict(bestModelStateDict)
-      
-      bestEpoch <- epoch[[bestEpochInd]]
-      self$bestEpoch <- bestEpoch
-      self$bestScore <- list(
-        loss = scores[[bestEpochInd]]$loss,
-        auc = scores[[bestEpochInd]]$auc
-      )
-      self$learnRateSchedule <- learnRates[1:bestEpochInd]
-      
-      ParallelLogger::logInfo("Loaded best model (based on AUC) from epoch ", bestEpoch)
-      ParallelLogger::logInfo("ValLoss: ", self$bestScore$loss)
-      ParallelLogger::logInfo("valAUC: ", self$bestScore$auc)
-      if (!is.null(self$metric) && (!self$metric$name=='auc') && (!self$metric$name=='loss')) {
-        self$bestScore[[self$metric$name]] <- scores[[bestEpochInd]]$metric
-        ParallelLogger::logInfo(self$metric$name,": ", self$bestScore[[self$metric$name]])
-      }
-    },
-    
-    #' @description Print out training progress per epoch
-    #' @param scores scores returned by `self$score`
-    #' @param trainLoss training loss
-    #' @param delta how long did the epoch take
-    #' @param currentEpoch the current epoch number
-    printProgress = function(scores, trainLoss, delta, currentEpoch) {
-      if (!is.null(self$metric) && (!self$metric$name=='auc') && (!self$metric$name=='loss')) {
-        ParallelLogger::logInfo(
-          "Epochs: ", currentEpoch,
-          " | Val ", self$metric$name, ": ", round(scores$metric, 3),
-          " | Val AUC: ", round(scores$auc, 3),
-          " | Val Loss: ", round(scores$loss, 3),
-          " | Train Loss: ", round(trainLoss, 3),
-          " | Time: ", round(delta, 3), " ",
-          units(delta),
-          " | LR: ", self$optimizer$param_groups[[1]]$lr
-        )
-      } else {
-      ParallelLogger::logInfo(
-        "Epochs: ", currentEpoch,
-        " | Val AUC: ", round(scores$auc, 3),
-        " | Val Loss: ", round(scores$loss, 3),
-        " | Train Loss: ", round(trainLoss, 3),
-        " | Time: ", round(delta, 3), " ",
-        units(delta),
-        " | LR: ", self$optimizer$param_groups[[1]]$lr
-      )
-      }
-    },
-    
-    #' @description
-    #' Fits whole training set on a specific number of epochs
-    #' @param dataset torch dataset
-    #' @param learnRates learnRateSchedule from CV
-    fitWholeTrainingSet = function(dataset, learnRates = NULL) {
-      if (length(learnRates) > 1) {
-        self$bestEpoch <- length(learnRates)
-      } else if (is.null(self$bestEpoch)) {
-        self$bestEpoch <- self$epochs
-      }
-      # TODO constant LR
-
-      batchIndex <- torch::torch_randperm(length(dataset)) + 1L
-      batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize))
-      for (epoch in 1:self$bestEpoch) {
-        self$optimizer$param_groups[[1]]$lr <- learnRates[[epoch]]
-        self$fitEpoch(dataset, batchIndex)
-      }
-    },
-    
-    #' @description
-    #' save model and those parameters needed to reconstruct it
-    #' @param path where to save the model
-    #' @param name name of file
-    #' @return the path to saved model
-    save = function(path, name) {
-      savePath <- file.path(path, name)
-      torch::torch_save(
-        list(
-          modelStateDict = self$model$state_dict(),
-          modelParameters = self$modelParameters,
-          estimatorSettings = self$estimatorSettings,
-          epoch = self$epochs
-        ),
-        savePath
-      )
-      return(savePath)
-    },
-    
-    
-    #' @description
-    #' predicts and outputs the probabilities
-    #' @param dataset Torch dataset to create predictions for
-    #' @return predictions as probabilities
-    predictProba = function(dataset) {
-      batchIndex <- 1:length(dataset)
-      batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize))
-      torch::with_no_grad({
-        predictions <- torch::torch_empty(length(dataset), device=self$device)
-        self$model$eval()
-        progressBar <- utils::txtProgressBar(style = 3)
-        ix <- 1
-        coro::loop(for (b in batchIndex) {
-          batch <- batchToDevice(dataset[b], self$device)
-          target <- batch$target
-          pred <- self$model(batch$batch)
-          predictions[b] <- torch::torch_sigmoid(pred)
-          utils::setTxtProgressBar(progressBar, ix / length(batchIndex))
-          ix <- ix + 1
-        })
-        predictions <- as.array(predictions$cpu())
-        close(progressBar)
-      })
-      return(predictions)
-    },
-    
-    #' @description
-    #' predicts and outputs the class
-    #' @param   dataset A torch dataset to create predictions for
-    #' @param   threshold Which threshold to use for predictions
-    #' @return  The predicted class for the data in the dataset
-    predict = function(dataset, threshold = NULL) {
-      predictions <- self$predictProba(dataset)
-      
-      if (is.null(threshold)) {
-        # use outcome rate
-        threshold <- dataset$target$sum()$item() / length(dataset)
-      }
-      predicted_class <- as.integer(predictions > threshold)
-      return(predicted_class)
-    },
-    
-    #' @description
-    #' select item from list, and if it's null sets a default
-    #' @param list A list with items
-    #' @param item Which list item to retrieve
-    #' @param default The value to return if list doesn't have item
-    #' @return the list item or default
-    itemOrDefaults = function(list, item, default = NULL) {
-      value <- list[[item]]
-      if (is.null(value)) default else value
-    }
-  )
-)
-
-#' Earlystopping class
-#' @description
-#' Stops training if a loss or metric has stopped improving
-EarlyStopping <- R6::R6Class(
-  classname = "EarlyStopping",
-  lock_objects = FALSE,
-  public = list(
-    #' @description
-    #' Creates a new earlyStopping object
-    #' @param patience Stop after this number of epochs if loss doesn't improve
-    #' @param delta    How much does the loss need to improve to count as improvement
-    #' @param verbose  If information should be printed out
-    #' @param mode    either `min` or `max` depending on metric to be used for earlyStopping
-    #' @return a new earlystopping object
-    initialize = function(patience = 3, delta = 0, verbose = TRUE,
-                          mode='max') {
-      self$patience <- patience
-      self$counter <- 0
-      self$verbose <- verbose
-      self$bestScore <- NULL
-      self$earlyStop <- FALSE
-      self$improved <- FALSE
-      self$delta <- delta
-      self$previousScore <- 0
-      self$mode <- mode
-    },
-    #' @description
-    #' call the earlystopping object and increment a counter if loss is not
-    #' improving
-    #' @param metric the current metric value
-    call = function(metric) {
-      if (self$mode=='max') {
-        score <- metric
-      } else {
-        score <- -1 * metric
-      }
-      if (is.null(self$bestScore)) {
-        self$bestScore <- score
-        self$improved <- TRUE
-      } else if (score < self$bestScore + self$delta) {
-        self$counter <- self$counter + 1
-        self$improved <- FALSE
-        if (self$verbose) {
-          ParallelLogger::logInfo(
-            "EarlyStopping counter: ", self$counter,
-            " out of ", self$patience
-          )
-        }
-        if (self$counter >= self$patience) {
-          self$earlyStop <- TRUE
-        }
-      } else {
-        self$bestScore <- score
-        self$counter <- 0
-        self$improved <- TRUE
-      }
-      self$previousScore <- score
-    }
-  )
-)
-
-#' sends a batch of data to device
-#' @description 
-#' sends a batch of data to device
-#' assumes batch includes lists of tensors to arbitrary nested depths
-#' @param batch the batch to send, usually a list of torch tensors
-#' @param device which device to send batch to
-#' @return the batch on the required device
-batchToDevice = function(batch, device) {
-  if (class(batch)[1] == "torch_tensor") {
-    batch <- batch$to(device = device)
-  } else {
-    ix <- 1
-    for (b in batch) {
-      if (class(b)[1] == "torch_tensor") {
-        b <- b$to(device = device)
-      } else {
-        b <- batchToDevice(b, device)
-      }
-      if (!is.null(b)) {
-        batch[[ix]] <- b
-      }
-      ix <- ix + 1
-    }
-  }
-  return(batch)
-}
diff --git a/R/Estimator.R b/R/Estimator.R
index 1452503..cb684f9 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -39,15 +39,15 @@
 #' @export
 setEstimator <- function(learningRate='auto',
                          weightDecay = 0.0,
-                         batchSize = 512,
-                         epochs = 30,
+                         batchSize = 512L,
+                         epochs = 30L,
                          device='cpu',
-                         optimizer = torch::optim_adamw,
-                         scheduler = list(fun=torch::lr_reduce_on_plateau,
-                                          params=list(patience=1)),
-                         criterion = torch::nn_bce_with_logits_loss,
+                         optimizer = torch$optim$AdamW,
+                         scheduler = list(fun=torch$optim$lr_scheduler$ReduceLROnPlateau,
+                                          params=list(patience=1L)),
+                         criterion = torch$nn$BCEWithLogitsLoss,
                          earlyStopping = list(useEarlyStopping=TRUE,
-                                              params = list(patience=4)),
+                                              params = list(patience=4L)),
                          metric = "auc",
                          seed = NULL
 ) {
@@ -55,21 +55,32 @@ setEstimator <- function(learningRate='auto',
   if (is.null(seed)) {
     seed <- as.integer(sample(1e5, 1))
   }
-  
-  
   estimatorSettings <- list(learningRate=learningRate,
                             weightDecay=weightDecay,
                             batchSize=batchSize,
                             epochs=epochs,
                             device=device,
-                            optimizer=optimizer,
-                            scheduler=scheduler,
-                            criterion=criterion,
                             earlyStopping=earlyStopping,
                             findLR=findLR,
                             metric=metric,
-                            seed=seed[1] 
-  )
+                            seed=seed[1])
+  
+  optimizer <- rlang::enquo(optimizer) 
+  estimatorSettings$optimizer <- function() rlang::eval_tidy(optimizer)
+  class(estimatorSettings$optimizer) <- c("delayed", class(estimatorSettings$optimizer))
+  
+  criterion <- rlang::enquo(criterion)
+  estimatorSettings$criterion <- function() rlang::eval_tidy(criterion)
+  class(estimatorSettings$criterion) <- c("delayed", class(estimatorSettings$criterion))
+  
+  scheduler <- rlang::enquo(scheduler)
+  estimatorSettings$scheduler <- function() rlang::eval_tidy(scheduler)
+  class(estimatorSettings$scheduler) <-c("delayed", class(estimatorSettings$scheduler))
+
+  if (is.function(device)) {
+    class(estimatorSettings$device) <- c("delayed",  class(estimatorSettings$device))
+  }
+  
   paramsToTune <- list()
   for (name in names(estimatorSettings)) {
     param <- estimatorSettings[[name]]
@@ -77,7 +88,7 @@ setEstimator <- function(learningRate='auto',
       paramsToTune[[paste0('estimator.',name)]] <- param
     }
     if ("params" %in% names(param)) {
-      for (name2 in names(param[["params"]])) { 
+      for (name2 in names(param[["params"]])) {
         param2 <- param[["params"]][[name2]]
         if (length(param2) > 1) {
           paramsToTune[[paste0('estimator.',name,'.',name2)]] <- param2
@@ -86,9 +97,10 @@ setEstimator <- function(learningRate='auto',
     }
   }
   estimatorSettings$paramsToTune <- paramsToTune
+
   return(estimatorSettings)
 }
-
+ 
 #' fitEstimator
 #'
 #' @description
@@ -141,11 +153,12 @@ fitEstimator <- function(trainData,
                 dplyr::collect() %>%
                 as.integer())
   covariateRef <- covariateRef %>%
+    dplyr::arrange("columnId") %>%
     dplyr::collect() %>%
     dplyr::mutate(
       included = incs,
       covariateValue = 0,
-      isNumeric = cvResult$numericalIndex
+      isNumeric = .data$columnId %in% cvResult$numericalIndex
     )
   
   comp <- start - Sys.time()
@@ -218,27 +231,23 @@ predictDeepEstimator <- function(plpModel,
                                                      "covariateId"
                                                    )
     )
-    data <- Dataset(mappedData$covariates,
-                    numericalIndex = plpModel$covariateImportance$isNumeric
-    )
+    data <- createDataset(mappedData, plpModel=plpModel)
   }
   
   # get predictions
   prediction <- cohort
   if (is.character(plpModel$model)) {
-    model <- torch::torch_load(file.path(plpModel$model, "DeepEstimatorModel.pt"), device = "cpu")
-    estimator <- Estimator$new(
-      modelType = plpModel$modelDesign$modelSettings$modelType,
-      modelParameters = model$modelParameters,
-      estimatorSettings = model$estimatorSettings
-    )
-    estimator$model$load_state_dict(model$modelStateDict)
-    prediction$value <- estimator$predictProba(data)
+    modelSettings <- plpModel$modelDesign$modelSettings
+    model <- torch$load(file.path(plpModel$model, "DeepEstimatorModel.pt"), map_location = "cpu")
+    estimator <- createEstimator(modelType=modelSettings$modelType,
+                                 modelParameters=model$model_parameters,
+                                 estimatorSettings=model$estimator_settings)
+    estimator$model$load_state_dict(model$model_state_dict)
+    prediction$value <- estimator$predict_proba(data)
   } else {
-    prediction$value <- plpModel$model$predictProba(data)
+    prediction$value <- plpModel$model$predict_proba(data)
   }
   
-  
   attr(prediction, "metaData")$modelType <- attr(plpModel, "modelType")
   
   return(prediction)
@@ -267,9 +276,10 @@ gridCvDeep <- function(mappedData,
   ###########################################################################
   
   paramSearch <- modelSettings$param
-  trainCache <- TrainingCache$new(analysisPath)
   
-  if (trainCache$isParamGridIdentical(paramSearch)) {
+  # TODO below chunk should be in a setupCache function
+  trainCache <- TrainingCache$new(analysisPath)
+    if (trainCache$isParamGridIdentical(paramSearch)) {
     gridSearchPredictons <- trainCache$getGridSearchPredictions()
   } else {
     gridSearchPredictons <- list()
@@ -278,55 +288,46 @@ gridCvDeep <- function(mappedData,
     trainCache$saveModelParams(paramSearch)
   }
   
-  dataset <- Dataset(mappedData$covariates, labels$outcomeCount)
-  
-  estimatorSettings <- modelSettings$estimatorSettings
+  dataset <- createDataset(data=mappedData, labels=labels)
   
   fitParams <- names(paramSearch[[1]])[grepl("^estimator", names(paramSearch[[1]]))]
-  
+  findLR <- modelSettings$estimatorSettings$findLR
   for (gridId in trainCache$getLastGridSearchIndex():length(paramSearch)) {
     ParallelLogger::logInfo(paste0("Running hyperparameter combination no ", gridId))
     ParallelLogger::logInfo(paste0("HyperParameters: "))
     ParallelLogger::logInfo(paste(names(paramSearch[[gridId]]), paramSearch[[gridId]], collapse = " | "))
-    modelParams <- paramSearch[[gridId]][modelSettings$modelParamNames]
-    
+    currentModelParams <- paramSearch[[gridId]][modelSettings$modelParamNames]
     
-    estimatorSettings <- fillEstimatorSettings(estimatorSettings, fitParams, 
+    currentEstimatorSettings <- fillEstimatorSettings(modelSettings$estimatorSettings, fitParams, 
                                                paramSearch[[gridId]])
-
+    
     # initiate prediction
-    prediction <- c()
+    prediction <- NULL
     
     fold <- labels$index
     ParallelLogger::logInfo(paste0("Max fold: ", max(fold)))
-    modelParams$catFeatures <- dataset$numCatFeatures()
-    modelParams$numFeatures <- dataset$numNumFeatures()
-    
-    if (estimatorSettings$findLR) {
-      lr <- lrFinder(dataset=dataset, 
-                     modelType = modelSettings$modelType,
-                     modelParams = modelParams,
-                     estimatorSettings = estimatorSettings)
+    currentModelParams$catFeatures <- dataset$get_cat_features()$shape[[1]]
+    currentModelParams$numFeatures <- dataset$get_numerical_features()$shape[[1]]
+    if (findLR) {
+      LrFinder <- createLRFinder(modelType = modelSettings$modelType,
+                                 modelParameters = currentModelParams,
+                                 estimatorSettings = currentEstimatorSettings
+                                 )
+      lr <- LrFinder$get_lr(dataset)
       ParallelLogger::logInfo(paste0("Auto learning rate selected as: ", lr))
-      estimatorSettings$learningRate <- lr
+      currentEstimatorSettings$learningRate <- lr
     }
     
-    
     learnRates <- list()
     for (i in 1:max(fold)) {
       ParallelLogger::logInfo(paste0("Fold ", i))
-      trainDataset <- torch::dataset_subset(dataset, indices = which(fold != i))
-      testDataset <- torch::dataset_subset(dataset, indices = which(fold == i))
-      estimator <- Estimator$new(
-        modelType = modelSettings$modelType,
-        modelParameters = modelParams,
-        estimatorSettings = estimatorSettings
-      )
-      
-      estimator$fit(
-        trainDataset,
-        testDataset
-      )
+      trainDataset <- torch$utils$data$Subset(dataset, indices = as.integer(which(fold != i) - 1)) # -1 for python 0-based indexing
+      testDataset <- torch$utils$data$Subset(dataset, indices = as.integer(which(fold == i) -1)) # -1 for python 0-based indexing
+    
+      estimator <- createEstimator(modelType=modelSettings$modelType,
+                                   modelParameters=currentModelParams,
+                                   estimatorSettings=currentEstimatorSettings)
+      estimator$fit(trainDataset, testDataset)
       
       ParallelLogger::logInfo("Calculating predictions on left out fold set...")
       
@@ -339,8 +340,8 @@ gridCvDeep <- function(mappedData,
         )
       )
       learnRates[[i]] <- list(
-        LRs = estimator$learnRateSchedule,
-        bestEpoch = estimator$bestEpoch
+        LRs = estimator$learn_rate_schedule,
+        bestEpoch = estimator$best_epoch
       )
     }
     maxIndex <- which.max(unlist(sapply(learnRates, `[`, 2)))
@@ -374,20 +375,20 @@ gridCvDeep <- function(mappedData,
   if (!dir.exists(file.path(modelLocation))) {
     dir.create(file.path(modelLocation), recursive = T)
   }
-  modelParams$catFeatures <- dataset$numCatFeatures()
-  modelParams$numFeatures <- dataset$numNumFeatures()
   
-  estimatorSettings <- fillEstimatorSettings(estimatorSettings, fitParams,
-                                             finalParam)
+  modelParams$catFeatures <- dataset$get_cat_features()$shape[[1]]
+  modelParams$numFeatures <- dataset$get_numerical_features()$shape[[1]]
   
-  estimator <- Estimator$new(
-    modelType = modelSettings$modelType,
-    modelParameters = modelParams,
-    estimatorSettings = estimatorSettings
-  )
-  numericalIndex <- dataset$getNumericalIndex()
   
-  estimator$fitWholeTrainingSet(dataset, finalParam$learnSchedule$LRs)
+  estimatorSettings <- fillEstimatorSettings(modelSettings$estimatorSettings, fitParams,
+                                             finalParam)
+  estimatorSettings$learningRate <- finalParam$learnSchedule$LRs[[1]]
+  estimator <- createEstimator(modelType = modelSettings$modelType,
+                               modelParameters = modelParams,
+                               estimatorSettings = estimatorSettings)
+ 
+  numericalIndex <- dataset$get_numerical_features()
+  estimator$fit_whole_training_set(dataset, finalParam$learnSchedule$LRs)
   
   ParallelLogger::logInfo("Calculating predictions on all train data...")
   prediction <- predictDeepEstimator(
@@ -410,14 +411,13 @@ gridCvDeep <- function(mappedData,
   
   # save torch code here
   estimator$save(modelLocation, "DeepEstimatorModel.pt")
-  
   return(
     list(
       estimator = modelLocation,
       prediction = prediction,
       finalParam = finalParam,
       paramGridSearch = paramGridSearch,
-      numericalIndex = numericalIndex
+      numericalIndex = numericalIndex$to_list()
     )
   )
 }
@@ -434,4 +434,34 @@ fillEstimatorSettings <- function(estimatorSettings, fitParams, paramSearch) {
     }
   }
   return(estimatorSettings)
+}
+
+# utility function to evaluate any expressions or call functions passed as settings
+evalEstimatorSettings <- function(estimatorSettings) {
+  
+  for (set in names(estimatorSettings)) {
+    if (inherits(estimatorSettings[[set]], "delayed")) {
+      estimatorSettings[[set]] <- estimatorSettings[[set]]()
+    }
+  }
+  estimatorSettings
+}
+
+createEstimator <- function(modelType,
+                            modelParameters,
+                            estimatorSettings) {
+  path <- system.file("python", package = "DeepPatientLevelPrediction")
+
+  Model <- reticulate::import_from_path(modelType, path=path)[[modelType]]
+  
+  Estimator <- reticulate::import_from_path("Estimator", path=path)$Estimator
+  
+  modelParameters <- camelCaseToSnakeCaseNames(modelParameters)
+  estimatorSettings <- camelCaseToSnakeCaseNames(estimatorSettings)
+  estimatorSettings <- evalEstimatorSettings(estimatorSettings)
+  
+  estimator <- Estimator(model = Model,
+                         model_parameters = modelParameters,
+                         estimator_settings = estimatorSettings)
+  return(estimator)
 }
\ No newline at end of file
diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R
new file mode 100644
index 0000000..83f2f7e
--- /dev/null
+++ b/R/HelperFunctions.R
@@ -0,0 +1,41 @@
+# @file HelperFunctions.R
+#
+# Copyright 2023 Observational Health Data Sciences and Informatics
+#
+# This file is part of DeepPatientLevelPrediction
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#' Convert a camel case string to snake case
+#'
+#' @param string   The string to be converted
+#'
+#' @return
+#' A string
+#'
+camelCaseToSnakeCase <- function(string) {
+  string <- gsub("([A-Z])", "_\\1", string)
+  string <- tolower(string)
+  string <- gsub("([a-z])([0-9])", "\\1_\\2", string)
+  return(string)
+}
+
+#' Convert the names of an object from camel case to snake case
+#'
+#' @param object   The object of which the names should be converted
+#'
+#' @return
+#' The same object, but with converted names.
+camelCaseToSnakeCaseNames <- function(object) {
+  names(object) <- camelCaseToSnakeCase(names(object))
+  return(object)
+}
\ No newline at end of file
diff --git a/R/LRFinder.R b/R/LRFinder.R
index 9f670e4..dfe8ee3 100644
--- a/R/LRFinder.R
+++ b/R/LRFinder.R
@@ -1,122 +1,42 @@
-lrPerBatch <- torch::lr_scheduler(
-  "lrPerBatch",
-  initialize = function(
-    optimizer,
-    startLR = 1e-7,
-    endLR = 1.0,
-    nIters = 100,
-    lastEpoch = -1,
-    verbose = FALSE) {
-    
-    self$optimizer <- optimizer
-    self$endLR <- endLR
-    self$base_lrs <- startLR
-    self$iterations <- nIters
-    self$last_epoch <- lastEpoch
-    self$multiplier <- (endLR/startLR)^(1/nIters)
-    
-    super$initialize(optimizer, last_epoch=lastEpoch, verbose)
-    
-  },
+# @file LRFinder.R
+#
+# Copyright 2023 Observational Health Data Sciences and Informatics
+#
+# This file is part of DeepPatientLevelPrediction
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+createLRFinder <- function(modelType,
+                           modelParameters,
+                           estimatorSettings,
+                           lrSettings=NULL) {
+  path <- system.file("python", package = "DeepPatientLevelPrediction")
+  LRFinderClass <- reticulate::import_from_path("LrFinder", path = path)$LrFinder
   
-  get_lr = function() {
-    if (self$last_epoch > 0) {
-      lrs <- numeric(length(self$optimizer$param_groups))
-      for (i in seq_along(self$optimizer$param_groups)) {
-        lrs[i] <- self$base_lrs[[i]] * (self$endLR / self$base_lrs[[i]]) ^ (self$last_epoch/(self$iterations-1))
-      }
-    } else {
-      lrs <- as.numeric(self$base_lrs)
-    }
-    lrs
+  model <- reticulate::import_from_path(modelType, path=path)[[modelType]]
+  
+  modelParameters <- camelCaseToSnakeCaseNames(modelParameters)
+  estimatorSettings <- camelCaseToSnakeCaseNames(estimatorSettings)
+  if (!is.null(lrSettings)) {
+    lrSettings <- camelCaseToSnakeCaseNames(lrSettings)
   }
   
-)
+  estimatorSettings <- evalEstimatorSettings(estimatorSettings)
+  
+  
+  lrFinder <- LRFinderClass(model=model,
+                            model_parameters = modelParameters,
+                            estimator_settings = estimatorSettings,
+                            lr_settings = lrSettings)
 
-#' Find learning rate that decreases loss the most
-#' @description Method originated from https://arxiv.org/abs/1506.01186 but this
-#' implementation draws inspiration from various other implementations such as
-#' pytorch lightning, fastai, luz and pytorch-lr-finder.
-#' @param dataset torch dataset, training dataset
-#' @param modelType the function used to initialize the model
-#' @param modelParams parameters used to initialize model
-#' @param estimatorSettings settings for estimator to fit model
-#' @param minLR lower bound of learning rates to search through
-#' @param maxLR upper bound of learning rates to search through
-#' @param numLR number of learning rates to go through
-#' @param smooth smoothing to use on losses 
-#' @param divergenceThreshold if loss increases this amount above the minimum, stop.
-#' @export
-lrFinder <- function(dataset, modelType, modelParams, estimatorSettings,
-                     minLR=1e-7, maxLR=1, numLR=100, smooth=0.05,
-                     divergenceThreshold=4) {
-    torch::torch_manual_seed(seed=estimatorSettings$seed)
-    model <- do.call(modelType, modelParams)
-    if (is.function(estimatorSettings$device)) {
-      device = estimatorSettings$device()
-    } else {device = estimatorSettings$device}
-    model$to(device=device)
-    
-    optimizer <- estimatorSettings$optimizer(model$parameters, lr=minLR)
-    
-    # made a special lr scheduler for this task
-    scheduler <- lrPerBatch(optimizer = optimizer,
-                            startLR = minLR,
-                            endLR = maxLR,
-                            nIters = numLR)
-    
-    criterion <- estimatorSettings$criterion()
-    
-    batchIndex <- seq(length(dataset))
-    set.seed(estimatorSettings$seed)
-    
-    losses <- numeric(numLR)
-    lrs <- numeric(numLR)
-    ParallelLogger::logInfo('\nSearching for best learning rate')
-    progressBar <- utils::txtProgressBar(style = 3)
-    for (i in seq(numLR)) {
-      optimizer$zero_grad()
-      
-      batch <- dataset[sample(batchIndex, estimatorSettings$batchSize)]
-      batch <- batchToDevice(batch, device=device)
-      
-      output <- model(batch$batch)
-      
-      loss <- criterion(output, batch$target)
-      if (!is.null(smooth) && i != 1) {
-        losses[i] <- smooth * loss$item() + (1 - smooth) * losses[i-1]
-      } else {
-        losses[i] <- loss$item()  
-      }
-      lrs[i] <- optimizer$param_groups[[1]]$lr
-      
-      loss$backward()
-      optimizer$step()
-      scheduler$step()
-      utils::setTxtProgressBar(progressBar, i / numLR)
-      
-      if (i == 1) {
-        bestLoss <- losses[i]
-      } else {
-        if (losses[i] < bestLoss) {
-          bestLoss <- losses[i]
-        }
-      }
-      
-      if (losses[i] > (divergenceThreshold * bestLoss)) {
-        ParallelLogger::logInfo("\nLoss diverged - stopped early")
-        break
-      }
-      
-    }
-    
-    # find LR where gradient is highest but before global minimum is reached
-    # I added -5 to make sure it is not still in the minimum
-    globalMinimum <- which.min(losses)
-    grad <- as.numeric(torch::torch_diff(torch::torch_tensor(losses[1:(globalMinimum-5)])))
-    smallestGrad <- which.min(grad)
-    
-    suggestedLR <- lrs[smallestGrad]
-    
-  return(suggestedLR) 
-}
\ No newline at end of file
+  return(lrFinder)
+}
diff --git a/R/MLP.R b/R/MLP.R
index 2b9c8b1..175cc46 100644
--- a/R/MLP.R
+++ b/R/MLP.R
@@ -25,8 +25,8 @@
 #' Model architecture
 #'
 #'
-#' @param numLayers         Number of layers in network, default: 1:16
-#' @param sizeHidden        Amount of neurons in each default layer, default: 2^(6:10) (64 to 1024)
+#' @param numLayers         Number of layers in network, default: 1:8
+#' @param sizeHidden        Amount of neurons in each default layer, default: 2^(6:9) (64 to 512)
 #' @param dropout           How much dropout to apply after first linear, default: seq(0, 0.3, 0.05)
 #' @param sizeEmbedding     Size of embedding layer, default: 2^(6:9) (64 to 512)
 #' @param estimatorSettings settings of Estimator created with `setEstimator`
@@ -35,16 +35,16 @@
 #' @param randomSampleSeed  Random seed to sample hyperparameter combinations
 #'
 #' @export
-setMultiLayerPerceptron <- function(numLayers = c(1:8),
-                                    sizeHidden = c(2^(6:9)),
-                                    dropout = c(seq(0, 0.5, 0.05)),
-                                    sizeEmbedding = c(2^(6:9)),
+setMultiLayerPerceptron <- function(numLayers = as.integer(1:8),
+                                    sizeHidden = as.integer(2^(6:9)),
+                                    dropout = c(seq(0, 0.3, 0.05)),
+                                    sizeEmbedding = as.integer(2^(6:9)),
                                     estimatorSettings = setEstimator(
                                       learningRate = 'auto',
                                       weightDecay = c(1e-6, 1e-3),
-                                      batchSize = 1024,
-                                      epochs = 30,
-                                      device="cpu"),                              
+                                      batchSize = 1024L,
+                                      epochs = 30L,
+                                      device="cpu"),
                                     hyperParamSearch = "random",
                                     randomSample = 100,
                                     randomSampleSeed = NULL) {
@@ -55,20 +55,20 @@ setMultiLayerPerceptron <- function(numLayers = c(1:8),
     dropout = dropout,
     sizeEmbedding = sizeEmbedding
   )
-  
+
   paramGrid <- c(paramGrid, estimatorSettings$paramsToTune)
-  
+
   param <- PatientLevelPrediction::listCartesian(paramGrid)
   if (hyperParamSearch == "random" && randomSample>length(param)) {
-    stop(paste("\n Chosen amount of randomSamples is higher than the amount of possible hyperparameter combinations.", 
+    stop(paste("\n Chosen amount of randomSamples is higher than the amount of possible hyperparameter combinations.",
                "\n randomSample:", randomSample,"\n Possible hyperparameter combinations:", length(param),
                "\n Please lower the amount of randomSamples"))
   }
-  
+
   if (hyperParamSearch == "random") {
     suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
-  attr(param, 'settings')$modelType <- "MLP"  
+  attr(param, 'settings')$modelType <- "MLP"
 
   results <- list(
     fitFunction = "fitEstimator",
@@ -86,82 +86,3 @@ setMultiLayerPerceptron <- function(numLayers = c(1:8),
 
   return(results)
 }
-
-
-MLP <- torch::nn_module(
-  name = "MLP",
-  initialize = function(catFeatures, numFeatures = 0, sizeEmbedding, sizeHidden, numLayers,
-                        activation = torch::nn_relu,
-                        normalization = torch::nn_batch_norm1d, dropout = NULL,
-                        d_out = 1) {
-    self$embedding <- torch::nn_embedding_bag(
-      num_embeddings = catFeatures + 1,
-      embedding_dim = sizeEmbedding,
-      padding_idx = 1
-    )
-    if (numFeatures != 0) {
-      self$numEmbedding <- numericalEmbedding(numFeatures, sizeEmbedding)
-    }
-
-    self$first_layer <- torch::nn_linear(sizeEmbedding, sizeHidden)
-
-
-    self$layers <- torch::nn_module_list(lapply(
-      1:numLayers,
-      function(x) {
-        MLPLayer(
-          sizeHidden,
-          normalization, activation,
-          dropout
-        )
-      }
-    ))
-    self$lastNorm <- normalization(sizeHidden)
-    self$head <- torch::nn_linear(sizeHidden, d_out)
-
-    self$lastAct <- activation()
-  },
-  forward = function(x) {
-    x_cat <- x$cat
-    x_num <- x$num
-    x_cat <- self$embedding(x_cat + 1L) # padding_idx is 1
-    if (!is.null(x_num)) {
-      x <- (x_cat + self$numEmbedding(x_num)$mean(dim = 2)) / 2
-    } else {
-      x <- x_cat
-    }
-    x <- self$first_layer(x)
-
-    for (i in 1:length(self$layers)) {
-      x <- self$layers[[i]](x)
-    }
-    x <- self$lastNorm(x)
-    x <- self$lastAct(x)
-    x <- self$head(x)
-    x <- x$squeeze(-1)
-    return(x)
-  }
-)
-
-MLPLayer <- torch::nn_module(
-  name = "MLPLayer",
-  initialize = function(sizeHidden = 64,
-                        normalization = torch::nn_batch_norm1d,
-                        activation = torch::nn_relu,
-                        dropout = 0.0, bias = TRUE) {
-    self$norm <- normalization(sizeHidden)
-    self$activation <- activation()
-    self$linear <- torch::nn_linear(sizeHidden, sizeHidden, bias = bias)
-
-    if (!is.null(dropout) | !dropout == 0.0) {
-      self$dropout <- torch::nn_dropout(p = dropout)
-    }
-  },
-  forward = function(x) {
-    x <- self$linear(self$norm(x))
-    if (!is.null(self$dropout)) {
-      x <- self$dropout(x)
-    }
-    return(self$activation(x))
-  }
-)
diff --git a/R/ResNet.R b/R/ResNet.R
index dc610b5..8620687 100644
--- a/R/ResNet.R
+++ b/R/ResNet.R
@@ -31,15 +31,15 @@
 setDefaultResNet <- function(estimatorSettings=setEstimator(learningRate='auto',
                                                             weightDecay=1e-6,
                                                             device='cpu',
-                                                            batchSize=1024,
-                                                            epochs=50,
+                                                            batchSize=1024L,
+                                                            epochs=50L,
                                                             seed=NULL)) {
-  resnetSettings <- setResNet(numLayers = 6,
-                              sizeHidden = 512,
-                              hiddenFactor = 2,
+  resnetSettings <- setResNet(numLayers = 6L,
+                              sizeHidden = 512L,
+                              hiddenFactor = 2L,
                               residualDropout = 0.1,
                               hiddenDropout = 0.4,
-                              sizeEmbedding = 256,
+                              sizeEmbedding = 256L,
                               estimatorSettings = estimatorSettings,
                               hyperParamSearch = 'random',
                               randomSample = 1)
@@ -68,17 +68,17 @@ setDefaultResNet <- function(estimatorSettings=setEstimator(learningRate='auto',
 #' @param randomSample      How many random samples from hyperparameter space to use
 #' @param randomSampleSeed  Random seed to sample hyperparameter combinations
 #' @export
-setResNet <- function(numLayers = c(1:8),
-                      sizeHidden = c(2^(6:10)),
-                      hiddenFactor = c(1:4),
+setResNet <- function(numLayers = as.integer(1:8),
+                      sizeHidden = as.integer(2^(6:10)),
+                      hiddenFactor = as.integer(1:4),
                       residualDropout = c(seq(0, 0.5, 0.05)),
                       hiddenDropout = c(seq(0, 0.5, 0.05)),
-                      sizeEmbedding = c(2^(6:9)),
+                      sizeEmbedding = as.integer(2^(6:9)),
                       estimatorSettings = setEstimator(learningRate='auto',
                                                        weightDecay=c(1e-6, 1e-3),
                                                        device='cpu',
-                                                       batchSize=1024,
-                                                       epochs=30,
+                                                       batchSize=1024L,
+                                                       epochs=30L,
                                                        seed=NULL),
                       hyperParamSearch = "random",
                       randomSample = 100,
@@ -121,99 +121,3 @@ setResNet <- function(numLayers = c(1:8),
 
   return(results)
 }
-
-ResNet <- torch::nn_module(
-  name = "ResNet",
-  initialize = function(catFeatures, numFeatures = 0, sizeEmbedding, sizeHidden, numLayers,
-                        hiddenFactor, activation = torch::nn_relu,
-                        normalization = torch::nn_batch_norm1d, hiddenDropout = NULL,
-                        residualDropout = NULL, d_out = 1, concatNum=TRUE) {
-    self$embedding <- torch::nn_embedding_bag(
-      num_embeddings = catFeatures + 1,
-      embedding_dim = sizeEmbedding,
-      padding_idx = 1
-    )
-    if (numFeatures != 0 & concatNum != TRUE) {
-      self$numEmbedding <- numericalEmbedding(numFeatures, sizeEmbedding)
-    } else {
-      self$numEmbedding <- NULL
-      sizeEmbedding <- sizeEmbedding + numFeatures
-    }
-
-    self$first_layer <- torch::nn_linear(sizeEmbedding, sizeHidden)
-
-    resHidden <- sizeHidden * hiddenFactor
-
-    self$layers <- torch::nn_module_list(lapply(
-      1:numLayers,
-      function(x) {
-        ResLayer(
-          sizeHidden, resHidden,
-          normalization, activation,
-          hiddenDropout,
-          residualDropout
-        )
-      }
-    ))
-    self$lastNorm <- normalization(sizeHidden)
-    self$head <- torch::nn_linear(sizeHidden, d_out)
-
-    self$lastAct <- activation()
-  },
-  forward = function(x) {
-    x_cat <- x$cat
-    x_num <- x$num
-    x_cat <- self$embedding(x_cat + 1L) # padding_idx is 1
-    if (!is.null(x_num) & (!is.null(self$numEmbedding))) {
-      x <- (x_cat + self$numEmbedding(x_num)$mean(dim = 2)) / 2
-    } else if (!is.null(x_num) & is.null(self$numEmbedding)) {
-      x <- torch::torch_cat(list(x_cat, x_num), dim = 2L)
-    } else {
-      x <- x_cat
-    }
-    x <- self$first_layer(x)
-
-    for (i in 1:length(self$layers)) {
-      x <- self$layers[[i]](x)
-    }
-    x <- self$lastNorm(x)
-    x <- self$lastAct(x)
-    x <- self$head(x)
-    x <- x$squeeze(-1)
-    return(x)
-  }
-)
-
-ResLayer <- torch::nn_module(
-  name = "ResLayer",
-  initialize = function(sizeHidden, resHidden, normalization,
-                        activation, hiddenDropout = NULL, residualDropout = NULL) {
-    self$norm <- normalization(sizeHidden)
-    self$linear0 <- torch::nn_linear(sizeHidden, resHidden)
-    self$linear1 <- torch::nn_linear(resHidden, sizeHidden)
-
-    if (!is.null(hiddenDropout)) {
-      self$hiddenDropout <- torch::nn_dropout(p = hiddenDropout)
-    }
-    if (!is.null(residualDropout)) {
-      self$residualDropout <- torch::nn_dropout(p = residualDropout)
-    }
-
-    self$activation <- activation()
-  },
-  forward = function(x) {
-    z <- x
-    z <- self$norm(z)
-    z <- self$linear0(z)
-    z <- self$activation(z)
-    if (!is.null(self$hiddenDropout)) {
-      z <- self$hiddenDropout(z)
-    }
-    z <- self$linear1(z)
-    if (!is.null(self$residualDropout)) {
-      z <- self$residualDropout(z)
-    }
-    x <- z + x
-    return(x)
-  }
-)
diff --git a/R/Transformer.R b/R/Transformer.R
index 04212fd..b520b7c 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -17,29 +17,29 @@
 # limitations under the License.
 
 #' Create default settings for a non-temporal transformer
-#' 
+#'
 #' @description A transformer model with default hyperparameters
 #' @details from https://arxiv.org/abs/2106.11959
 #' Default hyperparameters from paper
 #' @param estimatorSettings created with `setEstimator`
-#' 
+#'
 #' @export
 setDefaultTransformer <- function(estimatorSettings=setEstimator(
   learningRate = 'auto',
   weightDecay = 1e-4,
-  batchSize=512,
-  epochs=10,
+  batchSize=512L,
+  epochs=10L,
   seed=NULL,
   device='cpu')
 ) {
-  transformerSettings <- setTransformer(numBlocks = 3,
-                                        dimToken = 192,
-                                        dimOut = 1,
-                                        numHeads = 8,
+  transformerSettings <- setTransformer(numBlocks = 3L,
+                                        dimToken = 192L,
+                                        dimOut = 1L,
+                                        numHeads = 8L,
                                         attDropout = 0.2,
                                         ffnDropout = 0.1,
                                         resDropout = 0.0,
-                                        dimHidden = 256,
+                                        dimHidden = 256L,
                                         estimatorSettings=estimatorSettings,
                                         hyperParamSearch = 'random',
                                         randomSample = 1)
@@ -94,7 +94,7 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
       dimHidden <- dimHiddenRatio
     }
   }
-  
+
   paramGrid <- list(
     numBlocks = numBlocks,
     dimToken = dimToken,
@@ -105,9 +105,9 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
     ffnDropout = ffnDropout,
     resDropout = resDropout
   )
-  
+
   paramGrid <- c(paramGrid, estimatorSettings$paramsToTune)
-  
+
   param <- PatientLevelPrediction::listCartesian(paramGrid)
 
   if (!is.null(dimHiddenRatio)) {
@@ -118,15 +118,15 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
   }
   
   if (hyperParamSearch == "random" && randomSample>length(param)) {
-    stop(paste("\n Chosen amount of randomSamples is higher than the amount of possible hyperparameter combinations.", 
+    stop(paste("\n Chosen amount of randomSamples is higher than the amount of possible hyperparameter combinations.",
                "\n randomSample:", randomSample,"\n Possible hyperparameter combinations:", length(param),
                "\n Please lower the amount of randomSample"))
   }
-  
+
   if (hyperParamSearch == "random") {
     suppressWarnings(withr::with_seed(randomSampleSeed, {param <- param[sample(length(param), randomSample)]}))
   }
-  attr(param, 'settings')$modelType <- "Transformer"  
+  attr(param, 'settings')$modelType <- "Transformer"
   results <- list(
     fitFunction = "fitEstimator",
     param = param,
@@ -141,225 +141,4 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
 
   class(results) <- "modelSettings"
   return(results)
-}
-
-
-Transformer <- torch::nn_module(
-  name = "Transformer",
-  initialize = function(catFeatures, numFeatures, numBlocks, dimToken, dimOut = 1,
-                        numHeads, attDropout, ffnDropout, resDropout,
-                        headActivation = torch::nn_relu,
-                        activation = NULL,
-                        ffnNorm = torch::nn_layer_norm,
-                        headNorm = torch::nn_layer_norm,
-                        attNorm = torch::nn_layer_norm,
-                        dimHidden) {
-    activation <- nn_reglu
-    self$Categoricalembedding <- Embedding(catFeatures + 1, dimToken) # + 1 for padding idx
-    self$numericalEmbedding <- numericalEmbedding(numFeatures, dimToken)
-    self$classToken <- ClassToken(dimToken)
-
-    self$layers <- torch::nn_module_list(lapply(
-      1:numBlocks,
-      function(x) {
-        layer <- torch::nn_module_list()
-        layer$add_module("attention", torch::nn_multihead_attention(dimToken, numHeads,
-          dropout = attDropout,
-          bias = TRUE
-        ))
-        layer$add_module("ffn", FeedForwardBlock(dimToken, dimHidden,
-          biasFirst = TRUE,
-          biasSecond = TRUE,
-          dropout = ffnDropout,
-          activation = activation
-        ))
-        layer$add_module("attentionResDropout", torch::nn_dropout(resDropout))
-        layer$add_module("ffnResDropout", torch::nn_dropout(resDropout))
-        layer$add_module("ffnNorm", ffnNorm(dimToken))
-
-        if (x != 1) {
-          layer$add_module("attentionNorm", attNorm(dimToken))
-        }
-        return(layer)
-      }
-    ))
-    self$head <- Head(dimToken,
-      bias = TRUE, activation = headActivation,
-      headNorm, dimOut
-    )
-  },
-  forward = function(x) {
-    mask <- torch::torch_where(x$cat == 0, TRUE, FALSE)
-    input <- x
-    cat <- self$Categoricalembedding(x$cat)
-    if (!is.null(input$num)) {
-      num <- self$numericalEmbedding(input$num)
-      x <- torch::torch_cat(list(cat, num), dim = 2L)
-      mask <- torch::torch_cat(list(mask, torch::torch_zeros(c(
-        x$shape[1],
-        num$shape[2]
-      ),
-      device = mask$device,
-      dtype = mask$dtype
-      )),
-      dim = 2L
-      )
-    } else {
-      x <- cat
-    }
-    x <- self$classToken(x)
-    mask <- torch::torch_cat(list(mask, torch::torch_zeros(c(x$shape[1], 1),
-      device = mask$device,
-      dtype = mask$dtype
-    )),
-    dim = 2L
-    )
-    for (i in 1:length(self$layers)) {
-      layer <- self$layers[[i]]
-      xResidual <- self$startResidual(layer, "attention", x)
-
-      if (i == length(self$layers)) {
-        dims <- xResidual$shape
-        # in final layer take only attention on CLS token
-        xResidual <- layer$attention(
-          xResidual[, -1]$view(c(dims[1], 1, dims[3]))$transpose(1, 2),
-          xResidual$transpose(1, 2),
-          xResidual$transpose(1, 2), mask
-        )
-        attnWeights <- xResidual[[2]]
-        xResidual <- xResidual[[1]]
-        x <- x[, -1]$view(c(dims[1], 1, dims[3]))
-      } else {
-        # attention input is seq_length x batch_size x embedding_dim
-        xResidual <- layer$attention(
-          xResidual$transpose(1, 2),
-          xResidual$transpose(1, 2),
-          xResidual$transpose(1, 2),
-          mask,
-        )[[1]]
-      }
-      x <- self$endResidual(layer, "attention", x, xResidual$transpose(1, 2))
-
-      xResidual <- self$startResidual(layer, "ffn", x)
-      xResidual <- layer$ffn(xResidual)
-      x <- self$endResidual(layer, "ffn", x, xResidual)
-    }
-    x <- self$head(x)[, 1] # remove singleton dimension
-    return(x)
-  },
-  startResidual = function(layer, stage, x) {
-    xResidual <- x
-    normKey <- paste0(stage, "Norm")
-    if (normKey %in% names(as.list(layer))) {
-      xResidual <- layer[[normKey]](xResidual)
-    }
-    return(xResidual)
-  },
-  endResidual = function(layer, stage, x, xResidual) {
-    dropoutKey <- paste0(stage, "ResDropout")
-    xResidual <- layer[[dropoutKey]](xResidual)
-    x <- x + xResidual
-    return(x)
-  }
-)
-
-
-FeedForwardBlock <- torch::nn_module(
-  name = "FeedForwardBlock",
-  initialize = function(dimToken, dimHidden, biasFirst, biasSecond,
-                        dropout, activation) {
-    self$linearFirst <- torch::nn_linear(dimToken, dimHidden * 2, biasFirst)
-    self$activation <- activation()
-    self$dropout <- torch::nn_dropout(dropout)
-    self$linearSecond <- torch::nn_linear(dimHidden, dimToken, biasSecond)
-  },
-  forward = function(x) {
-    x <- self$linearFirst(x)
-    x <- self$activation(x)
-    x <- self$dropout(x)
-    x <- self$linearSecond(x)
-    return(x)
-  }
-)
-
-Head <- torch::nn_module(
-  name = "Head",
-  initialize = function(dimIn, bias, activation, normalization, dimOut) {
-    self$normalization <- normalization(dimIn)
-    self$activation <- activation()
-    self$linear <- torch::nn_linear(dimIn, dimOut, bias)
-  },
-  forward = function(x) {
-    x <- x[, -1] # ?
-    x <- self$normalization(x)
-    x <- self$activation(x)
-    x <- self$linear(x)
-    return(x)
-  }
-)
-
-Embedding <- torch::nn_module(
-  name = "Embedding",
-  initialize = function(numEmbeddings, embeddingDim) {
-    self$embedding <- torch::nn_embedding(numEmbeddings, embeddingDim, padding_idx = 1)
-  },
-  forward = function(x_cat) {
-    x <- self$embedding(x_cat + 1L) # padding idx is 1L
-    return(x)
-  }
-)
-
-numericalEmbedding <- torch::nn_module(
-  name = "numericalEmbedding",
-  initialize = function(numEmbeddings, embeddingDim, bias = TRUE) {
-    self$weight <- torch::nn_parameter(torch::torch_empty(numEmbeddings, embeddingDim))
-    if (bias) {
-      self$bias <- torch::nn_parameter(torch::torch_empty(numEmbeddings, embeddingDim))
-    } else {
-      self$bias <- NULL
-    }
-
-    for (parameter in list(self$weight, self$bias)) {
-      if (!is.null(parameter)) {
-        torch::nn_init_kaiming_uniform_(parameter, a = sqrt(5))
-      }
-    }
-  },
-  forward = function(x) {
-    x <- self$weight$unsqueeze(1) * x$unsqueeze(-1)
-    if (!is.null(self$bias)) {
-      x <- x + self$bias$unsqueeze(1)
-    }
-    return(x)
-  }
-)
-
-# adds a class token embedding to embeddings
-ClassToken <- torch::nn_module(
-  name = "ClassToken",
-  initialize = function(dimToken) {
-    self$weight <- torch::nn_parameter(torch::torch_empty(dimToken, 1))
-    torch::nn_init_kaiming_uniform_(self$weight, a = sqrt(5))
-  },
-  expand = function(dims) {
-    newDims <- vector("integer", length(dims) - 1) + 1
-    return(self$weight$view(c(newDims, -1))$expand(c(dims, -1)))
-  },
-  forward = function(x) {
-    return(torch::torch_cat(c(x, self$expand(c(dim(x)[[1]], 1))), dim = 2))
-  }
-)
-
-nn_reglu <- torch::nn_module(
-  name = "ReGlu",
-  forward = function(x) {
-    return(reglu(x))
-  }
-)
-
-
-reglu <- function(x) {
-  chunks <- x$chunk(2, dim = -1)
-
-  return(chunks[[1]] * torch::nnf_relu(chunks[[2]]))
-}
+}
\ No newline at end of file
diff --git a/extras/example.R b/extras/example.R
index 49ead98..99a6c08 100644
--- a/extras/example.R
+++ b/extras/example.R
@@ -4,49 +4,70 @@ library(PatientLevelPrediction)
 library(DeepPatientLevelPrediction)
 
 data(plpDataSimulationProfile)
-sampleSize <- 1e4
+sampleSize <- 1e3
 plpData <- simulatePlpData(
-  plpDataSimulationProfile,
-  n = sampleSize 
-)
+   plpDataSimulationProfile,
+   n = sampleSize
+ )
+
 
 populationSet <- PatientLevelPrediction::createStudyPopulationSettings(
   requireTimeAtRisk = F, 
   riskWindowStart = 1, 
-  riskWindowEnd = 365)
+  riskWindowEnd = 365*5)
+
+# 
+# modelSettings <- setDefaultTransformer(estimatorSettings = setEstimator(
+#   learningRate = "auto",
+#   batchSize=64L,
+#   epochs = 10L
+# ))
 
- 
-modelSettings <- setDefaultResNet(estimatorSettings = setEstimator(epochs=1L,
-                                                                   device='cuda:0',
-                                                                   batchSize=128L))
+modelSettings <- setDefaultResNet(estimatorSettings = setEstimator(
+  learningRate = "auto",
+  weightDecay = 1e-06,
+  device="cuda:0",
+  batchSize=128L,
+  epochs=50L,
+  seed=42
+))
 
-# modelSettings <- setTransformer(numBlocks=1, dimToken = 33, dimOut = 1, numHeads = 3,
-#                                 attDropout = 0.2, ffnDropout = 0.2, resDropout = 0,
-#                                 dimHidden = 8, batchSize = 32, hyperParamSearch = 'random',
-#                                 weightDecay = 1e-6, learningRate = 3e-4, epochs = 10,
-#                                 device = 'cuda:0', randomSamples = 1, seed = 42)
+modelSettings <- setResNet(numLayers = c(1L, 2L),
+                           sizeHidden = 72L,
+                           hiddenFactor = 1L,
+                           residualDropout = 0.0,
+                           hiddenDropout = 0.0,
+                           sizeEmbedding = 64L,
+                           estimatorSettings = setEstimator(
+                             learningRate = 3e-4,
+                             batchSize = 128L,
+                             epochs = 10L,
+                             device = "cpu",
+                             seed = 42
+                           ),
+                           randomSample = 2)
 
 res2 <- PatientLevelPrediction::runPlp(
-plpData = plpData,
-outcomeId = 3,
-modelSettings = modelSettings,
-analysisId = 'Test',
-analysisName = 'Testing DeepPlp',
-populationSettings = populationSet,
-splitSettings = createDefaultSplitSetting(),
-sampleSettings = createSampleSettings(),  # none
-featureEngineeringSettings = createFeatureEngineeringSettings(), # none
-preprocessSettings = createPreprocessSettings(),
-logSettings = createLogSettings(verbosity='TRACE'),
-executeSettings = createExecuteSettings(
-  runSplitData = T,
-  runSampleData = F,
-  runfeatureEngineering = F,
-  runPreprocessData = T,
-  runModelDevelopment = T,
-  runCovariateSummary = T
-),
-saveDirectory = '~/test/new_plp/'
+  plpData = plpData,
+  outcomeId = unique(plpData$outcomes$outcomeId)[[1]],
+  modelSettings = modelSettings,
+  analysisId = 'Test',
+  analysisName = 'Testing DeepPlp',
+  populationSettings = populationSet,
+  splitSettings = createDefaultSplitSetting(splitSeed = 123),
+  sampleSettings = createSampleSettings("underSample"),  # none
+  featureEngineeringSettings = createFeatureEngineeringSettings(), # none
+  preprocessSettings = createPreprocessSettings(normalize = F),
+  logSettings = createLogSettings(verbosity='TRACE'),
+  executeSettings = createExecuteSettings(
+    runSplitData = T,
+    runSampleData = T,
+    runfeatureEngineering = F,
+    runPreprocessData = T,
+    runModelDevelopment = T,
+    runCovariateSummary = F
+  ),
+  saveDirectory = '~/test/resnet/'
 )
 
 
diff --git a/inst/python/Dataset.py b/inst/python/Dataset.py
new file mode 100644
index 0000000..15d749b
--- /dev/null
+++ b/inst/python/Dataset.py
@@ -0,0 +1,102 @@
+import time
+import pathlib
+import urllib
+
+import polars as pl
+import torch
+from torch.utils.data import Dataset
+
+
+class Data(Dataset):
+    def __init__(self,
+                 data,
+                 labels=None,
+                 numerical_features=None):
+        """
+        data: path to a covariates dataframe either arrow dataset or sqlite object
+        labels: a list of either 0 or 1, 1 if the patient got the outcome
+        numerical_features: list of indices where the numerical features are
+        """
+        start = time.time()
+        if pathlib.Path(data).suffix == '.sqlite':
+            data = urllib.parse.quote(data)
+            data = pl.read_database("SELECT * from covariates",
+                                    connection_uri=f"sqlite://{data}").lazy()
+        else:
+            data = pl.scan_ipc(pathlib.Path(data).joinpath('covariates/*.arrow'))
+        observations = data.select(pl.col('rowId').max()).collect()[0, 0]
+        # detect features are numeric
+        if numerical_features is None:
+            self.numerical_features = data.groupby(by='columnId') \
+                .n_unique().filter(pl.col('covariateValue') > 1).select('columnId').collect()['columnId']
+        else:
+            self.numerical_features = pl.Series('num', numerical_features)
+
+        if labels:
+            self.target = torch.as_tensor(labels)
+        else:
+            self.target = torch.zeros(size=(observations,))
+
+        # filter by categorical columns,
+        # sort and group_by columnId
+        # create newColumnId from 1 (or zero?) until # catColumns
+        # select rowId and newColumnId
+        # rename newColumnId to columnId and sort by it
+        data_cat = data.filter(~pl.col('columnId')
+                               .is_in(self.numerical_features)) \
+            .sort(by='columnId').with_row_count('newColumnId'). \
+            with_columns(pl.col('newColumnId').first().over('columnId')
+                         .rank(method="dense")) \
+            .select(pl.col('rowId'), pl.col('newColumnId').alias('columnId')).sort('rowId') \
+            .with_columns(pl.col('rowId') - 1).collect()
+        cat_tensor = torch.as_tensor(data_cat.to_numpy())
+        tensor_list = torch.split(cat_tensor[:, 1], torch.unique_consecutive(cat_tensor[:, 0], return_counts=True)[1].
+                                  tolist())
+
+        # because of subjects without cat features, I need to create a list with all zeroes and then insert
+        # my tensorList. That way I can still index the dataset correctly.
+        total_list = [torch.as_tensor((0,))] * observations
+        idx = data_cat['rowId'].unique().to_list()
+        for i, i2 in enumerate(idx):
+            total_list[i2] = tensor_list[i]
+        self.cat = torch.nn.utils.rnn.pad_sequence(total_list, batch_first=True)
+        self.cat_features = data_cat['columnId'].unique()
+
+        # numerical data,
+        # N x C, dense matrix with values for N patients/visits for C numerical features
+        if pl.count(self.numerical_features) == 0:
+            self.num = None
+        else:
+            numerical_data = data.filter(pl.col('columnId').is_in(self.numerical_features)). \
+                with_row_count('newColumnId').with_columns(pl.col('newColumnId').first().over('columnId').
+                                                           rank(method="dense") - 1, pl.col('rowId') - 1) \
+                .select(pl.col('rowId'), pl.col('newColumnId').alias('columnId'), pl.col('covariateValue')).collect()
+            indices = torch.as_tensor(numerical_data.select(['rowId', 'columnId']).to_numpy(), dtype=torch.long)
+            values = torch.as_tensor(numerical_data.select('covariateValue').to_numpy(), dtype=torch.float)
+            self.num = torch.sparse_coo_tensor(indices=indices.T,
+                                               values=values.squeeze(),
+                                               size=(observations, pl.count(self.numerical_features))).to_dense()
+        delta = time.time() - start
+        print(f'Processed data in {delta:.2f} seconds')
+
+    def get_numerical_features(self):
+        return self.numerical_features
+
+    def get_cat_features(self):
+        return self.cat_features
+
+    def __len__(self):
+        return self.target.size()[0]
+
+    def __getitem__(self, item):
+        if self.num is not None:
+            batch = {"cat": self.cat[item, :],
+                     "num": self.num[item, :]}
+        else:
+            batch = {"cat": self.cat[item, :].squeeze(),
+                     "num": None}
+        if batch["cat"].dim() == 1 and not isinstance(item, list):
+            batch["cat"] = batch["cat"].unsqueeze(0)
+        if batch["num"] is not None and batch["num"].dim() == 1 and not isinstance(item, list):
+            batch["num"] = batch["num"].unsqueeze(0)
+        return [batch, self.target[item].squeeze()]
diff --git a/inst/python/Estimator.py b/inst/python/Estimator.py
new file mode 100644
index 0000000..5997817
--- /dev/null
+++ b/inst/python/Estimator.py
@@ -0,0 +1,355 @@
+import time
+import pathlib
+
+import torch
+from torch.utils.data import DataLoader, BatchSampler, RandomSampler, SequentialSampler
+import torch.nn.functional as F
+from tqdm import tqdm
+from sklearn.metrics import roc_auc_score
+
+
+class Estimator:
+    """
+    A class that wraps around pytorch models.
+    """
+
+    def __init__(self,
+                 model,
+                 model_parameters,
+                 estimator_settings):
+        self.seed = estimator_settings["seed"]
+        if callable(estimator_settings["device"]):
+            self.device = estimator_settings["device"]()
+        else:
+            self.device = estimator_settings["device"]
+
+        torch.manual_seed(seed=self.seed)
+        self.model = model(**model_parameters)
+        self.model_parameters = model_parameters
+        self.estimator_settings = estimator_settings
+
+        self.epochs = estimator_settings.get("epochs", 5)
+        self.learning_rate = estimator_settings.get("learning_rate", 3e-4)
+        self.weight_decay = estimator_settings.get("weight_decay", 1e-5)
+        self.batch_size = estimator_settings.get("batch_size", 1024)
+        self.prefix = estimator_settings.get("prefix", self.model.name)
+
+        self.previous_epochs = estimator_settings.get("previous_epochs", 0)
+        self.model.to(device=self.device)
+
+        self.optimizer = estimator_settings["optimizer"](params=self.model.parameters(),
+                                                         lr=self.learning_rate,
+                                                         weight_decay=self.weight_decay)
+        self.criterion = estimator_settings["criterion"]()
+
+        if "metric" in estimator_settings.keys() and estimator_settings["metric"] is not None:
+            self.metric = estimator_settings["metric"]
+            if type(self.metric) == str:
+                if self.metric == "auc":
+                    self.metric = {"name": "auc",
+                                   "mode": "max"}
+                elif self.metric == "loss":
+                    self.metric = {"name": "loss",
+                                   "mode": "min"}
+            if "scheduler" in estimator_settings.keys() and estimator_settings["scheduler"] is not None:
+                estimator_settings["scheduler"]["params"]["mode"] = self.metric["mode"]
+            if "early_stopping" in estimator_settings.keys() and estimator_settings["early_stopping"] is not None:
+                estimator_settings["early_stopping"]["params"]["mode"] = self.metric["mode"]
+
+        if "scheduler" in estimator_settings.keys() and estimator_settings["scheduler"] is not None:
+            self.scheduler = estimator_settings["scheduler"]["fun"](self.optimizer,
+                                                                    **estimator_settings["scheduler"]["params"])
+
+        if "early_stopping" in estimator_settings.keys() and estimator_settings["early_stopping"] is not None:
+            self.early_stopper = EarlyStopping(**estimator_settings["early_stopping"]["params"])
+        else:
+            self.early_stopper = None
+
+        self.best_score = None
+        self.best_epoch = None
+        self.learn_rate_schedule = None
+
+    def fit(self, dataset, test_dataset):
+
+        train_dataloader = DataLoader(dataset=dataset,
+                                      batch_size=None,
+                                      sampler=BatchSampler(
+                                          sampler=RandomSampler(dataset),
+                                          batch_size=self.batch_size,
+                                          drop_last=True
+                                      ))
+        test_dataloader = DataLoader(dataset=test_dataset,
+                                     batch_size=None,
+                                     sampler=BatchSampler(
+                                         sampler=SequentialSampler(test_dataset),
+                                         batch_size=self.batch_size,
+                                         drop_last=False
+                                     ))
+
+        trained_epochs = dict()
+        times = list()
+        learning_rates = list()
+        all_scores = list()
+        model_state_dict = dict()
+        for epoch in range(self.epochs):
+            start_time = time.time()
+            training_loss = self.fit_epoch(train_dataloader)
+            scores = self.score(test_dataloader)
+            end_time = time.time()
+            delta_time = end_time - start_time
+            current_epoch = epoch + self.previous_epochs
+            lr = self.optimizer.param_groups[0]["lr"]
+            self.print_progress(scores, training_loss, delta_time, current_epoch)
+            self.scheduler.step(scores["metric"])
+            all_scores.append(scores)
+            learning_rates.append(lr)
+            times.append(round(delta_time, 3))
+
+            if self.early_stopper:
+                self.early_stopper(scores['metric'])
+                if self.early_stopper.improved:
+                    model_state_dict[epoch] = self.model.state_dict()
+                    trained_epochs[epoch] = current_epoch
+                if self.early_stopper.early_stop:
+                    print("Early stopping, validation metric stopped improving")
+                    print(f'Average time per epoch was: {torch.mean(torch.as_tensor(times)).item():.2f} seconds')
+                    self.finish_fit(all_scores, model_state_dict, trained_epochs, learning_rates)
+                    return
+            else:
+                model_state_dict[epoch] = self.model.state_dict()
+                trained_epochs[epoch] = current_epoch
+        print(f'Average time per epoch was: {torch.mean(torch.as_tensor(times)).item()} seconds')
+        self.finish_fit(all_scores, model_state_dict, trained_epochs, learning_rates)
+        return
+
+    def fit_epoch(self, dataloader):
+        training_losses = torch.empty(len(dataloader))
+        self.model.train()
+        index = 0
+        for batch in tqdm(dataloader):
+            self.optimizer.zero_grad()
+            batch = batch_to_device(batch, device=self.device)
+            out = self.model(batch[0])
+            loss = self.criterion(out, batch[1])
+            loss.backward()
+
+            self.optimizer.step()
+            training_losses[index] = loss.detach()
+            index += 1
+        return training_losses.mean().item()
+
+    def score(self, dataloader):
+        with torch.no_grad():
+            loss = torch.empty(len(dataloader))
+            predictions = list()
+            targets = list()
+            self.model.eval()
+            index = 0
+            for batch in tqdm(dataloader):
+                batch = batch_to_device(batch, device=self.device)
+                pred = self.model(batch[0])
+                predictions.append(pred)
+                targets.append(batch[1])
+                loss[index] = self.criterion(pred, batch[1])
+                index += 1
+            mean_loss = loss.mean().item()
+            predictions = torch.concat(predictions)
+            targets = torch.concat(targets)
+            auc = roc_auc_score(targets.cpu(), predictions.cpu())
+            # auc = compute_auc(predictions, targets)
+            scores = dict()
+            if self.metric:
+                if self.metric["name"] == "auc":
+                    scores["metric"] = auc
+                elif self.metric["name"] == "loss":
+                    scores["metric"] = mean_loss
+                else:
+                    metric = self.metric["fun"](predictions, targets)
+                    scores["metric"] = metric
+            scores["auc"] = auc
+            scores["loss"] = mean_loss
+            return scores
+
+    def finish_fit(self, scores, model_state_dict, epoch, learning_rates):
+        if self.metric["mode"] == "max":
+            best_epoch_index = torch.argmax(torch.as_tensor([x["metric"] for x in scores])).item()
+        elif self.metric["mode"] == "min":
+            best_epoch_index = torch.argmin(torch.as_tensor([x["metric"] for x in scores])).item()
+
+        best_model_state_dict = model_state_dict[best_epoch_index]
+        self.model.load_state_dict(best_model_state_dict)
+
+        self.best_epoch = epoch[best_epoch_index]
+        self.best_score = {"loss": scores[best_epoch_index]["loss"],
+                           "auc": scores[best_epoch_index]["auc"]}
+        self.learn_rate_schedule = learning_rates[:(best_epoch_index+1)]
+        print(f"Loaded best model (based on AUC) from epoch {self.best_epoch}")
+        print(f"ValLoss: {self.best_score['loss']}")
+        print(f"valAUC: {self.best_score['auc']}")
+        if self.metric and self.metric["name"] != "auc" and self.metric["name"] != "loss":
+            self.best_score[self.metric["name"]] = scores[best_epoch_index]["metric"]
+            print(f"{self.metric['name']}: {self.best_score[self.metric['name']]}")
+        return
+
+    def print_progress(self, scores, training_loss, delta_time, current_epoch):
+        if self.metric and self.metric["name"] != "auc" and self.metric["name"] != "loss":
+            print(f"Epochs: {current_epoch} | Val {self.metric['name']}: {scores['metric']:.3f} "
+                  f"| Val AUC: {scores['auc']:.3f} | Val Loss: {scores['loss']:.3f} "
+                  f"| Train Loss: {training_loss:.3f} | Time: {delta_time:.3f} seconds "
+                  f"| LR: {self.optimizer.param_groups[0]['lr']}")
+        else:
+            print(f"Epochs: {current_epoch} "
+                  f"| Val AUC: {scores['auc']:.3f} "
+                  f"| Val Loss: {scores['loss']:.3f} "
+                  f"| Train Loss: {training_loss:.3f} "
+                  f"| Time: {delta_time:.3f} seconds "
+                  f"| LR: {self.optimizer.param_groups[0]['lr']}")
+        return
+
+    def fit_whole_training_set(self, dataset, learning_rates=None):
+        dataloader = DataLoader(dataset=dataset,
+                                batch_size=None,
+                                sampler=BatchSampler(
+                                    sampler=RandomSampler(dataset),
+                                    batch_size=self.batch_size,
+                                    drop_last=True
+                                ))
+        if isinstance(learning_rates, list):
+            self.best_epoch = len(learning_rates)
+        elif ~isinstance(learning_rates, list):
+            learning_rates = [learning_rates]
+            self.best_epoch = len(learning_rates)
+        else:
+            self.best_epoch = self.epochs
+
+        for epoch in range(self.best_epoch):
+            self.optimizer.param_groups[0]['lr'] = learning_rates[epoch]
+            self.fit_epoch(dataloader)
+        return
+
+    def save(self, path, name):
+        save_path = pathlib.Path(path).joinpath(name)
+        out = dict(
+            model_state_dict=self.model.state_dict(),
+            model_parameters=self.model_parameters,
+            estimator_settings=self.estimator_settings,
+            epoch=self.epochs)
+        torch.save(out,
+                   f=save_path)
+        return save_path
+
+    def predict_proba(self, dataset):
+        dataloader = DataLoader(dataset=dataset,
+                                batch_size=None,
+                                sampler=BatchSampler(
+                                    sampler=SequentialSampler(dataset),
+                                    batch_size=self.batch_size,
+                                    drop_last=False
+                                ))
+        with torch.no_grad():
+            predictions = list()
+            self.model.eval()
+            for batch in tqdm(dataloader):
+                batch = batch_to_device(batch, device=self.device)
+                pred = self.model(batch[0])
+                predictions.append(torch.sigmoid(pred))
+            predictions = torch.concat(predictions).cpu().numpy()
+        return predictions
+
+    def predict(self, dataset, threshold=None):
+        predictions = self.predict_proba(dataset)
+
+        if threshold is None:
+            # use outcome rate
+            threshold = dataset.target.sum().item() / len(dataset)
+        predicted_class = predictions > threshold
+
+
+class EarlyStopping:
+
+    def __init__(self,
+                 patience=3,
+                 delta=0,
+                 verbose=True,
+                 mode='max'):
+        self.patience = patience
+        self.counter = 0
+        self.verbose = verbose
+        self.best_score = None
+        self.early_stop = False
+        self.improved = False
+        self.delta = delta
+        self.previous_score = 0
+        self.mode = mode
+
+    def __call__(self,
+                 metric):
+        if self.mode == 'max':
+            score = metric
+        else:
+            score = -1 * metric
+        if self.best_score is None:
+            self.best_score = score
+            self.improved = True
+        elif score < (self.best_score + self.delta):
+            self.counter += 1
+            self.improved = False
+            if self.verbose:
+                print(f"Early stopping counter: {self.counter}"
+                      f" out of {self.patience}")
+            if self.counter >= self.patience:
+                self.early_stop = True
+        else:
+            self.best_score = score
+            self.counter = 0
+            self.improved = True
+        self.previous_score = score
+
+
+def batch_to_device(batch, device='cpu'):
+    if torch.is_tensor(batch):
+        batch = batch.to(device=device)
+    else:
+        for ix, b in enumerate(batch):
+            if type(b) is str:
+                key = b
+                b = batch[b]
+            else:
+                key = None
+            if b is None:
+                continue
+            if torch.is_tensor(b):
+                b_out = b.to(device=device)
+            else:
+                b_out = batch_to_device(b, device)
+            if b_out is not None:
+                if key is not None:
+                    batch[key] = b_out
+                else:
+                    batch[ix] = b_out
+    return batch
+
+
+def compute_auc(input, target, n_threshold=1000):
+    threshold = torch.linspace(0, 1.0, n_threshold).to(device=input.device)
+    pred_label = input >= threshold[:, None, None]
+    input_target = pred_label * target
+
+    cum_tp = F.pad(input_target.sum(dim=-1).rot90(1, [1, 0]), (1, 0), value=0.0)
+    cum_fp = F.pad(
+        (pred_label.sum(dim=-1) - input_target.sum(dim=-1)).rot90(1, [1, 0]),
+        (1, 0),
+        value=0.0,
+    )
+
+    if len(cum_tp.shape) > 1:
+        factor = cum_tp[:, -1] * cum_fp[:, -1]
+    else:
+        factor = cum_tp[-1] * cum_fp[-1]
+    # Set AUROC to 0.5 when the target contains all ones or all zeros.
+    auroc = torch.where(
+        factor == 0,
+        0.5,
+        torch.trapz(cum_tp, cum_fp).double() / factor,
+    )
+    return auroc.item()
diff --git a/inst/python/LrFinder.py b/inst/python/LrFinder.py
new file mode 100644
index 0000000..4c24a38
--- /dev/null
+++ b/inst/python/LrFinder.py
@@ -0,0 +1,105 @@
+import random
+
+import torch
+from torch.optim.lr_scheduler import _LRScheduler
+from torch.utils.data import DataLoader, BatchSampler, RandomSampler
+from tqdm import tqdm
+
+from Estimator import batch_to_device
+
+
+class ExponentialSchedulerPerBatch(_LRScheduler):
+
+    def __init__(self, optimizer,
+                 end_lr,
+                 num_iter):
+        self.end_lr = end_lr
+        self.num_iter = num_iter
+        super(ExponentialSchedulerPerBatch, self).__init__(optimizer, last_epoch=-1)
+
+    def get_lr(self):
+        r = self.last_epoch / (self.num_iter - 1)
+        return [base_lr * (self.end_lr / base_lr) ** r for base_lr in self.base_lrs]
+
+
+class LrFinder:
+
+    def __init__(self,
+                 model,
+                 model_parameters,
+                 estimator_settings,
+                 lr_settings):
+        if lr_settings is None:
+          lr_settings = {}
+        min_lr = lr_settings.get("min_lr", 1e-7)
+        max_lr = lr_settings.get("max_lr", 1)
+        num_lr = lr_settings.get("num_lr", 100)
+        smooth = lr_settings.get("smooth", 0.05)
+        divergence_threshold = lr_settings.get("divergence_threshold", 4)
+        torch.manual_seed(seed=estimator_settings["seed"])
+        self.model = model(**model_parameters)
+        if callable(estimator_settings["device"]):
+            self.device = estimator_settings["device"]()
+        else:
+            self.device = estimator_settings["device"]
+        self.model.to(device=self.device)
+        self.min_lr = min_lr
+        self.max_lr = max_lr
+        self.num_lr = num_lr
+        self.smooth = smooth
+        self.divergence_threshold = divergence_threshold
+
+        self.optimizer = estimator_settings['optimizer'](params=self.model.parameters(),
+                                                         lr=self.min_lr)
+
+        self.scheduler = ExponentialSchedulerPerBatch(self.optimizer, self.max_lr, self.num_lr)
+
+        self.criterion = estimator_settings["criterion"]()
+        self.batch_size = estimator_settings['batch_size']
+        self.losses = None
+        self.loss_index = None
+
+    def get_lr(self, dataset):
+        batch_index = torch.arange(0, len(dataset), 1).tolist()
+
+        losses = torch.empty(size=(self.num_lr,), dtype=torch.float)
+        lrs = torch.empty(size=(self.num_lr,), dtype=torch.float)
+        for i in tqdm(range(self.num_lr)):
+            self.optimizer.zero_grad()
+            random_batch = random.sample(batch_index, 32)
+            batch = dataset[random_batch]
+            batch = batch_to_device(batch, self.device)
+
+            out = self.model(batch[0])
+            loss = self.criterion(out, batch[1])
+            if self.smooth is not None and i != 0:
+                losses[i] = self.smooth * loss.item() + (1 - self.smooth) * losses[i - 1]
+            else:
+                losses[i] = loss.item()
+            lrs[i] = self.optimizer.param_groups[0]["lr"]
+
+            loss.backward()
+            self.optimizer.step()
+            self.scheduler.step()
+
+            if i == 0:
+                best_loss = losses[i]
+            else:
+                if losses[i] < best_loss:
+                    best_loss = losses[i]
+
+            if losses[i] > (self.divergence_threshold * best_loss):
+                print(f"Loss diverged - stopped early - iteration {i} out of {self.num_lr}")
+                break
+
+        # find LR where gradient is highest but before global minimum is reached
+        # I added -5 to make sure it is not still in the minimum
+        global_minimum = torch.argmin(losses)
+        gradient = torch.diff(losses[:(global_minimum - 5)+1])
+        smallest_gradient = torch.argmin(gradient)
+
+        suggested_lr = lrs[smallest_gradient]
+        self.losses = losses
+        self.loss_index = smallest_gradient
+        self.lrs = lrs
+        return suggested_lr.item()
diff --git a/inst/python/MLP.py b/inst/python/MLP.py
new file mode 100644
index 0000000..278e324
--- /dev/null
+++ b/inst/python/MLP.py
@@ -0,0 +1,76 @@
+from torch import nn
+
+from ResNet import NumericalEmbedding
+
+
+class MLP(nn.Module):
+
+    def __init__(self,
+                 cat_features,
+                 num_features,
+                 size_embedding,
+                 size_hidden,
+                 num_layers,
+                 activation=nn.ReLU,
+                 normalization=nn.BatchNorm1d,
+                 dropout=None,
+                 d_out=1):
+        super(MLP, self).__init__()
+        self.name = "MLP"
+        self.embedding = nn.EmbeddingBag(cat_features + 1,
+                                         size_embedding,
+                                         padding_idx=0)
+
+        if num_features != 0 and num_features is not None:
+            self.num_embedding = NumericalEmbedding(num_features, size_embedding)
+
+        self.first_layer = nn.Linear(size_embedding, size_hidden)
+
+        self.layers = nn.ModuleList(MLPLayer(size_hidden=size_hidden,
+                                             normalization=normalization,
+                                             activation=activation,
+                                             dropout=dropout)
+                                    for _ in range(num_layers))
+        self.last_norm = normalization(size_hidden)
+        self.head = nn.Linear(size_hidden, d_out)
+
+        self.last_act = activation()
+
+    def forward(self, input):
+        x_cat = input["cat"]
+        x_cat = self.embedding(x_cat)
+        if "num" in input.keys() and self.num_embedding is not None:
+            x_num = input["num"]
+            x = (x_cat + self.num_embedding(x_num).mean(dim=1)) / 2
+        else:
+            x = x_cat
+        x = self.first_layer(x)
+        for layer in self.layers:
+            x = layer(x)
+        x = self.last_norm(x)
+        x = self.last_act(x)
+        x = self.head(x)
+        x = x.squeeze(-1)
+        return x
+
+
+class MLPLayer(nn.Module):
+    def __init__(self,
+                 size_hidden=64,
+                 normalization=nn.BatchNorm1d,
+                 activation=nn.ReLU,
+                 dropout=0.0,
+                 bias=True):
+        super(MLPLayer, self).__init__()
+        self.norm = normalization(size_hidden)
+        self.activation = activation()
+        self.linear = nn.Linear(size_hidden, size_hidden, bias=bias)
+
+        if dropout != 0.0 or dropout is not None:
+            self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x):
+        x = self.linear(x)
+        if self.dropout:
+            x = self.dropout(x)
+        return self.activation(x)
diff --git a/inst/python/ResNet.py b/inst/python/ResNet.py
new file mode 100644
index 0000000..2ec6232
--- /dev/null
+++ b/inst/python/ResNet.py
@@ -0,0 +1,130 @@
+import math
+
+import torch
+from torch import nn
+
+
+class ResNet(nn.Module):
+
+    def __init__(self,
+                 cat_features,
+                 num_features=0,
+                 size_embedding=256,
+                 size_hidden=256,
+                 num_layers=2,
+                 hidden_factor=1,
+                 activation=nn.ReLU,
+                 normalization=nn.BatchNorm1d,
+                 hidden_dropout=0,
+                 residual_dropout=0,
+                 dim_out=1,
+                 concat_num=True):
+        super(ResNet, self).__init__()
+        self.name = 'ResNet'
+        self.embedding = nn.EmbeddingBag(
+            num_embeddings=cat_features + 1,
+            embedding_dim=size_embedding,
+            padding_idx=0
+        )
+        if num_features != 0 and not concat_num:
+            self.num_embedding = NumericalEmbedding(num_features, size_embedding)
+        else:
+            self.num_embedding = None
+            size_embedding = size_embedding + num_features
+
+        self.first_layer = nn.Linear(size_embedding, size_hidden)
+
+        res_hidden = size_hidden * hidden_factor
+
+        self.layers = nn.ModuleList(ResLayer(size_hidden, res_hidden, normalization,
+                                             activation, hidden_dropout, residual_dropout)
+                                    for _ in range(num_layers))
+
+        self.last_norm = normalization(size_hidden)
+
+        self.head = nn.Linear(size_hidden, dim_out)
+
+        self.last_act = activation()
+
+    def forward(self, x):
+        x_cat = x["cat"]
+        x_cat = self.embedding(x_cat)
+        if "num" in x.keys() and x["num"] is not None and self.num_embedding is not None:
+            x_num = x["num"]
+            # take the average af numerical and categorical embeddings
+            x = (x_cat + self.num_embedding(x_num).mean(dim=1)) / 2
+        elif "num" in x.keys() and x["num"] is not None and self.num_embedding is None:
+            x_num = x["num"]
+            # concatenate numerical to categorical embedding
+            x = torch.cat([x_cat, x_num], dim=1)
+        else:
+            x = x_cat
+        x = self.first_layer(x)
+        for layer in self.layers:
+            x = layer(x)
+        x = self.last_norm(x)
+        x = self.last_act(x)
+        x = self.head(x)
+        x = x.squeeze(-1)
+        return x
+
+
+class ResLayer(nn.Module):
+
+    def __init__(self,
+                 size_hidden,
+                 res_hidden,
+                 normalization,
+                 activation,
+                 hidden_dropout=None,
+                 residual_dropout=None):
+        super(ResLayer, self).__init__()
+
+        self.norm = normalization(size_hidden)
+        self.linear0 = nn.Linear(size_hidden, res_hidden)
+        self.linear1 = nn.Linear(res_hidden, size_hidden)
+
+        if hidden_dropout is not None:
+            self.hidden_dropout = nn.Dropout(p=hidden_dropout)
+        if residual_dropout is not None:
+            self.residual_dropout = nn.Dropout(p=residual_dropout)
+        self.activation = activation()
+
+    def forward(self, input):
+        z = input
+        z = self.norm(z)
+        z = self.linear0(z)
+        z = self.activation(z)
+        if self.hidden_dropout is not None:
+            z = self.hidden_dropout(z)
+        z = self.linear1(z)
+        if self.residual_dropout is not None:
+            z = self.residual_dropout(z)
+        z = z + input
+        return z
+
+
+class NumericalEmbedding(nn.Module):
+    def __init__(self,
+                 num_embeddings,
+                 embedding_dim,
+                 bias=True):
+        super(NumericalEmbedding, self).__init__()
+        self.weight = nn.Parameter(torch.empty(num_embeddings, embedding_dim))
+        if bias:
+            self.bias = nn.Parameter(torch.empty(num_embeddings, embedding_dim))
+        else:
+            self.bias = None
+
+        for parameter in [self.weight, self.bias]:
+            if parameter is not None:
+                nn.init.kaiming_uniform_(parameter, a=math.sqrt(5))
+
+    def forward(self, input):
+        x = self.weight.unsqueeze(0) * input.unsqueeze(-1)
+        if self.bias is not None:
+            x = x + self.bias.unsqueeze(-1)
+        return x
+
+
+
diff --git a/inst/python/Transformer.py b/inst/python/Transformer.py
new file mode 100644
index 0000000..395026a
--- /dev/null
+++ b/inst/python/Transformer.py
@@ -0,0 +1,188 @@
+import math
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+from ResNet import NumericalEmbedding
+
+
+def reglu(x):
+    a, b = x.chunk(2, dim=-1)
+    return a * F.relu(b)
+
+
+class ReGLU(nn.Module):
+    def forward(self, x):
+        return reglu(x)
+
+
+class Transformer(nn.Module):
+
+    def __init__(self,
+                 cat_features,
+                 num_features,
+                 num_blocks,
+                 dim_token,
+                 num_heads,
+                 att_dropout,
+                 ffn_dropout,
+                 res_dropout,
+                 dim_hidden,
+                 dim_out=1,
+                 head_activation=nn.ReLU,
+                 activation=ReGLU,
+                 ffn_norm=nn.LayerNorm,
+                 head_norm=nn.LayerNorm,
+                 att_norm=nn.LayerNorm):
+        super(Transformer, self).__init__()
+        self.name = "Transformer"
+        self.categorical_embedding = nn.Embedding(cat_features + 1, dim_token, padding_idx=0)
+
+        if num_features != 0 and num_features is not None:
+            self.numerical_embedding = NumericalEmbedding(num_features, dim_token)
+        self.class_token = ClassToken(dim_token)
+
+        self.layers = nn.ModuleList([])
+        for layer_idx in range(num_blocks):
+            layer = nn.ModuleDict({
+                "attention": nn.MultiheadAttention(dim_token, num_heads,
+                                                   dropout=att_dropout),
+                "ffn": FeedForwardBlock(dim_token, dim_hidden,
+                                        bias_first=True,
+                                        bias_second=True,
+                                        dropout=ffn_dropout,
+                                        activation=activation),
+                "attention_res_dropout": nn.Dropout(res_dropout),
+                "ffn_res_dropout": nn.Dropout(res_dropout),
+                "ffn_norm": ffn_norm(dim_token)
+            })
+            if layer_idx != 0:
+                layer["attention_norm"] = att_norm(dim_token)
+            self.layers.append(layer)
+
+        self.head = Head(dim_token,
+                         bias=True,
+                         activation=head_activation,
+                         normalization=head_norm,
+                         dim_out=dim_out)
+
+    def forward(self, x):
+        mask = torch.where(x["cat"] == 0, True, False)
+        cat = self.categorical_embedding(x["cat"])
+        if "num" in x.keys() and self.numerical_embedding is not None:
+            num = self.numerical_embedding(x["num"])
+            x = torch.cat([cat, num], dim=1)
+            mask = torch.cat([mask, torch.zeros([x.shape[0],
+                                                 num.shape[1]],
+                                                device=mask.device,
+                                                dtype=mask.dtype)],
+                             dim=1)
+        else:
+            x = cat
+        x = self.class_token(x)
+        mask = torch.cat([mask, torch.zeros([x.shape[0], 1],
+                                            device=mask.device,
+                                            dtype=mask.dtype)],
+                         dim=1)
+
+        for i, layer in enumerate(self.layers):
+            x_residual = self.start_residual(layer, "attention", x)
+
+            if i == len(self.layers)-1:
+                dims = x_residual.shape
+                x_residual = layer["attention"](
+                    x_residual[:, -1].view([dims[0], 1, dims[2]]).transpose(0, 1),
+                    x_residual.transpose(0, 1),
+                    x_residual.transpose(0, 1),
+                    mask
+                )
+                attn_weights = x_residual[1]
+                x_residual = x_residual[0]
+                x = x[:, -1].view([dims[0], 1, dims[2]])
+            else:
+                x_residual = layer["attention"](
+                    x_residual.transpose(0, 1),
+                    x_residual.transpose(0, 1),
+                    x_residual.transpose(0, 1),
+                    mask
+                )[0]
+            x = self.end_residual(layer, "attention", x, x_residual.transpose(0, 1))
+
+            x_residual = self.start_residual(layer, "ffn", x)
+            x_residual = layer["ffn"](x_residual)
+            x = self.end_residual(layer, "ffn", x, x_residual)
+
+        x = self.head(x)[:, 0]
+        return x
+
+    @staticmethod
+    def start_residual(layer, stage, x):
+        norm = f"{stage}_norm"
+        if norm in layer.keys():
+            x = layer[stage + "_norm"](x)
+        return x
+
+    @staticmethod
+    def end_residual(layer, stage, x, x_residual):
+        x_residual = layer[f"{stage}_res_dropout"](x_residual)
+        return x + x_residual
+
+
+class FeedForwardBlock(nn.Module):
+
+    def __init__(self,
+                 dim_token,
+                 dim_hidden,
+                 bias_first=True,
+                 bias_second=True,
+                 dropout=0.0,
+                 activation=ReGLU):
+        super(FeedForwardBlock, self).__init__()
+        self.linear0 = nn.Linear(dim_token, int(dim_hidden * 2), bias=bias_first)
+        self.activation = activation()
+        self.dropout = nn.Dropout(p=dropout)
+        self.linear1 = nn.Linear(dim_hidden, dim_token, bias=bias_second)
+
+    def forward(self, x):
+        x = self.linear0(x)
+        x = self.activation(x)
+        x = self.dropout(x)
+        x = self.linear1(x)
+        return x
+
+
+class Head(nn.Module):
+
+    def __init__(self,
+                 dim_in,
+                 bias,
+                 activation,
+                 normalization,
+                 dim_out):
+        super(Head, self).__init__()
+        self.normalization = normalization(dim_in)
+        self.activation = activation()
+        self.linear = nn.Linear(dim_in, dim_out, bias=bias)
+
+    def forward(self, x):
+        x = x[:, -1]
+        x = self.normalization(x)
+        x = self.activation(x)
+        x = self.linear(x)
+        return x
+
+class ClassToken(nn.Module):
+
+    def __init__(self,
+                 dim_token):
+        super(ClassToken, self).__init__()
+        self.weight = nn.Parameter(torch.empty(dim_token, 1))
+        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+
+    def expand(self, dims):
+        new_dims = [1] * (len(dims) - 1)
+        return self.weight.view(new_dims + [-1]).expand(dims +[-1])
+
+    def forward(self, x):
+        return torch.cat([x, self.expand([x.shape[0], 1])], dim=1)
diff --git a/inst/python/__init__.py b/inst/python/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/man/Dataset.Rd b/man/Dataset.Rd
deleted file mode 100644
index eb12468..0000000
--- a/man/Dataset.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/Dataset.R
-\name{Dataset}
-\alias{Dataset}
-\title{A torch dataset}
-\usage{
-Dataset(data, labels = NULL, numericalIndex = NULL)
-}
-\arguments{
-\item{data}{a dataframe like object with the covariates}
-
-\item{labels}{a dataframe with the labels}
-
-\item{numericalIndex}{in what column numeric data is in (if any)}
-}
-\description{
-A torch dataset
-}
diff --git a/man/EarlyStopping.Rd b/man/EarlyStopping.Rd
deleted file mode 100644
index 53581b2..0000000
--- a/man/EarlyStopping.Rd
+++ /dev/null
@@ -1,78 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/Estimator-class.R
-\name{EarlyStopping}
-\alias{EarlyStopping}
-\title{Earlystopping class}
-\description{
-Stops training if a loss or metric has stopped improving
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-EarlyStopping-new}{\code{EarlyStopping$new()}}
-\item \href{#method-EarlyStopping-call}{\code{EarlyStopping$call()}}
-\item \href{#method-EarlyStopping-clone}{\code{EarlyStopping$clone()}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-EarlyStopping-new"></a>}}
-\if{latex}{\out{\hypertarget{method-EarlyStopping-new}{}}}
-\subsection{Method \code{new()}}{
-Creates a new earlyStopping object
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{EarlyStopping$new(patience = 3, delta = 0, verbose = TRUE, mode = "max")}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{patience}}{Stop after this number of epochs if loss doesn't improve}
-
-\item{\code{delta}}{How much does the loss need to improve to count as improvement}
-
-\item{\code{verbose}}{If information should be printed out}
-
-\item{\code{mode}}{either `min` or `max` depending on metric to be used for earlyStopping}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-a new earlystopping object
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-EarlyStopping-call"></a>}}
-\if{latex}{\out{\hypertarget{method-EarlyStopping-call}{}}}
-\subsection{Method \code{call()}}{
-call the earlystopping object and increment a counter if loss is not
-improving
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{EarlyStopping$call(metric)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{metric}}{the current metric value}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-EarlyStopping-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-EarlyStopping-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{EarlyStopping$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/Estimator.Rd b/man/Estimator.Rd
deleted file mode 100644
index 96692f0..0000000
--- a/man/Estimator.Rd
+++ /dev/null
@@ -1,278 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/Estimator-class.R
-\name{Estimator}
-\alias{Estimator}
-\title{Estimator}
-\description{
-A generic R6 class that wraps around a torch nn module and can be used to
-fit and predict the model defined in that module.
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-Estimator-new}{\code{Estimator$new()}}
-\item \href{#method-Estimator-fit}{\code{Estimator$fit()}}
-\item \href{#method-Estimator-fitEpoch}{\code{Estimator$fitEpoch()}}
-\item \href{#method-Estimator-score}{\code{Estimator$score()}}
-\item \href{#method-Estimator-finishFit}{\code{Estimator$finishFit()}}
-\item \href{#method-Estimator-printProgress}{\code{Estimator$printProgress()}}
-\item \href{#method-Estimator-fitWholeTrainingSet}{\code{Estimator$fitWholeTrainingSet()}}
-\item \href{#method-Estimator-save}{\code{Estimator$save()}}
-\item \href{#method-Estimator-predictProba}{\code{Estimator$predictProba()}}
-\item \href{#method-Estimator-predict}{\code{Estimator$predict()}}
-\item \href{#method-Estimator-itemOrDefaults}{\code{Estimator$itemOrDefaults()}}
-\item \href{#method-Estimator-clone}{\code{Estimator$clone()}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-new"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-new}{}}}
-\subsection{Method \code{new()}}{
-Creates a new estimator
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$new(modelType, modelParameters, estimatorSettings)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{modelType}}{The torch nn module to use as model}
-
-\item{\code{modelParameters}}{Parameters to initialize the model}
-
-\item{\code{estimatorSettings}}{Parameters required for the estimator fitting}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-fit"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-fit}{}}}
-\subsection{Method \code{fit()}}{
-fits the estimator
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$fit(dataset, testDataset)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{dataset}}{a torch dataset to use for model fitting}
-
-\item{\code{testDataset}}{a torch dataset to use for early stopping}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-fitEpoch"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-fitEpoch}{}}}
-\subsection{Method \code{fitEpoch()}}{
-fits estimator for one epoch (one round through the data)
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$fitEpoch(dataset, batchIndex)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{dataset}}{torch dataset to use for fitting}
-
-\item{\code{batchIndex}}{indices of batches}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-score"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-score}{}}}
-\subsection{Method \code{score()}}{
-calculates loss and auc after training for one epoch
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$score(dataset, batchIndex)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{dataset}}{The torch dataset to use to evaluate loss and auc}
-
-\item{\code{batchIndex}}{Indices of batches in the dataset}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-list with average loss and auc in the dataset
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-finishFit"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-finishFit}{}}}
-\subsection{Method \code{finishFit()}}{
-operations that run when fitting is finished
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$finishFit(scores, modelStateDict, epoch, learnRates)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{scores}}{validation scores}
-
-\item{\code{modelStateDict}}{fitted model parameters}
-
-\item{\code{epoch}}{list of epochs fit}
-
-\item{\code{learnRates}}{learning rate sequence used so far}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-printProgress"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-printProgress}{}}}
-\subsection{Method \code{printProgress()}}{
-Print out training progress per epoch
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$printProgress(scores, trainLoss, delta, currentEpoch)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{scores}}{scores returned by `self$score`}
-
-\item{\code{trainLoss}}{training loss}
-
-\item{\code{delta}}{how long did the epoch take}
-
-\item{\code{currentEpoch}}{the current epoch number}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-fitWholeTrainingSet"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-fitWholeTrainingSet}{}}}
-\subsection{Method \code{fitWholeTrainingSet()}}{
-Fits whole training set on a specific number of epochs
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$fitWholeTrainingSet(dataset, learnRates = NULL)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{dataset}}{torch dataset}
-
-\item{\code{learnRates}}{learnRateSchedule from CV}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-save"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-save}{}}}
-\subsection{Method \code{save()}}{
-save model and those parameters needed to reconstruct it
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$save(path, name)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{path}}{where to save the model}
-
-\item{\code{name}}{name of file}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-the path to saved model
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-predictProba"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-predictProba}{}}}
-\subsection{Method \code{predictProba()}}{
-predicts and outputs the probabilities
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$predictProba(dataset)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{dataset}}{Torch dataset to create predictions for}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-predictions as probabilities
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-predict"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-predict}{}}}
-\subsection{Method \code{predict()}}{
-predicts and outputs the class
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$predict(dataset, threshold = NULL)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{dataset}}{A torch dataset to create predictions for}
-
-\item{\code{threshold}}{Which threshold to use for predictions}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-The predicted class for the data in the dataset
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-itemOrDefaults"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-itemOrDefaults}{}}}
-\subsection{Method \code{itemOrDefaults()}}{
-select item from list, and if it's null sets a default
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$itemOrDefaults(list, item, default = NULL)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{list}}{A list with items}
-
-\item{\code{item}}{Which list item to retrieve}
-
-\item{\code{default}}{The value to return if list doesn't have item}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-the list item or default
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Estimator-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-Estimator-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Estimator$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/batchToDevice.Rd b/man/batchToDevice.Rd
deleted file mode 100644
index a19e34c..0000000
--- a/man/batchToDevice.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/Estimator-class.R
-\name{batchToDevice}
-\alias{batchToDevice}
-\title{sends a batch of data to device}
-\usage{
-batchToDevice(batch, device)
-}
-\arguments{
-\item{batch}{the batch to send, usually a list of torch tensors}
-
-\item{device}{which device to send batch to}
-}
-\value{
-the batch on the required device
-}
-\description{
-sends a batch of data to device
-assumes batch includes lists of tensors to arbitrary nested depths
-}
diff --git a/man/camelCaseToSnakeCase.Rd b/man/camelCaseToSnakeCase.Rd
new file mode 100644
index 0000000..224e795
--- /dev/null
+++ b/man/camelCaseToSnakeCase.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/HelperFunctions.R
+\name{camelCaseToSnakeCase}
+\alias{camelCaseToSnakeCase}
+\title{Convert a camel case string to snake case}
+\usage{
+camelCaseToSnakeCase(string)
+}
+\arguments{
+\item{string}{The string to be converted}
+}
+\value{
+A string
+}
+\description{
+Convert a camel case string to snake case
+}
diff --git a/man/camelCaseToSnakeCaseNames.Rd b/man/camelCaseToSnakeCaseNames.Rd
new file mode 100644
index 0000000..a4f27ff
--- /dev/null
+++ b/man/camelCaseToSnakeCaseNames.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/HelperFunctions.R
+\name{camelCaseToSnakeCaseNames}
+\alias{camelCaseToSnakeCaseNames}
+\title{Convert the names of an object from camel case to snake case}
+\usage{
+camelCaseToSnakeCaseNames(object)
+}
+\arguments{
+\item{object}{The object of which the names should be converted}
+}
+\value{
+The same object, but with converted names.
+}
+\description{
+Convert the names of an object from camel case to snake case
+}
diff --git a/man/lrFinder.Rd b/man/lrFinder.Rd
deleted file mode 100644
index 74f31d2..0000000
--- a/man/lrFinder.Rd
+++ /dev/null
@@ -1,42 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/LRFinder.R
-\name{lrFinder}
-\alias{lrFinder}
-\title{Find learning rate that decreases loss the most}
-\usage{
-lrFinder(
-  dataset,
-  modelType,
-  modelParams,
-  estimatorSettings,
-  minLR = 1e-07,
-  maxLR = 1,
-  numLR = 100,
-  smooth = 0.05,
-  divergenceThreshold = 4
-)
-}
-\arguments{
-\item{dataset}{torch dataset, training dataset}
-
-\item{modelType}{the function used to initialize the model}
-
-\item{modelParams}{parameters used to initialize model}
-
-\item{estimatorSettings}{settings for estimator to fit model}
-
-\item{minLR}{lower bound of learning rates to search through}
-
-\item{maxLR}{upper bound of learning rates to search through}
-
-\item{numLR}{number of learning rates to go through}
-
-\item{smooth}{smoothing to use on losses}
-
-\item{divergenceThreshold}{if loss increases this amount above the minimum, stop.}
-}
-\description{
-Method originated from https://arxiv.org/abs/1506.01186 but this
-implementation draws inspiration from various other implementations such as
-pytorch lightning, fastai, luz and pytorch-lr-finder.
-}
diff --git a/man/setDefaultResNet.Rd b/man/setDefaultResNet.Rd
index c26f354..2ff34c1 100644
--- a/man/setDefaultResNet.Rd
+++ b/man/setDefaultResNet.Rd
@@ -6,7 +6,7 @@
 \usage{
 setDefaultResNet(
   estimatorSettings = setEstimator(learningRate = "auto", weightDecay = 1e-06, device =
-    "cpu", batchSize = 1024, epochs = 50, seed = NULL)
+    "cpu", batchSize = 1024L, epochs = 50L, seed = NULL)
 )
 }
 \arguments{
diff --git a/man/setDefaultTransformer.Rd b/man/setDefaultTransformer.Rd
index e0a3a32..049b121 100644
--- a/man/setDefaultTransformer.Rd
+++ b/man/setDefaultTransformer.Rd
@@ -6,7 +6,7 @@
 \usage{
 setDefaultTransformer(
   estimatorSettings = setEstimator(learningRate = "auto", weightDecay = 1e-04, batchSize
-    = 512, epochs = 10, seed = NULL, device = "cpu")
+    = 512L, epochs = 10L, seed = NULL, device = "cpu")
 )
 }
 \arguments{
diff --git a/man/setEstimator.Rd b/man/setEstimator.Rd
index 8454c55..18ed8fa 100644
--- a/man/setEstimator.Rd
+++ b/man/setEstimator.Rd
@@ -7,13 +7,14 @@
 setEstimator(
   learningRate = "auto",
   weightDecay = 0,
-  batchSize = 512,
-  epochs = 30,
+  batchSize = 512L,
+  epochs = 30L,
   device = "cpu",
-  optimizer = torch::optim_adamw,
-  scheduler = list(fun = torch::lr_reduce_on_plateau, params = list(patience = 1)),
-  criterion = torch::nn_bce_with_logits_loss,
-  earlyStopping = list(useEarlyStopping = TRUE, params = list(patience = 4)),
+  optimizer = torch$optim$AdamW,
+  scheduler = list(fun = torch$optim$lr_scheduler$ReduceLROnPlateau, params =
+    list(patience = 1L)),
+  criterion = torch$nn$BCEWithLogitsLoss,
+  earlyStopping = list(useEarlyStopping = TRUE, params = list(patience = 4L)),
   metric = "auc",
   seed = NULL
 )
diff --git a/man/setMultiLayerPerceptron.Rd b/man/setMultiLayerPerceptron.Rd
index d6ab36c..01418fe 100644
--- a/man/setMultiLayerPerceptron.Rd
+++ b/man/setMultiLayerPerceptron.Rd
@@ -5,21 +5,21 @@
 \title{setMultiLayerPerceptron}
 \usage{
 setMultiLayerPerceptron(
-  numLayers = c(1:8),
-  sizeHidden = c(2^(6:9)),
-  dropout = c(seq(0, 0.5, 0.05)),
-  sizeEmbedding = c(2^(6:9)),
+  numLayers = as.integer(1:8),
+  sizeHidden = as.integer(2^(6:9)),
+  dropout = c(seq(0, 0.3, 0.05)),
+  sizeEmbedding = as.integer(2^(6:9)),
   estimatorSettings = setEstimator(learningRate = "auto", weightDecay = c(1e-06, 0.001),
-    batchSize = 1024, epochs = 30, device = "cpu"),
+    batchSize = 1024L, epochs = 30L, device = "cpu"),
   hyperParamSearch = "random",
   randomSample = 100,
   randomSampleSeed = NULL
 )
 }
 \arguments{
-\item{numLayers}{Number of layers in network, default: 1:16}
+\item{numLayers}{Number of layers in network, default: 1:8}
 
-\item{sizeHidden}{Amount of neurons in each default layer, default: 2^(6:10) (64 to 1024)}
+\item{sizeHidden}{Amount of neurons in each default layer, default: 2^(6:9) (64 to 512)}
 
 \item{dropout}{How much dropout to apply after first linear, default: seq(0, 0.3, 0.05)}
 
diff --git a/man/setResNet.Rd b/man/setResNet.Rd
index fbe3d77..9dcffa7 100644
--- a/man/setResNet.Rd
+++ b/man/setResNet.Rd
@@ -5,14 +5,14 @@
 \title{setResNet}
 \usage{
 setResNet(
-  numLayers = c(1:8),
-  sizeHidden = c(2^(6:10)),
-  hiddenFactor = c(1:4),
+  numLayers = as.integer(1:8),
+  sizeHidden = as.integer(2^(6:10)),
+  hiddenFactor = as.integer(1:4),
   residualDropout = c(seq(0, 0.5, 0.05)),
   hiddenDropout = c(seq(0, 0.5, 0.05)),
-  sizeEmbedding = c(2^(6:9)),
+  sizeEmbedding = as.integer(2^(6:9)),
   estimatorSettings = setEstimator(learningRate = "auto", weightDecay = c(1e-06, 0.001),
-    device = "cpu", batchSize = 1024, epochs = 30, seed = NULL),
+    device = "cpu", batchSize = 1024L, epochs = 30L, seed = NULL),
   hyperParamSearch = "random",
   randomSample = 100,
   randomSampleSeed = NULL
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index fde852b..adf0dcb 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -1,11 +1,7 @@
 library(PatientLevelPrediction)
 
-if(Sys.getenv('GITHUB_ACTIONS') == 'true' & torch::torch_is_installed() != FALSE) {
-  torch::install_torch()
-}
-
 testLoc <- tempdir()
-
+torch <- reticulate::import("torch")
 # get connection and data from Eunomia
 connectionDetails <- Eunomia::getEunomiaConnectionDetails()
 Eunomia::createCohorts(connectionDetails)
@@ -69,13 +65,16 @@ mappedData <- PatientLevelPrediction::MapIds(
   cohort = trainData$Train$labels
 )
 
+path <- system.file("python", package = "DeepPatientLevelPrediction")
+Dataset <- reticulate::import_from_path("Dataset", path = path)
+if (is.null(attributes(mappedData)$path)) {
+  # sqlite object
+  attributes(mappedData)$path <- attributes(mappedData)$dbname
+}
 
-
-dataset <- Dataset(
-  data = mappedData$covariates,
-  labels = trainData$Train$labels$outcomeCount,
-  numericalIndex = NULL
+dataset <- Dataset$Data(
+  data = reticulate::r_to_py(normalizePath(attributes(mappedData)$path)),
+  labels = reticulate::r_to_py(trainData$Train$labels$outcomeCount),
 )
-
-small_dataset <- torch::dataset_subset(dataset, (1:round(length(dataset)/3)))
+small_dataset <- torch$utils$data$Subset(dataset, (1:round(length(dataset)/3)))
 
diff --git a/tests/testthat/test-Dataset.R b/tests/testthat/test-Dataset.R
index b949394..940f9d1 100644
--- a/tests/testthat/test-Dataset.R
+++ b/tests/testthat/test-Dataset.R
@@ -1,19 +1,6 @@
-test_that("dataset correct class", {
-  testthat::expect_true("myDataset" %in% class(dataset))
-})
-
-test_that("length of index correct", {
-  testthat::expect_equal(
-    length(dataset$getNumericalIndex()),
-    dplyr::n_distinct(mappedData$covariates %>% 
-                        dplyr::collect() %>%
-                        dplyr::pull(covariateId))
-  )
-})
-
 test_that("number of num and cat features sum correctly", {
   testthat::expect_equal(
-    dataset$numNumFeatures() + dataset$numCatFeatures(),
+    length(dataset$get_numerical_features()) + length(dataset$get_cat_features()),
     dplyr::n_distinct(mappedData$covariates %>% dplyr::collect() %>%
                         dplyr::pull(covariateId))
   )
@@ -21,10 +8,10 @@ test_that("number of num and cat features sum correctly", {
 
 
 test_that("length of dataset correct", {
-  expect_equal(length(dataset), dataset$cat$shape[1])
-  expect_equal(length(dataset), dataset$num$shape[1])
+  expect_equal(length(dataset), dataset$cat$shape[0])
+  expect_equal(length(dataset), dataset$num$shape[0])
   expect_equal(
-    dataset$.length(),
+    length(dataset),
     dplyr::n_distinct(mappedData$covariates %>% 
                         dplyr::collect() %>% dplyr::pull(rowId))
   )
@@ -35,30 +22,30 @@ test_that(".getbatch works", {
   # get one sample
   out <- dataset[10]
   
-  # output should be a list of two items, the batch and targets,
+  # output should be a list of two items, the batch in pos 1 and targets in pos 2,
   # the batch is what goes to the model
   expect_equal(length(out), 2)
   
   # targets should be binary
-  expect_true(out$target$item() %in% c(0, 1))
+  expect_true(out[[2]]$item() %in% c(0, 1))
   
   # shape of batch is correct
-  expect_equal(length(out$batch), 2)
-  expect_equal(out$batch$cat$shape[1], 1)
-  expect_equal(out$batch$num$shape[1], 1)
+  expect_equal(length(out[[1]]), 2)
+  expect_equal(out[[1]]$cat$shape[0], 1)
+  expect_equal(out[[1]]$num$shape[0], 1)
   
   # shape of target
-  expect_equal(out$target$shape[1], 1)
+  expect_equal(out[[2]]$shape$numel(), 1)
   
   # get a whole batch
-  out <- dataset[10:(10 + batch_size - 1)]
+  out <- dataset[10:(10 + batch_size)]
   
   expect_equal(length(out), 2)
-  expect_true(all(torch::as_array(out$target) %in% c(0, 1)))
+  expect_true(all(out[[2]]$numpy() %in% c(0, 1)))
   
-  expect_equal(length(out$batch), 2)
-  expect_equal(out$batch$cat$shape[1], 16)
-  expect_equal(out$batch$num$shape[1], 16)
+  expect_equal(length(out[[1]]), 2)
+  expect_equal(out[[1]]$cat$shape[0], 16)
+  expect_equal(out[[1]]$num$shape[0], 16)
   
-  expect_equal(out$target$shape[1], 16)
+  expect_equal(out[[2]]$shape[0], 16)
 })
diff --git a/tests/testthat/test-Estimator.R b/tests/testthat/test-Estimator.R
index a803003..a8711e8 100644
--- a/tests/testthat/test-Estimator.R
+++ b/tests/testthat/test-Estimator.R
@@ -1,47 +1,50 @@
-catFeatures <- small_dataset$dataset$numCatFeatures()
-numFeatures <- small_dataset$dataset$numNumFeatures()
-
-modelType <- ResNet
+catFeatures <- small_dataset$dataset$get_cat_features()$shape[[1]]
+numFeatures <- small_dataset$dataset$get_numerical_features()$shape[[1]]
 
 modelParameters <- list(
-  catFeatures = catFeatures,
-  numFeatures = numFeatures,
-  sizeEmbedding = 16,
-  sizeHidden = 16,
-  numLayers = 2,
-  hiddenFactor = 2
+  cat_features = catFeatures,
+  num_features = numFeatures,
+  size_embedding = 16L,
+  size_hidden = 16L,
+  num_layers = 2L,
+  hidden_factor = 2L
 )
 
 estimatorSettings <- setEstimator(learningRate = 3e-4,
                                  weightDecay = 0.0,
-                                 batchSize = 128,
-                                 epochs = 5,
-                                 device = 'cpu')
-
-estimator <- Estimator$new(
-  modelType = modelType,
-  modelParameters = modelParameters,
-  estimatorSettings = estimatorSettings
-)
+                                 batchSize = 128L,
+                                 epochs = 5L,
+                                 device = 'cpu',
+                                 seed=42,
+                                 optimizer=torch$optim$AdamW,
+                                 criterion=torch$nn$BCEWithLogitsLoss,
+                                 metric='loss',
+                                 scheduler= list(fun=torch$optim$lr_scheduler$ReduceLROnPlateau,
+                                                 params=list(patience=1)),
+                                 earlyStopping=NULL)
+
+modelType <- "ResNet"
+estimator <- createEstimator(modelType = modelType,
+                             modelParameters = modelParameters,
+                             estimatorSettings = estimatorSettings)
 
 test_that("Estimator initialization works", {
 
   # count parameters in both instances
+  path <- system.file("python", package = "DeepPatientLevelPrediction")
+  ResNet <- reticulate::import_from_path(modelType, path=path)[[modelType]] 
+
   testthat::expect_equal(
-    sum(sapply(estimator$model$parameters, function(x) prod(x$shape))),
-    sum(sapply(do.call(modelType, modelParameters)$parameters, function(x) prod(x$shape)))
+    sum(reticulate::iterate(estimator$model$parameters(), function(x) x$numel())),
+    sum(reticulate::iterate(do.call(ResNet, modelParameters)$parameters(), 
+                            function(x) x$numel()))
   )
 
   testthat::expect_equal(
-    estimator$modelParameters,
+    estimator$model_parameters,
     modelParameters
   )
 
-  # check the function that results the value from a list
-  val <- estimator$itemOrDefaults(list(param = 1, test = 3), "param", default = NULL)
-  expect_equal(val, 1)
-  val <- estimator$itemOrDefaults(list(param = 1, test = 3), "paramater", default = NULL)
-  expect_true(is.null(val))
   
 })
 sink(nullfile())
@@ -50,17 +53,17 @@ sink()
 
 test_that("estimator fitting works", {
 
-  expect_true(!is.null(estimator$bestEpoch))
-  expect_true(!is.null(estimator$bestScore$loss))
-  expect_true(!is.null(estimator$bestScore$auc))
+  expect_true(!is.null(estimator$best_epoch))
+  expect_true(!is.null(estimator$best_score$loss))
+  expect_true(!is.null(estimator$best_score$auc))
 
   old_weights <- estimator$model$head$weight$mean()$item()
 
   sink(nullfile())
-  estimator$fitWholeTrainingSet(small_dataset, estimator$learnRateSchedule)
+  estimator$fit_whole_training_set(small_dataset, estimator$learn_rate_schedule)
   sink()
 
-  expect_equal(estimator$optimizer$param_groups[[1]]$lr, tail(estimator$learnRateSchedule, 1)[[1]])
+  expect_equal(estimator$optimizer$param_groups[[1]]$lr, tail(estimator$learn_rate_schedule, 1)[[1]])
 
   new_weights <- estimator$model$head$weight$mean()$item()
 
@@ -72,7 +75,7 @@ test_that("estimator fitting works", {
   expect_true(file.exists(file.path(testLoc, "estimator.pt")))
   
   sink(nullfile())
-  preds <- estimator$predictProba(dataset)
+  preds <- estimator$predict_proba(dataset)
   sink()
   
   expect_lt(max(preds), 1)
@@ -90,16 +93,15 @@ test_that("estimator fitting works", {
   
   estimatorSettings <- setEstimator(learningRate = 3e-4,
                                     weightDecay = 0.0,
-                                    batchSize = 128,
-                                    epochs = 5,
+                                    batchSize = 128L,
+                                    epochs = 5L,
                                     device = 'cpu',
                                     metric= "loss")
   
-  estimator <- Estimator$new(
-    modelType = modelType,
-    modelParameters = modelParameters,
-    estimatorSettings = estimatorSettings
-  )
+  estimator <- createEstimator(modelType=modelType,
+                               modelParameters=modelParameters,
+                               estimatorSettings=estimatorSettings)
+  
   sink(nullfile())
   estimator$fit(small_dataset, small_dataset)
   sink()
@@ -108,32 +110,35 @@ test_that("estimator fitting works", {
   expect_equal(estimator$metric$name, "loss")
   
   sink(nullfile())
-  estimator$fitWholeTrainingSet(small_dataset, estimator$learnRateSchedule)
+  estimator$fit_whole_training_set(small_dataset, estimator$learn_rate_schedule)
   sink()
   
-  expect_equal(estimator$learnRateSchedule[[estimator$bestEpoch]],
+  expect_equal(estimator$learn_rate_schedule[[estimator$best_epoch]],
                estimator$optimizer$param_groups[[1]]$lr)
   
 })
 
 test_that("early stopping works", {
-  earlyStop <- EarlyStopping$new(patience = 3, delta = 0, verbose = FALSE)
-  testthat::expect_true(is.null(earlyStop$bestScore))
-  testthat::expect_false(earlyStop$earlyStop)
-  earlyStop$call(0.5)
-  testthat::expect_equal(earlyStop$bestScore, 0.5)
-  earlyStop$call(0.4)
-  earlyStop$call(0.4)
-  earlyStop$call(0.4)
-  testthat::expect_true(earlyStop$earlyStop)
+  EarlyStopping <- reticulate::import_from_path("Estimator", path=path)$EarlyStopping
+  earlyStop <- EarlyStopping(patience = 3, delta = 0, verbose = FALSE)
+  
+  
+  testthat::expect_true(is.null(earlyStop$best_score))
+  testthat::expect_false(earlyStop$early_stop)
+  earlyStop(0.5)
+  testthat::expect_equal(earlyStop$best_score, 0.5)
+  earlyStop(0.4)
+  earlyStop(0.4)
+  earlyStop(0.4)
+  testthat::expect_true(earlyStop$early_stop)
 })
 
 modelSettings <- setResNet(
-  numLayers = 1, sizeHidden = 16, hiddenFactor = 1,
+  numLayers = 1L, sizeHidden = 16L, hiddenFactor = 1L,
   residualDropout = 0, hiddenDropout = 0,
-  sizeEmbedding = 16, hyperParamSearch = "random",
-  randomSample = 1,
-  setEstimator(epochs=1,
+  sizeEmbedding = 16L, hyperParamSearch = "random",
+  randomSample = 1L,
+  setEstimator(epochs=1L,
                learningRate = 3e-4)
 )
 
@@ -176,91 +181,88 @@ test_that("predictDeepEstimator works", {
 })
 
 test_that("batchToDevice works", {
+  batch_to_device <- reticulate::import_from_path("Estimator", path=path)$batch_to_device
   # since we can't test moving to gpu there is a fake device called meta
   # which we can use to test of the device is updated
   estimator$device <- "meta"
   b <- 1:10
-  batch <- batchToDevice(dataset[b], device=estimator$device)
-
+  batch <- batch_to_device(dataset[b], device=estimator$device)
+  
   devices <- lapply(
     lapply(unlist(batch, recursive = TRUE), function(x) x$device),
-    function(x) x == torch::torch_device(type = "meta")
+    function(x) x == torch$device(type = "meta")
   )
   # test that all are meta
   expect_true(all(devices == TRUE))
   
-  numDevice <- batchToDevice(dataset[b]$batch$num, device=estimator$device)
-  expect_true(numDevice$device==torch::torch_device(type="meta"))
+  numDevice <- batch_to_device(dataset[b][[1]]$num, device=estimator$device)
+  expect_true(numDevice$device==torch$device(type="meta"))
 })
 
 test_that("Estimator without earlyStopping works", {
   # estimator without earlyStopping
   estimatorSettings <- setEstimator(learningRate = 3e-4,
                                     weightDecay = 0.0,
-                                    batchSize = 128,
-                                    epochs = 1,
+                                    batchSize = 128L,
+                                    epochs = 1L,
                                     device = 'cpu',
                                     earlyStopping = NULL)
-  estimator2 <- Estimator$new(
-    modelType = modelType,
-    modelParameters = modelParameters,
-    estimatorSettings = estimatorSettings
-  )
+  
+  estimator2 <- createEstimator(modelType = modelType,
+                                modelParameters = modelParameters,
+                                estimatorSettings=estimatorSettings)
   sink(nullfile())
   estimator2$fit(small_dataset, small_dataset)
   sink()
   
-  expect_null(estimator2$earlyStopper)
-  expect_true(!is.null(estimator2$bestEpoch))
+  expect_null(estimator2$early_stopper)
+  expect_true(!is.null(estimator2$best_epoch))
   
 })
 
 test_that("Early stopper can use loss and stops early", {
   estimatorSettings <- setEstimator(learningRate = 3e-2,
                                     weightDecay = 0.0,
-                                    batchSize = 128,
-                                    epochs = 10,
+                                    batchSize = 128L,
+                                    epochs = 10L,
                                     device = 'cpu',
                                     earlyStopping =list(useEarlyStopping=TRUE,
                                                         params = list(mode=c('min'), 
-                                                                      patience=1)),
+                                                                      patience=1L)),
                                     metric = 'loss',
                                     seed=42)
-  estimator <- Estimator$new(
-    modelType=modelType,
-    modelParameters = modelParameters,
-    estimatorSettings = estimatorSettings
-  )
   
+  estimator <- createEstimator(modelType = modelType,
+                               modelParameters = modelParameters,
+                               estimatorSettings = estimatorSettings)
   sink(nullfile())
   estimator$fit(small_dataset, small_dataset)
   sink()
   
-  expect_true(estimator$bestEpoch < estimator$epochs)
+  expect_true(estimator$best_epoch < estimator$epochs)
   
 })
 
 test_that('Custom metric in estimator works', {
   
   metric_fun <- function(predictions, labels)  {
-    positive <- predictions[labels == 1]
-    negative <- predictions[labels == 0]
-    pr <- PRROC::pr.curve(scores.class0 = positive, scores.class1 = negative)
+    pr <- PRROC::pr.curve(scores.class0 = torch$sigmoid(predictions)$numpy(), 
+                          weights.class0 = labels$numpy())
     auprc <- pr$auc.integral
+    reticulate::r_to_py(auprc)
   }
     
   estimatorSettings <- setEstimator(learningRate = 3e-4,
                                     weightDecay = 0.0,
-                                    batchSize = 128,
+                                    batchSize = 128L,
                                     device = "cpu",
-                                    epochs = 1,
+                                    epochs = 1L,
                                     metric=list(fun=metric_fun,
                                                 name="auprc",
                                                 mode="max"))
-  
-  estimator <- Estimator$new(modelType = modelType,
-                             modelParameters = modelParameters,
-                             estimatorSettings = estimatorSettings)
+  estimator <- createEstimator(modelType = modelType,
+                               modelParameters = modelParameters,
+                               estimatorSettings = estimatorSettings)
   expect_true(is.function(estimator$metric$fun))
   expect_true(is.character(estimator$metric$name))
   expect_true(estimator$metric$mode %in% c("min", "max"))
@@ -269,21 +271,21 @@ test_that('Custom metric in estimator works', {
   estimator$fit(small_dataset, small_dataset)
   sink()
   
-  expect_true(estimator$bestScore[["auprc"]]>0)
+  expect_true(estimator$best_score[["auprc"]]>0)
 
 })
 
 test_that("setEstimator with paramsToTune is correctly added to hyperparameters", {
   estimatorSettings <- setEstimator(learningRate=c(3e-4,1e-3),
-                                    batchSize=128,
-                                    epochs=1,
+                                    batchSize=128L,
+                                    epochs=1L,
                                     device="cpu",
                                     metric=c("auc", "auprc"),
                                     earlyStopping = list(useEarlyStopping=TRUE,
-                                                         params=list(patience=c(4,10))))
-  modelSettings <- setResNet(numLayers = 1, sizeHidden = 64,
-                             hiddenFactor = 1, residualDropout = 0.2,
-                             hiddenDropout = 0.2,sizeEmbedding = 128,
+                                                         params=list(patience=c(4L,10L))))
+  modelSettings <- setResNet(numLayers = 1L, sizeHidden = 64L,
+                             hiddenFactor = 1L, residualDropout = 0.2,
+                             hiddenDropout = 0.2,sizeEmbedding = 128L,
                              estimatorSettings = estimatorSettings,
                              hyperParamSearch = "grid")
   
@@ -309,31 +311,40 @@ test_that("device as a function argument works", {
       dev
     }
   }
-  
-  estimatorSettings <- setEstimator(device=getDevice)
+
+  estimatorSettings <- setEstimator(device=getDevice,
+                                    learningRate = 3e-4)
   
   model <- setDefaultResNet(estimatorSettings = estimatorSettings) 
-  model$param[[1]]$catFeatures <- 10
-  
-  estimator <- Estimator$new(modelType="ResNet",
-                             modelParameters = model$param[[1]],
-                             estimatorSettings = estimatorSettings)
+  model$param[[1]]$catFeatures <- 10L
+
+  estimator <- createEstimator(modelType = modelType,
+                               modelParameters = model$param[[1]],
+                               estimatorSettings = estimatorSettings)
   
   expect_equal(estimator$device, "cpu")
   
   Sys.setenv("testDeepPLPDevice" = "meta")
   
-  estimatorSettings <- setEstimator(device=getDevice)
+  estimatorSettings <- setEstimator(device=getDevice,
+                                    learningRate = 3e-4)
   
   model <- setDefaultResNet(estimatorSettings = estimatorSettings) 
-  model$param[[1]]$catFeatures <- 10
+  model$param[[1]]$catFeatures <- 10L
   
-  estimator <- Estimator$new(modelType="ResNet",
-                             modelParameters = model$param[[1]],
-                             estimatorSettings = estimatorSettings)
+  estimator <- createEstimator(modelType = modelType,
+                               modelParameters = model$param[[1]],
+                               estimatorSettings = estimatorSettings)
   
   expect_equal(estimator$device, "meta")
   
   Sys.unsetenv("testDeepPLPDevice")
   
   })
+
+# test_that("estimatorSettings can be saved and loaded with correct objects", {
+#   settings <- setEstimator()
+#   
+#   saveRDS(settings,file=file.path(testLoc, 'settings.RDS'))
+#   
+# })
\ No newline at end of file
diff --git a/tests/testthat/test-LRFinder.R b/tests/testthat/test-LRFinder.R
index f6c263e..0a5d700 100644
--- a/tests/testthat/test-LRFinder.R
+++ b/tests/testthat/test-LRFinder.R
@@ -1,18 +1,23 @@
+ResNet <- reticulate::import_from_path("ResNet", path)$ResNet
+lrFinderClass <- reticulate::import_from_path("LrFinder", path=path)$LrFinder
+
 test_that("LR scheduler that changes per batch works", {
   
-  model <- ResNet(catFeatures = 10, numFeatures = 1,
-                  sizeEmbedding = 32, sizeHidden = 64,
-                  numLayers = 1, hiddenFactor = 1)
-  optimizer <- torch::optim_adamw(model$parameters, lr=1e-7)
+  model <- ResNet(cat_features = 10L, num_features = 1L,
+                  size_embedding = 32L, size_hidden = 64L,
+                  num_layers = 1L, hidden_factor = 1L)
+  optimizer <- torch$optim$AdamW(model$parameters(), lr=1e-7)
   
-  scheduler <- lrPerBatch(optimizer,
-                          startLR = 1e-7,
-                          endLR = 1e-2,
-                          nIters = 5)
+  ExponentialSchedulerPerBatch <- reticulate::import_from_path("LrFinder", 
+                                                               path=path)$ExponentialSchedulerPerBatch
+  scheduler <- ExponentialSchedulerPerBatch(optimizer,
+                          end_lr = 1e-2,
+                          num_iter = 5)
   expect_equal(scheduler$last_epoch, 0)
   expect_equal(scheduler$optimizer$param_groups[[1]]$lr, 1e-7)
   
   for (i in 1:5) {
+    optimizer$step()
     scheduler$step()
   }
   
@@ -23,19 +28,21 @@ test_that("LR scheduler that changes per batch works", {
 
 
 test_that("LR finder works", {
-  
-  lr <- lrFinder(dataset, modelType = ResNet, modelParams = list(catFeatures=dataset$numCatFeatures(),
-                                                           numFeatures=dataset$numNumFeatures(),
-                                                           sizeEmbedding=32,
-                                                           sizeHidden=64,
-                                                           numLayers=1,
-                                                           hiddenFactor=1),
-           estimatorSettings = setEstimator(batchSize=32,
-                                            seed = 42),
-           minLR = 3e-4,
-           maxLR = 10.0,
-           numLR = 20,
-           divergenceThreshold = 1.1)
+  lrFinder <- createLRFinder(modelType="ResNet",
+                             modelParameters = list(cat_features=length(dataset$get_cat_features()),
+                                                    num_features=length(dataset$get_numerical_features()),
+                                                    size_embedding=32L,
+                                                    size_hidden=64L,
+                                                    num_layers=1L,
+                                                    hidden_factor=1L),
+                             estimatorSettings = setEstimator(batchSize = 32L,
+                                                              seed=42),
+                             lrSettings = list(minLr=3e-4,
+                                               maxLr=10.0,
+                                               numLr=20L,
+                                               divergenceThreshold=1.1))
+    
+  lr <- lrFinder$get_lr(dataset)
   
   expect_true(lr<=10.0)
   expect_true(lr>=3e-4)
@@ -46,19 +53,23 @@ test_that("LR finder works with device specified by a function", {
   deviceFun <- function(){
     dev = "cpu"
   }
-  lr <- lrFinder(dataset, modelType = ResNet, modelParams = list(catFeatures=dataset$numCatFeatures(),
-                                                                 numFeatures=dataset$numNumFeatures(),
-                                                                 sizeEmbedding=8,
-                                                                 sizeHidden=16,
-                                                                 numLayers=1,
-                                                                 hiddenFactor=1),
-                 estimatorSettings = setEstimator(batchSize=32,
+  lrFinder <- createLRFinder(model = "ResNet", 
+                             modelParameters = list(cat_features=length(dataset$get_cat_features()),
+                                                num_features=length(dataset$get_numerical_features()),
+                                                size_embedding=8L,
+                                                size_hidden=16L,
+                                                num_layers=1L,
+                                                hidden_factor=1L),
+                             estimatorSettings = setEstimator(batchSize=32L,
                                                   seed = 42,
                                                   device = deviceFun),
-                 minLR = 3e-4,
-                 maxLR = 10.0,
-                 numLR = 20,
-                 divergenceThreshold = 1.1)
+                             lrSettings = list(minLr=3e-4,
+                                               maxLr=10.0,
+                                               numLr=20L,
+                                               divergenceThreshold=1.1)
+                             )
+  lr <- lrFinder$get_lr(dataset)
+  
   expect_true(lr<=10.0)
   expect_true(lr>=3e-4)
   
diff --git a/tests/testthat/test-MLP.R b/tests/testthat/test-MLP.R
index 4daa6aa..cb6f2c9 100644
--- a/tests/testthat/test-MLP.R
+++ b/tests/testthat/test-MLP.R
@@ -1,15 +1,15 @@
 
 modelSettings <- setMultiLayerPerceptron(
-  numLayers = c(2),
-  sizeHidden = c(32),
+  numLayers = 2L,
+  sizeHidden = 32L,
   dropout = c(0.1),
-  sizeEmbedding = c(32),
+  sizeEmbedding = 32L,
   estimatorSettings = setEstimator(
     learningRate=c(3e-4),
     weightDecay = c(1e-6),
     seed=42,
-    batchSize=128,
-    epochs=1
+    batchSize=128L,
+    epochs=1L
   ),
   hyperParamSearch = "random",
   randomSample = 1
@@ -82,40 +82,48 @@ test_that("MLP with runPlp working checks", {
 
 
 test_that("MLP nn-module works ", {
+  MLP <- reticulate::import_from_path("MLP", path=path)$MLP
   model <- MLP(
-    catFeatures = 5, numFeatures = 1, sizeEmbedding = 5,
-    sizeHidden = 16, numLayers = 1,
-    activation = torch::nn_relu,
-    normalization = torch::nn_batch_norm1d, dropout = 0.3,
-    d_out = 1
+    cat_features = 5L, 
+    num_features = 1L, 
+    size_embedding = 5L,
+    size_hidden = 16L, 
+    num_layers = 1L,
+    activation = torch$nn$ReLU,
+    normalization = torch$nn$BatchNorm1d, 
+    dropout = 0.3
   )
 
-  pars <- sum(sapply(model$parameters, function(x) prod(x$shape)))
+  pars <- sum(reticulate::iterate(model$parameters(), function(x) x$numel()))
 
   # expected number of parameters
   expect_equal(pars, 489)
 
   input <- list()
-  input$cat <- torch::torch_randint(0, 5, c(10, 5), dtype = torch::torch_long())
-  input$num <- torch::torch_randn(10, 1, dtype = torch::torch_float32())
+  input$cat <- torch$randint(0L, 5L, c(10L, 5L), dtype=torch$long)
+  input$num <- torch$randn(10L, 1L, dtype=torch$float32)
 
 
   output <- model(input)
 
   # output is correct shape
-  expect_equal(output$shape, 10)
+  expect_equal(output$shape[0], 10L)
 
   input$num <- NULL
   model <- MLP(
-    catFeatures = 5, numFeatures = 0, sizeEmbedding = 5,
-    sizeHidden = 16, numLayers = 1,
-    activation = torch::nn_relu,
-    normalization = torch::nn_batch_norm1d, dropout = 0.3,
-    d_out = 1
+    cat_features = 5L, 
+    num_features = 0, 
+    size_embedding = 5L,
+    size_hidden = 16L, 
+    num_layers = 1L,
+    activation = torch$nn$ReLU,
+    normalization = torch$nn$BatchNorm1d, 
+    dropout = 0.3,
+    d_out = 1L
   )
   output <- model(input)
   # model works without numeric variables
-  expect_equal(output$shape, 10)
+  expect_equal(output$shape[0], 10L)
 })
 
 
@@ -123,15 +131,17 @@ test_that("Errors are produced by settings function", {
   randomSample <- 2
   
   expect_error(setMultiLayerPerceptron(
-    numLayers = 1,
-    sizeHidden = 128,
+    numLayers = 1L,
+    sizeHidden = 128L,
     dropout = 0.0,
-    sizeEmbedding = 128,
-    weightDecay = 1e-6,
-    learningRate = 0.01,
-    seed = 42,
+    sizeEmbedding = 128L,
     hyperParamSearch = 'random',
-    randomSample = randomSample))
+    estimatorSettings = setEstimator(
+      learningRate = 'auto',
+      weightDecay = c(1e-3),
+      batchSize = 1024L,
+      epochs = 30L,
+      device="cpu")))
                            
 })
 
diff --git a/tests/testthat/test-ResNet.R b/tests/testthat/test-ResNet.R
index 7a380a4..81b1ac7 100644
--- a/tests/testthat/test-ResNet.R
+++ b/tests/testthat/test-ResNet.R
@@ -1,16 +1,16 @@
 
 resSet <- setResNet(
-  numLayers = c(2),
-  sizeHidden = c(32),
-  hiddenFactor = c(2),
-  residualDropout = c(0.1),
-  hiddenDropout = c(0.1),
-  sizeEmbedding = c(32),
+  numLayers = 2L,
+  sizeHidden = 32L,
+  hiddenFactor = 2L,
+  residualDropout = 0.1,
+  hiddenDropout = 0.1,
+  sizeEmbedding = 32L,
   estimatorSettings = setEstimator(learningRate="auto",
                                    weightDecay = c(1e-6),
                                    seed=42,
-                                   batchSize = 128,
-                                   epochs=1),
+                                   batchSize = 128L,
+                                   epochs=1L),
   hyperParamSearch = "random",
   randomSample = 1,
 )
@@ -22,17 +22,17 @@ test_that("setResNet works", {
   
   testthat::expect_true(length(resSet$param) > 0)
   
-  expect_error(setResNet(numLayers = c(2),
-                         sizeHidden = c(32),
-                         hiddenFactor = c(2),
-                         residualDropout = c(0.1),
-                         hiddenDropout = c(0.1),
-                         sizeEmbedding = c(32),
+  expect_error(setResNet(numLayers = 2L,
+                         sizeHidden = 32L,
+                         hiddenFactor = 2L,
+                         residualDropout = 0.1,
+                         hiddenDropout = 0.1,
+                         sizeEmbedding = 32L,
                          estimatorSettings = setEstimator(learningRate=c(3e-4),
                                                           weightDecay = c(1e-6),
                                                           seed=42,
-                                                          batchSize = 128,
-                                                          epochs=1),
+                                                          batchSize = 128L,
+                                                          epochs=1L),
                          hyperParamSearch = "random",
                          randomSample = 2))
 })
@@ -89,52 +89,63 @@ test_that("ResNet with runPlp working checks", {
 
 
 test_that("ResNet nn-module works ", {
+  ResNet <- reticulate::import_from_path("ResNet", path=path)$ResNet
   model <- ResNet(
-    catFeatures = 5, numFeatures = 1, sizeEmbedding = 5,
-    sizeHidden = 16, numLayers = 1, hiddenFactor = 2,
-    activation = torch::nn_relu,
-    normalization = torch::nn_batch_norm1d, hiddenDropout = 0.3,
-    residualDropout = 0.3, d_out = 1
+    cat_features = 5L, 
+    num_features = 1L, 
+    size_embedding = 5L,
+    size_hidden = 16L, 
+    num_layers = 1L, 
+    hidden_factor = 2L,
+    activation = torch$nn$ReLU,
+    normalization = torch$nn$BatchNorm1d, 
+    hidden_dropout = 0.3,
+    residual_dropout = 0.3
   )
   
-  pars <- sum(sapply(model$parameters, function(x) prod(x$shape)))
+  pars <- sum(reticulate::iterate(model$parameters(), function(x) x$numel()))
   
   # expected number of parameters
   expect_equal(pars, 1295)
   
   input <- list()
-  input$cat <- torch::torch_randint(0, 5, c(10, 5), dtype = torch::torch_long())
-  input$num <- torch::torch_randn(10, 1, dtype = torch::torch_float32())
+  input$cat <- torch$randint(0L, 5L, c(10L, 5L), dtype = torch$long)
+  input$num <- torch$randn(10L, 1L, dtype = torch$float32)
   
   
   output <- model(input)
   
   # output is correct shape
-  expect_equal(output$shape, 10)
+  expect_equal(output$shape[0], 10L)
   
   input$num <- NULL
   model <- ResNet(
-    catFeatures = 5, numFeatures = 0, sizeEmbedding = 5,
-    sizeHidden = 16, numLayers = 1, hiddenFactor = 2,
-    activation = torch::nn_relu,
-    normalization = torch::nn_batch_norm1d, hiddenDropout = 0.3,
-    residualDropout = 0.3, d_out = 1
+    cat_features = 5L, 
+    num_features = 0L, 
+    size_embedding = 5L,
+    size_hidden = 16L, 
+    num_layers = 1L, 
+    hidden_factor = 2L,
+    activation = torch$nn$ReLU,
+    normalization = torch$nn$BatchNorm1d, 
+    hidden_dropout = 0.3,
+    residual_dropout = 0.3
   )
   output <- model(input)
   # model works without numeric variables
-  expect_equal(output$shape, 10)
+  expect_equal(output$shape[0], 10L)
 })
 
 test_that("Default Resnet works", {
   defaultResNet <- setDefaultResNet()
   params <- defaultResNet$param[[1]]
   
-  expect_equal(params$numLayers, 6)
-  expect_equal(params$sizeHidden, 512)
-  expect_equal(params$hiddenFactor, 2)
+  expect_equal(params$numLayers, 6L)
+  expect_equal(params$sizeHidden, 512L)
+  expect_equal(params$hiddenFactor, 2L)
   expect_equal(params$residualDropout, 0.1)
   expect_equal(params$hiddenDropout, 0.4)
-  expect_equal(params$sizeEmbedding, 256)
+  expect_equal(params$sizeEmbedding, 256L)
   
 }) 
 
@@ -142,15 +153,15 @@ test_that("Errors are produced by settings function", {
   randomSample <- 2
   
   expect_error(setResNet(
-    numLayers = 1,
-    sizeHidden = 128,
+    numLayers = 1L,
+    sizeHidden = 128L,
     hiddenFactor = 1,
     residualDropout = 0.0,
     hiddenDropout = 0.0,
-    sizeEmbedding = 128,
-    weightDecay = 1e-6,
-    learningRate = 0.01,
-    seed = 42,
+    sizeEmbedding = 128L,
+    estimatorSettings = setEstimator(weightDecay = 1e-6,
+                                     learningRate = 0.01,
+                                     seed = 42),
     hyperParamSearch = 'random',
     randomSample = randomSample))
 })
diff --git a/tests/testthat/test-TrainingCache.R b/tests/testthat/test-TrainingCache.R
index feca8a4..757804c 100644
--- a/tests/testthat/test-TrainingCache.R
+++ b/tests/testthat/test-TrainingCache.R
@@ -1,17 +1,17 @@
-resNetSettings <- setResNet(numLayers = c(1, 2, 4),
-                            sizeHidden = 2^6,
-                            hiddenFactor = 1,
+resNetSettings <- setResNet(numLayers = c(1L, 2L, 4L),
+                            sizeHidden = 64L,
+                            hiddenFactor = 1L,
                             residualDropout = 0.5,
                             hiddenDropout = 0.5,
-                            sizeEmbedding = 2^6,
-                            estimatorSettings = setEstimator(learningRate='auto',
+                            sizeEmbedding = 64L,
+                            estimatorSettings = setEstimator(learningRate=3e-4,
                                                              weightDecay=1e-3,
                                                              device='cpu',
-                                                             batchSize=1024,
-                                                             epochs=30,
+                                                             batchSize=64L,
+                                                             epochs=1L,
                                                              seed=NULL),
                             hyperParamSearch = "random",
-                            randomSample = 2,
+                            randomSample = 3,
                             randomSampleSeed = NULL)
 
 trainCache <- TrainingCache$new(testLoc)
@@ -84,4 +84,5 @@ test_that("Estimator can resume training from cache", {
     }
   )
   sink()
+  
 })
diff --git a/tests/testthat/test-Transformer.R b/tests/testthat/test-Transformer.R
index 52139ad..7e233e0 100644
--- a/tests/testthat/test-Transformer.R
+++ b/tests/testthat/test-Transformer.R
@@ -1,10 +1,15 @@
 settings <- setTransformer(
-  numBlocks = 1, dimToken = 8, dimOut = 1,
-  numHeads = 2, attDropout = 0.0, ffnDropout = 0.2,
-  resDropout = 0.0, dimHidden = 32, 
+  numBlocks = 1L, 
+  dimToken = 8L, 
+  dimOut = 1L,
+  numHeads = 2L, 
+  attDropout = 0.0, 
+  ffnDropout = 0.2,
+  resDropout = 0.0, 
+  dimHidden = 32L, 
   estimatorSettings = setEstimator(learningRate = 3e-4,
-                                   batchSize=64,
-                                   epochs=1),
+                                   batchSize=64L,
+                                   epochs=1L),
   randomSample = 1
 )
 
@@ -13,16 +18,16 @@ test_that("Transformer settings work", {
   testthat::expect_equal(settings$fitFunction, "fitEstimator")
   testthat::expect_true(length(settings$param) > 0)
   testthat::expect_error(setTransformer(
-    numBlocks = 1, dimToken = 50,
-    numHeads = 7
+    numBlocks = 1L, dimToken = 50L,
+    numHeads = 7L
   ))
   testthat::expect_error(setTransformer(
-    numBlocks = 1, dimToken = c(2, 4),
-    numHeads = c(2, 4)
+    numBlocks = 1L, dimToken = c(2L, 4L),
+    numHeads = c(2L, 4L)
   ))
   testthat::expect_error(setTransformer(
-    numBlocks = 1, dimToken = c(4, 6),
-    numHeads = c(2, 4)
+    numBlocks = 1L, dimToken = c(4L, 6L),
+    numHeads = c(2L, 4L)
   ))
 })
 
@@ -39,44 +44,57 @@ test_that("fitEstimator with Transformer works", {
 })
 
 test_that("transformer nn-module works", {
+  Transformer <- reticulate::import_from_path("Transformer", path=path)$Transformer
   model <- Transformer(
-    catFeatures = 5, numFeatures = 1, numBlocks = 2,
-    dimToken = 16, numHeads = 2, attDropout = 0, ffnDropout = 0,
-    resDropout = 0, dimHidden = 32
+    cat_features = 5L, 
+    num_features = 1L, 
+    num_blocks = 2L,
+    dim_token = 16L, 
+    num_heads = 2L, 
+    att_dropout = 0, 
+    ffn_dropout = 0,
+    res_dropout = 0, 
+    dim_hidden = 32L
   )
 
-  pars <- sum(sapply(model$parameters, function(x) prod(x$shape)))
+  pars <- sum(reticulate::iterate(model$parameters(), function(x) x$numel()))
 
   # expected number of parameters
   expect_equal(pars, 5697)
 
   input <- list()
-  input$cat <- torch::torch_randint(0, 5, c(10, 5), dtype = torch::torch_long())
-  input$num <- torch::torch_randn(10, 1, dtype = torch::torch_float32())
+  input$cat <- torch$randint(0L, 5L, c(10L, 5L), dtype = torch$long)
+  input$num <- torch$randn(10L, 1L, dtype = torch$float32)
 
   output <- model(input)
 
   # output is correct shape, size of batch
-  expect_equal(output$shape, 10)
+  expect_equal(output$shape[0], 10L)
 
   input$num <- NULL
 
   model <- Transformer(
-    catFeatures = 5, numFeatures = 0, numBlocks = 2,
-    dimToken = 16, numHeads = 2, attDropout = 0, ffnDropout = 0,
-    resDropout = 0, dimHidden = 32
+    cat_features = 5L, 
+    num_features = 0L, 
+    num_blocks = 2L,
+    dim_token = 16L, 
+    num_heads = 2L, 
+    att_dropout = 0, 
+    ffn_dropout = 0,
+    res_dropout = 0, 
+    dim_hidden = 32L
   )
   output <- model(input)
-  expect_equal(output$shape, 10)
+  expect_equal(output$shape[0], 10L)
 })
 
 test_that("Default Transformer works", {
   defaultTransformer <- setDefaultTransformer()
   params <- defaultTransformer$param[[1]]
   
-  expect_equal(params$numBlocks, 3)
-  expect_equal(params$dimToken, 192)
-  expect_equal(params$numHeads, 8)
+  expect_equal(params$numBlocks, 3L)
+  expect_equal(params$dimToken, 192L)
+  expect_equal(params$numHeads, 8L)
   expect_equal(params$resDropout, 0.0)
   expect_equal(params$attDropout, 0.2)
   
@@ -93,8 +111,8 @@ test_that("Errors are produced by settings function", {
 
 test_that("dimHidden ratio works as expected", {
   randomSample <- 4
-  dimToken <- c(64, 128, 256, 512)
-  dimHiddenRatio <- 2
+  dimToken <- c(64L, 128L, 256L, 512L)
+  dimHiddenRatio <- 2L
   modelSettings <- setTransformer(dimToken = dimToken,
                                   dimHiddenRatio = dimHiddenRatio,
                                   dimHidden = NULL,
diff --git a/vignettes/BuildingDeepModels.Rmd b/vignettes/BuildingDeepModels.Rmd
index adbfd98..e95e052 100644
--- a/vignettes/BuildingDeepModels.Rmd
+++ b/vignettes/BuildingDeepModels.Rmd
@@ -228,18 +228,18 @@ combinations are 2*2*2*2 or 16 but specify ```randomSample=10``` to only try
 ```{r, eval=FALSE}
 
 modelSettings <- setMultiLayerPerceptron(
-  numLayers = c(3, 5),
-  sizeHidden = c(64, 128), 
+  numLayers = c(3L, 5L),
+  sizeHidden = c(64L, 128L), 
   dropout = c(0.2),
-  sizeEmbedding = c(32, 64),
+  sizeEmbedding = c(32L, 64L),
   estimatorSettings = setEstimator(
     learningRate = c(1e-3, 1e-4),
     weightDecay = c(1e-5),
-    batchSize = c(128),
-    epochs=c(5),
-    seed=12
+    batchSize = c(128L),
+    epochs=c(5L),
+    seed=12L
   ),
-  randomSample=10
+  randomSample=10L
 )
 
 mlpResult <- PatientLevelPrediction::runPlp(
@@ -347,18 +347,18 @@ input only includes one option.
 ```{r, eval=FALSE}
 
 resset <- setResNet(
-  numLayers = c(2), 
-  sizeHidden = c(32),
-  hiddenFactor = c(2),
+  numLayers = c(2L), 
+  sizeHidden = c(32L),
+  hiddenFactor = c(2L),
   residualDropout = c(0.1), 
   hiddenDropout = c(0.1),
-  sizeEmbedding = c(32),
+  sizeEmbedding = c(32L),
   estimatorSettings = setEstimator(learningRate = c(3e-4),
                                    weightDecay = c(1e-6),
                                    #device='cuda:0', # uncomment to use GPU
-                                   batchSize = 128, 
-                                   epochs = 3,
-                                   seed = 42),
+                                   batchSize = 128L, 
+                                   epochs = 3L,
+                                   seed = 42L),
   hyperParamSearch = 'random',
   randomSample = 1
 )
@@ -438,22 +438,22 @@ block
 
 ```{r, eval=FALSE}
 
-modelSettings <- setTransformer(numBlocks = 3,
-                                dimToken = 32,
+modelSettings <- setTransformer(numBlocks = 3L,
+                                dimToken = 32L,
                                 dimOut = 1, 
-                                numHeads = 4,
+                                numHeads = 4L,
                                 attDropout = 0.25,
                                 ffnDropout = 0.25,
                                 resDropout = 0,
-                                dimHidden = 128,
+                                dimHidden = 128L,
                                 estimatorSettings = setEstimator(
                                   learningRate = 3e-4,
                                   weightDecay = 1e-6,
-                                  batchSize = 128,
-                                  epochs = 10,
+                                  batchSize = 128L,
+                                  epochs = 10L,
                                   device = 'cpu'
                                 ),
-                                randomSample=1)
+                                randomSample=1L)
                               
 
 
diff --git a/vignettes/FirstModel.Rmd b/vignettes/FirstModel.Rmd
index 4aaeee3..789ec26 100644
--- a/vignettes/FirstModel.Rmd
+++ b/vignettes/FirstModel.Rmd
@@ -120,8 +120,8 @@ library(DeepPatientLevelPrediction)
 modelSettings <- setDefaultResNet(
   estimatorSettings = setEstimator(learningRate=3e-4,
                                    device="cpu",
-                                   batchSize=256,
-                                   epochs=3)
+                                   batchSize=256L,
+                                   epochs=3L)
 )
 
 ```
diff --git a/vignettes/Installing.Rmd b/vignettes/Installing.Rmd
index 8d77599..5b88419 100644
--- a/vignettes/Installing.Rmd
+++ b/vignettes/Installing.Rmd
@@ -45,6 +45,7 @@ This vignette describes how you need to install the Observational Health Data Sc
 Under Windows the OHDSI Deep Patient Level Prediction (DeepPLP) package requires installing:
 
 -   R (<https://cran.cnr.berkeley.edu/> ) - (R \>= 4.0.0, but latest is recommended)
+-   Python - The package is tested with python 3.10, but \>= 3.8 should work
 -   Rstudio (<https://www.rstudio.com/> )
 -   Java (<http://www.java.com> )
 -   RTools (<https://cran.r-project.org/bin/windows/Rtools/>)
@@ -54,6 +55,7 @@ Under Windows the OHDSI Deep Patient Level Prediction (DeepPLP) package requires
 Under Mac and Linux the OHDSI deepPLP package requires installing:
 
 -   R (<https://cran.cnr.berkeley.edu/> ) - (R \>= 4.0.0, but latest is recommended)
+-   Python - The package is tested with python 3.10, but \>= 3.8 should work
 -   Rstudio (<https://www.rstudio.com/> )
 -   Java (<http://www.java.com> )
 -   Xcode command line tools(run in terminal: xcode-select --install) [MAC USERS ONLY]
@@ -66,6 +68,25 @@ If you do not want the official release you could install the bleeding edge vers
 
 Note that the latest develop branch could contain bugs, please report them to us if you experience problems.
 
+## Installing Python environment
+
+Since the package uses `pytorch` through `reticulate` a working python installation is required. The package is tested with python 3.10. To install python an easy way is to use miniconda through `reticulate`:
+
+```{r, echo = TRUE, message = FALSE, warning = FALSE,tidy=FALSE,eval=FALSE}
+install.packages('reticulate')
+reticulate::install_miniconda()
+```
+
+By default `install_minconda()` creates an environment `r-reticulate` with `python 3.9`. To use instead `python 3.10` we can do:
+
+```{r, echo = TRUE, message = FALSE, warning = FALSE,tidy=FALSE,eval=FALSE}
+reticulate::conda_install(envname = 'r-reticulate', packages=c('python=3.10'))
+```
+
+Then when we can install `DeepPatientLevelPrediction` and it should install the required python packages in this environment.
+
+If instead you want to use a specific python environment you can set the environment variable `RETICULATE_PYTHON` to point to the python executable of that environment in your `.Renviron` file. You need to do this before installing `DeepPatientLevelPrediction`.
+
 ## Installing DeepPatientLevelPrediction using remotes
 
 To install using `remotes` run:
@@ -77,12 +98,6 @@ remotes::install_github("OHDSI/PatientLevelPrediction")
 remotes::install_github("OHDSI/DeepPatientLevelPrediction")
 ```
 
-DeepPLP relies on [torch for R](https://torch.mlverse.org/). When torch is installed the user 
-will be prompted if libtorch and lantern binaries should be downloaded. These binaries are necessary 
-for the package to run.
-
-If you are using DeepPLP in an offline environment the function `torch::install_torch_from_file()` can be used. This will first require to download and move the correct binaries to the offline environment. See [torch installation guide](https://torch.mlverse.org/docs/articles/installation.html) for more detailed instructions.
-
 When installing make sure to close any other Rstudio sessions that are using `DeepPatientLevelPrediction` or any dependency. Keeping Rstudio sessions open can cause locks on windows that prevent the package installing.
 
 # Testing Installation
@@ -103,20 +118,20 @@ populationSettings <- PatientLevelPrediction::createStudyPopulationSettings(
                                                           riskWindowStart = 1, 
                                                           riskWindowEnd = 365)
 # a very simple resnet
-modelSettings <- setResNet(numLayers = 2, 
-                           sizeHidden = 64, 
-                           hiddenFactor = 1,
+modelSettings <- setResNet(numLayers = 2L, 
+                           sizeHidden = 64L, 
+                           hiddenFactor = 1L,
                            residualDropout = 0, 
                            hiddenDropout = 0.2, 
-                           sizeEmbedding = 64, 
+                           sizeEmbedding = 64L, 
                            estimatorSettings = setEstimator(learningRate = 3e-4,
                                                             weightDecay = 1e-6,
                                                             device='cpu',
-                                                            batchSize=128,
-                                                            epochs=3,
+                                                            batchSize=128L,
+                                                            epochs=3L,
                                                             seed = 42),
                            hyperParamSearch = 'random',
-                           randomSample = 1)
+                           randomSample = 1L)
 
 plpResults <- PatientLevelPrediction::runPlp(plpData = plpData,
                outcomeId = 3,
@@ -148,4 +163,4 @@ citation("DeepPatientLevelPrediction")
 
 **Please reference this paper if you use the PLP Package in your work:**
 
-[Reps JM, Schuemie MJ, Suchard MA, Ryan PB, Rijnbeek PR. Design and implementation of a standardized framework to generate and evaluate patient-level prediction models using observational healthcare data. J Am Med Inform Assoc. 2018;25(8):969-975.](http://dx.doi.org/10.1093/jamia/ocy032)
\ No newline at end of file
+[Reps JM, Schuemie MJ, Suchard MA, Ryan PB, Rijnbeek PR. Design and implementation of a standardized framework to generate and evaluate patient-level prediction models using observational healthcare data. J Am Med Inform Assoc. 2018;25(8):969-975.](http://dx.doi.org/10.1093/jamia/ocy032)

From e501124ea2dc874f09dcb7ba08e09f8c4e9f2377 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Mon, 28 Aug 2023 13:30:48 +0200
Subject: [PATCH 53/58] fix dataset

---
 R/Dataset.R | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/R/Dataset.R b/R/Dataset.R
index 47bd0c6..f4b5170 100644
--- a/R/Dataset.R
+++ b/R/Dataset.R
@@ -21,3 +21,15 @@ createDataset <- function(data, labels, plpModel=NULL) {
   if (is.null(attributes(data)$path)) {
     # sqlite object
     attributes(data)$path <- attributes(data)$dbname
+  }
+  if (is.null(plpModel)) {
+    data <- Dataset(r_to_py(normalizePath(attributes(data)$path)), 
+                            r_to_py(labels$outcomeCount))
+  }
+  else {
+    data <- Dataset(r_to_py(normalizePath(attributes(data)$path)),
+                    numerical_features = r_to_py(as.array(which(plpModel$covariateImportance$isNumeric)))  )
+  }
+  
+  return(data)  
+}
\ No newline at end of file

From 31ef8323650ae1c6ed3548b59a220a1d11698b84 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Thu, 7 Sep 2023 10:16:28 +0200
Subject: [PATCH 54/58] update PLP version in DESCRIPTION

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index b17cc67..496abc8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -22,7 +22,7 @@ Imports:
     dplyr,
     FeatureExtraction (>= 3.0.0),
     ParallelLogger (>= 2.0.0),
-    PatientLevelPrediction (>= 6.0.4),
+    PatientLevelPrediction (>= 6.3.2),
     rlang,
     withr,
     reticulate (>= 1.31)

From 66b8c842491cdb086d30355442afd6ebd84f7e36 Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Thu, 7 Sep 2023 17:10:21 +0200
Subject: [PATCH 55/58] integer handling in python and input checks (#83)

* integer handling in python

* add input checks

* add tests for wrong inputs
---
 R/Estimator.R                       | 28 ++++++++++--
 R/HelperFunctions.R                 | 41 ++++++++++++++++-
 R/MLP.R                             | 29 ++++++++++--
 R/ResNet.R                          | 46 ++++++++++++++-----
 R/Transformer.R                     | 67 ++++++++++++++++++++++-----
 inst/python/Estimator.py            |  6 +--
 inst/python/MLP.py                  | 19 +++++---
 inst/python/ResNet.py               | 23 +++++++---
 inst/python/Transformer.py          | 22 ++++++---
 man/checkHigher.Rd                  | 16 +++++++
 man/checkHigherEqual.Rd             | 16 +++++++
 man/checkIsClass.Rd                 | 16 +++++++
 man/setDefaultResNet.Rd             |  2 +-
 man/setDefaultTransformer.Rd        |  2 +-
 man/setEstimator.Rd                 |  8 ++--
 man/setMultiLayerPerceptron.Rd      |  8 ++--
 man/setResNet.Rd                    | 10 ++--
 tests/testthat/test-Estimator.R     | 71 +++++++++++++++++------------
 tests/testthat/test-MLP.R           | 30 ++++++------
 tests/testthat/test-ResNet.R        | 62 ++++++++++++-------------
 tests/testthat/test-TrainingCache.R | 12 ++---
 tests/testthat/test-Transformer.R   | 66 ++++++++++++++-------------
 22 files changed, 418 insertions(+), 182 deletions(-)
 create mode 100644 man/checkHigher.Rd
 create mode 100644 man/checkHigherEqual.Rd
 create mode 100644 man/checkIsClass.Rd

diff --git a/R/Estimator.R b/R/Estimator.R
index cb684f9..573c6a6 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -39,18 +39,38 @@
 #' @export
 setEstimator <- function(learningRate='auto',
                          weightDecay = 0.0,
-                         batchSize = 512L,
-                         epochs = 30L,
+                         batchSize = 512,
+                         epochs = 30,
                          device='cpu',
                          optimizer = torch$optim$AdamW,
                          scheduler = list(fun=torch$optim$lr_scheduler$ReduceLROnPlateau,
-                                          params=list(patience=1L)),
+                                          params=list(patience=1)),
                          criterion = torch$nn$BCEWithLogitsLoss,
                          earlyStopping = list(useEarlyStopping=TRUE,
-                                              params = list(patience=4L)),
+                                              params = list(patience=4)),
                          metric = "auc",
                          seed = NULL
 ) {
+  
+  checkIsClass(learningRate, c("numeric", "character"))
+  if (inherits(learningRate, "character")) {
+    if (learningRate != "auto"){
+      stop(paste0('Learning rate should be either a numeric or "auto", you provided: ', learningRate))
+    }
+  }
+  checkIsClass(weightDecay, "numeric")
+  checkHigherEqual(weightDecay, 0.0)
+  checkIsClass(batchSize, c("numeric", "integer"))
+  checkHigher(batchSize, 0)
+  checkIsClass(epochs, c("numeric", "integer"))
+  checkHigher(epochs, 0)
+  checkIsClass(device, c("character", "function"))
+  checkIsClass(scheduler, "list")
+  checkIsClass(earlyStopping, c("list", "NULL"))
+  checkIsClass(metric, c("character", "list"))
+  checkIsClass(seed, c("numeric", "integer", "NULL"))
+  
+  
   if (length(learningRate)==1 && learningRate=='auto') {findLR <- TRUE} else {findLR <- FALSE}
   if (is.null(seed)) {
     seed <- as.integer(sample(1e5, 1))
diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R
index 83f2f7e..350baf9 100644
--- a/R/HelperFunctions.R
+++ b/R/HelperFunctions.R
@@ -38,4 +38,43 @@ camelCaseToSnakeCase <- function(string) {
 camelCaseToSnakeCaseNames <- function(object) {
   names(object) <- camelCaseToSnakeCase(names(object))
   return(object)
-}
\ No newline at end of file
+}
+
+#' helper function to check class of input
+#' 
+#' @param parameter the input parameter to check
+#' @param classes which classes it should belong to (one or more)
+checkIsClass<- function(parameter,classes) {
+  name = deparse(substitute(parameter))
+  if (!inherits(x = parameter, what = classes)) {
+    ParallelLogger::logError(paste0(name, ' should be of class:', classes, ' '))      
+    stop(paste0(name, ' is wrong class'))
+  }
+  return(TRUE)
+}
+
+#' helper function to check that input is higher than a certain value
+#' 
+#' @param parameter the input parameter to check, can be a vector
+#' @param value which value it should be higher than
+checkHigher <- function(parameter,value) {
+  name = deparse(substitute(parameter))
+  if (!is.numeric(parameter) | all(parameter<=value)) {
+    ParallelLogger::logError(paste0(name, ' needs to be > ',value))      
+    stop(paste0(name, ' needs to be > ', value))
+  }
+  return(TRUE)
+}
+
+#' helper function to check that input is higher or equal than a certain value
+#' 
+#' @param parameter the input parameter to check, can be a vector
+#' @param value which value it should be higher or equal than
+checkHigherEqual <- function(parameter,value) {
+  name = deparse(substitute(parameter))
+  if (!is.numeric(parameter) | all(parameter<value)) {
+    ParallelLogger::logError(paste0(name, ' needs to be >= ',value))      
+    stop(paste0(name, ' needs to be >= ', value))
+  }
+  return(TRUE)
+} 
\ No newline at end of file
diff --git a/R/MLP.R b/R/MLP.R
index 175cc46..6ad35e6 100644
--- a/R/MLP.R
+++ b/R/MLP.R
@@ -35,20 +35,39 @@
 #' @param randomSampleSeed  Random seed to sample hyperparameter combinations
 #'
 #' @export
-setMultiLayerPerceptron <- function(numLayers = as.integer(1:8),
-                                    sizeHidden = as.integer(2^(6:9)),
+setMultiLayerPerceptron <- function(numLayers = c(1:8),
+                                    sizeHidden = c(2^(6:9)),
                                     dropout = c(seq(0, 0.3, 0.05)),
-                                    sizeEmbedding = as.integer(2^(6:9)),
+                                    sizeEmbedding = c(2^(6:9)),
                                     estimatorSettings = setEstimator(
                                       learningRate = 'auto',
                                       weightDecay = c(1e-6, 1e-3),
-                                      batchSize = 1024L,
-                                      epochs = 30L,
+                                      batchSize = 1024,
+                                      epochs = 30,
                                       device="cpu"),
                                     hyperParamSearch = "random",
                                     randomSample = 100,
                                     randomSampleSeed = NULL) {
+  
+  checkIsClass(numLayers, c("integer", "numeric"))
+  checkHigherEqual(numLayers, 1)
 
+  checkIsClass(sizeHidden, c("integer", "numeric"))
+  checkHigherEqual(sizeHidden, 1)
+  
+  checkIsClass(dropout, c("numeric"))
+  checkHigherEqual(dropout, 0)
+  
+  checkIsClass(sizeEmbedding, c("numeric", "integer"))
+  checkHigherEqual(sizeEmbedding, 1)
+  
+  checkIsClass(hyperParamSearch, "character")
+  
+  checkIsClass(randomSample, c("numeric", "integer"))
+  checkHigherEqual(randomSample, 1)
+  
+  checkIsClass(randomSampleSeed, c("numeric", "integer", "NULL"))
+  
   paramGrid <- list(
     numLayers = numLayers,
     sizeHidden = sizeHidden,
diff --git a/R/ResNet.R b/R/ResNet.R
index 8620687..d20aa1d 100644
--- a/R/ResNet.R
+++ b/R/ResNet.R
@@ -31,15 +31,15 @@
 setDefaultResNet <- function(estimatorSettings=setEstimator(learningRate='auto',
                                                             weightDecay=1e-6,
                                                             device='cpu',
-                                                            batchSize=1024L,
-                                                            epochs=50L,
+                                                            batchSize=1024,
+                                                            epochs=50,
                                                             seed=NULL)) {
-  resnetSettings <- setResNet(numLayers = 6L,
-                              sizeHidden = 512L,
-                              hiddenFactor = 2L,
+  resnetSettings <- setResNet(numLayers = 6,
+                              sizeHidden = 512,
+                              hiddenFactor = 2,
                               residualDropout = 0.1,
                               hiddenDropout = 0.4,
-                              sizeEmbedding = 256L,
+                              sizeEmbedding = 256,
                               estimatorSettings = estimatorSettings,
                               hyperParamSearch = 'random',
                               randomSample = 1)
@@ -68,22 +68,44 @@ setDefaultResNet <- function(estimatorSettings=setEstimator(learningRate='auto',
 #' @param randomSample      How many random samples from hyperparameter space to use
 #' @param randomSampleSeed  Random seed to sample hyperparameter combinations
 #' @export
-setResNet <- function(numLayers = as.integer(1:8),
-                      sizeHidden = as.integer(2^(6:10)),
-                      hiddenFactor = as.integer(1:4),
+setResNet <- function(numLayers = c(1:8),
+                      sizeHidden = c(2^(6:10)),
+                      hiddenFactor = c(1:4),
                       residualDropout = c(seq(0, 0.5, 0.05)),
                       hiddenDropout = c(seq(0, 0.5, 0.05)),
-                      sizeEmbedding = as.integer(2^(6:9)),
+                      sizeEmbedding = c(2^(6:9)),
                       estimatorSettings = setEstimator(learningRate='auto',
                                                        weightDecay=c(1e-6, 1e-3),
                                                        device='cpu',
-                                                       batchSize=1024L,
-                                                       epochs=30L,
+                                                       batchSize=1024,
+                                                       epochs=30,
                                                        seed=NULL),
                       hyperParamSearch = "random",
                       randomSample = 100,
                       randomSampleSeed = NULL) 
 {
+  checkIsClass(numLayers, c("integer", "numeric"))
+  checkHigherEqual(numLayers, 1)
+  
+  checkIsClass(sizeHidden, c("integer", "numeric"))
+  checkHigherEqual(sizeHidden, 1)
+  
+  checkIsClass(residualDropout, "numeric")
+  checkHigherEqual(residualDropout, 0)
+  
+  checkIsClass(hiddenDropout, "numeric")
+  checkHigherEqual(hiddenDropout, 0)
+  
+  checkIsClass(sizeEmbedding, c("integer", "numeric"))
+  checkHigherEqual(sizeEmbedding, 1)
+  
+  checkIsClass(hyperParamSearch, "character")
+  
+  checkIsClass(randomSample, c("numeric", "integer"))
+  checkHigherEqual(randomSample, 1)
+  
+  checkIsClass(randomSampleSeed, c("numeric", "integer", "NULL"))
+  
   paramGrid <- list(
     numLayers = numLayers,
     sizeHidden = sizeHidden,
diff --git a/R/Transformer.R b/R/Transformer.R
index b520b7c..8b0d0c4 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -27,19 +27,19 @@
 setDefaultTransformer <- function(estimatorSettings=setEstimator(
   learningRate = 'auto',
   weightDecay = 1e-4,
-  batchSize=512L,
-  epochs=10L,
+  batchSize=512,
+  epochs=10,
   seed=NULL,
   device='cpu')
 ) {
-  transformerSettings <- setTransformer(numBlocks = 3L,
-                                        dimToken = 192L,
-                                        dimOut = 1L,
-                                        numHeads = 8L,
+  transformerSettings <- setTransformer(numBlocks = 3,
+                                        dimToken = 192,
+                                        dimOut = 1,
+                                        numHeads = 8,
                                         attDropout = 0.2,
                                         ffnDropout = 0.1,
                                         resDropout = 0.0,
-                                        dimHidden = 256L,
+                                        dimHidden = 256,
                                         estimatorSettings=estimatorSettings,
                                         hyperParamSearch = 'random',
                                         randomSample = 1)
@@ -67,16 +67,61 @@ setDefaultTransformer <- function(estimatorSettings=setEstimator(
 #' @param randomSampleSeed        Random seed to sample hyperparameter combinations
 #'
 #' @export
-setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
-                           numHeads = 8, attDropout = 0.25, ffnDropout = 0.25,
-                           resDropout = 0, dimHidden = 512, dimHiddenRatio = NULL,
+setTransformer <- function(numBlocks = 3, 
+                           dimToken = 96, 
+                           dimOut = 1,
+                           numHeads = 8, 
+                           attDropout = 0.25, 
+                           ffnDropout = 0.25,
+                           resDropout = 0, 
+                           dimHidden = 512, 
+                           dimHiddenRatio = NULL,
                            estimatorSettings=setEstimator(weightDecay = 1e-6,
                                                           batchSize=1024,
                                                           epochs=10,
                                                           seed=NULL),
                            hyperParamSearch = "random",
-                           randomSample = 1, randomSampleSeed = NULL) {
+                           randomSample = 1, 
+                           randomSampleSeed = NULL) {
+  
+  checkIsClass(numBlocks, c("integer", "numeric"))
+  checkHigherEqual(numBlocks, 1)
+  
+  checkIsClass(dimToken, c("integer", "numeric"))
+  checkHigherEqual(dimToken, 1)
+  
+  checkIsClass(dimOut, c("integer", "numeric"))
+  checkHigherEqual(dimOut, 1)
+  
+  checkIsClass(numHeads, c("integer", "numeric"))
+  checkHigherEqual(numHeads, 1)
 
+  checkIsClass(attDropout, c("numeric"))
+  checkHigherEqual(attDropout, 0)
+  
+  checkIsClass(ffnDropout, c("numeric"))
+  checkHigherEqual(ffnDropout, 0)
+  
+  checkIsClass(resDropout, c("numeric"))
+  checkHigherEqual(resDropout, 0)
+  
+  checkIsClass(dimHidden, c("integer", "numeric", "NULL"))
+  if (!is.null(dimHidden)) {
+    checkHigherEqual(dimHidden, 1)
+  }
+  
+  checkIsClass(dimHiddenRatio, c("numeric", "NULL"))
+  if (!is.null(dimHiddenRatio)) {
+    checkHigher(dimHiddenRatio, 0)
+  }
+  
+  checkIsClass(hyperParamSearch, "character")
+  
+  checkIsClass(randomSample, c("numeric", "integer"))
+  checkHigherEqual(randomSample, 1)
+  
+  checkIsClass(randomSampleSeed, c("numeric", "integer", "NULL"))
+  
   if (any(with(expand.grid(dimToken = dimToken, numHeads = numHeads), dimToken %% numHeads != 0))) {
     stop(paste(
       "dimToken needs to divisible by numHeads. dimToken =", dimToken,
diff --git a/inst/python/Estimator.py b/inst/python/Estimator.py
index 5997817..f8a182f 100644
--- a/inst/python/Estimator.py
+++ b/inst/python/Estimator.py
@@ -28,13 +28,13 @@ def __init__(self,
         self.model_parameters = model_parameters
         self.estimator_settings = estimator_settings
 
-        self.epochs = estimator_settings.get("epochs", 5)
+        self.epochs = int(estimator_settings.get("epochs", 5))
         self.learning_rate = estimator_settings.get("learning_rate", 3e-4)
         self.weight_decay = estimator_settings.get("weight_decay", 1e-5)
-        self.batch_size = estimator_settings.get("batch_size", 1024)
+        self.batch_size = int(estimator_settings.get("batch_size", 1024))
         self.prefix = estimator_settings.get("prefix", self.model.name)
 
-        self.previous_epochs = estimator_settings.get("previous_epochs", 0)
+        self.previous_epochs = int(estimator_settings.get("previous_epochs", 0))
         self.model.to(device=self.device)
 
         self.optimizer = estimator_settings["optimizer"](params=self.model.parameters(),
diff --git a/inst/python/MLP.py b/inst/python/MLP.py
index 278e324..3b8b03b 100644
--- a/inst/python/MLP.py
+++ b/inst/python/MLP.py
@@ -6,17 +6,24 @@
 class MLP(nn.Module):
 
     def __init__(self,
-                 cat_features,
-                 num_features,
-                 size_embedding,
-                 size_hidden,
-                 num_layers,
+                 cat_features: int,
+                 num_features: int,
+                 size_embedding: int,
+                 size_hidden: int,
+                 num_layers: int,
                  activation=nn.ReLU,
                  normalization=nn.BatchNorm1d,
                  dropout=None,
-                 d_out=1):
+                 d_out: int = 1):
         super(MLP, self).__init__()
         self.name = "MLP"
+        cat_features = int(cat_features)
+        num_features = int(num_features)
+        size_embedding = int(size_embedding)
+        size_hidden = int(size_hidden)
+        num_layers = int(num_layers)
+        d_out = int(d_out)
+        
         self.embedding = nn.EmbeddingBag(cat_features + 1,
                                          size_embedding,
                                          padding_idx=0)
diff --git a/inst/python/ResNet.py b/inst/python/ResNet.py
index 2ec6232..f680eb2 100644
--- a/inst/python/ResNet.py
+++ b/inst/python/ResNet.py
@@ -7,20 +7,29 @@
 class ResNet(nn.Module):
 
     def __init__(self,
-                 cat_features,
-                 num_features=0,
-                 size_embedding=256,
-                 size_hidden=256,
-                 num_layers=2,
-                 hidden_factor=1,
+                 cat_features: int,
+                 num_features: int = 0,
+                 size_embedding: int = 256,
+                 size_hidden: int = 256,
+                 num_layers: int = 2,
+                 hidden_factor: int = 1,
                  activation=nn.ReLU,
                  normalization=nn.BatchNorm1d,
                  hidden_dropout=0,
                  residual_dropout=0,
-                 dim_out=1,
+                 dim_out: int = 1,
                  concat_num=True):
         super(ResNet, self).__init__()
         self.name = 'ResNet'
+        cat_features = int(cat_features)
+        num_features = int(num_features)
+        size_embedding = int(size_embedding)
+        size_hidden = int(size_hidden)
+        num_layers = int(num_layers)
+        hidden_factor = int(hidden_factor)
+        dim_out = int(dim_out)
+        
+        
         self.embedding = nn.EmbeddingBag(
             num_embeddings=cat_features + 1,
             embedding_dim=size_embedding,
diff --git a/inst/python/Transformer.py b/inst/python/Transformer.py
index 395026a..5944e1b 100644
--- a/inst/python/Transformer.py
+++ b/inst/python/Transformer.py
@@ -20,16 +20,16 @@ def forward(self, x):
 class Transformer(nn.Module):
 
     def __init__(self,
-                 cat_features,
-                 num_features,
-                 num_blocks,
-                 dim_token,
-                 num_heads,
+                 cat_features: int,
+                 num_features: int,
+                 num_blocks: int,
+                 dim_token: int,
+                 num_heads: int,
                  att_dropout,
                  ffn_dropout,
                  res_dropout,
-                 dim_hidden,
-                 dim_out=1,
+                 dim_hidden: int,
+                 dim_out: int = 1,
                  head_activation=nn.ReLU,
                  activation=ReGLU,
                  ffn_norm=nn.LayerNorm,
@@ -37,6 +37,14 @@ def __init__(self,
                  att_norm=nn.LayerNorm):
         super(Transformer, self).__init__()
         self.name = "Transformer"
+        cat_features = int(cat_features)
+        num_features = int(num_features)
+        num_blocks = int(num_blocks)
+        dim_token = int(dim_token)
+        num_heads = int(num_heads)
+        dim_hidden = int(dim_hidden)
+        dim_out = int(dim_out)
+        
         self.categorical_embedding = nn.Embedding(cat_features + 1, dim_token, padding_idx=0)
 
         if num_features != 0 and num_features is not None:
diff --git a/man/checkHigher.Rd b/man/checkHigher.Rd
new file mode 100644
index 0000000..bab0d76
--- /dev/null
+++ b/man/checkHigher.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/HelperFunctions.R
+\name{checkHigher}
+\alias{checkHigher}
+\title{helper function to check that input is higher than a certain value}
+\usage{
+checkHigher(parameter, value)
+}
+\arguments{
+\item{parameter}{the input parameter to check, can be a vector}
+
+\item{value}{which value it should be higher than}
+}
+\description{
+helper function to check that input is higher than a certain value
+}
diff --git a/man/checkHigherEqual.Rd b/man/checkHigherEqual.Rd
new file mode 100644
index 0000000..60230e6
--- /dev/null
+++ b/man/checkHigherEqual.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/HelperFunctions.R
+\name{checkHigherEqual}
+\alias{checkHigherEqual}
+\title{helper function to check that input is higher or equal than a certain value}
+\usage{
+checkHigherEqual(parameter, value)
+}
+\arguments{
+\item{parameter}{the input parameter to check, can be a vector}
+
+\item{value}{which value it should be higher or equal than}
+}
+\description{
+helper function to check that input is higher or equal than a certain value
+}
diff --git a/man/checkIsClass.Rd b/man/checkIsClass.Rd
new file mode 100644
index 0000000..5af3ad8
--- /dev/null
+++ b/man/checkIsClass.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/HelperFunctions.R
+\name{checkIsClass}
+\alias{checkIsClass}
+\title{helper function to check class of input}
+\usage{
+checkIsClass(parameter, classes)
+}
+\arguments{
+\item{parameter}{the input parameter to check}
+
+\item{classes}{which classes it should belong to (one or more)}
+}
+\description{
+helper function to check class of input
+}
diff --git a/man/setDefaultResNet.Rd b/man/setDefaultResNet.Rd
index 2ff34c1..c26f354 100644
--- a/man/setDefaultResNet.Rd
+++ b/man/setDefaultResNet.Rd
@@ -6,7 +6,7 @@
 \usage{
 setDefaultResNet(
   estimatorSettings = setEstimator(learningRate = "auto", weightDecay = 1e-06, device =
-    "cpu", batchSize = 1024L, epochs = 50L, seed = NULL)
+    "cpu", batchSize = 1024, epochs = 50, seed = NULL)
 )
 }
 \arguments{
diff --git a/man/setDefaultTransformer.Rd b/man/setDefaultTransformer.Rd
index 049b121..e0a3a32 100644
--- a/man/setDefaultTransformer.Rd
+++ b/man/setDefaultTransformer.Rd
@@ -6,7 +6,7 @@
 \usage{
 setDefaultTransformer(
   estimatorSettings = setEstimator(learningRate = "auto", weightDecay = 1e-04, batchSize
-    = 512L, epochs = 10L, seed = NULL, device = "cpu")
+    = 512, epochs = 10, seed = NULL, device = "cpu")
 )
 }
 \arguments{
diff --git a/man/setEstimator.Rd b/man/setEstimator.Rd
index 18ed8fa..fcb33ab 100644
--- a/man/setEstimator.Rd
+++ b/man/setEstimator.Rd
@@ -7,14 +7,14 @@
 setEstimator(
   learningRate = "auto",
   weightDecay = 0,
-  batchSize = 512L,
-  epochs = 30L,
+  batchSize = 512,
+  epochs = 30,
   device = "cpu",
   optimizer = torch$optim$AdamW,
   scheduler = list(fun = torch$optim$lr_scheduler$ReduceLROnPlateau, params =
-    list(patience = 1L)),
+    list(patience = 1)),
   criterion = torch$nn$BCEWithLogitsLoss,
-  earlyStopping = list(useEarlyStopping = TRUE, params = list(patience = 4L)),
+  earlyStopping = list(useEarlyStopping = TRUE, params = list(patience = 4)),
   metric = "auc",
   seed = NULL
 )
diff --git a/man/setMultiLayerPerceptron.Rd b/man/setMultiLayerPerceptron.Rd
index 01418fe..f3676cd 100644
--- a/man/setMultiLayerPerceptron.Rd
+++ b/man/setMultiLayerPerceptron.Rd
@@ -5,12 +5,12 @@
 \title{setMultiLayerPerceptron}
 \usage{
 setMultiLayerPerceptron(
-  numLayers = as.integer(1:8),
-  sizeHidden = as.integer(2^(6:9)),
+  numLayers = c(1:8),
+  sizeHidden = c(2^(6:9)),
   dropout = c(seq(0, 0.3, 0.05)),
-  sizeEmbedding = as.integer(2^(6:9)),
+  sizeEmbedding = c(2^(6:9)),
   estimatorSettings = setEstimator(learningRate = "auto", weightDecay = c(1e-06, 0.001),
-    batchSize = 1024L, epochs = 30L, device = "cpu"),
+    batchSize = 1024, epochs = 30, device = "cpu"),
   hyperParamSearch = "random",
   randomSample = 100,
   randomSampleSeed = NULL
diff --git a/man/setResNet.Rd b/man/setResNet.Rd
index 9dcffa7..fbe3d77 100644
--- a/man/setResNet.Rd
+++ b/man/setResNet.Rd
@@ -5,14 +5,14 @@
 \title{setResNet}
 \usage{
 setResNet(
-  numLayers = as.integer(1:8),
-  sizeHidden = as.integer(2^(6:10)),
-  hiddenFactor = as.integer(1:4),
+  numLayers = c(1:8),
+  sizeHidden = c(2^(6:10)),
+  hiddenFactor = c(1:4),
   residualDropout = c(seq(0, 0.5, 0.05)),
   hiddenDropout = c(seq(0, 0.5, 0.05)),
-  sizeEmbedding = as.integer(2^(6:9)),
+  sizeEmbedding = c(2^(6:9)),
   estimatorSettings = setEstimator(learningRate = "auto", weightDecay = c(1e-06, 0.001),
-    device = "cpu", batchSize = 1024L, epochs = 30L, seed = NULL),
+    device = "cpu", batchSize = 1024, epochs = 30, seed = NULL),
   hyperParamSearch = "random",
   randomSample = 100,
   randomSampleSeed = NULL
diff --git a/tests/testthat/test-Estimator.R b/tests/testthat/test-Estimator.R
index a8711e8..b9ecadd 100644
--- a/tests/testthat/test-Estimator.R
+++ b/tests/testthat/test-Estimator.R
@@ -4,16 +4,16 @@ numFeatures <- small_dataset$dataset$get_numerical_features()$shape[[1]]
 modelParameters <- list(
   cat_features = catFeatures,
   num_features = numFeatures,
-  size_embedding = 16L,
-  size_hidden = 16L,
-  num_layers = 2L,
-  hidden_factor = 2L
+  size_embedding = 16,
+  size_hidden = 16,
+  num_layers = 2,
+  hidden_factor = 2
 )
 
 estimatorSettings <- setEstimator(learningRate = 3e-4,
                                  weightDecay = 0.0,
-                                 batchSize = 128L,
-                                 epochs = 5L,
+                                 batchSize = 128,
+                                 epochs = 5,
                                  device = 'cpu',
                                  seed=42,
                                  optimizer=torch$optim$AdamW,
@@ -44,9 +44,24 @@ test_that("Estimator initialization works", {
     estimator$model_parameters,
     modelParameters
   )
+})
 
-  
+test_that("Estimator detects wrong inputs", {
+  
+  testthat::expect_error(setEstimator(learningRate='notAuto'))
+  testthat::expect_error(setEstimator(weightDecay = -1))
+  testthat::expect_error(setEstimator(weightDecay = "text"))
+  testthat::expect_error(setEstimator(batchSize = 0))
+  testthat::expect_error(setEstimator(batchSize = "text"))
+  testthat::expect_error(setEstimator(epochs = 0))
+  testthat::expect_error(setEstimator(epochs = "test"))
+  testthat::expect_error(setEstimator(device = 1))
+  testthat::expect_error(setEstimator(scheduler = "notList"))
+  testthat::expect_error(setEstimator(earlyStopping = "notListorNull"))
+  testthat::expect_error(setEstimator(metric = 1))
+  testthat::expect_error(setEstimator(seed = "32"))
 })
+
 sink(nullfile())
 estimator$fit(small_dataset, small_dataset)
 sink()
@@ -93,8 +108,8 @@ test_that("estimator fitting works", {
   
   estimatorSettings <- setEstimator(learningRate = 3e-4,
                                     weightDecay = 0.0,
-                                    batchSize = 128L,
-                                    epochs = 5L,
+                                    batchSize = 128,
+                                    epochs = 5,
                                     device = 'cpu',
                                     metric= "loss")
   
@@ -134,11 +149,11 @@ test_that("early stopping works", {
 })
 
 modelSettings <- setResNet(
-  numLayers = 1L, sizeHidden = 16L, hiddenFactor = 1L,
+  numLayers = 1, sizeHidden = 16, hiddenFactor = 1,
   residualDropout = 0, hiddenDropout = 0,
-  sizeEmbedding = 16L, hyperParamSearch = "random",
-  randomSample = 1L,
-  setEstimator(epochs=1L,
+  sizeEmbedding = 16, hyperParamSearch = "random",
+  randomSample = 1,
+  setEstimator(epochs=1,
                learningRate = 3e-4)
 )
 
@@ -203,8 +218,8 @@ test_that("Estimator without earlyStopping works", {
   # estimator without earlyStopping
   estimatorSettings <- setEstimator(learningRate = 3e-4,
                                     weightDecay = 0.0,
-                                    batchSize = 128L,
-                                    epochs = 1L,
+                                    batchSize = 128,
+                                    epochs = 1,
                                     device = 'cpu',
                                     earlyStopping = NULL)
   
@@ -223,12 +238,12 @@ test_that("Estimator without earlyStopping works", {
 test_that("Early stopper can use loss and stops early", {
   estimatorSettings <- setEstimator(learningRate = 3e-2,
                                     weightDecay = 0.0,
-                                    batchSize = 128L,
-                                    epochs = 10L,
+                                    batchSize = 128,
+                                    epochs = 10,
                                     device = 'cpu',
                                     earlyStopping =list(useEarlyStopping=TRUE,
                                                         params = list(mode=c('min'), 
-                                                                      patience=1L)),
+                                                                      patience=1)),
                                     metric = 'loss',
                                     seed=42)
   
@@ -254,9 +269,9 @@ test_that('Custom metric in estimator works', {
     
   estimatorSettings <- setEstimator(learningRate = 3e-4,
                                     weightDecay = 0.0,
-                                    batchSize = 128L,
+                                    batchSize = 128,
                                     device = "cpu",
-                                    epochs = 1L,
+                                    epochs = 1,
                                     metric=list(fun=metric_fun,
                                                 name="auprc",
                                                 mode="max"))
@@ -277,15 +292,15 @@ test_that('Custom metric in estimator works', {
 
 test_that("setEstimator with paramsToTune is correctly added to hyperparameters", {
   estimatorSettings <- setEstimator(learningRate=c(3e-4,1e-3),
-                                    batchSize=128L,
-                                    epochs=1L,
+                                    batchSize=128,
+                                    epochs=1,
                                     device="cpu",
                                     metric=c("auc", "auprc"),
                                     earlyStopping = list(useEarlyStopping=TRUE,
-                                                         params=list(patience=c(4L,10L))))
-  modelSettings <- setResNet(numLayers = 1L, sizeHidden = 64L,
-                             hiddenFactor = 1L, residualDropout = 0.2,
-                             hiddenDropout = 0.2,sizeEmbedding = 128L,
+                                                         params=list(patience=c(4,10))))
+  modelSettings <- setResNet(numLayers = 1, sizeHidden = 64,
+                             hiddenFactor = 1, residualDropout = 0.2,
+                             hiddenDropout = 0.2,sizeEmbedding = 128,
                              estimatorSettings = estimatorSettings,
                              hyperParamSearch = "grid")
   
@@ -316,7 +331,7 @@ test_that("device as a function argument works", {
                                     learningRate = 3e-4)
   
   model <- setDefaultResNet(estimatorSettings = estimatorSettings) 
-  model$param[[1]]$catFeatures <- 10L
+  model$param[[1]]$catFeatures <- 10
 
   estimator <- createEstimator(modelType = modelType,
                                modelParameters = model$param[[1]],
@@ -330,7 +345,7 @@ test_that("device as a function argument works", {
                                     learningRate = 3e-4)
   
   model <- setDefaultResNet(estimatorSettings = estimatorSettings) 
-  model$param[[1]]$catFeatures <- 10L
+  model$param[[1]]$catFeatures <- 10
   
   estimator <- createEstimator(modelType = modelType,
                                modelParameters = model$param[[1]],
diff --git a/tests/testthat/test-MLP.R b/tests/testthat/test-MLP.R
index cb6f2c9..5cbd7a5 100644
--- a/tests/testthat/test-MLP.R
+++ b/tests/testthat/test-MLP.R
@@ -1,15 +1,15 @@
 
 modelSettings <- setMultiLayerPerceptron(
-  numLayers = 2L,
-  sizeHidden = 32L,
+  numLayers = 2,
+  sizeHidden = 32,
   dropout = c(0.1),
-  sizeEmbedding = 32L,
+  sizeEmbedding = 32,
   estimatorSettings = setEstimator(
     learningRate=c(3e-4),
     weightDecay = c(1e-6),
     seed=42,
-    batchSize=128L,
-    epochs=1L
+    batchSize=128,
+    epochs=1
   ),
   hyperParamSearch = "random",
   randomSample = 1
@@ -84,11 +84,11 @@ test_that("MLP with runPlp working checks", {
 test_that("MLP nn-module works ", {
   MLP <- reticulate::import_from_path("MLP", path=path)$MLP
   model <- MLP(
-    cat_features = 5L, 
-    num_features = 1L, 
-    size_embedding = 5L,
-    size_hidden = 16L, 
-    num_layers = 1L,
+    cat_features = 5, 
+    num_features = 1, 
+    size_embedding = 5,
+    size_hidden = 16, 
+    num_layers = 1,
     activation = torch$nn$ReLU,
     normalization = torch$nn$BatchNorm1d, 
     dropout = 0.3
@@ -131,16 +131,16 @@ test_that("Errors are produced by settings function", {
   randomSample <- 2
   
   expect_error(setMultiLayerPerceptron(
-    numLayers = 1L,
-    sizeHidden = 128L,
+    numLayers = 1,
+    sizeHidden = 128,
     dropout = 0.0,
-    sizeEmbedding = 128L,
+    sizeEmbedding = 128,
     hyperParamSearch = 'random',
     estimatorSettings = setEstimator(
       learningRate = 'auto',
       weightDecay = c(1e-3),
-      batchSize = 1024L,
-      epochs = 30L,
+      batchSize = 1024,
+      epochs = 30,
       device="cpu")))
                            
 })
diff --git a/tests/testthat/test-ResNet.R b/tests/testthat/test-ResNet.R
index 81b1ac7..29dfb8e 100644
--- a/tests/testthat/test-ResNet.R
+++ b/tests/testthat/test-ResNet.R
@@ -1,16 +1,16 @@
 
 resSet <- setResNet(
-  numLayers = 2L,
-  sizeHidden = 32L,
-  hiddenFactor = 2L,
+  numLayers = 2,
+  sizeHidden = 32,
+  hiddenFactor = 2,
   residualDropout = 0.1,
   hiddenDropout = 0.1,
-  sizeEmbedding = 32L,
+  sizeEmbedding = 32,
   estimatorSettings = setEstimator(learningRate="auto",
                                    weightDecay = c(1e-6),
                                    seed=42,
-                                   batchSize = 128L,
-                                   epochs=1L),
+                                   batchSize = 128,
+                                   epochs=1),
   hyperParamSearch = "random",
   randomSample = 1,
 )
@@ -22,17 +22,17 @@ test_that("setResNet works", {
   
   testthat::expect_true(length(resSet$param) > 0)
   
-  expect_error(setResNet(numLayers = 2L,
-                         sizeHidden = 32L,
-                         hiddenFactor = 2L,
+  expect_error(setResNet(numLayers = 2,
+                         sizeHidden = 32,
+                         hiddenFactor = 2,
                          residualDropout = 0.1,
                          hiddenDropout = 0.1,
-                         sizeEmbedding = 32L,
+                         sizeEmbedding = 32,
                          estimatorSettings = setEstimator(learningRate=c(3e-4),
                                                           weightDecay = c(1e-6),
                                                           seed=42,
-                                                          batchSize = 128L,
-                                                          epochs=1L),
+                                                          batchSize = 128,
+                                                          epochs=1),
                          hyperParamSearch = "random",
                          randomSample = 2))
 })
@@ -91,12 +91,12 @@ test_that("ResNet with runPlp working checks", {
 test_that("ResNet nn-module works ", {
   ResNet <- reticulate::import_from_path("ResNet", path=path)$ResNet
   model <- ResNet(
-    cat_features = 5L, 
-    num_features = 1L, 
-    size_embedding = 5L,
-    size_hidden = 16L, 
-    num_layers = 1L, 
-    hidden_factor = 2L,
+    cat_features = 5, 
+    num_features = 1, 
+    size_embedding = 5,
+    size_hidden = 16, 
+    num_layers = 1, 
+    hidden_factor = 2,
     activation = torch$nn$ReLU,
     normalization = torch$nn$BatchNorm1d, 
     hidden_dropout = 0.3,
@@ -120,12 +120,12 @@ test_that("ResNet nn-module works ", {
   
   input$num <- NULL
   model <- ResNet(
-    cat_features = 5L, 
-    num_features = 0L, 
-    size_embedding = 5L,
-    size_hidden = 16L, 
-    num_layers = 1L, 
-    hidden_factor = 2L,
+    cat_features = 5, 
+    num_features = 0, 
+    size_embedding = 5,
+    size_hidden = 16, 
+    num_layers = 1, 
+    hidden_factor = 2,
     activation = torch$nn$ReLU,
     normalization = torch$nn$BatchNorm1d, 
     hidden_dropout = 0.3,
@@ -140,12 +140,12 @@ test_that("Default Resnet works", {
   defaultResNet <- setDefaultResNet()
   params <- defaultResNet$param[[1]]
   
-  expect_equal(params$numLayers, 6L)
-  expect_equal(params$sizeHidden, 512L)
-  expect_equal(params$hiddenFactor, 2L)
+  expect_equal(params$numLayers, 6)
+  expect_equal(params$sizeHidden, 512)
+  expect_equal(params$hiddenFactor, 2)
   expect_equal(params$residualDropout, 0.1)
   expect_equal(params$hiddenDropout, 0.4)
-  expect_equal(params$sizeEmbedding, 256L)
+  expect_equal(params$sizeEmbedding, 256)
   
 }) 
 
@@ -153,12 +153,12 @@ test_that("Errors are produced by settings function", {
   randomSample <- 2
   
   expect_error(setResNet(
-    numLayers = 1L,
-    sizeHidden = 128L,
+    numLayers = 1,
+    sizeHidden = 128,
     hiddenFactor = 1,
     residualDropout = 0.0,
     hiddenDropout = 0.0,
-    sizeEmbedding = 128L,
+    sizeEmbedding = 128,
     estimatorSettings = setEstimator(weightDecay = 1e-6,
                                      learningRate = 0.01,
                                      seed = 42),
diff --git a/tests/testthat/test-TrainingCache.R b/tests/testthat/test-TrainingCache.R
index e4ba6b2..8b5d546 100644
--- a/tests/testthat/test-TrainingCache.R
+++ b/tests/testthat/test-TrainingCache.R
@@ -1,14 +1,14 @@
-resNetSettings <- setResNet(numLayers = c(1L, 2L, 4L),
-                            sizeHidden = 64L,
-                            hiddenFactor = 1L,
+resNetSettings <- setResNet(numLayers = c(1, 2, 4),
+                            sizeHidden = 64,
+                            hiddenFactor = 1,
                             residualDropout = 0.5,
                             hiddenDropout = 0.5,
-                            sizeEmbedding = 64L,
+                            sizeEmbedding = 64,
                             estimatorSettings = setEstimator(learningRate=3e-4,
                                                              weightDecay=1e-3,
                                                              device='cpu',
-                                                             batchSize=64L,
-                                                             epochs=1L,
+                                                             batchSize=64,
+                                                             epochs=1,
                                                              seed=NULL),
                             hyperParamSearch = "random",
                             randomSample = 3,
diff --git a/tests/testthat/test-Transformer.R b/tests/testthat/test-Transformer.R
index 7e233e0..b3e421f 100644
--- a/tests/testthat/test-Transformer.R
+++ b/tests/testthat/test-Transformer.R
@@ -1,15 +1,15 @@
 settings <- setTransformer(
-  numBlocks = 1L, 
-  dimToken = 8L, 
-  dimOut = 1L,
-  numHeads = 2L, 
+  numBlocks = 1, 
+  dimToken = 8, 
+  dimOut = 1,
+  numHeads = 2, 
   attDropout = 0.0, 
   ffnDropout = 0.2,
   resDropout = 0.0, 
-  dimHidden = 32L, 
+  dimHidden = 32, 
   estimatorSettings = setEstimator(learningRate = 3e-4,
-                                   batchSize=64L,
-                                   epochs=1L),
+                                   batchSize=64,
+                                   epochs=1),
   randomSample = 1
 )
 
@@ -18,16 +18,16 @@ test_that("Transformer settings work", {
   testthat::expect_equal(settings$fitFunction, "fitEstimator")
   testthat::expect_true(length(settings$param) > 0)
   testthat::expect_error(setTransformer(
-    numBlocks = 1L, dimToken = 50L,
-    numHeads = 7L
+    numBlocks = 1, dimToken = 50,
+    numHeads = 7
   ))
   testthat::expect_error(setTransformer(
-    numBlocks = 1L, dimToken = c(2L, 4L),
-    numHeads = c(2L, 4L)
+    numBlocks = 1, dimToken = c(2, 4),
+    numHeads = c(2, 4)
   ))
   testthat::expect_error(setTransformer(
-    numBlocks = 1L, dimToken = c(4L, 6L),
-    numHeads = c(2L, 4L)
+    numBlocks = 1, dimToken = c(4, 6),
+    numHeads = c(2, 4)
   ))
 })
 
@@ -46,15 +46,15 @@ test_that("fitEstimator with Transformer works", {
 test_that("transformer nn-module works", {
   Transformer <- reticulate::import_from_path("Transformer", path=path)$Transformer
   model <- Transformer(
-    cat_features = 5L, 
-    num_features = 1L, 
-    num_blocks = 2L,
-    dim_token = 16L, 
-    num_heads = 2L, 
+    cat_features = 5, 
+    num_features = 1, 
+    num_blocks = 2,
+    dim_token = 16, 
+    num_heads = 2, 
     att_dropout = 0, 
     ffn_dropout = 0,
     res_dropout = 0, 
-    dim_hidden = 32L
+    dim_hidden = 32
   )
 
   pars <- sum(reticulate::iterate(model$parameters(), function(x) x$numel()))
@@ -74,15 +74,15 @@ test_that("transformer nn-module works", {
   input$num <- NULL
 
   model <- Transformer(
-    cat_features = 5L, 
-    num_features = 0L, 
-    num_blocks = 2L,
-    dim_token = 16L, 
-    num_heads = 2L, 
+    cat_features = 5, 
+    num_features = 0, 
+    num_blocks = 2,
+    dim_token = 16, 
+    num_heads = 2, 
     att_dropout = 0, 
     ffn_dropout = 0,
     res_dropout = 0, 
-    dim_hidden = 32L
+    dim_hidden = 32
   )
   output <- model(input)
   expect_equal(output$shape[0], 10L)
@@ -92,9 +92,9 @@ test_that("Default Transformer works", {
   defaultTransformer <- setDefaultTransformer()
   params <- defaultTransformer$param[[1]]
   
-  expect_equal(params$numBlocks, 3L)
-  expect_equal(params$dimToken, 192L)
-  expect_equal(params$numHeads, 8L)
+  expect_equal(params$numBlocks, 3)
+  expect_equal(params$dimToken, 192)
+  expect_equal(params$numHeads, 8)
   expect_equal(params$resDropout, 0.0)
   expect_equal(params$attDropout, 0.2)
   
@@ -111,14 +111,18 @@ test_that("Errors are produced by settings function", {
 
 test_that("dimHidden ratio works as expected", {
   randomSample <- 4
-  dimToken <- c(64L, 128L, 256L, 512L)
-  dimHiddenRatio <- 2L
+  dimToken <- c(64, 128, 256, 512)
+  dimHiddenRatio <- 2
   modelSettings <- setTransformer(dimToken = dimToken,
                                   dimHiddenRatio = dimHiddenRatio,
                                   dimHidden = NULL,
                                   randomSample = randomSample)
   dimHidden <- unlist(lapply(modelSettings$param, function(x) x$dimHidden))
   tokens <-   unlist(lapply(modelSettings$param, function(x) x$dimToken))
-  expect_true(all(dimHidden == dimHiddenRatio * tokens))
+  testthat::expect_true(all(dimHidden == dimHiddenRatio * tokens))
+  testthat::expect_error(setTransformer(dimHidden = NULL,
+                                        dimHiddenRatio = NULL))
+  testthat::expect_error(setTransformer(dimHidden = 256,
+                                        dimHiddenRatio = 4/3))
 
 })

From 85be68991ca1daa756db9846c9d4124f25ffd66a Mon Sep 17 00:00:00 2001
From: Henrik <l.john@erasmusmc.nl>
Date: Thu, 7 Sep 2023 17:23:20 +0200
Subject: [PATCH 56/58] Ensure that param search is completed in empty cache
 test (#84)

---
 tests/testthat/test-TrainingCache.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/testthat/test-TrainingCache.R b/tests/testthat/test-TrainingCache.R
index 8b5d546..eb4ab17 100644
--- a/tests/testthat/test-TrainingCache.R
+++ b/tests/testthat/test-TrainingCache.R
@@ -84,4 +84,6 @@ test_that("Estimator can resume training from cache", {
     }
   )
   sink()
+  trainCache <- TrainingCache$new(analysisPath)
+  testthat::expect_equal(is.na(trainCache$getLastGridSearchIndex()), TRUE)
 })

From 4c83897622e0ff871b82a4ab0f6f237866a69ed6 Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Thu, 7 Sep 2023 20:00:21 +0200
Subject: [PATCH 57/58] use ubuntu 22.04 in CI (#85)

---
 .github/workflows/R_CDM_check_hades.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/R_CDM_check_hades.yaml b/.github/workflows/R_CDM_check_hades.yaml
index 669e780..fa5072b 100644
--- a/.github/workflows/R_CDM_check_hades.yaml
+++ b/.github/workflows/R_CDM_check_hades.yaml
@@ -22,7 +22,7 @@ jobs:
         config:
           - {os: windows-latest, r: 'release'}
           - {os: macOS-latest, r: 'release'}
-          - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
+          - {os: ubuntu-22.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"}
 
     env:
       GITHUB_PAT: ${{ secrets.GH_TOKEN }}

From 904e926e1b8a0819fe1e5bef24f094b27961234d Mon Sep 17 00:00:00 2001
From: Egill Axfjord Fridgeirsson <egillax@gmail.com>
Date: Fri, 8 Sep 2023 10:36:33 +0200
Subject: [PATCH 58/58] Update NEWS.md

---
 NEWS.md | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index b47f33d..3da704b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,13 @@
 DeepPatientLevelPrediction 2.0.0
 ======================
-  - New backend which uses pytorch through reticulate insteada of torch in R
+  - New backend which uses pytorch through reticulate instead of torch in R
+  - All models ported over to python
+  - Dataset class now in python
+  - Estimator class in python
+  - Learning rate finder in python
+  - Added input checks and tests for wrong inputs
+  - Training-cache for single hyperparameter combination added
+  - Fixed empty test for training-cache
 
 DeepPatientLevelPrediction 1.1.6
 ======================