diff --git a/R/Dataset.R b/R/Dataset.R
index bd3a8b5..9d10343 100644
--- a/R/Dataset.R
+++ b/R/Dataset.R
@@ -1,115 +1,127 @@
-#'  A torch dataset 
+#'  A torch dataset
 #' @import data.table
 #' @export
 Dataset <- torch::dataset(
-  name = 'myDataset',
+  name = "myDataset",
   #' @param data           a dataframe like object with the covariates
   #' @param labels         a dataframe with the labels
   #' @param numericalIndex in what column numeric data is in (if any)
   #' @param all            if True then returns all features instead of splitting num/cat
-  initialize = function(data, labels = NULL, numericalIndex = NULL, all=FALSE) {
+  initialize = function(data, labels = NULL, numericalIndex = NULL, all = FALSE) {
     # determine numeric
-    if (is.null(numericalIndex) && all==FALSE) {
-    numericalIndex <- data %>% dplyr::group_by(columnId) %>% dplyr::collect() %>% 
-      dplyr::summarise(n=dplyr::n_distinct(.data$covariateValue)) %>% dplyr::pull(n)>1
-    self$numericalIndex <- numericalIndex
-    }
-    else {
+    if (is.null(numericalIndex) && all == FALSE) {
+      numericalIndex <- data %>%
+        dplyr::group_by(columnId) %>%
+        dplyr::collect() %>%
+        dplyr::summarise(n = dplyr::n_distinct(.data$covariateValue)) %>%
+        dplyr::pull(n) > 1
+      self$numericalIndex <- numericalIndex
+    } else {
       self$numericalIndex <- NULL
     }
-    
-    
+
+
     # add labels if training (make 0 vector for prediction)
-    if(!is.null(labels)){
+    if (!is.null(labels)) {
       self$target <- torch::torch_tensor(labels)
-    } else{
-      if (all==FALSE) {
-      self$target <- torch::torch_tensor(rep(0, data %>% dplyr::distinct(rowId) 
-                                             %>% dplyr::collect() %>% nrow()))
-      } else{
+    } else {
+      if (all == FALSE) {
+        self$target <- torch::torch_tensor(rep(0, data %>% dplyr::distinct(rowId)
+          %>% dplyr::collect() %>% nrow()))
+      } else {
         self$target <- torch::torch_tensor(rep(0, dim(data)[[1]]))
       }
     }
     # Weight to add in loss function to positive class
-    self$posWeight <- (self$target==0)$sum()/self$target$sum()
+    self$posWeight <- (self$target == 0)$sum() / self$target$sum()
     # for DeepNNTorch
     if (all) {
-    self$all <- torch::torch_tensor(as.matrix(data), dtype = torch::torch_float32())
-    self$cat <- NULL
-    self$num <- NULL
-    return()
+      self$all <- torch::torch_tensor(as.matrix(data), dtype = torch::torch_float32())
+      self$cat <- NULL
+      self$num <- NULL
+      return()
     }
     # add features
-    catColumns <- which(!numericalIndex) 
-    dataCat <- dplyr::filter(data,columnId %in% catColumns) %>% 
+    catColumns <- which(!numericalIndex)
+    dataCat <- dplyr::filter(data, columnId %in% catColumns) %>%
       dplyr::arrange(columnId) %>%
-      dplyr::group_by(columnId) %>% 
-      dplyr::collect() %>% 
-      dplyr::mutate(newColumnId=dplyr::cur_group_id()) %>% dplyr::ungroup() %>%
-      dplyr::select(c('rowId', 'newColumnId'))  %>% dplyr::rename(columnId=newColumnId)
+      dplyr::group_by(columnId) %>%
+      dplyr::collect() %>%
+      dplyr::mutate(newColumnId = dplyr::cur_group_id()) %>%
+      dplyr::ungroup() %>%
+      dplyr::select(c("rowId", "newColumnId")) %>%
+      dplyr::rename(columnId = newColumnId)
     # the fastest way I found so far to convert data using data.table
     # 1.5 min for 100k rows :(
-    dt <- data.table::data.table(rows=dataCat$rowId, cols=dataCat$columnId)
-    maxFeatures <- max(dt[, .N, by=rows][,N])
+    dt <- data.table::data.table(rows = dataCat$rowId, cols = dataCat$columnId)
+    maxFeatures <- max(dt[, .N, by = rows][, N])
     start <- Sys.time()
     tensorList <- lapply(1:max(data %>% dplyr::pull(rowId)), function(x) {
-      torch::torch_tensor(dt[rows==x, cols])
-      })
+      torch::torch_tensor(dt[rows == x, cols])
+    })
     self$lengths <- lengths
     self$cat <- torch::nn_utils_rnn_pad_sequence(tensorList, batch_first = T)
     delta <- Sys.time() - start
     ParallelLogger::logInfo("Data conversion for dataset took ", signif(delta, 3), " ", attr(delta, "units"))
     if (sum(numericalIndex) == 0) {
       self$num <- NULL
-    } else  {
-      numericalData <- data %>% dplyr::filter(columnId %in% !! which(numericalIndex)) %>% dplyr::collect()
-      numericalData <-numericalData %>% dplyr::group_by(columnId) %>% dplyr::mutate(newId = dplyr::cur_group_id())
-      indices <- torch::torch_tensor(cbind(numericalData$rowId, numericalData$newId), dtype=torch::torch_long())$t_()
-      values <- torch::torch_tensor(numericalData$covariateValue,dtype=torch::torch_float32())
-      self$num <- torch::torch_sparse_coo_tensor(indices=indices,
-                                            values=values, 
-                                            size=c(self$target$shape,sum(numericalIndex)))$to_dense()
+    } else {
+      numericalData <- data %>%
+        dplyr::filter(columnId %in% !!which(numericalIndex)) %>%
+        dplyr::collect()
+      numericalData <- numericalData %>%
+        dplyr::group_by(columnId) %>%
+        dplyr::mutate(newId = dplyr::cur_group_id())
+      indices <- torch::torch_tensor(cbind(numericalData$rowId, numericalData$newId), dtype = torch::torch_long())$t_()
+      values <- torch::torch_tensor(numericalData$covariateValue, dtype = torch::torch_float32())
+      self$num <- torch::torch_sparse_coo_tensor(
+        indices = indices,
+        values = values,
+        size = c(self$target$shape, sum(numericalIndex))
+      )$to_dense()
     }
-    if (self$cat$shape[1] != self$num$shape[1])
+    if (self$cat$shape[1] != self$num$shape[1]) {
       browser()
+    }
   },
-  
   getNumericalIndex = function() {
     return(
       self$numericalIndex
     )
   },
-  
   numCatFeatures = function() {
-    return (
+    return(
       sum(!self$numericalIndex)
     )
   },
-  
   numNumFeatures = function() {
-      if (!is.null(self$num)) {
+    if (!is.null(self$num)) {
       return(self$num$shape[2])
-      } else {
+    } else {
       return(0)
-      }
+    }
   },
-  
   .getbatch = function(item) {
-    if (length(item)==1) {
+    if (length(item) == 1) {
       # add leading singleton dimension since models expects 2d tensors
-      return(list(batch = list(cat = self$cat[item]$unsqueeze(1),
-                               num = self$num[item]$unsqueeze(1)),
-                  target = self$target[item]$unsqueeze(1)))
-      }
-    else {
-    return(list(batch = list(cat = self$cat[item],
-                             num = self$num[item]),
-                target = self$target[item]))}
+      return(list(
+        batch = list(
+          cat = self$cat[item]$unsqueeze(1),
+          num = self$num[item]$unsqueeze(1)
+        ),
+        target = self$target[item]$unsqueeze(1)
+      ))
+    } else {
+      return(list(
+        batch = list(
+          cat = self$cat[item],
+          num = self$num[item]
+        ),
+        target = self$target[item]
+      ))
+    }
   },
   .length = function() {
     self$target$size()[[1]] # shape[1]
   }
 )
-
-
-
diff --git a/R/DeepNNTorch.R b/R/DeepNNTorch.R
index 0127278..f85771e 100644
--- a/R/DeepNNTorch.R
+++ b/R/DeepNNTorch.R
@@ -9,33 +9,34 @@
 #' @param device          Which device to use
 #' @param seed            A seed to make experiments more reproducible
 #' @export
-setDeepNNTorch <- function(
-  units=list(c(128, 64), 128),
-  layer_dropout=c(0.2),
-  lr =c(1e-4),
-  decay=c(1e-5),
-  outcome_weight = c(1.0),
-  batch_size = c(10000), 
-  epochs= c(100),
-  device = 'cpu', 
-  seed=NULL){
-  
+setDeepNNTorch <- function(units = list(c(128, 64), 128),
+                           layer_dropout = c(0.2),
+                           lr = c(1e-4),
+                           decay = c(1e-5),
+                           outcome_weight = c(1.0),
+                           batch_size = c(10000),
+                           epochs = c(100),
+                           device = "cpu",
+                           seed = NULL) {
+
   # ensure_installed("torch")
-  
-  param <- expand.grid(units=units,
-                       layer_dropout=layer_dropout,
-                       lr =lr, decay=decay, outcome_weight=outcome_weight, epochs= epochs,
-                       seed=ifelse(is.null(seed),'NULL', seed))
-  
-  param$units1=unlist(lapply(param$units, function(x) x[1])) 
-  param$units2=unlist(lapply(param$units, function(x) x[2])) 
-  param$units3=unlist(lapply(param$units, function(x) x[3]))
+
+  param <- expand.grid(
+    units = units,
+    layer_dropout = layer_dropout,
+    lr = lr, decay = decay, outcome_weight = outcome_weight, epochs = epochs,
+    seed = ifelse(is.null(seed), "NULL", seed)
+  )
+
+  param$units1 <- unlist(lapply(param$units, function(x) x[1]))
+  param$units2 <- unlist(lapply(param$units, function(x) x[2]))
+  param$units3 <- unlist(lapply(param$units, function(x) x[3]))
   param$units <- NULL
-  
-  attr(param, 'settings') <- list(
-    selectorType = "byPid",  # is this correct?
+
+  attr(param, "settings") <- list(
+    selectorType = "byPid", # is this correct?
     crossValidationInPrior = T,
-    modelType = 'DeepNN',
+    modelType = "DeepNN",
     seed = seed[1],
     name = "DeepNNTorch",
     units = units,
@@ -47,17 +48,18 @@ setDeepNNTorch <- function(
     device = device,
     epochs = epochs
   )
-  
-  attr(param, 'modelType') <- 'binary' 
-  attr(param, 'settings')$saveType <- 'file'
-  
-  result <- list(fitFunction='fitDeepNNTorch', 
-                 param=param)
-  
-  class(result) <- 'modelSettings' 
-  
+
+  attr(param, "modelType") <- "binary"
+  attr(param, "settings")$saveType <- "file"
+
+  result <- list(
+    fitFunction = "fitDeepNNTorch",
+    param = param
+  )
+
+  class(result) <- "modelSettings"
+
   return(result)
-  
 }
 
 #' Fits a deep neural network
@@ -66,40 +68,37 @@ setDeepNNTorch <- function(
 #' @param search        Which kind of search strategy to use
 #' @param analysisId    Analysis Id
 #' @export
-fitDeepNNTorch <- function(
-  trainData,
-  modelSettings, 
-  search='grid', 
-  analysisId)
-  {
-  
+fitDeepNNTorch <- function(trainData,
+                           modelSettings,
+                           search = "grid",
+                           analysisId) {
   start <- Sys.time()
-  
+
   # check covariateData
-  if (!FeatureExtraction::isCovariateData(trainData$covariateData)){
-    stop('DeepNNTorch requires correct covariateData')
+  if (!FeatureExtraction::isCovariateData(trainData$covariateData)) {
+    stop("DeepNNTorch requires correct covariateData")
   }
-  
+
   param <- modelSettings$param
   # get the settings from the param
-  settings <- attr(param, 'settings')
-  
-  if(!is.null(trainData$folds)){
-    trainData$labels <- merge(trainData$labels, trainData$fold, by = 'rowId')
+  settings <- attr(param, "settings")
+
+  if (!is.null(trainData$folds)) {
+    trainData$labels <- merge(trainData$labels, trainData$fold, by = "rowId")
   }
-  
+
   mappedData <- PatientLevelPrediction::toSparseM(
     plpData = trainData,
     map = NULL
   )
-  
+
   matrixData <- mappedData$dataMatrix
   labels <- mappedData$labels
   covariateRef <- mappedData$covariateRef
-  
-  outLoc <- PatientLevelPrediction:::createTempModelLoc() #export
-  
-  cvResult <- do.call( 
+
+  outLoc <- PatientLevelPrediction:::createTempModelLoc() # export
+
+  cvResult <- do.call(
     what = gridCvDeepNN,
     args = list(
       matrixData = matrixData,
@@ -113,58 +112,55 @@ fitDeepNNTorch <- function(
       paramSearch = param
     )
   )
-  
+
   hyperSummary <- do.call(rbind, lapply(cvResult$paramGridSearch, function(x) x$hyperSummary))
-  
+
   prediction <- cvResult$prediction
-  
+
   incs <- rep(1, nrow(covariateRef))
   covariateRef$included <- incs
   covariateRef$covariateValue <- 0
-  
+
   comp <- start - Sys.time()
-  
+
   result <- list(
-    model = cvResult$estimator, #file.path(outLoc),
-    
+    model = cvResult$estimator, # file.path(outLoc),
+
     prediction = prediction,
-    
     settings = list(
       plpDataSettings = attr(trainData, "metaData")$plpDataSettings,
       covariateSettings = attr(trainData, "metaData")$covariateSettings,
       populationSettings = attr(trainData, "metaData")$populationSettings,
       featureEngineering = attr(trainData$covariateData, "metaData")$featureEngineering,
-      tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings, 
+      tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings,
       requireDenseMatrix = F,
       modelSettings = list(
-        model = settings$name, 
+        model = settings$name,
         param = param,
         finalModelParameters = cvResult$finalParam,
-        extraSettings = attr(param, 'settings')
+        extraSettings = attr(param, "settings")
       ),
       splitSettings = attr(trainData, "metaData")$splitSettings,
       sampleSettings = attr(trainData, "metaData")$sampleSettings
     ),
-    
     trainDetails = list(
       analysisId = analysisId,
       cdmDatabaseSchema = attr(trainData, "metaData")$cdmDatabaseSchema,
       outcomeId = attr(trainData, "metaData")$outcomeId,
       cohortId = attr(trainData, "metaData")$cohortId,
-      attrition = attr(trainData, "metaData")$attrition, 
+      attrition = attr(trainData, "metaData")$attrition,
       trainingTime = comp,
       trainingDate = Sys.Date(),
       hyperParamSearch = hyperSummary
     ),
-    
     covariateImportance = covariateRef
   )
-  
+
   class(result) <- "plpModel"
   attr(result, "predictionFunction") <- "predictDeepNN"
   attr(result, "modelType") <- "binary"
-  attr(result, "saveType") <- attr(param, 'settings')$saveType
-  
+  attr(result, "saveType") <- attr(param, "settings")$saveType
+
   return(result)
 }
 
@@ -173,173 +169,170 @@ fitDeepNNTorch <- function(
 #' @param data      The data to make predictions for
 #' @param cohort    The cohort to use
 #' @export
-predictDeepNN <- function(
-  plpModel,
-  data,
-  cohort
-){
-  
-  if(!'plpModel' %in% class(plpModel)){
+predictDeepNN <- function(plpModel,
+                          data,
+                          cohort) {
+  if (!"plpModel" %in% class(plpModel)) {
     plpModel <- list(model = plpModel)
-    attr(plpModel, 'modelType') <- 'binary'
+    attr(plpModel, "modelType") <- "binary"
   }
-  
-  if("plpData" %in% class(data)){
-    
+
+  if ("plpData" %in% class(data)) {
     dataMat <- PatientLevelPrediction::toSparseM(
-      plpData = data, 
-      cohort = cohort, 
-      map = plpModel$covariateImportance %>% 
+      plpData = data,
+      cohort = cohort,
+      map = plpModel$covariateImportance %>%
         dplyr::select(.data$columnId, .data$covariateId)
     )
-    
-    data <- Dataset(dataMat$dataMatrix, all=TRUE) # add numeric details..
+
+    data <- Dataset(dataMat$dataMatrix, all = TRUE) # add numeric details..
   }
-  
+
   # get predictions
   prediction <- cohort
-  
-  if(is.character(plpModel$model)) {
-    model <- torch::torch_load(file.path(plpModel$model, 'DeepNNTorchModel.pt'), device='cpu')
+
+  if (is.character(plpModel$model)) {
+    model <- torch::torch_load(file.path(plpModel$model, "DeepNNTorchModel.pt"), device = "cpu")
   }
-  y_pred = model(data$all)
-  prediction$value <- as.array(y_pred$to())[,1]
-    
-  attr(prediction, "metaData")$modelType <- attr(plpModel, 'modelType')
-  
+  y_pred <- model(data$all)
+  prediction$value <- as.array(y_pred$to())[, 1]
+
+  attr(prediction, "metaData")$modelType <- attr(plpModel, "modelType")
+
   return(prediction)
 }
 
 
-gridCvDeepNN <- function(
-  matrixData,
-  labels,
-  seed,
-  modelName,
-  device,
-  batchSize,
-  epochs,
-  modelLocation,
-  paramSearch
-){
-  
-  
-  ParallelLogger::logInfo(paste0("Running CV for ",modelName," model"))
-  
+gridCvDeepNN <- function(matrixData,
+                         labels,
+                         seed,
+                         modelName,
+                         device,
+                         batchSize,
+                         epochs,
+                         modelLocation,
+                         paramSearch) {
+  ParallelLogger::logInfo(paste0("Running CV for ", modelName, " model"))
+
   ###########################################################################
-  
-  
+
+
   gridSearchPredictons <- list()
   length(gridSearchPredictons) <- nrow(paramSearch)
-  
-  for(gridId in 1:nrow(paramSearch)){
-    
+
+  for (gridId in 1:nrow(paramSearch)) {
+
     # get the params
     modelParamNames <- c("layer_dropout", "lr", "decay", "outcome_weight", "epochs", "units1", "units2", "units3")
     modelParams <- paramSearch[gridId, modelParamNames]
-    
+
     fitParams <- paramSearch[gridId, c("lr", "decay")]
     fitParams$epochs <- epochs
     fitParams$batchSize <- batchSize
-    
-    
+
+
     # initiate prediction
     prediction <- c()
-    
+
     fold <- labels$index
-    ParallelLogger::logInfo(paste0('Max fold: ', max(fold)))
-    
-    dataset <- Dataset(matrixData, labels$outcomeCount, all=TRUE)
+    ParallelLogger::logInfo(paste0("Max fold: ", max(fold)))
+
+    dataset <- Dataset(matrixData, labels$outcomeCount, all = TRUE)
     # modelParams$cat_features <- dataset$cat$shape[2]
     # modelParams$num_features <- dataset$num$shape[2]
-    
-     for(i in 1:max(fold)){
-     
-       if(is.na(modelParams$units2)){
-         model <- singleLayerNN(inputN = ncol(matrixData),
-                                layer1 = modelParams$units1, 
-                                outputN = 2, 
-                                layer_dropout = modelParams$layer_dropout)
-         
-       } else if(is.na(modelParams$units3)){
-         model <- doubleLayerNN(inputN = ncol(matrixData),
-                                layer1 = modelParams$units1,
-                                layer2 = modelParams$units2,
-                                outputN = 2, 
-                                layer_dropout = modelParams$layer_dropout)
-       } else{
-         model <- tripleLayerNN(inputN = ncol(matrixData),
-                                layer1 = modelParams$units1,
-                                layer2 = modelParams$units2,
-                                layer3 = modelParams$units3,
-                                outputN = 2, 
-                                layer_dropout = modelParams$layer_dropout)
-       }
-       
-       criterion = torch::nn_bce_loss() #Binary crossentropy only
-       optimizer = torch::optim_adam(model$parameters, lr = fitParams$lr)
-       
-       # Need earlyStopping
-       # Need setting decay
-       
-      ParallelLogger::logInfo(paste0('Fold ',i))
-      trainDataset <- torch::dataset_subset(dataset, indices=which(fold!=i)) 
-      testDataset <- torch::dataset_subset(dataset, indices=which(fold==i))
-      
+
+    for (i in 1:max(fold)) {
+      if (is.na(modelParams$units2)) {
+        model <- singleLayerNN(
+          inputN = ncol(matrixData),
+          layer1 = modelParams$units1,
+          outputN = 2,
+          layer_dropout = modelParams$layer_dropout
+        )
+      } else if (is.na(modelParams$units3)) {
+        model <- doubleLayerNN(
+          inputN = ncol(matrixData),
+          layer1 = modelParams$units1,
+          layer2 = modelParams$units2,
+          outputN = 2,
+          layer_dropout = modelParams$layer_dropout
+        )
+      } else {
+        model <- tripleLayerNN(
+          inputN = ncol(matrixData),
+          layer1 = modelParams$units1,
+          layer2 = modelParams$units2,
+          layer3 = modelParams$units3,
+          outputN = 2,
+          layer_dropout = modelParams$layer_dropout
+        )
+      }
+
+      criterion <- torch::nn_bce_loss() # Binary crossentropy only
+      optimizer <- torch::optim_adam(model$parameters, lr = fitParams$lr)
+
+      # Need earlyStopping
+      # Need setting decay
+
+      ParallelLogger::logInfo(paste0("Fold ", i))
+      trainDataset <- torch::dataset_subset(dataset, indices = which(fold != i))
+      testDataset <- torch::dataset_subset(dataset, indices = which(fold == i))
+
       # batches <- split(trainDataset, ceiling(seq_along(trainDataset)/batch_size))
-      
-      for(j in 1:epochs){
+
+      for (j in 1:epochs) {
         # for(batchRowIds in batches){
-          optimizer$zero_grad()
-        
-          # this is full batch training, won't work on real data
-          y_pred = model(trainDataset$dataset$all[trainDataset$indices])
-          loss = criterion(y_pred[,1], trainDataset$dataset$target[trainDataset$indices])
-          loss$backward()
-          optimizer$step()
-          
-          if(j%%1 == 0){
-            cat("Epoch:", j, "out of ", epochs , ": Loss:", loss$item(), "\n")
-          }
+        optimizer$zero_grad()
+
+        # this is full batch training, won't work on real data
+        y_pred <- model(trainDataset$dataset$all[trainDataset$indices])
+        loss <- criterion(y_pred[, 1], trainDataset$dataset$target[trainDataset$indices])
+        loss$backward()
+        optimizer$step()
+
+        if (j %% 1 == 0) {
+          cat("Epoch:", j, "out of ", epochs, ": Loss:", loss$item(), "\n")
+        }
         # }
       }
-          model$eval()
-          
-          ParallelLogger::logInfo("Calculating predictions on left out fold set...")
-          
-          pred <- model(testDataset$dataset$all[testDataset$indices])
-          predictionTable <- labels[labels$index == i,]
-          predictionTable$value <- as.array(pred$to())[,1]
-          
-          if(!'plpModel' %in% class(model)){
-            model <- list(model = model)
-            attr(model, 'modelType') <- 'binary'
-          }
-          attr(predictionTable, "metaData")$modelType <-  attr(model, 'modelType')
-          
-          prediction <- rbind(prediction, predictionTable)
+      model$eval()
 
-  }
+      ParallelLogger::logInfo("Calculating predictions on left out fold set...")
+
+      pred <- model(testDataset$dataset$all[testDataset$indices])
+      predictionTable <- labels[labels$index == i, ]
+      predictionTable$value <- as.array(pred$to())[, 1]
+
+      if (!"plpModel" %in% class(model)) {
+        model <- list(model = model)
+        attr(model, "modelType") <- "binary"
+      }
+      attr(predictionTable, "metaData")$modelType <- attr(model, "modelType")
+
+      prediction <- rbind(prediction, predictionTable)
+    }
     gridSearchPredictons[[gridId]] <- list(
       prediction = prediction,
-      param = paramSearch[gridId,]
+      param = paramSearch[gridId, ]
     )
-  }    
-  
+  }
+
 
   # get best para (this could be modified to enable any metric instead of AUC, just need metric input in function)
-  
-  paramGridSearch <- lapply(gridSearchPredictons, function(x){do.call(PatientLevelPrediction:::computeGridPerformance, x)})  # cvAUCmean, cvAUC, param
-  
+
+  paramGridSearch <- lapply(gridSearchPredictons, function(x) {
+    do.call(PatientLevelPrediction:::computeGridPerformance, x)
+  }) # cvAUCmean, cvAUC, param
+
   optimalParamInd <- which.max(unlist(lapply(paramGridSearch, function(x) x$cvPerformance)))
-  
+
   finalParam <- paramGridSearch[[optimalParamInd]]$param
-  
+
   cvPrediction <- gridSearchPredictons[[optimalParamInd]]$prediction
-  cvPrediction$evaluationType <- 'CV'
-  
-  ParallelLogger::logInfo('Training final model using optimal parameters')
-  
+  cvPrediction$evaluationType <- "CV"
+
+  ParallelLogger::logInfo("Training final model using optimal parameters")
+
   # get the params
   modelParamNames <- c("layer_dropout", "lr", "decay", "outcome_weight", "epochs", "units1", "units2", "units3")
   modelParams <- finalParam[modelParamNames]
@@ -347,82 +340,84 @@ gridCvDeepNN <- function(
   fitParams$epochs <- epochs
   fitParams$batchSize <- batchSize
   # create the dir
-  if(!dir.exists(file.path(modelLocation))){
+  if (!dir.exists(file.path(modelLocation))) {
     dir.create(file.path(modelLocation), recursive = T)
   }
-  
+
   trainDataset <- Dataset(
-    matrixData, 
+    matrixData,
     labels$outcomeCount,
-    all=TRUE
+    all = TRUE
   )
-  
+
   # modelParams$cat_features <- trainDataset$cat$shape[2]
   # modelParams$num_features <- trainDataset$num$shape[2]
-  
-  # trainDataset <- torch::dataset_subset(dataset, indices=which(fold!=i)) 
-  
-  if(is.na(modelParams$units2)){
-    model <- singleLayerNN(inputN = ncol(matrixData),
-                           layer1 = modelParams$units1, 
-                           outputN = 2, 
-                           layer_dropout = modelParams$layer_dropout)
-    
-  } else if(is.na(modelParams$units3)){
-    model <- doubleLayerNN(inputN = ncol(matrixData),
-                           layer1 = modelParams$units1,
-                           layer2 = modelParams$units2,
-                           outputN = 2, 
-                           layer_dropout = modelParams$layer_dropout)
-  } else{
-    model <- tripleLayerNN(inputN = ncol(matrixData),
-                           layer1 = modelParams$units1,
-                           layer2 = modelParams$units2,
-                           layer3 = modelParams$units3,
-                           outputN = 2, 
-                           layer_dropout = modelParams$layer_dropout)
+
+  # trainDataset <- torch::dataset_subset(dataset, indices=which(fold!=i))
+
+  if (is.na(modelParams$units2)) {
+    model <- singleLayerNN(
+      inputN = ncol(matrixData),
+      layer1 = modelParams$units1,
+      outputN = 2,
+      layer_dropout = modelParams$layer_dropout
+    )
+  } else if (is.na(modelParams$units3)) {
+    model <- doubleLayerNN(
+      inputN = ncol(matrixData),
+      layer1 = modelParams$units1,
+      layer2 = modelParams$units2,
+      outputN = 2,
+      layer_dropout = modelParams$layer_dropout
+    )
+  } else {
+    model <- tripleLayerNN(
+      inputN = ncol(matrixData),
+      layer1 = modelParams$units1,
+      layer2 = modelParams$units2,
+      layer3 = modelParams$units3,
+      outputN = 2,
+      layer_dropout = modelParams$layer_dropout
+    )
   }
-  
-  criterion = torch::nn_bce_loss() #Binary crossentropy only
-  optimizer = torch::optim_adam(model$parameters, lr = fitParams$lr)
+
+  criterion <- torch::nn_bce_loss() # Binary crossentropy only
+  optimizer <- torch::optim_adam(model$parameters, lr = fitParams$lr)
   optimizer$zero_grad()
-  y_pred = model(trainDataset$all)
-  loss = criterion(y_pred[,1], trainDataset$target)
+  y_pred <- model(trainDataset$all)
+  loss <- criterion(y_pred[, 1], trainDataset$target)
   loss$backward()
   optimizer$step()
   model$eval()
-  
+
   ParallelLogger::logInfo("Calculating predictions on all train data...")
- 
+
   prediction <- labels
-  prediction$value <- as.array(y_pred$to())[,1]
-  prediction$evaluationType <- 'Train'
-  
+  prediction$value <- as.array(y_pred$to())[, 1]
+  prediction$evaluationType <- "Train"
+
   prediction <- rbind(
     prediction,
     cvPrediction
   )
-  
-  # modify prediction 
-  prediction <- prediction %>% 
+
+  # modify prediction
+  prediction <- prediction %>%
     dplyr::select(-.data$rowId, -.data$index) %>%
     dplyr::rename(rowId = .data$originalRowId)
-  
-  prediction$cohortStartDate <- as.Date(prediction$cohortStartDate, origin = '1970-01-01')
-  
-  
+
+  prediction$cohortStartDate <- as.Date(prediction$cohortStartDate, origin = "1970-01-01")
+
+
   # save torch code here
-  torch::torch_save(model, file.path(modelLocation, 'DeepNNTorchModel.pt'))
-  
+  torch::torch_save(model, file.path(modelLocation, "DeepNNTorchModel.pt"))
+
   return(
-    list( 
+    list(
       estimator = modelLocation,
       prediction = prediction,
       finalParam = finalParam,
       paramGridSearch = paramGridSearch
     )
   )
-  
-  }
-  
-  
+}
diff --git a/R/DeepPatientLevelPrediction.R b/R/DeepPatientLevelPrediction.R
index ddb96a4..f6f5a19 100644
--- a/R/DeepPatientLevelPrediction.R
+++ b/R/DeepPatientLevelPrediction.R
@@ -3,13 +3,13 @@
 # Copyright 2022 Observational Health Data Sciences and Informatics
 #
 # This file is part of DeepPatientLevelPrediction
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,7 +17,7 @@
 # limitations under the License.
 
 #' DeepPatientLevelPrediction
-#' 
+#'
 #' @description A package containing deep learning extensions for developing prediction models using data in the OMOP CDM
 #'
 #' @docType package
diff --git a/R/Estimator.R b/R/Estimator.R
index 1434c84..45cccaf 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -18,41 +18,42 @@
 
 #' fitEstimator
 #'
-#' @description 
+#' @description
 #' fits a deep learning estimator to data.
-#' 
+#'
 #' @param trainData      the data to use
 #' @param modelSettings  modelSettings object
 #' @param analysisId     Id of the analysis
 #' @param ...            Extra inputs
 #'
 #' @export
-fitEstimator <- function(
-  trainData, 
-  modelSettings, 
-  analysisId,
-  ...
-) {
-  
+fitEstimator <- function(trainData,
+                         modelSettings,
+                         analysisId,
+                         ...) {
   start <- Sys.time()
-  
+
   # check covariate data
-  if(!FeatureExtraction::isCovariateData(trainData$covariateData)){stop("Needs correct covariateData")}
-  
+  if (!FeatureExtraction::isCovariateData(trainData$covariateData)) {
+    stop("Needs correct covariateData")
+  }
+
   param <- modelSettings$param
-  
+
   # get the settings from the param
-  settings <- attr(param, 'settings')
-  if(!is.null(trainData$folds)){
-    trainData$labels <- merge(trainData$labels, trainData$fold, by = 'rowId')
+  settings <- attr(param, "settings")
+  if (!is.null(trainData$folds)) {
+    trainData$labels <- merge(trainData$labels, trainData$fold, by = "rowId")
   }
-  mappedCovariateData <- PatientLevelPrediction:::MapIds(covariateData = trainData$covariateData,
-                                                         cohort = trainData$labels)
-  
+  mappedCovariateData <- PatientLevelPrediction:::MapIds(
+    covariateData = trainData$covariateData,
+    cohort = trainData$labels
+  )
+
   covariateRef <- mappedCovariateData$covariateRef
-  
+
   outLoc <- PatientLevelPrediction:::createTempModelLoc() # export
-  cvResult <- do.call( 
+  cvResult <- do.call(
     what = gridCvDeep,
     args = list(
       mappedData = mappedCovariateData,
@@ -62,99 +63,101 @@ fitEstimator <- function(
       paramSearch = param
     )
   )
-  
+
   hyperSummary <- do.call(rbind, lapply(cvResult$paramGridSearch, function(x) x$hyperSummary))
   prediction <- cvResult$prediction
   incs <- rep(1, covariateRef %>% dplyr::tally() %>% dplyr::pull())
-  covariateRef <- covariateRef %>% dplyr::collect() %>% 
-                                   dplyr::mutate(included=incs,
-                                                 covariateValue=0)
-                  
-  
+  covariateRef <- covariateRef %>%
+    dplyr::collect() %>%
+    dplyr::mutate(
+      included = incs,
+      covariateValue = 0
+    )
+
+
   comp <- start - Sys.time()
   result <- list(
-    model = cvResult$estimator, #file.path(outLoc),
-    
+    model = cvResult$estimator, # file.path(outLoc),
+
     prediction = prediction,
-    
     settings = list(
       plpDataSettings = attr(trainData, "metaData")$plpDataSettings,
       covariateSettings = attr(trainData, "metaData")$covariateSettings,
       populationSettings = attr(trainData, "metaData")$populationSettings,
       featureEngineering = attr(trainData$covariateData, "metaData")$featureEngineering,
-      tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings, 
+      tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings,
       requireDenseMatrix = F,
       modelSettings = list(
-        model = settings$name, 
+        model = settings$name,
         param = param,
         finalModelParameters = cvResult$finalParam,
         numericalIndex = cvResult$numericalIndex,
-        extraSettings = attr(param, 'settings')
+        extraSettings = attr(param, "settings")
       ),
       splitSettings = attr(trainData, "metaData")$splitSettings,
       sampleSettings = attr(trainData, "metaData")$sampleSettings
     ),
-    
     trainDetails = list(
       analysisId = analysisId,
       cdmDatabaseSchema = attr(trainData, "metaData")$cdmDatabaseSchema,
       outcomeId = attr(trainData, "metaData")$outcomeId,
       cohortId = attr(trainData, "metaData")$cohortId,
-      attrition = attr(trainData, "metaData")$attrition, 
+      attrition = attr(trainData, "metaData")$attrition,
       trainingTime = comp,
       trainingDate = Sys.Date(),
       hyperParamSearch = hyperSummary
     ),
-    
     covariateImportance = covariateRef
   )
-  
+
   class(result) <- "plpModel"
   attr(result, "predictionFunction") <- "predictDeepEstimator"
   attr(result, "modelType") <- "binary"
-  attr(result, "saveType") <- attr(param, 'settings')$saveType
-  
+  attr(result, "saveType") <- attr(param, "settings")$saveType
+
   return(result)
 }
 
 #' predictDeepEstimator
 #'
-#' @description 
+#' @description
 #' the prediction function for the estimator
-#' 
+#'
 #' @param plpModel   the plpModel
 #' @param data       plp data object or a torch dataset
 #' @param cohort     data.frame with the rowIds of the people
 #'
-#' @export 
-predictDeepEstimator <- function(
-  plpModel, 
-  data, 
-  cohort
-){
-  if(!'plpModel' %in% class(plpModel)){
+#' @export
+predictDeepEstimator <- function(plpModel,
+                                 data,
+                                 cohort) {
+  if (!"plpModel" %in% class(plpModel)) {
     plpModel <- list(model = plpModel)
-    attr(plpModel, 'modelType') <- 'binary'
+    attr(plpModel, "modelType") <- "binary"
   }
-  if("plpData" %in% class(data)){
+  if ("plpData" %in% class(data)) {
     mappedData <- PatientLevelPrediction:::MapIds(data$covariateData,
-                                                  cohort=cohort,
-                                                  mapping=plpModel$covariateImportance %>% 
-                                                    dplyr::select(.data$columnId, 
-                                                                  .data$covariateId))
+      cohort = cohort,
+      mapping = plpModel$covariateImportance %>%
+        dplyr::select(
+          .data$columnId,
+          .data$covariateId
+        )
+    )
     data <- Dataset(mappedData$covariates,
-                    numericalIndex = plpModel$settings$modelSettings$numericalIndex)
+      numericalIndex = plpModel$settings$modelSettings$numericalIndex
+    )
   }
-  
+
   # get predictions
   prediction <- cohort
-  
-  if(is.character(plpModel$model)){
-    model <- torch::torch_load(file.path(plpModel$model, 'DeepEstimatorModel.pt'), device='cpu')
+
+  if (is.character(plpModel$model)) {
+    model <- torch::torch_load(file.path(plpModel$model, "DeepEstimatorModel.pt"), device = "cpu")
     estimator <- Estimator$new(
       baseModel = plpModel$settings$modelSettings$model,
       modelParameters = model$modelParameters,
-      fitParameters = model$fitParameters, 
+      fitParameters = model$fitParameters,
       device = plpModel$settings$modelSettings$extraSettings$device
     )
     estimator$model$load_state_dict(model$modelStateDict)
@@ -162,34 +165,31 @@ predictDeepEstimator <- function(
   } else {
     prediction$value <- plpModel$model$predictProba(data)
   }
-  
-  
-  attr(prediction, "metaData")$modelType <-  attr(plpModel, 'modelType')
-  
+
+
+  attr(prediction, "metaData")$modelType <- attr(plpModel, "modelType")
+
   return(prediction)
 }
 
-#' gridCvDeep 
-#' 
-#' @description 
+#' gridCvDeep
+#'
+#' @description
 #' Performs grid search for a deep learning estimator
-#' 
-#' 
+#'
+#'
 #' @param mappedData    Mapped data with covariates
 #' @param labels        Dataframe with the outcomes
 #' @param settings      Settings of the model
 #' @param modelLocation Where to save the model
 #' @param paramSearch   model parameters to perform search over
-#' 
-#' @export 
-gridCvDeep <- function(
-  mappedData,
-  labels,
-  settings,
-  modelLocation,
-  paramSearch
-){
-  
+#'
+#' @export
+gridCvDeep <- function(mappedData,
+                       labels,
+                       settings,
+                       modelLocation,
+                       paramSearch) {
   modelName <- settings$modelName
   modelParamNames <- settings$modelParamNames
   fitParamNames <- c("weightDecay", "learningRate")
@@ -197,82 +197,86 @@ gridCvDeep <- function(
   batchSize <- settings$batchSize
   baseModel <- settings$baseModel
   device <- settings$device
-  
-  ParallelLogger::logInfo(paste0("Running CV for ",modelName," model"))
+
+  ParallelLogger::logInfo(paste0("Running CV for ", modelName, " model"))
 
   ###########################################################################
-  
+
   gridSearchPredictons <- list()
   length(gridSearchPredictons) <- length(paramSearch)
   dataset <- Dataset(mappedData$covariates, labels$outcomeCount)
-  for(gridId in 1:length(paramSearch)){
-    ParallelLogger::logInfo(paste0("Running hyperparameter combination no ",gridId))
+  for (gridId in 1:length(paramSearch)) {
+    ParallelLogger::logInfo(paste0("Running hyperparameter combination no ", gridId))
     ParallelLogger::logInfo(paste0("HyperParameters: "))
-    ParallelLogger::logInfo(paste(names(paramSearch[[gridId]]), paramSearch[[gridId]], collapse=' | '))
+    ParallelLogger::logInfo(paste(names(paramSearch[[gridId]]), paramSearch[[gridId]], collapse = " | "))
     modelParams <- paramSearch[[gridId]][modelParamNames]
-    
+
     fitParams <- paramSearch[[gridId]][fitParamNames]
     fitParams$epochs <- epochs
     fitParams$batchSize <- batchSize
-    
-    
+
+
     # initiate prediction
     prediction <- c()
-    
+
     fold <- labels$index
-    ParallelLogger::logInfo(paste0('Max fold: ', max(fold)))
+    ParallelLogger::logInfo(paste0("Max fold: ", max(fold)))
     modelParams$catFeatures <- dataset$numCatFeatures()
     modelParams$numFeatures <- dataset$numNumFeatures()
     learnRates <- list()
-    for( i in 1:max(fold)){
-      ParallelLogger::logInfo(paste0('Fold ',i))
-      trainDataset <- torch::dataset_subset(dataset, indices=which(fold!=i)) 
-      testDataset <- torch::dataset_subset(dataset, indices=which(fold==i))
+    for (i in 1:max(fold)) {
+      ParallelLogger::logInfo(paste0("Fold ", i))
+      trainDataset <- torch::dataset_subset(dataset, indices = which(fold != i))
+      testDataset <- torch::dataset_subset(dataset, indices = which(fold == i))
       fitParams$posWeight <- trainDataset$dataset$posWeight
       estimator <- Estimator$new(
-        baseModel = baseModel, 
+        baseModel = baseModel,
         modelParameters = modelParams,
-        fitParameters = fitParams, 
+        fitParameters = fitParams,
         device = device
       )
-      
+
       estimator$fit(
-        trainDataset, 
+        trainDataset,
         testDataset
       )
-      
+
       ParallelLogger::logInfo("Calculating predictions on left out fold set...")
-      
+
       prediction <- rbind(
-        prediction, 
+        prediction,
         predictDeepEstimator(
-          plpModel = estimator, 
-          data = testDataset, 
-          cohort = labels[fold == i,]
+          plpModel = estimator,
+          data = testDataset,
+          cohort = labels[fold == i, ]
         )
       )
-      learnRates[[i]] <- list(LRs=estimator$learnRateSchedule,
-                         bestEpoch=estimator$bestEpoch)
+      learnRates[[i]] <- list(
+        LRs = estimator$learnRateSchedule,
+        bestEpoch = estimator$bestEpoch
+      )
     }
     maxIndex <- which.max(unlist(sapply(learnRates, `[`, 2)))
     paramSearch[[gridId]]$learnSchedule <- learnRates[[maxIndex]]
-    
+
     gridSearchPredictons[[gridId]] <- list(
       prediction = prediction,
       param = paramSearch[[gridId]]
-      )
+    )
   }
   # get best para (this could be modified to enable any metric instead of AUC, just need metric input in function)
-  paramGridSearch <- lapply(gridSearchPredictons, function(x){do.call(computeGridPerformance, x)})  # cvAUCmean, cvAUC, param
-  
+  paramGridSearch <- lapply(gridSearchPredictons, function(x) {
+    do.call(computeGridPerformance, x)
+  }) # cvAUCmean, cvAUC, param
+
   optimalParamInd <- which.max(unlist(lapply(paramGridSearch, function(x) x$cvPerformance)))
   finalParam <- paramGridSearch[[optimalParamInd]]$param
-  
+
   cvPrediction <- gridSearchPredictons[[optimalParamInd]]$prediction
-  cvPrediction$evaluationType <- 'CV'
-  
-  ParallelLogger::logInfo('Training final model using optimal parameters')
-  
+  cvPrediction$evaluationType <- "CV"
+
+  ParallelLogger::logInfo("Training final model using optimal parameters")
+
   # get the params
   modelParams <- finalParam[modelParamNames]
   fitParams <- finalParam[fitParamNames]
@@ -280,46 +284,46 @@ gridCvDeep <- function(
   fitParams$batchSize <- batchSize
   fitParams$posWeight <- dataset$posWeight
   # create the dir
-  if(!dir.exists(file.path(modelLocation))){
+  if (!dir.exists(file.path(modelLocation))) {
     dir.create(file.path(modelLocation), recursive = T)
   }
   modelParams$catFeatures <- dataset$numCatFeatures()
   modelParams$numFeatures <- dataset$numNumFeatures()
-  
+
   estimator <- Estimator$new(
     baseModel = baseModel,
     modelParameters = modelParams,
-    fitParameters = fitParams, 
+    fitParameters = fitParams,
     device = device
   )
   numericalIndex <- dataset$getNumericalIndex()
-  
+
   estimator$fitWholeTrainingSet(dataset, finalParam$learnSchedule$LRs)
-  
+
   ParallelLogger::logInfo("Calculating predictions on all train data...")
   prediction <- predictDeepEstimator(
-    plpModel = estimator, 
-    data = dataset, 
+    plpModel = estimator,
+    data = dataset,
     cohort = labels
   )
-  prediction$evaluationType <- 'Train'
-  
+  prediction$evaluationType <- "Train"
+
   prediction <- rbind(
     prediction,
     cvPrediction
   )
-  # modify prediction 
-  prediction <- prediction %>% 
+  # modify prediction
+  prediction <- prediction %>%
     dplyr::select(-.data$index)
 
-  prediction$cohortStartDate <- as.Date(prediction$cohortStartDate, origin = '1970-01-01')
-  
-  
+  prediction$cohortStartDate <- as.Date(prediction$cohortStartDate, origin = "1970-01-01")
+
+
   # save torch code here
-  estimatorFile <- estimator$save(modelLocation, 'DeepEstimatorModel.pt')
-  
+  estimatorFile <- estimator$save(modelLocation, "DeepEstimatorModel.pt")
+
   return(
-    list( 
+    list(
       estimator = modelLocation,
       prediction = prediction,
       finalParam = finalParam,
@@ -327,74 +331,78 @@ gridCvDeep <- function(
       numericalIndex = numericalIndex
     )
   )
-  
 }
 
 #' Estimator
-#' @description 
-#' A generic R6 class that wraps around a torch nn module and can be used to 
+#' @description
+#' A generic R6 class that wraps around a torch nn module and can be used to
 #' fit and predict the model defined in that module.
 #' @export
 Estimator <- R6::R6Class(
-  classname = 'Estimator',
+  classname = "Estimator",
   lock_objects = FALSE,
   public = list(
-    #' @description 
+    #' @description
     #' Creates a new estimator
     #' @param baseModel       The torch nn module to use as model
     #' @param modelParameters Parameters to initialize the baseModel
     #' @param fitParameters   Parameters required for the estimator fitting
     #' @param optimizer       A torch optimizer to use, default is Adam
-    #' @param criterion       The torch loss function to use, defaults to 
+    #' @param criterion       The torch loss function to use, defaults to
     #'                        binary cross entropy with logits
-    #' @param scheduler       learning rate scheduler to use                  
+    #' @param scheduler       learning rate scheduler to use
     #' @param device           Which device to use for fitting, default is cpu
-    #' @param patience         Patience to use for early stopping                      
-    initialize = function(baseModel, 
-                          modelParameters, 
+    #' @param patience         Patience to use for early stopping
+    initialize = function(baseModel,
+                          modelParameters,
                           fitParameters,
-                          optimizer=torch::optim_adam,
-                          criterion=torch::nn_bce_with_logits_loss,
-                          scheduler=torch::lr_reduce_on_plateau,
-                          device='cpu', 
-                          patience=4) {
+                          optimizer = torch::optim_adam,
+                          criterion = torch::nn_bce_with_logits_loss,
+                          scheduler = torch::lr_reduce_on_plateau,
+                          device = "cpu",
+                          patience = 4) {
       self$device <- device
       self$model <- do.call(baseModel, modelParameters)
       self$modelParameters <- modelParameters
       self$fitParameters <- fitParameters
-      self$epochs <- self$itemOrDefaults(fitParameters, 'epochs', 10)
-      self$learningRate <- self$itemOrDefaults(fitParameters,'learningRate', 1e-3)
-      self$l2Norm <- self$itemOrDefaults(fitParameters, 'weightDecay', 1e-5)
-      self$batchSize <- self$itemOrDefaults(fitParameters, 'batchSize', 1024)
-      self$posWeight <- self$itemOrDefaults(fitParameters, 'posWeight', 1)
-      self$prefix <- self$itemOrDefaults(fitParameters, 'prefix', self$model$name)
-      
-      self$previousEpochs <- self$itemOrDefaults(fitParameters, 'previousEpochs', 0)
-      self$model$to(device=self$device)
-      
-      self$optimizer <- optimizer(params=self$model$parameters, 
-                                  lr=self$learningRate, 
-                                  weight_decay=self$l2Norm)
-      self$criterion <- criterion(torch::torch_tensor(self$posWeight, 
-                                                      device=self$device))
-      
-      self$scheduler <- scheduler(self$optimizer, patience=1,
-                                  verbose=FALSE, mode='max')
-      
+      self$epochs <- self$itemOrDefaults(fitParameters, "epochs", 10)
+      self$learningRate <- self$itemOrDefaults(fitParameters, "learningRate", 1e-3)
+      self$l2Norm <- self$itemOrDefaults(fitParameters, "weightDecay", 1e-5)
+      self$batchSize <- self$itemOrDefaults(fitParameters, "batchSize", 1024)
+      self$posWeight <- self$itemOrDefaults(fitParameters, "posWeight", 1)
+      self$prefix <- self$itemOrDefaults(fitParameters, "prefix", self$model$name)
+
+      self$previousEpochs <- self$itemOrDefaults(fitParameters, "previousEpochs", 0)
+      self$model$to(device = self$device)
+
+      self$optimizer <- optimizer(
+        params = self$model$parameters,
+        lr = self$learningRate,
+        weight_decay = self$l2Norm
+      )
+      self$criterion <- criterion(torch::torch_tensor(self$posWeight,
+        device = self$device
+      ))
+
+      self$scheduler <- scheduler(self$optimizer,
+        patience = 1,
+        verbose = FALSE, mode = "max"
+      )
+
       # gradient accumulation is useful when training large numbers where
       # you can only fit few samples on the GPU in each batch.
       self$gradAccumulationIter <- 1
-        
+
       if (!is.null(patience)) {
-        self$earlyStopper <- EarlyStopping$new(patience=patience)
+        self$earlyStopper <- EarlyStopping$new(patience = patience)
       } else {
         self$earlyStopper <- NULL
       }
-      
-      self$bestScore <- NULL 
+
+      self$bestScore <- NULL
       self$bestEpoch <- NULL
     },
-  
+
     #' @description fits the estimator
     #' @param dataset     a torch dataset to use for model fitting
     #' @param testDataset a torch dataset to use for early stopping
@@ -402,33 +410,35 @@ Estimator <- R6::R6Class(
       valLosses <- c()
       valAUCs <- c()
       batchIndex <- torch::torch_randperm(length(dataset)) + 1L
-      batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex)/self$batchSize))
+      batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize))
 
       testBatchIndex <- 1:length(testDataset)
-      testBatchIndex <- split(testBatchIndex, ceiling(seq_along(testBatchIndex)/self$batchSize))
+      testBatchIndex <- split(testBatchIndex, ceiling(seq_along(testBatchIndex) / self$batchSize))
 
       modelStateDict <- list()
       epoch <- list()
       times <- list()
-      learnRates <-list()
+      learnRates <- list()
       for (epochI in 1:self$epochs) {
         startTime <- Sys.time()
         trainLoss <- self$fitEpoch(dataset, batchIndex)
         endTime <- Sys.time()
-        
+
         # predict on test data
         scores <- self$score(testDataset, testBatchIndex)
         delta <- endTime - startTime
         currentEpoch <- epochI + self$previousEpochs
         lr <- self$optimizer$param_groups[[1]]$lr
-        ParallelLogger::logInfo('Epochs: ', currentEpoch, 
-                                ' | Val AUC: ', round(scores$auc,3), 
-                                ' | Val Loss: ', round(scores$loss,3),
-                                ' | Train Loss: ', round(trainLoss,3),
-                                ' | Time: ', round(delta, 3), ' ', 
-                                units(delta),
-                                ' | LR: ', lr)
-        self$scheduler$step(scores$auc)                        
+        ParallelLogger::logInfo(
+          "Epochs: ", currentEpoch,
+          " | Val AUC: ", round(scores$auc, 3),
+          " | Val Loss: ", round(scores$loss, 3),
+          " | Train Loss: ", round(trainLoss, 3),
+          " | Time: ", round(delta, 3), " ",
+          units(delta),
+          " | LR: ", lr
+        )
+        self$scheduler$step(scores$auc)
         valLosses <- c(valLosses, scores$loss)
         valAUCs <- c(valAUCs, scores$auc)
         learnRates <- c(learnRates, lr)
@@ -437,60 +447,60 @@ Estimator <- R6::R6Class(
           self$earlyStopper$call(scores$auc)
           if (self$earlyStopper$improved) {
             # here it saves the results to lists rather than files
-            modelStateDict[[epochI]]  <- lapply(self$model$state_dict(), function(x) x$detach()$cpu())
+            modelStateDict[[epochI]] <- lapply(self$model$state_dict(), function(x) x$detach()$cpu())
             epoch[[epochI]] <- currentEpoch
           }
           if (self$earlyStopper$earlyStop) {
-            ParallelLogger::logInfo('Early stopping, validation AUC stopped improving')
-            ParallelLogger::logInfo('Average time per epoch was: ', round(mean(as.numeric(times)),3), ' ' , units(delta))
+            ParallelLogger::logInfo("Early stopping, validation AUC stopped improving")
+            ParallelLogger::logInfo("Average time per epoch was: ", round(mean(as.numeric(times)), 3), " ", units(delta))
             self$finishFit(valAUCs, modelStateDict, valLosses, epoch, learnRates)
             return(invisible(self))
           }
         } else {
-          modelStateDict[[epochI]]  <- lapply(self$model$state_dict(), function(x) x$detach()$cpu())
+          modelStateDict[[epochI]] <- lapply(self$model$state_dict(), function(x) x$detach()$cpu())
           epoch[[epochI]] <- currentEpoch
-          }
+        }
       }
-      ParallelLogger::logInfo('Average time per epoch was: ', round(mean(as.numeric(times)),3), ' ' , units(delta))
+      ParallelLogger::logInfo("Average time per epoch was: ", round(mean(as.numeric(times)), 3), " ", units(delta))
       self$finishFit(valAUCs, modelStateDict, valLosses, epoch, learnRates)
       invisible(self)
     },
-    
-    #' @description 
+
+    #' @description
     #' fits estimator for one epoch (one round through the data)
     #' @param dataset     torch dataset to use for fitting
-    #' @param batchIndex  indices of batches 
-    fitEpoch = function(dataset, batchIndex){
+    #' @param batchIndex  indices of batches
+    fitEpoch = function(dataset, batchIndex) {
       trainLosses <- torch::torch_empty(length(batchIndex))
       ix <- 1
       self$model$train()
-      progressBar <- utils::txtProgressBar(style=3)
+      progressBar <- utils::txtProgressBar(style = 3)
       coro::loop(for (b in batchIndex) {
         self$optimizer$zero_grad()
         batch <- self$batchToDevice(dataset[b])
         out <- self$model(batch[[1]])
         loss <- self$criterion(out, batch[[2]])
         loss$backward()
-        
+
         self$optimizer$step()
         trainLosses[ix] <- loss$detach()
-        utils::setTxtProgressBar(progressBar, ix/length(batchIndex))
+        utils::setTxtProgressBar(progressBar, ix / length(batchIndex))
         ix <- ix + 1
-        })
+      })
       close(progressBar)
       trainLosses$mean()$item()
     },
-    
-    #' @description 
+
+    #' @description
     #' calculates loss and auc after training for one epoch
     #' @param dataset    The torch dataset to use to evaluate loss and auc
     #' @param batchIndex Indices of batches in the dataset
     #' @return list with average loss and auc in the dataset
-    score = function(dataset, batchIndex){
+    score = function(dataset, batchIndex) {
       torch::with_no_grad({
         loss <- torch::torch_empty(c(length(batchIndex)))
-        predictions = list()
-        targets = list()
+        predictions <- list()
+        targets <- list()
         self$model$eval()
         ix <- 1
         coro::loop(for (b in batchIndex) {
@@ -502,15 +512,17 @@ Estimator <- R6::R6Class(
           ix <- ix + 1
         })
         mean_loss <- loss$mean()$item()
-        predictionsClass <- data.frame(value=as.matrix(torch::torch_sigmoid(torch::torch_cat(predictions)$cpu())), 
-                                       outcomeCount=as.matrix(torch::torch_cat(targets)$cpu()))
-        attr(predictionsClass, 'metaData')$modelType <-'binary' 
+        predictionsClass <- data.frame(
+          value = as.matrix(torch::torch_sigmoid(torch::torch_cat(predictions)$cpu())),
+          outcomeCount = as.matrix(torch::torch_cat(targets)$cpu())
+        )
+        attr(predictionsClass, "metaData")$modelType <- "binary"
         auc <- PatientLevelPrediction::computeAuc(predictionsClass)
       })
-      return(list(loss=mean_loss, auc=auc))
+      return(list(loss = mean_loss, auc = auc))
     },
-    
-    #' @description 
+
+    #' @description
     #' operations that run when fitting is finished
     #' @param valAUCs         validation AUC values
     #' @param modelStateDict  fitted model parameters
@@ -518,72 +530,75 @@ Estimator <- R6::R6Class(
     #' @param epoch           list of epochs fit
     #' @param learnRates      learning rate sequence used so far
     finishFit = function(valAUCs, modelStateDict, valLosses, epoch, learnRates) {
-      bestEpochInd <- which.max(valAUCs)  # change this if a different metric is used
-      
-      bestModelStateDict <- lapply(modelStateDict[[bestEpochInd]], function(x) x$to(device=self$device))
+      bestEpochInd <- which.max(valAUCs) # change this if a different metric is used
+
+      bestModelStateDict <- lapply(modelStateDict[[bestEpochInd]], function(x) x$to(device = self$device))
       self$model$load_state_dict(bestModelStateDict)
-      
+
       bestEpoch <- epoch[[bestEpochInd]]
       self$bestEpoch <- bestEpoch
-      self$bestScore <- list(loss = valLosses[bestEpochInd], 
-                             auc = valAUCs[bestEpochInd])
+      self$bestScore <- list(
+        loss = valLosses[bestEpochInd],
+        auc = valAUCs[bestEpochInd]
+      )
       self$learnRateSchedule <- learnRates[1:bestEpochInd]
-      
-      ParallelLogger::logInfo('Loaded best model (based on AUC) from epoch ', bestEpoch)
-      ParallelLogger::logInfo('ValLoss: ', self$bestScore$loss)
-      ParallelLogger::logInfo('valAUC: ', self$bestScore$auc)
+
+      ParallelLogger::logInfo("Loaded best model (based on AUC) from epoch ", bestEpoch)
+      ParallelLogger::logInfo("ValLoss: ", self$bestScore$loss)
+      ParallelLogger::logInfo("valAUC: ", self$bestScore$auc)
     },
-    
-    #' @description 
+
+    #' @description
     #' Fits whole training set on a specific number of epochs
     #' TODO What happens when learning rate changes per epochs?
     #' Ideally I would copy the learning rate strategy from before
     #' and adjust for different sizes ie more iterations/updates???
     #' @param dataset torch dataset
     #' @param learnRates learnRateSchedule from CV
-    fitWholeTrainingSet = function(dataset, learnRates=NULL) {
-      if(is.null(self$bestEpoch)) {
+    fitWholeTrainingSet = function(dataset, learnRates = NULL) {
+      if (is.null(self$bestEpoch)) {
         self$bestEpoch <- self$epochs
       }
-      
+
       batchIndex <- torch::torch_randperm(length(dataset)) + 1L
-      batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex)/self$batchSize))
+      batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize))
       for (epoch in 1:self$bestEpoch) {
         self$optimizer$param_groups[[1]]$lr <- learnRates[[epoch]]
         self$fitEpoch(dataset, batchIndex)
       }
-      
-    }, 
-    
-    #' @description 
+    },
+
+    #' @description
     #' save model and those parameters needed to reconstruct it
     #' @param path where to save the model
     #' @param name name of file
     #' @return the path to saved model
     save = function(path, name) {
       savePath <- file.path(path, name)
-      torch::torch_save(list(modelStateDict=self$model$state_dict(),
-                             modelParameters=self$modelParameters,
-                             fitParameters=self$fitParameters,
-                             epoch=self$epochs),
-                        savePath
-                        )
+      torch::torch_save(
+        list(
+          modelStateDict = self$model$state_dict(),
+          modelParameters = self$modelParameters,
+          fitParameters = self$fitParameters,
+          epoch = self$epochs
+        ),
+        savePath
+      )
       return(savePath)
-      
     },
-    
-    
-    #' @description 
+
+
+    #' @description
     #' predicts and outputs the probabilities
     #' @param dataset Torch dataset to create predictions for
     #' @return predictions as probabilities
     predictProba = function(dataset) {
       batchIndex <- 1:length(dataset)
-      batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex)/self$batchSize))
+      batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize))
       torch::with_no_grad({
         predictions <- c()
         self$model$eval()
-        coro::loop(for (b in batchIndex){
+        coro::loop(for (b in batchIndex) {
           batch <- self$batchToDevice(dataset[b])
           target <- batch$target
           pred <- self$model(batch$batch)
@@ -592,36 +607,36 @@ Estimator <- R6::R6Class(
       })
       return(predictions)
     },
-    
-    #' @description 
+
+    #' @description
     #' predicts and outputs the class
     #' @param   dataset A torch dataset to create predictions for
     #' @param   threshold Which threshold to use for predictions
     #' @return  The predicted class for the data in the dataset
-    predict = function(dataset, threshold=NULL){
+    predict = function(dataset, threshold = NULL) {
       predictions <- self$predictProba(dataset)
-      
+
       if (is.null(threshold)) {
         # use outcome rate
-        threshold <- dataset$target$sum()$item()/length(dataset)
+        threshold <- dataset$target$sum()$item() / length(dataset)
       }
       predicted_class <- as.integer(predictions > threshold)
       return(predicted_class)
     },
-    
-    #' @description 
+
+    #' @description
     #' sends a batch of data to device
     #' assumes batch includes lists of tensors to arbitrary nested depths
     #' @param batch the batch to send, usually a list of torch tensors
     #' @return the batch on the required device
     batchToDevice = function(batch) {
-      if (class(batch)[1] == 'torch_tensor') {
-        batch <- batch$to(device=self$device)
+      if (class(batch)[1] == "torch_tensor") {
+        batch <- batch$to(device = self$device)
       } else {
         ix <- 1
         for (b in batch) {
-          if (class(b)[1] == 'torch_tensor') {
-            b <- b$to(device=self$device)
+          if (class(b)[1] == "torch_tensor") {
+            b <- b$to(device = self$device)
           } else {
             b <- self$batchToDevice(b)
           }
@@ -633,14 +648,14 @@ Estimator <- R6::R6Class(
       }
       return(batch)
     },
-    
+
     #' @description
     #' select item from list, and if it's null sets a default
     #' @param list A list with items
     #' @param item Which list item to retrieve
     #' @param default The value to return if list doesn't have item
-    #' @return the list item or default 
-    itemOrDefaults = function (list, item, default = NULL) {
+    #' @return the list item or default
+    itemOrDefaults = function(list, item, default = NULL) {
       value <- list[[item]]
       if (is.null(value)) default else value
     }
@@ -648,57 +663,55 @@ Estimator <- R6::R6Class(
 )
 
 #' Earlystopping class
-#' @description 
+#' @description
 #' Stops training if a loss or metric has stopped improving
 EarlyStopping <- R6::R6Class(
-   classname = 'EarlyStopping',
-   lock_objects = FALSE,
-   public = list(
-     #' @description 
-     #' Creates a new earlystopping object
-     #' @param patience Stop after this number of epochs if loss doesn't improve
-     #' @param delta    How much does the loss need to improve to count as improvement
-     #' @param verbose   
-     #' @return a new earlystopping object
-     initialize = function(patience=3, delta=0, verbose=TRUE) {
-       self$patience <- patience
-       self$counter <- 0
-       self$verbose <- verbose
-       self$bestScore <- NULL
-       self$earlyStop <- FALSE
-       self$improved <- FALSE
-       self$delta <- delta
-       self$previousScore <- 0
-     },
-     #' @description
-     #' call the earlystopping object and increment a counter if loss is not
-     #' improving
-     #' @param metric the current metric value
-     call = function(metric){
-       score <- metric
-       if (is.null(self$bestScore)) {
-         self$bestScore <- score
-         self$improved <- TRUE
-       }
-       else if (score < self$bestScore + self$delta) {
-         self$counter <- self$counter + 1
-         self$improved <- FALSE
-         if (self$verbose) {
-         ParallelLogger::logInfo('EarlyStopping counter: ', self$counter,
-                                 ' out of ', self$patience)
-         }
-         if (self$counter >= self$patience) {
-           self$earlyStop <- TRUE
-         }
-       }
-       else {
-         self$bestScore <- score
-         self$counter <- 0
-         self$improved <- TRUE
-       }
-       self$previousScore <- score
-     } 
-   )
+  classname = "EarlyStopping",
+  lock_objects = FALSE,
+  public = list(
+    #' @description
+    #' Creates a new earlystopping object
+    #' @param patience Stop after this number of epochs if loss doesn't improve
+    #' @param delta    How much does the loss need to improve to count as improvement
+    #' @param verbose
+    #' @return a new earlystopping object
+    initialize = function(patience = 3, delta = 0, verbose = TRUE) {
+      self$patience <- patience
+      self$counter <- 0
+      self$verbose <- verbose
+      self$bestScore <- NULL
+      self$earlyStop <- FALSE
+      self$improved <- FALSE
+      self$delta <- delta
+      self$previousScore <- 0
+    },
+    #' @description
+    #' call the earlystopping object and increment a counter if loss is not
+    #' improving
+    #' @param metric the current metric value
+    call = function(metric) {
+      score <- metric
+      if (is.null(self$bestScore)) {
+        self$bestScore <- score
+        self$improved <- TRUE
+      } else if (score < self$bestScore + self$delta) {
+        self$counter <- self$counter + 1
+        self$improved <- FALSE
+        if (self$verbose) {
+          ParallelLogger::logInfo(
+            "EarlyStopping counter: ", self$counter,
+            " out of ", self$patience
+          )
+        }
+        if (self$counter >= self$patience) {
+          self$earlyStop <- TRUE
+        }
+      } else {
+        self$bestScore <- score
+        self$counter <- 0
+        self$improved <- TRUE
+      }
+      self$previousScore <- score
+    }
+  )
 )
-
-
diff --git a/R/ResNet.R b/R/ResNet.R
index ab1dec8..86bb140 100644
--- a/R/ResNet.R
+++ b/R/ResNet.R
@@ -18,13 +18,13 @@
 
 #' setResNet
 #'
-#' @description 
+#' @description
 #' Creates settings for a ResNet model
-#' 
+#'
 #' @details
-#' Model architecture from by https://arxiv.org/abs/2106.11959 
-#' 
-#' 
+#' Model architecture from by https://arxiv.org/abs/2106.11959
+#'
+#'
 #' @param numLayers         Number of layers in network, default: 1:16
 #' @param sizeHidden        Amount of neurons in each default layer, default: 2^(6:10) (64 to 1024)
 #' @param hiddenFactor      How much to grow the amount of neurons in each ResLayer, default: 1:4
@@ -43,111 +43,115 @@
 #' @param epochs            Number of epochs to run, default: 10
 #'
 #' @export
-setResNet <- function(
-  numLayers = c(1:8), 
-  sizeHidden = c(2^(6:9)), 
-  hiddenFactor = c(1:4),
-  residualDropout = c(seq(0,0.5,0.05)), 
-  hiddenDropout = c(seq(0,0.5,0.05)),
-  normalization = c('BatchNorm'), 
-  activation = c('RelU'),
-  sizeEmbedding = c(2^(6:9)), 
-  weightDecay = c(1e-6, 1e-3),
-  learningRate = c(1e-2, 3e-4, 1e-5), 
-  seed = NULL, 
-  hyperParamSearch = 'random',
-  randomSample = 100, 
-  device = 'cpu', 
-  batchSize = 1024, 
-  epochs = 30
-  ) {
-
+setResNet <- function(numLayers = c(1:8),
+                      sizeHidden = c(2^(6:9)),
+                      hiddenFactor = c(1:4),
+                      residualDropout = c(seq(0, 0.5, 0.05)),
+                      hiddenDropout = c(seq(0, 0.5, 0.05)),
+                      normalization = c("BatchNorm"),
+                      activation = c("RelU"),
+                      sizeEmbedding = c(2^(6:9)),
+                      weightDecay = c(1e-6, 1e-3),
+                      learningRate = c(1e-2, 3e-4, 1e-5),
+                      seed = NULL,
+                      hyperParamSearch = "random",
+                      randomSample = 100,
+                      device = "cpu",
+                      batchSize = 1024,
+                      epochs = 30) {
   if (is.null(seed)) {
     seed <- as.integer(sample(1e5, 1))
   }
-  
+
   paramGrid <- list(
-    numLayers = numLayers, 
+    numLayers = numLayers,
     sizeHidden = sizeHidden,
     hiddenFactor = hiddenFactor,
     residualDropout = residualDropout,
     hiddenDropout = hiddenDropout,
-    sizeEmbedding = sizeEmbedding, 
+    sizeEmbedding = sizeEmbedding,
     weightDecay = weightDecay,
     learningRate = learningRate,
     seed = list(as.integer(seed[[1]]))
   )
-  
+
   param <- listCartesian(paramGrid)
-  
-  if (hyperParamSearch=='random'){
+
+  if (hyperParamSearch == "random") {
     param <- param[sample(length(param), randomSample)]
   }
 
-  attr(param, 'settings') <- list(
+  attr(param, "settings") <- list(
     seed = seed[1],
     device = device,
     batchSize = batchSize,
     epochs = epochs,
     name = "ResNet",
-    saveType = 'file',
-    modelParamNames = c("numLayers", "sizeHidden", "hiddenFactor",
-                         "residualDropout", "hiddenDropout", "sizeEmbedding"),
-    baseModel = 'ResNet'
+    saveType = "file",
+    modelParamNames = c(
+      "numLayers", "sizeHidden", "hiddenFactor",
+      "residualDropout", "hiddenDropout", "sizeEmbedding"
+    ),
+    baseModel = "ResNet"
   )
 
   results <- list(
-    fitFunction = 'fitEstimator',
+    fitFunction = "fitEstimator",
     param = param
   )
 
-  class(results) <- 'modelSettings'
+  class(results) <- "modelSettings"
 
   return(results)
-
 }
 
 ResNet <- torch::nn_module(
-  name='ResNet',
-  initialize=function(catFeatures, numFeatures=0, sizeEmbedding, sizeHidden, numLayers,
-                      hiddenFactor, activation=torch::nn_relu, 
-                      normalization=torch::nn_batch_norm1d, hiddenDropout=NULL,
-                      residualDropout=NULL, d_out=1) {
-    self$embedding <- torch::nn_embedding_bag(num_embeddings = catFeatures + 1,
-                                              embedding_dim = sizeEmbedding,
-                                              padding_idx = 1)
+  name = "ResNet",
+  initialize = function(catFeatures, numFeatures = 0, sizeEmbedding, sizeHidden, numLayers,
+                        hiddenFactor, activation = torch::nn_relu,
+                        normalization = torch::nn_batch_norm1d, hiddenDropout = NULL,
+                        residualDropout = NULL, d_out = 1) {
+    self$embedding <- torch::nn_embedding_bag(
+      num_embeddings = catFeatures + 1,
+      embedding_dim = sizeEmbedding,
+      padding_idx = 1
+    )
     self$first_layer <- torch::nn_linear(sizeEmbedding + numFeatures, sizeHidden)
-    
+
     resHidden <- sizeHidden * hiddenFactor
-    
-    self$layers <- torch::nn_module_list(lapply(1:numLayers,
-                                                 function (x) ResLayer(sizeHidden, resHidden,
-                                                          normalization, activation,
-                                                          hiddenDropout,
-                                                          residualDropout)))
+
+    self$layers <- torch::nn_module_list(lapply(
+      1:numLayers,
+      function(x) {
+        ResLayer(
+          sizeHidden, resHidden,
+          normalization, activation,
+          hiddenDropout,
+          residualDropout
+        )
+      }
+    ))
     self$lastNorm <- normalization(sizeHidden)
     self$head <- torch::nn_linear(sizeHidden, d_out)
-    
+
     self$lastAct <- activation()
-    
   },
-      
-  forward=function(x) {
+  forward = function(x) {
     x_cat <- x$cat
     x_num <- x$num
     x_cat <- self$embedding(x_cat + 1L) # padding_idx is 1
     if (!is.null(x_num)) {
-      x <- torch::torch_cat(list(x_cat, x_num), dim=2L)
+      x <- torch::torch_cat(list(x_cat, x_num), dim = 2L)
     } else {
       x <- x_cat
     }
     x <- self$first_layer(x)
-    
+
     for (i in 1:length(self$layers)) {
       x <- self$layers[[i]](x)
     }
     x <- self$lastNorm(x)
-    x <- self$lastAct(x)    
+    x <- self$lastAct(x)
     x <- self$head(x)
     x <- x$squeeze(-1)
     return(x)
@@ -155,28 +159,24 @@ ResNet <- torch::nn_module(
 )
 
 ResLayer <- torch::nn_module(
-  name='ResLayer',
-  
-  initialize=function(sizeHidden, resHidden, normalization,
-                      activation, hiddenDropout=NULL, residualDropout=NULL){
+  name = "ResLayer",
+  initialize = function(sizeHidden, resHidden, normalization,
+                        activation, hiddenDropout = NULL, residualDropout = NULL) {
     self$norm <- normalization(sizeHidden)
     self$linear0 <- torch::nn_linear(sizeHidden, resHidden)
     self$linear1 <- torch::nn_linear(resHidden, sizeHidden)
-    
+
     self$activation <- activation
-    if (!is.null(hiddenDropout)){
-      self$hiddenDropout <- torch::nn_dropout(p=hiddenDropout)
+    if (!is.null(hiddenDropout)) {
+      self$hiddenDropout <- torch::nn_dropout(p = hiddenDropout)
     }
-    if (!is.null(residualDropout)) 
-    {
-      self$residualDropout <- torch::nn_dropout(p=residualDropout)
+    if (!is.null(residualDropout)) {
+      self$residualDropout <- torch::nn_dropout(p = residualDropout)
     }
-    
+
     self$activation <- activation()
-    
   },
-  
-  forward=function(x) {
+  forward = function(x) {
     z <- x
     z <- self$norm(z)
     z <- self$linear0(z)
@@ -188,33 +188,31 @@ ResLayer <- torch::nn_module(
     if (!is.null(self$residualDropout)) {
       z <- self$residualDropout(z)
     }
-    x <- z + x 
+    x <- z + x
     return(x)
   }
 )
 
 
-listCartesian <- function(allList){
-  
+listCartesian <- function(allList) {
   sizes <- lapply(allList, function(x) 1:length(x))
   combinations <- expand.grid(sizes)
-  
+
   result <- list()
   length(result) <- nrow(combinations)
-  
-  for(i in 1:nrow(combinations)){
+
+  for (i in 1:nrow(combinations)) {
     tempList <- list()
-    for(j in 1:ncol(combinations)){
-      tempList <- c(tempList, list(allList[[j]][combinations[[i,j]]]))
+    for (j in 1:ncol(combinations)) {
+      tempList <- c(tempList, list(allList[[j]][combinations[[i, j]]]))
     }
     names(tempList) <- names(allList)
     result[[i]] <- tempList
   }
-  
+
   return(result)
 }
 
 
 # export this in PLP
 computeGridPerformance <- PatientLevelPrediction:::computeGridPerformance
-
diff --git a/R/Topologies.R b/R/Topologies.R
index 58e4cb8..b3ca9c7 100644
--- a/R/Topologies.R
+++ b/R/Topologies.R
@@ -4,20 +4,17 @@
 #' @param outputN       Output neurons
 #' @param layer_dropout Layer dropout to use
 #' @export
-singleLayerNN <- function(inputN, layer1, outputN = 2, layer_dropout){
-  
+singleLayerNN <- function(inputN, layer1, outputN = 2, layer_dropout) {
   self <- NA # fixing R check
-  
+
   net <- torch::nn_module(
     "classic_net",
-    
-    initialize = function(){
-      self$linear1 = torch::nn_linear(inputN, layer1)
-      self$linear2 = torch::nn_linear(layer1, outputN)
-      self$softmax = torch::nn_softmax(outputN)
+    initialize = function() {
+      self$linear1 <- torch::nn_linear(inputN, layer1)
+      self$linear2 <- torch::nn_linear(layer1, outputN)
+      self$softmax <- torch::nn_softmax(outputN)
     },
-    
-    forward = function(x){
+    forward = function(x) {
       x %>%
         self$linear1() %>%
         torch::nnf_dropout(p = layer_dropout) %>%
@@ -36,23 +33,20 @@ singleLayerNN <- function(inputN, layer1, outputN = 2, layer_dropout){
 #' @param outputN       output neurons
 #' @param layer_dropout layer_dropout to use
 #' @export
-doubleLayerNN <- function(inputN, layer1, 
+doubleLayerNN <- function(inputN, layer1,
                           layer2, outputN,
-                          layer_dropout){
-  
+                          layer_dropout) {
   self <- NA # fixing R check
-  
+
   net <- torch::nn_module(
     "classic_net",
-    
-    initialize = function(){
-      self$linear1 = torch::nn_linear(inputN, layer1)
-      self$linear2 = torch::nn_linear(layer1, layer2)
-      self$linear3 = torch::nn_linear(layer2, outputN)
-      self$softmax = torch::nn_softmax(outputN)
+    initialize = function() {
+      self$linear1 <- torch::nn_linear(inputN, layer1)
+      self$linear2 <- torch::nn_linear(layer1, layer2)
+      self$linear3 <- torch::nn_linear(layer2, outputN)
+      self$softmax <- torch::nn_softmax(outputN)
     },
-    
-    forward = function(x){
+    forward = function(x) {
       x %>%
         self$linear1() %>%
         torch::nnf_dropout(p = layer_dropout) %>%
@@ -73,24 +67,21 @@ doubleLayerNN <- function(inputN, layer1,
 #' @param outputN       Number of output neurons
 #' @param layer_dropout The dropout to use in layer
 #' @export
-tripleLayerNN <- function(inputN, layer1, 
+tripleLayerNN <- function(inputN, layer1,
                           layer2, layer3,
-                          outputN, layer_dropout){
-  
+                          outputN, layer_dropout) {
   self <- NA # fixing R check
-  
+
   net <- torch::nn_module(
     "classic_net",
-    
-    initialize = function(){
-      self$linear1 = torch::nn_linear(inputN, layer1)
-      self$linear2 = torch::nn_linear(layer1, layer2)
-      self$linear3 = torch::nn_linear(layer2, layer3)
-      self$linear4 = torch::nn_linear(layer3, outputN)
-      self$softmax = torch::nn_softmax(outputN)
+    initialize = function() {
+      self$linear1 <- torch::nn_linear(inputN, layer1)
+      self$linear2 <- torch::nn_linear(layer1, layer2)
+      self$linear3 <- torch::nn_linear(layer2, layer3)
+      self$linear4 <- torch::nn_linear(layer3, outputN)
+      self$softmax <- torch::nn_softmax(outputN)
     },
-    
-    forward = function(x){
+    forward = function(x) {
       x %>%
         self$linear1() %>%
         torch::nnf_dropout(p = layer_dropout) %>%
@@ -100,8 +91,7 @@ tripleLayerNN <- function(inputN, layer1,
         torch::nnf_dropout(p = layer_dropout) %>%
         self$linear4() %>%
         self$softmax()
-      
     }
   )
   model <- net()
-}
\ No newline at end of file
+}
diff --git a/R/Transformer.R b/R/Transformer.R
index 8108d28..545290b 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -1,8 +1,8 @@
 #' create settings for training a non-temporal transformer
 #'
 #' @description A transformer model
-#' @details from https://arxiv.org/abs/2106.11959 
-#' 
+#' @details from https://arxiv.org/abs/2106.11959
+#'
 #' @param numBlocks               number of transformer blocks
 #' @param dimToken                dimension of each token (embedding size)
 #' @param dimOut                  dimension of output, usually 1 for binary problems
@@ -19,23 +19,23 @@
 #' @param hyperParamSearch        what kind of hyperparameter search to do, default 'random'
 #' @param randomSamples           How many samples to use in hyperparameter search if random
 #' @param seed                    Random seed to use
-#' 
+#'
 #' @export
-setTransformer <- function(
-  numBlocks=3, dimToken=96, dimOut=1,
-  numHeads=8, attDropout=0.25, ffnDropout=0.25,
-  resDropout=0,dimHidden=512, weightDecay=1e-6, 
-  learningRate=3e-4, batchSize=1024,
-  epochs=10, device='cpu', hyperParamSearch='random',
-  randomSamples=100, seed=NULL
-) {
+setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
+                           numHeads = 8, attDropout = 0.25, ffnDropout = 0.25,
+                           resDropout = 0, dimHidden = 512, weightDecay = 1e-6,
+                           learningRate = 3e-4, batchSize = 1024,
+                           epochs = 10, device = "cpu", hyperParamSearch = "random",
+                           randomSamples = 100, seed = NULL) {
   if (!is.null(seed)) {
     seed <- as.integer(sample(1e5, 1))
   }
-  
+
   if (dimToken %% numHeads != 0) {
-    stop(paste('dimToken needs to divisble by numHeads. dimToken =', dimToken,
-               'is not divisible by numHeads =', numHeads))
+    stop(paste(
+      "dimToken needs to divisble by numHeads. dimToken =", dimToken,
+      "is not divisible by numHeads =", numHeads
+    ))
   }
 
   paramGrid <- list(
@@ -54,131 +54,148 @@ setTransformer <- function(
 
   param <- listCartesian(paramGrid)
 
-  if (hyperParamSearch=='random'){
+  if (hyperParamSearch == "random") {
     param <- param[sample(length(param), randomSamples)]
   }
 
-  attr(param, 'settings') <- list(
+  attr(param, "settings") <- list(
     seed = seed[1],
     device = device,
     batchSize = batchSize,
     epochs = epochs,
     name = "Transformer",
-    saveType = 'file',
-    modelParamNames = c('numBlocks', 'dimToken', 'dimOut', 'numHeads',
-                        'attDropout', 'ffnDropout', 'resDropout', 'dimHidden'),
-    baseModel = 'Transformer'
+    saveType = "file",
+    modelParamNames = c(
+      "numBlocks", "dimToken", "dimOut", "numHeads",
+      "attDropout", "ffnDropout", "resDropout", "dimHidden"
+    ),
+    baseModel = "Transformer"
   )
 
   results <- list(
-    fitFunction = 'fitEstimator',
+    fitFunction = "fitEstimator",
     param = param
   )
 
-  class(results) <- 'modelSettings'
+  class(results) <- "modelSettings"
   return(results)
 }
 
 
 Transformer <- torch::nn_module(
-  name='Transformer',
-  initialize = function(catFeatures, numFeatures, numBlocks, dimToken, dimOut=1, 
-                       numHeads, attDropout, ffnDropout, resDropout, 
-                       headActivation=torch::nn_relu,
-                       activation=NULL,
-                       ffnNorm=torch::nn_layer_norm, 
-                       headNorm=torch::nn_layer_norm,
-                       attNorm=torch::nn_layer_norm,
-                       dimHidden){
-    activation = nn_reglu
+  name = "Transformer",
+  initialize = function(catFeatures, numFeatures, numBlocks, dimToken, dimOut = 1,
+                        numHeads, attDropout, ffnDropout, resDropout,
+                        headActivation = torch::nn_relu,
+                        activation = NULL,
+                        ffnNorm = torch::nn_layer_norm,
+                        headNorm = torch::nn_layer_norm,
+                        attNorm = torch::nn_layer_norm,
+                        dimHidden) {
+    activation <- nn_reglu
     self$Categoricalembedding <- Embedding(catFeatures + 1, dimToken) # + 1 for padding idx
     self$numericalEmbedding <- numericalEmbedding(numFeatures, dimToken)
     self$classToken <- ClassToken(dimToken)
-    
-    self$layers <- torch::nn_module_list(lapply(1:numBlocks,
-                      function(x) {
-                         layer <- torch::nn_module_list()
-                         layer$add_module('attention', torch::nn_multihead_attention(dimToken,numHeads,
-                                                                                     dropout=attDropout,
-                                                                                     bias=TRUE))
-                         layer$add_module('ffn', FeedForwardBlock(dimToken, dimHidden,
-                                                                  biasFirst=TRUE,
-                                                                  biasSecond=TRUE,
-                                                                  dropout=ffnDropout,
-                                                                  activation=activation))
-                          layer$add_module('attentionResDropout', torch::nn_dropout(resDropout))      
-                          layer$add_module('ffnResDropout', torch::nn_dropout(resDropout))
-                          layer$add_module('ffnNorm', ffnNorm(dimToken))
-              
-                          if (x!=1) {
-                            layer$add_module('attentionNorm', attNorm(dimToken))
-                        }
-                         return(layer) 
-                          }))
-    self$head <- Head(dimToken, bias=TRUE, activation=headActivation, 
-                      headNorm, dimOut)
+
+    self$layers <- torch::nn_module_list(lapply(
+      1:numBlocks,
+      function(x) {
+        layer <- torch::nn_module_list()
+        layer$add_module("attention", torch::nn_multihead_attention(dimToken, numHeads,
+          dropout = attDropout,
+          bias = TRUE
+        ))
+        layer$add_module("ffn", FeedForwardBlock(dimToken, dimHidden,
+          biasFirst = TRUE,
+          biasSecond = TRUE,
+          dropout = ffnDropout,
+          activation = activation
+        ))
+        layer$add_module("attentionResDropout", torch::nn_dropout(resDropout))
+        layer$add_module("ffnResDropout", torch::nn_dropout(resDropout))
+        layer$add_module("ffnNorm", ffnNorm(dimToken))
+
+        if (x != 1) {
+          layer$add_module("attentionNorm", attNorm(dimToken))
+        }
+        return(layer)
+      }
+    ))
+    self$head <- Head(dimToken,
+      bias = TRUE, activation = headActivation,
+      headNorm, dimOut
+    )
   },
-  forward = function(x){
-    mask <- torch::torch_where(x$cat ==0, TRUE, FALSE)
+  forward = function(x) {
+    mask <- torch::torch_where(x$cat == 0, TRUE, FALSE)
     input <- x
     cat <- self$Categoricalembedding(x$cat)
     if (!is.null(input$num)) {
       num <- self$numericalEmbedding(input$num)
-      x <- torch::torch_cat(list(cat, num), dim=2L)
-      mask <- torch::torch_cat(list(mask, torch::torch_zeros(c(x$shape[1], 
-                                                               num$shape[2]), 
-                                                             device=mask$device,
-                                                             dtype=mask$dtype)), 
-                               dim=2L)
+      x <- torch::torch_cat(list(cat, num), dim = 2L)
+      mask <- torch::torch_cat(list(mask, torch::torch_zeros(c(
+        x$shape[1],
+        num$shape[2]
+      ),
+      device = mask$device,
+      dtype = mask$dtype
+      )),
+      dim = 2L
+      )
     } else {
       x <- cat
     }
     x <- self$classToken(x)
-    mask <- torch::torch_cat(list(mask, torch::torch_zeros(c(x$shape[1], 1), 
-                                                           device=mask$device,
-                                                           dtype=mask$dtype)), 
-                             dim=2L)
+    mask <- torch::torch_cat(list(mask, torch::torch_zeros(c(x$shape[1], 1),
+      device = mask$device,
+      dtype = mask$dtype
+    )),
+    dim = 2L
+    )
     for (i in 1:length(self$layers)) {
       layer <- self$layers[[i]]
-      xResidual <- self$startResidual(layer, 'attention', x)
+      xResidual <- self$startResidual(layer, "attention", x)
 
-      if (i==length(self$layers)) {
+      if (i == length(self$layers)) {
         dims <- xResidual$shape
         # in final layer take only attention on CLS token
-        xResidual <- layer$attention(xResidual[,-1]$view(c(dims[1], 1, dims[3]))$transpose(1,2), 
-                                     xResidual$transpose(1,2), 
-                                     xResidual$transpose(1,2), mask)
+        xResidual <- layer$attention(
+          xResidual[, -1]$view(c(dims[1], 1, dims[3]))$transpose(1, 2),
+          xResidual$transpose(1, 2),
+          xResidual$transpose(1, 2), mask
+        )
         attnWeights <- xResidual[[2]]
         xResidual <- xResidual[[1]]
-        x <- x[,-1]$view(c(dims[1], 1, dims[3]))
-        } else {
+        x <- x[, -1]$view(c(dims[1], 1, dims[3]))
+      } else {
         # attention input is seq_length x batch_size x embedding_dim
-        xResidual <- layer$attention(xResidual$transpose(1,2), 
-                                     xResidual$transpose(1,2), 
-                                     xResidual$transpose(1,2),
-                                     mask,
-                                     )[[1]]
-        }
-      x <- self$endResidual(layer, 'attention', x, xResidual$transpose(1,2))
+        xResidual <- layer$attention(
+          xResidual$transpose(1, 2),
+          xResidual$transpose(1, 2),
+          xResidual$transpose(1, 2),
+          mask,
+        )[[1]]
+      }
+      x <- self$endResidual(layer, "attention", x, xResidual$transpose(1, 2))
 
-      xResidual <- self$startResidual(layer, 'ffn', x)
+      xResidual <- self$startResidual(layer, "ffn", x)
       xResidual <- layer$ffn(xResidual)
-      x <- self$endResidual(layer, 'ffn', x, xResidual)
+      x <- self$endResidual(layer, "ffn", x, xResidual)
     }
-    x <- self$head(x)[,1] # remove singleton dimension
+    x <- self$head(x)[, 1] # remove singleton dimension
     return(x)
   },
   startResidual = function(layer, stage, x) {
     xResidual <- x
-    normKey <- paste0(stage, 'Norm')
+    normKey <- paste0(stage, "Norm")
     if (normKey %in% names(as.list(layer))) {
       xResidual <- layer[[normKey]](xResidual)
     }
     return(xResidual)
   },
   endResidual = function(layer, stage, x, xResidual) {
-    dropoutKey <- paste0(stage, 'ResDropout')
-    xResidual <-layer[[dropoutKey]](xResidual)
+    dropoutKey <- paste0(stage, "ResDropout")
+    xResidual <- layer[[dropoutKey]](xResidual)
     x <- x + xResidual
     return(x)
   }
@@ -186,10 +203,10 @@ Transformer <- torch::nn_module(
 
 
 FeedForwardBlock <- torch::nn_module(
-  name='FeedForwardBlock',
+  name = "FeedForwardBlock",
   initialize = function(dimToken, dimHidden, biasFirst, biasSecond,
                         dropout, activation) {
-    self$linearFirst <- torch::nn_linear(dimToken, dimHidden*2, biasFirst)
+    self$linearFirst <- torch::nn_linear(dimToken, dimHidden * 2, biasFirst)
     self$activation <- activation()
     self$dropout <- torch::nn_dropout(dropout)
     self$linearSecond <- torch::nn_linear(dimHidden, dimToken, biasSecond)
@@ -204,14 +221,14 @@ FeedForwardBlock <- torch::nn_module(
 )
 
 Head <- torch::nn_module(
-  name='Head',
+  name = "Head",
   initialize = function(dimIn, bias, activation, normalization, dimOut) {
     self$normalization <- normalization(dimIn)
     self$activation <- activation()
-    self$linear <- torch::nn_linear(dimIn,dimOut, bias)
+    self$linear <- torch::nn_linear(dimIn, dimOut, bias)
   },
   forward = function(x) {
-    x <- x[,-1] # ?
+    x <- x[, -1] # ?
     x <- self$normalization(x)
     x <- self$activation(x)
     x <- self$linear(x)
@@ -220,29 +237,29 @@ Head <- torch::nn_module(
 )
 
 Embedding <- torch::nn_module(
-  name='Embedding',
+  name = "Embedding",
   initialize = function(numEmbeddings, embeddingDim) {
     self$embedding <- torch::nn_embedding(numEmbeddings, embeddingDim, padding_idx = 1)
   },
   forward = function(x_cat) {
     x <- self$embedding(x_cat + 1L) # padding idx is 1L
     return(x)
-    }
+  }
 )
 
 numericalEmbedding <- torch::nn_module(
-  name='numericalEmbedding',
-  initialize = function(numEmbeddings, embeddingDim, bias=TRUE) {
-    self$weight <- torch::nn_parameter(torch::torch_empty(numEmbeddings,embeddingDim))
+  name = "numericalEmbedding",
+  initialize = function(numEmbeddings, embeddingDim, bias = TRUE) {
+    self$weight <- torch::nn_parameter(torch::torch_empty(numEmbeddings, embeddingDim))
     if (bias) {
-    self$bias <- torch::nn_parameter(torch::torch_empty(numEmbeddings, embeddingDim)) 
+      self$bias <- torch::nn_parameter(torch::torch_empty(numEmbeddings, embeddingDim))
     } else {
       self$bias <- NULL
     }
-    
+
     for (parameter in list(self$weight, self$bias)) {
       if (!is.null(parameter)) {
-        torch::nn_init_kaiming_uniform_(parameter, a=sqrt(5)) 
+        torch::nn_init_kaiming_uniform_(parameter, a = sqrt(5))
       }
     }
   },
@@ -253,28 +270,26 @@ numericalEmbedding <- torch::nn_module(
     }
     return(x)
   }
-  
 )
 
 # adds a class token embedding to embeddings
 ClassToken <- torch::nn_module(
-  name='ClassToken',
+  name = "ClassToken",
   initialize = function(dimToken) {
-    self$weight <- torch::nn_parameter(torch::torch_empty(dimToken,1))
-    torch::nn_init_kaiming_uniform_(self$weight, a=sqrt(5))
+    self$weight <- torch::nn_parameter(torch::torch_empty(dimToken, 1))
+    torch::nn_init_kaiming_uniform_(self$weight, a = sqrt(5))
   },
   expand = function(dims) {
     newDims <- vector("integer", length(dims) - 1) + 1
-    return (self$weight$view(c(newDims,-1))$expand(c(dims, -1)))
-    
+    return(self$weight$view(c(newDims, -1))$expand(c(dims, -1)))
   },
   forward = function(x) {
-    return(torch::torch_cat(c(x, self$expand(c(dim(x)[[1]], 1))), dim=2))
+    return(torch::torch_cat(c(x, self$expand(c(dim(x)[[1]], 1))), dim = 2))
   }
 )
 
 nn_reglu <- torch::nn_module(
-  name='ReGlu',
+  name = "ReGlu",
   forward = function(x) {
     return(reglu(x))
   }
@@ -282,8 +297,7 @@ nn_reglu <- torch::nn_module(
 
 
 reglu <- function(x) {
-  chunks <- x$chunk(2, dim=-1)
-  
-  return(chunks[[1]]* torch::nnf_relu(chunks[[2]]))
-  
+  chunks <- x$chunk(2, dim = -1)
+
+  return(chunks[[1]] * torch::nnf_relu(chunks[[2]]))
 }
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index 608a217..ff3cf9d 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -6,77 +6,79 @@ testLoc <- tempdir()
 connectionDetails <- Eunomia::getEunomiaConnectionDetails()
 Eunomia::createCohorts(connectionDetails)
 
-covSet <- FeatureExtraction::createCovariateSettings(useDemographicsGender = T, 
-                                  useDemographicsAge = T, 
-                                  useDemographicsRace = T,
-                                  useDemographicsEthnicity = T, 
-                                  useDemographicsAgeGroup = T,
-                                  useConditionGroupEraLongTerm = T, 
-                                  useDrugEraStartLongTerm  = T, 
-                                  endDays = -1
+covSet <- FeatureExtraction::createCovariateSettings(
+  useDemographicsGender = T,
+  useDemographicsAge = T,
+  useDemographicsRace = T,
+  useDemographicsEthnicity = T,
+  useDemographicsAgeGroup = T,
+  useConditionGroupEraLongTerm = T,
+  useDrugEraStartLongTerm = T,
+  endDays = -1
 )
 
 
-covSetT <- FeatureExtraction::createTemporalSequenceCovariateSettings(useDemographicsGender = T, 
-                                                     useDemographicsAge = T, 
-                                                     useDemographicsRace = T,
-                                                     useDemographicsEthnicity = T, 
-                                                     useDemographicsAgeGroup = T,
-                                                     useConditionEraGroupStart = T, 
-                                                     useDrugEraStart = T, 
-                                                     timePart = 'month', 
-                                                     timeInterval = 1, 
-                                                     sequenceEndDay = -1, 
-                                                     sequenceStartDay = -365*5
-                                                   )
+covSetT <- FeatureExtraction::createTemporalSequenceCovariateSettings(
+  useDemographicsGender = T,
+  useDemographicsAge = T,
+  useDemographicsRace = T,
+  useDemographicsEthnicity = T,
+  useDemographicsAgeGroup = T,
+  useConditionEraGroupStart = T,
+  useDrugEraStart = T,
+  timePart = "month",
+  timeInterval = 1,
+  sequenceEndDay = -1,
+  sequenceStartDay = -365 * 5
+)
 
 
 databaseDetails <- PatientLevelPrediction::createDatabaseDetails(
-  connectionDetails = connectionDetails, 
+  connectionDetails = connectionDetails,
   cdmDatabaseSchema = "main",
-  cohortDatabaseSchema = "main", 
-  cohortTable = "cohort", 
-  targetId = 4, 
+  cohortDatabaseSchema = "main",
+  cohortTable = "cohort",
+  targetId = 4,
   outcomeIds = 3,
-  outcomeDatabaseSchema = "main", 
-  outcomeTable =  "cohort", 
-  cdmDatabaseName = 'eunomia'
+  outcomeDatabaseSchema = "main",
+  outcomeTable = "cohort",
+  cdmDatabaseName = "eunomia"
 )
 
 restrictPlpDataSettings <- PatientLevelPrediction::createRestrictPlpDataSettings(
-  firstExposureOnly = T, 
+  firstExposureOnly = T,
   washoutPeriod = 365
 )
 
 plpData <- PatientLevelPrediction::getPlpData(
-  databaseDetails = databaseDetails, 
+  databaseDetails = databaseDetails,
   restrictPlpDataSettings = restrictPlpDataSettings,
   covariateSettings = covSet
 )
 
 
-plpDataT <- PatientLevelPrediction::getPlpData(  
-    databaseDetails = databaseDetails, 
-    restrictPlpDataSettings = restrictPlpDataSettings,
-    covariateSettings = covSetT
+plpDataT <- PatientLevelPrediction::getPlpData(
+  databaseDetails = databaseDetails,
+  restrictPlpDataSettings = restrictPlpDataSettings,
+  covariateSettings = covSetT
 )
 
 
 populationSet <- PatientLevelPrediction::createStudyPopulationSettings(
-  requireTimeAtRisk = F, 
-  riskWindowStart = 1, 
+  requireTimeAtRisk = F,
+  riskWindowStart = 1,
   riskWindowEnd = 365
 )
 
 population <- PatientLevelPrediction::createStudyPopulation(
-  plpData = plpData, 
-  outcomeId = 3, 
+  plpData = plpData,
+  outcomeId = 3,
   populationSettings = populationSet
 )
 
 trainData <- PatientLevelPrediction::splitData(
-  plpData, 
-  population = population, 
+  plpData,
+  population = population,
   splitSettings = PatientLevelPrediction::createDefaultSplitSetting()
 )
 
@@ -86,8 +88,7 @@ mappedData <- PatientLevelPrediction:::MapIds(
 )
 
 dataset <- Dataset(
-  data = mappedData$covariates, 
-  labels = trainData$Train$labels$outcomeCount, 
+  data = mappedData$covariates,
+  labels = trainData$Train$labels$outcomeCount,
   numericalIndex = NULL
-) 
-
+)
diff --git a/tests/testthat/test-DeepNNTorch.R b/tests/testthat/test-DeepNNTorch.R
index af6492a..1a6e6ca 100644
--- a/tests/testthat/test-DeepNNTorch.R
+++ b/tests/testthat/test-DeepNNTorch.R
@@ -1,79 +1,86 @@
 
 # code to train models
-deepset <- setDeepNNTorch(units=list(c(128, 64), 128), layer_dropout=c(0.2),
-                          lr =c(1e-4), decay=c(1e-5), outcome_weight = c(1.0), batch_size = c(100), 
-                          epochs= c(5),  seed=NULL  )
+deepset <- setDeepNNTorch(
+  units = list(c(128, 64), 128), layer_dropout = c(0.2),
+  lr = c(1e-4), decay = c(1e-5), outcome_weight = c(1.0), batch_size = c(100),
+  epochs = c(5), seed = NULL
+)
 
 test_that("setDeepNNTorch works", {
-  
-  testthat::expect_s3_class(object = deepset, class = 'modelSettings')
-  
-  testthat::expect_equal(deepset$fitFunction, 'fitDeepNNTorch') 
-  
-  testthat::expect_true(nrow(deepset$param) > 0 )
-  
+  testthat::expect_s3_class(object = deepset, class = "modelSettings")
+
+  testthat::expect_equal(deepset$fitFunction, "fitDeepNNTorch")
+
+  testthat::expect_true(nrow(deepset$param) > 0)
 })
 
 sink(nullfile())
-res <- tryCatch({
-  PatientLevelPrediction::runPlp(
-    plpData = plpData, 
-    outcomeId = 3, 
-    modelSettings = deepset,
-    analysisId = 'DeepNNTorch', 
-    analysisName = 'Testing Deep Learning', 
-    populationSettings = populationSet, 
-    splitSettings = PatientLevelPrediction::createDefaultSplitSetting(), 
-    sampleSettings = PatientLevelPrediction::createSampleSettings(),  # none 
-    featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(), # none 
-    preprocessSettings = PatientLevelPrediction::createPreprocessSettings(), 
-    executeSettings = PatientLevelPrediction::createExecuteSettings(
-      runSplitData = T, 
-      runSampleData = F, 
-      runfeatureEngineering = F, 
-      runPreprocessData = T, 
-      runModelDevelopment = T, 
-      runCovariateSummary = F
-    ), 
-    saveDirectory = file.path(testLoc, 'DeepNNTorch')
-  )
-}, error = function(e){print(e); return(NULL)}
+res <- tryCatch(
+  {
+    PatientLevelPrediction::runPlp(
+      plpData = plpData,
+      outcomeId = 3,
+      modelSettings = deepset,
+      analysisId = "DeepNNTorch",
+      analysisName = "Testing Deep Learning",
+      populationSettings = populationSet,
+      splitSettings = PatientLevelPrediction::createDefaultSplitSetting(),
+      sampleSettings = PatientLevelPrediction::createSampleSettings(), # none
+      featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(), # none
+      preprocessSettings = PatientLevelPrediction::createPreprocessSettings(),
+      executeSettings = PatientLevelPrediction::createExecuteSettings(
+        runSplitData = T,
+        runSampleData = F,
+        runfeatureEngineering = F,
+        runPreprocessData = T,
+        runModelDevelopment = T,
+        runCovariateSummary = F
+      ),
+      saveDirectory = file.path(testLoc, "DeepNNTorch")
+    )
+  },
+  error = function(e) {
+    print(e)
+    return(NULL)
+  }
 )
 sink()
 
 test_that("setDeepNNTorch with runPlp working checks", {
-  
   testthat::expect_false(is.null(res))
-  
+
   # check structure
-  testthat::expect_true('prediction' %in% names(res))
-  testthat::expect_true('model' %in% names(res))
-  testthat::expect_true('covariateSummary' %in% names(res))
-  testthat::expect_true('performanceEvaluation' %in% names(res))
-  
+  testthat::expect_true("prediction" %in% names(res))
+  testthat::expect_true("model" %in% names(res))
+  testthat::expect_true("covariateSummary" %in% names(res))
+  testthat::expect_true("performanceEvaluation" %in% names(res))
+
   # check prediction same size as pop
-  testthat::expect_equal(nrow(res$prediction %>% dplyr::filter(evaluationType %in% c('Train', 'Test'))), 
-                         nrow(population))
-  
+  testthat::expect_equal(
+    nrow(res$prediction %>% dplyr::filter(evaluationType %in% c("Train", "Test"))),
+    nrow(population)
+  )
+
   # check prediction between 0 and 1
   testthat::expect_gte(min(res$prediction$value), 0)
   testthat::expect_lte(max(res$prediction$value), 1)
-  
 })
 
 test_that("Triple layer-nn works", {
-  deepset <- setDeepNNTorch(units=list(c(64,64,32), c(64,32,16), c(32,16,8)), layer_dropout=c(0.2),
-                            lr =c(1e-4), decay=c(1e-5), outcome_weight = c(1.0), batch_size = c(100), 
-                            epochs= c(5),  seed=NULL)
-  
+  deepset <- setDeepNNTorch(
+    units = list(c(64, 64, 32), c(64, 32, 16), c(32, 16, 8)), layer_dropout = c(0.2),
+    lr = c(1e-4), decay = c(1e-5), outcome_weight = c(1.0), batch_size = c(100),
+    epochs = c(5), seed = NULL
+  )
+
   sink(nullfile())
-  results <- fitDeepNNTorch(trainData$Train, deepset, analysisId=1)
+  results <- fitDeepNNTorch(trainData$Train, deepset, analysisId = 1)
   sink()
-  
-  expect_equal(class(results), 'plpModel')
-  expect_equal(attr(results, 'modelType'), 'binary')
-  expect_equal(attr(results, 'saveType'), 'file')
-  
+
+  expect_equal(class(results), "plpModel")
+  expect_equal(attr(results, "modelType"), "binary")
+  expect_equal(attr(results, "saveType"), "file")
+
   # check prediction between 0 and 1
   testthat::expect_gt(min(results$prediction$value), 0)
   testthat::expect_lt(max(results$prediction$value), 1)
diff --git a/tests/testthat/test-Estimator.R b/tests/testthat/test-Estimator.R
index af153e1..2f110d3 100644
--- a/tests/testthat/test-Estimator.R
+++ b/tests/testthat/test-Estimator.R
@@ -4,153 +4,152 @@ numFeatures <- dataset$numNumFeatures()
 fitParams <- list()
 baseModel <- ResNet
 
-modelParameters <- list(catFeatures=catFeatures,
-                        numFeatures=numFeatures,
-                        sizeEmbedding=16,
-                        sizeHidden=16,
-                        numLayers=2,
-                        hiddenFactor=2)
+modelParameters <- list(
+  catFeatures = catFeatures,
+  numFeatures = numFeatures,
+  sizeEmbedding = 16,
+  sizeHidden = 16,
+  numLayers = 2,
+  hiddenFactor = 2
+)
 
 estimator <- Estimator$new(
   baseModel = baseModel,
   modelParameters = modelParameters,
-  fitParameters = fitParams, 
-  device = 'cpu'
+  fitParameters = fitParams,
+  device = "cpu"
 )
 
 test_that("Estimator initialization works", {
-  
+
   # count parameters in both instances
   testthat::expect_equal(
-    sum(sapply(estimator$model$parameters,function(x) prod(x$shape))), 
+    sum(sapply(estimator$model$parameters, function(x) prod(x$shape))),
     sum(sapply(do.call(baseModel, modelParameters)$parameters, function(x) prod(x$shape)))
   )
-  
+
   testthat::expect_equal(
     estimator$modelParameters,
     modelParameters
   )
 
   # check the function that results the value from a list
-  val <- estimator$itemOrDefaults(list(param=1, test=3), 'param', default = NULL) 
+  val <- estimator$itemOrDefaults(list(param = 1, test = 3), "param", default = NULL)
   expect_equal(val, 1)
-  val <- estimator$itemOrDefaults(list(param=1, test=3), 'paramater', default = NULL) 
+  val <- estimator$itemOrDefaults(list(param = 1, test = 3), "paramater", default = NULL)
   expect_true(is.null(val))
-  
-}
-)
+})
 sink(nullfile())
 estimator$fit(dataset, dataset)
 sink()
 
 test_that("estimator fitting works", {
-    
-    
-    # check the fitting
-    # estimator$fitEpoch(dataset, batchIndex)
-    # estimator$finishFit(valAUCs, modelStateDict, valLosses, epoch)
-    # estimator$score(dataset, batchIndex)
-    expect_true(!is.null(estimator$bestEpoch))
-    expect_true(!is.null(estimator$bestScore$loss))
-    expect_true(!is.null(estimator$bestScore$auc))
-    
-    old_weights <- estimator$model$head$weight$mean()$item()
-    
-    sink(nullfile())
-    estimator$fitWholeTrainingSet(dataset, estimator$learnRateSchedule)
-    sink()
-    
-    expect_equal(estimator$optimizer$param_groups[[1]]$lr, tail(estimator$learnRateSchedule, 1)[[1]])
-    
-    new_weights <- estimator$model$head$weight$mean()$item()
-    
-    # model should be updated when refitting
-    expect_true(old_weights != new_weights)
-    
-    estimator$save(testLoc, 'estimator.pt')
-    
-    expect_true(file.exists(file.path(testLoc, 'estimator.pt')))
-    
-    preds <- estimator$predictProba(dataset) 
-    
-    expect_lt(max(preds), 1)
-    expect_gt(min(preds), 0)
-    
-    classes <- estimator$predict(dataset, threshold=0.5)
-    expect_equal(all(unique(classes) %in% c(0,1)), TRUE)
-    
-    # not sure how to test: batchToDevice(batch)
-  })
 
-test_that("early stopping works", {
 
-  earlyStop <- EarlyStopping$new(patience=3, delta=0, verbose=FALSE)
+  # check the fitting
+  # estimator$fitEpoch(dataset, batchIndex)
+  # estimator$finishFit(valAUCs, modelStateDict, valLosses, epoch)
+  # estimator$score(dataset, batchIndex)
+  expect_true(!is.null(estimator$bestEpoch))
+  expect_true(!is.null(estimator$bestScore$loss))
+  expect_true(!is.null(estimator$bestScore$auc))
+
+  old_weights <- estimator$model$head$weight$mean()$item()
+
+  sink(nullfile())
+  estimator$fitWholeTrainingSet(dataset, estimator$learnRateSchedule)
+  sink()
+
+  expect_equal(estimator$optimizer$param_groups[[1]]$lr, tail(estimator$learnRateSchedule, 1)[[1]])
+
+  new_weights <- estimator$model$head$weight$mean()$item()
+
+  # model should be updated when refitting
+  expect_true(old_weights != new_weights)
+
+  estimator$save(testLoc, "estimator.pt")
+
+  expect_true(file.exists(file.path(testLoc, "estimator.pt")))
+
+  preds <- estimator$predictProba(dataset)
+
+  expect_lt(max(preds), 1)
+  expect_gt(min(preds), 0)
+
+  classes <- estimator$predict(dataset, threshold = 0.5)
+  expect_equal(all(unique(classes) %in% c(0, 1)), TRUE)
+
+  # not sure how to test: batchToDevice(batch)
+})
+
+test_that("early stopping works", {
+  earlyStop <- EarlyStopping$new(patience = 3, delta = 0, verbose = FALSE)
   testthat::expect_true(is.null(earlyStop$bestScore))
   testthat::expect_false(earlyStop$earlyStop)
   earlyStop$call(0.5)
-  testthat::expect_equal(earlyStop$bestScore,0.5)
+  testthat::expect_equal(earlyStop$bestScore, 0.5)
   earlyStop$call(0.4)
   earlyStop$call(0.4)
   earlyStop$call(0.4)
   testthat::expect_true(earlyStop$earlyStop)
-  
 })
 
-modelSettings <- setResNet(numLayers=1, sizeHidden=16, hiddenFactor=1,
-                           residualDropout=0, hiddenDropout=0, 
-                           sizeEmbedding = 16, hyperParamSearch = 'random',
-                           randomSample = 1, epochs=1)
+modelSettings <- setResNet(
+  numLayers = 1, sizeHidden = 16, hiddenFactor = 1,
+  residualDropout = 0, hiddenDropout = 0,
+  sizeEmbedding = 16, hyperParamSearch = "random",
+  randomSample = 1, epochs = 1
+)
 
-sink(nullfile())  
+sink(nullfile())
 results <- fitEstimator(trainData$Train, modelSettings = modelSettings, analysisId = 1)
 sink()
 
-test_that('Estimator fit function works', {
-  
+test_that("Estimator fit function works", {
   expect_true(!is.null(results$trainDetails$trainingTime))
-  
-  expect_equal(class(results), 'plpModel')
-  expect_equal(attr(results, 'modelType'), 'binary')
-  expect_equal(attr(results, 'saveType'), 'file')
-  
+
+  expect_equal(class(results), "plpModel")
+  expect_equal(attr(results, "modelType"), "binary")
+  expect_equal(attr(results, "saveType"), "file")
 })
 
-test_that('predictDeepEstimator works', {
-  
+test_that("predictDeepEstimator works", {
+
   # input is an estimator and a dataset
   sink(nullfile())
-  predictions <- predictDeepEstimator(estimator, dataset, cohort=trainData$Train$labels)
+  predictions <- predictDeepEstimator(estimator, dataset, cohort = trainData$Train$labels)
   sink()
-  
+
   expect_lt(max(predictions$value), 1)
   expect_gt(min(predictions$value), 0)
   expect_equal(nrow(predictions), nrow(trainData$Train$labels))
-  
-  #input is a plpModel and data
+
+  # input is a plpModel and data
   sink(nullfile())
-  predictions <- predictDeepEstimator(plpModel = results, data=trainData$Test,
-                                      trainData$Test$labels)
+  predictions <- predictDeepEstimator(
+    plpModel = results, data = trainData$Test,
+    trainData$Test$labels
+  )
   sink()
   expect_lt(max(predictions$value), 1)
   expect_gt(min(predictions$value), 0)
   expect_equal(nrow(predictions), nrow(trainData$Test$labels))
-  
-}) 
+})
 
-test_that('batchToDevice works', {
+test_that("batchToDevice works", {
   # since we can't test moving to gpu there is a fake device called meta
   # which we can use to test of the device is updated
-  estimator$device <- 'meta'
+  estimator$device <- "meta"
   b <- 1:10
   batch <- estimator$batchToDevice(dataset[b])
-  
-  devices <- lapply(lapply(unlist(batch, recursive=TRUE), function(x) x$device), 
-                    function(x) x==torch::torch_device(type='meta'))
-  # test that all are meta
-  expect_true(all(devices==TRUE))
 
-  }
-)
+  devices <- lapply(
+    lapply(unlist(batch, recursive = TRUE), function(x) x$device),
+    function(x) x == torch::torch_device(type = "meta")
+  )
+  # test that all are meta
+  expect_true(all(devices == TRUE))
+})
 
 
-#cases to add, estimator with early stopping that stops, and estimator without earlystopping
+# cases to add, estimator with early stopping that stops, and estimator without earlystopping
diff --git a/tests/testthat/test-ResNet.R b/tests/testthat/test-ResNet.R
index 41e54d9..270150d 100644
--- a/tests/testthat/test-ResNet.R
+++ b/tests/testthat/test-ResNet.R
@@ -1,113 +1,115 @@
 
 resSet <- setResNet(
-  numLayers = c(2), 
+  numLayers = c(2),
   sizeHidden = c(32),
   hiddenFactor = c(2),
-  residualDropout = c(0.1), 
+  residualDropout = c(0.1),
   hiddenDropout = c(0.1),
-  normalization = c('BatchNorm'), 
-  activation = c('RelU'),
-  sizeEmbedding = c(32), 
+  normalization = c("BatchNorm"),
+  activation = c("RelU"),
+  sizeEmbedding = c(32),
   weightDecay = c(1e-6),
-  learningRate = c(3e-4), 
-  seed = 42, 
-  hyperParamSearch = 'random',
-  randomSample = 1, 
-  #device='cuda:0', 
-  batchSize = 128, 
+  learningRate = c(3e-4),
+  seed = 42,
+  hyperParamSearch = "random",
+  randomSample = 1,
+  # device='cuda:0',
+  batchSize = 128,
   epochs = 3
 )
 
 test_that("setResNet works", {
-  
-  testthat::expect_s3_class(object = resSet, class = 'modelSettings')
-  
-  testthat::expect_equal(resSet$fitFunction, 'fitEstimator') 
-  
-  testthat::expect_true(length(resSet$param) > 0 )
-  
+  testthat::expect_s3_class(object = resSet, class = "modelSettings")
+
+  testthat::expect_equal(resSet$fitFunction, "fitEstimator")
+
+  testthat::expect_true(length(resSet$param) > 0)
 })
 
 sink(nullfile())
-res2 <- tryCatch({
-  PatientLevelPrediction::runPlp(
-    plpData = plpData, 
-    outcomeId = 3, 
-    modelSettings = resSet,
-    analysisId = 'ResNet', 
-    analysisName = 'Testing Deep Learning', 
-    populationSettings = populationSet, 
-    splitSettings = PatientLevelPrediction::createDefaultSplitSetting(), 
-    sampleSettings = PatientLevelPrediction::createSampleSettings(),  # none 
-    featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(), # none 
-    preprocessSettings = PatientLevelPrediction::createPreprocessSettings(), 
-    executeSettings = PatientLevelPrediction::createExecuteSettings(
-      runSplitData = T, 
-      runSampleData = F, 
-      runfeatureEngineering = F, 
-      runPreprocessData = T, 
-      runModelDevelopment = T, 
-      runCovariateSummary = F
-    ), 
-    saveDirectory = file.path(testLoc, 'Deep')
-  )
-}, error = function(e){print(e); return(NULL)}
+res2 <- tryCatch(
+  {
+    PatientLevelPrediction::runPlp(
+      plpData = plpData,
+      outcomeId = 3,
+      modelSettings = resSet,
+      analysisId = "ResNet",
+      analysisName = "Testing Deep Learning",
+      populationSettings = populationSet,
+      splitSettings = PatientLevelPrediction::createDefaultSplitSetting(),
+      sampleSettings = PatientLevelPrediction::createSampleSettings(), # none
+      featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(), # none
+      preprocessSettings = PatientLevelPrediction::createPreprocessSettings(),
+      executeSettings = PatientLevelPrediction::createExecuteSettings(
+        runSplitData = T,
+        runSampleData = F,
+        runfeatureEngineering = F,
+        runPreprocessData = T,
+        runModelDevelopment = T,
+        runCovariateSummary = F
+      ),
+      saveDirectory = file.path(testLoc, "Deep")
+    )
+  },
+  error = function(e) {
+    print(e)
+    return(NULL)
+  }
 )
 sink()
 
 test_that("ResNet with runPlp working checks", {
-  
   testthat::expect_false(is.null(res2))
-  
+
   # check structure
-  testthat::expect_true('prediction' %in% names(res2))
-  testthat::expect_true('model' %in% names(res2))
-  testthat::expect_true('covariateSummary' %in% names(res2))
-  testthat::expect_true('performanceEvaluation' %in% names(res2))
-  
+  testthat::expect_true("prediction" %in% names(res2))
+  testthat::expect_true("model" %in% names(res2))
+  testthat::expect_true("covariateSummary" %in% names(res2))
+  testthat::expect_true("performanceEvaluation" %in% names(res2))
+
   # check prediction same size as pop
-  testthat::expect_equal(nrow(res2$prediction %>% 
-                                dplyr::filter(evaluationType %in% c('Train', 'Test'))), nrow(population))
-  
+  testthat::expect_equal(nrow(res2$prediction %>%
+    dplyr::filter(evaluationType %in% c("Train", "Test"))), nrow(population))
+
   # check prediction between 0 and 1
   testthat::expect_gte(min(res2$prediction$value), 0)
   testthat::expect_lte(max(res2$prediction$value), 1)
-  
 })
 
 
 test_that("ResNet nn-module works ", {
+  model <- ResNet(
+    catFeatures = 5, numFeatures = 1, sizeEmbedding = 5,
+    sizeHidden = 16, numLayers = 1, hiddenFactor = 2,
+    activation = torch::nn_relu,
+    normalization = torch::nn_batch_norm1d, hiddenDropout = 0.3,
+    residualDropout = 0.3, d_out = 1
+  )
 
-  model <- ResNet(catFeatures=5, numFeatures=1, sizeEmbedding=5, 
-                  sizeHidden=16, numLayers=1, hiddenFactor=2,
-                  activation=torch::nn_relu, 
-                  normalization=torch::nn_batch_norm1d, hiddenDropout=0.3,
-                  residualDropout=0.3, d_out=1)
-  
   pars <- sum(sapply(model$parameters, function(x) prod(x$shape)))
-  
+
   # expected number of parameters
   expect_equal(pars, 1295)
-  
+
   input <- list()
-  input$cat <- torch::torch_randint(0, 5, c(10,5), dtype = torch::torch_long())
-  input$num <- torch::torch_randn(10,1, dtype = torch::torch_float32())
-  
-  
+  input$cat <- torch::torch_randint(0, 5, c(10, 5), dtype = torch::torch_long())
+  input$num <- torch::torch_randn(10, 1, dtype = torch::torch_float32())
+
+
   output <- model(input)
-  
+
   # output is correct shape
   expect_equal(output$shape, 10)
-  
+
   input$num <- NULL
-  model <- ResNet(catFeatures=5, numFeatures=0, sizeEmbedding=5, 
-                  sizeHidden=16, numLayers=1, hiddenFactor=2,
-                  activation=torch::nn_relu, 
-                  normalization=torch::nn_batch_norm1d, hiddenDropout=0.3,
-                  residualDropout=0.3, d_out=1)
+  model <- ResNet(
+    catFeatures = 5, numFeatures = 0, sizeEmbedding = 5,
+    sizeHidden = 16, numLayers = 1, hiddenFactor = 2,
+    activation = torch::nn_relu,
+    normalization = torch::nn_batch_norm1d, hiddenDropout = 0.3,
+    residualDropout = 0.3, d_out = 1
+  )
   output <- model(input)
   # model works without numeric variables
   expect_equal(output$shape, 10)
-  
-}
-)
+})
diff --git a/tests/testthat/test-Transformer.R b/tests/testthat/test-Transformer.R
index 209faca..8b5a479 100644
--- a/tests/testthat/test-Transformer.R
+++ b/tests/testthat/test-Transformer.R
@@ -1,54 +1,57 @@
-settings <- setTransformer(numBlocks=1, dimToken=8, dimOut = 1,
-                           numHeads = 2, attDropout = 0.0, ffnDropout = 0.2,
-                           resDropout = 0.0,dimHidden = 32, batchSize = 64,
-                           epochs = 1, randomSamples = 1)
-
-test_that('Transformer settings work', {
-  
-  testthat::expect_s3_class(object = settings, class = 'modelSettings')
-  testthat::expect_equal(settings$fitFunction, 'fitEstimator') 
-  testthat::expect_true(length(settings$param) > 0 )
+settings <- setTransformer(
+  numBlocks = 1, dimToken = 8, dimOut = 1,
+  numHeads = 2, attDropout = 0.0, ffnDropout = 0.2,
+  resDropout = 0.0, dimHidden = 32, batchSize = 64,
+  epochs = 1, randomSamples = 1
+)
+
+test_that("Transformer settings work", {
+  testthat::expect_s3_class(object = settings, class = "modelSettings")
+  testthat::expect_equal(settings$fitFunction, "fitEstimator")
+  testthat::expect_true(length(settings$param) > 0)
 })
 
-test_that('fitEstimator with Transformer works', {
-  
-  results <- fitEstimator(trainData$Train, settings, analysisId=1)
-  
-  expect_equal(class(results), 'plpModel')
-  expect_equal(attr(results, 'modelType'), 'binary')
-  expect_equal(attr(results, 'saveType'), 'file')
-  
+test_that("fitEstimator with Transformer works", {
+  results <- fitEstimator(trainData$Train, settings, analysisId = 1)
+
+  expect_equal(class(results), "plpModel")
+  expect_equal(attr(results, "modelType"), "binary")
+  expect_equal(attr(results, "saveType"), "file")
+
   # check prediction between 0 and 1
   expect_gt(min(results$prediction$value), 0)
   expect_lt(max(results$prediction$value), 1)
-  
 })
 
-test_that('transformer nn-module works', {
-  model <- Transformer(catFeatures=5, numFeatures=1, numBlocks=2, 
-                      dimToken=16, numHeads=2, attDropout=0, ffnDropout=0,
-                      resDropout=0, dimHidden=32)
-  
+test_that("transformer nn-module works", {
+  model <- Transformer(
+    catFeatures = 5, numFeatures = 1, numBlocks = 2,
+    dimToken = 16, numHeads = 2, attDropout = 0, ffnDropout = 0,
+    resDropout = 0, dimHidden = 32
+  )
+
   pars <- sum(sapply(model$parameters, function(x) prod(x$shape)))
-  
+
   # expected number of parameters
   expect_equal(pars, 5697)
-  
+
   input <- list()
-  input$cat <- torch::torch_randint(0, 5, c(10,5), dtype = torch::torch_long())
-  input$num <- torch::torch_randn(10,1, dtype = torch::torch_float32())
-  
-  
+  input$cat <- torch::torch_randint(0, 5, c(10, 5), dtype = torch::torch_long())
+  input$num <- torch::torch_randn(10, 1, dtype = torch::torch_float32())
+
+
   output <- model(input)
-  
+
   # output is correct shape, size of batch
   expect_equal(output$shape, 10)
-  
+
   input$num <- NULL
-  
-  model <- Transformer(catFeatures=5, numFeatures=0, numBlocks=2, 
-                       dimToken=16, numHeads=2, attDropout=0, ffnDropout=0,
-                       resDropout=0, dimHidden=32)
+
+  model <- Transformer(
+    catFeatures = 5, numFeatures = 0, numBlocks = 2,
+    dimToken = 16, numHeads = 2, attDropout = 0, ffnDropout = 0,
+    resDropout = 0, dimHidden = 32
+  )
   output <- model(input)
   expect_equal(output$shape, 10)
-})
\ No newline at end of file
+})
diff --git a/tests/testthat/test-dataset.R b/tests/testthat/test-dataset.R
index e692ec8..989f886 100644
--- a/tests/testthat/test-dataset.R
+++ b/tests/testthat/test-dataset.R
@@ -1,69 +1,60 @@
 test_that("dataset correct class", {
-testthat::expect_true("myDataset" %in%class(dataset))
+  testthat::expect_true("myDataset" %in% class(dataset))
 })
 
 test_that("length of index correct", {
-  
-testthat::expect_equal(
-length(dataset$getNumericalIndex()),
-dplyr::n_distinct(mappedData$covariates %>% dplyr::pull(covariateId)))
-
+  testthat::expect_equal(
+    length(dataset$getNumericalIndex()),
+    dplyr::n_distinct(mappedData$covariates %>% dplyr::pull(covariateId))
+  )
 })
 
 test_that("number of num and cat features sum correctly", {
-  
-testthat::expect_equal(
-  dataset$numNumFeatures()+dataset$numCatFeatures(), 
-  dplyr::n_distinct(mappedData$covariates %>% dplyr::pull(covariateId))
-)
-
+  testthat::expect_equal(
+    dataset$numNumFeatures() + dataset$numCatFeatures(),
+    dplyr::n_distinct(mappedData$covariates %>% dplyr::pull(covariateId))
+  )
 })
 
 
 test_that("length of dataset correct", {
-  
   expect_equal(length(dataset), dataset$cat$shape[1])
   expect_equal(length(dataset), dataset$num$shape[1])
   expect_equal(
-    dataset$.length(), 
+    dataset$.length(),
     dplyr::n_distinct(mappedData$covariates %>% dplyr::pull(rowId))
   )
-  
 })
 
 test_that(".getbatch works", {
-  
-  batch_size = 16
-  #get one sample
+  batch_size <- 16
+  # get one sample
   out <- dataset[10]
-  
-  # output should be a list of two items, the batch and targets, 
+
+  # output should be a list of two items, the batch and targets,
   # the batch is what goes to the model
   expect_equal(length(out), 2)
-  
-  #targets should be binary
+
+  # targets should be binary
   expect_true(out$target$item() %in% c(0, 1))
-  
-  #shape of batch is correct
+
+  # shape of batch is correct
   expect_equal(length(out$batch), 2)
   expect_equal(out$batch$cat$shape[1], 1)
   expect_equal(out$batch$num$shape[1], 1)
-  
-  #shape of target
+
+  # shape of target
   expect_equal(out$target$shape[1], 1)
-  
-  #get a whole batch
-  out <-dataset[10:(10 + batch_size - 1)]
+
+  # get a whole batch
+  out <- dataset[10:(10 + batch_size - 1)]
 
   expect_equal(length(out), 2)
-  expect_true(all(torch::as_array(out$target) %in% c(0,1)))
-  
+  expect_true(all(torch::as_array(out$target) %in% c(0, 1)))
+
   expect_equal(length(out$batch), 2)
   expect_equal(out$batch$cat$shape[1], 16)
   expect_equal(out$batch$num$shape[1], 16)
-  
+
   expect_equal(out$target$shape[1], 16)
 })
-
-
-