From d1c02934c8d7fe03221e3a0d6260c4430f202d0c Mon Sep 17 00:00:00 2001
From: Egill Fridgeirsson <egillax@gmail.com>
Date: Sat, 16 Apr 2022 16:05:12 +0200
Subject: [PATCH] fix example and saving of modified estimator

---
 NAMESPACE             | 28 ++++++++++++++
 R/Estimator.R         |  5 +--
 R/ResNet.R            |  5 ++-
 R/Transformer.R       |  2 +-
 extras/example.R      | 90 +++++++++++++++----------------------------
 man/setDeepNNTorch.Rd | 43 ---------------------
 man/setResNet.Rd      | 20 +++++-----
 7 files changed, 75 insertions(+), 118 deletions(-)
 create mode 100644 NAMESPACE
 delete mode 100644 man/setDeepNNTorch.Rd

diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..7d3bc60
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,28 @@
+# Generated by roxygen2: do not edit by hand
+
+export(Dataset)
+export(Estimator)
+export(doubleLayerNN)
+export(fitDeepNNTorch)
+export(fitEstimator)
+export(fitTabNetTorch)
+export(gridCvDeep)
+export(predictDeepEstimator)
+export(predictDeepNN)
+export(predictTabNetTorch)
+export(setCIReNN)
+export(setCNNTorch)
+export(setCovNN)
+export(setCovNN2)
+export(setDeepNN)
+export(setDeepNNTorch)
+export(setRNNTorch)
+export(setResNet)
+export(setTabNetTorch)
+export(setTransformer)
+export(singleLayerNN)
+export(toSparseMDeep)
+export(toSparseRTorch)
+export(transferLearning)
+export(tripleLayerNN)
+importFrom(zeallot,"%<-%")
diff --git a/R/Estimator.R b/R/Estimator.R
index 9c3af73..ee6a3e3 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -46,7 +46,7 @@ fitEstimator <- function(
     trainData$labels <- merge(trainData$labels, trainData$fold, by = 'rowId')
   }
   
-  mappedData <- PatientLevelPredictionArrow::toSparseM(
+  mappedData <- PatientLevelPrediction::toSparseM(
     plpData = trainData,  
     map = NULL
   )
@@ -55,7 +55,7 @@ fitEstimator <- function(
   labels <- mappedData$labels
   covariateRef <- mappedData$covariateRef
   
-  outLoc <- PatientLevelPredictionArrow:::createTempModelLoc() # export
+  outLoc <- PatientLevelPrediction:::createTempModelLoc() # export
   
   cvResult <- do.call( 
     what = gridCvDeep,
@@ -245,7 +245,6 @@ gridCvDeep <- function(
         device = device
       )
       
-      browser()
       estimator$fit(
         trainDataset, 
         testDataset
diff --git a/R/ResNet.R b/R/ResNet.R
index febe3ee..89d3062 100644
--- a/R/ResNet.R
+++ b/R/ResNet.R
@@ -93,7 +93,7 @@ setResNet <- function(
     saveType = 'file',
     modelParamNames = c("numLayers", "sizeHidden", "hiddenFactor",
                          "residualDropout", "hiddenDropout", "sizeEmbedding"),
-    baseModel = ResNet
+    baseModel = 'ResNet'
   )
 
   results <- list(
@@ -129,6 +129,9 @@ sparseLinearLayer <- torch::nn_module(
     }
   },
   forward = function(input) {
+    if (input$dtype != torch::torch_float32()) {
+      input <- input$type_as(self$weight)
+    }
     torch::nnf_linear(input, self$weight, self$bias)
   }
 )
diff --git a/R/Transformer.R b/R/Transformer.R
index 2a5b6b6..46f50b5 100644
--- a/R/Transformer.R
+++ b/R/Transformer.R
@@ -42,7 +42,7 @@ setTransformer <- function(numBlocks=3, dimToken=96, dimOut=1,
     saveType = 'file',
     modelParamNames = c('numBlocks', 'dimToken', 'dimOut', 'numHeads',
                         'attDropout', 'ffnDropout', 'resDropout', 'dimHidden'),
-    baseModel = Transformer
+    baseModel = 'Transformer'
   )
 
   results <- list(
diff --git a/extras/example.R b/extras/example.R
index 82c7676..3503e88 100644
--- a/extras/example.R
+++ b/extras/example.R
@@ -1,10 +1,9 @@
 # testing code (requires sequential branch of FeatureExtraction):
 # rm(list = ls())
 library(FeatureExtraction)
-library(PatientLevelPredictionArrow)
+library(PatientLevelPrediction)
 library(DeepPatientLevelPrediction)
 
-arrow <- T
 data(plpDataSimulationProfile)
 sampleSize <- 1e4
 plpData <- simulatePlpData(
@@ -13,68 +12,39 @@ plpData <- simulatePlpData(
 )
 
 
-populationSet <- PatientLevelPredictionArrow::createStudyPopulationSettings(
+populationSet <- PatientLevelPrediction::createStudyPopulationSettings(
   requireTimeAtRisk = F, 
   riskWindowStart = 1, 
   riskWindowEnd = 365)
 
 
-# modelSettings <- PatientLevelPrediction::setGradientBoostingMachine(ntrees = 100, nthread = 16, 
-#                                             earlyStopRound = 25, maxDepth = 6,
-#                                             minChildWeight = 1, learnRate = 0.3,
-#                                             seed = 42)
-
-# modelSettings <- PatientLevelPredictionArrow::setLassoLogisticRegression()
-
-modelSettings <- DeepPatientLevelPrediction::setTabNetTorch(device='cuda:0', randomSamples = 1,
-                                                            batchSize = 32)
-
-if (arrow) {
-  res2 <- runPlp(
-  plpData = plpData,
-  outcomeId = 3,
-  modelSettings = modelSettings,
-  analysisId = 'Test',
-  analysisName = 'Testing ARrow',
-  populationSettings = populationSet,
-  splitSettings = createDefaultSplitSetting(),
-  sampleSettings = createSampleSettings(),  # none
-  featureEngineeringSettings = createFeatureEngineeringSettings(), # none
-  preprocessSettings = createPreprocessSettings(),
-  logSettings = createLogSettings(verbosity='TRACE'),
-  executeSettings = createExecuteSettings(
-    runSplitData = T,
-    runSampleData = F,
-    runfeatureEngineering = F,
-    runPreprocessData = T,
-    runModelDevelopment = T,
-    runCovariateSummary = T
-  ),
-  saveDirectory = '~/test/arrow_new_plp/'
+modelSettings <- setResNet(numLayers = 2, sizeHidden = 64, hiddenFactor = 1,
+                          residualDropout = 0, hiddenDropout = 0.2, normalization = 'BatchNorm',
+                          activation = 'RelU', sizeEmbedding = 64, weightDecay = 1e-6,
+                          learningRate = 3e-4, seed = 42, hyperParamSearch = 'random',
+                          randomSample = 1, device = 'cuda:0',batchSize = 32,epochs = 1)
+
+res2 <- PatientLevelPrediction::runPlp(
+plpData = plpData,
+outcomeId = 3,
+modelSettings = modelSettings,
+analysisId = 'Test',
+analysisName = 'Testing DeepPlp',
+populationSettings = populationSet,
+splitSettings = createDefaultSplitSetting(),
+sampleSettings = createSampleSettings(),  # none
+featureEngineeringSettings = createFeatureEngineeringSettings(), # none
+preprocessSettings = createPreprocessSettings(),
+logSettings = createLogSettings(verbosity='TRACE'),
+executeSettings = createExecuteSettings(
+  runSplitData = T,
+  runSampleData = F,
+  runfeatureEngineering = F,
+  runPreprocessData = T,
+  runModelDevelopment = T,
+  runCovariateSummary = T
+),
+saveDirectory = '~/test/new_plp/'
 )
-} else {
-  library(PatientLevelPrediction)
-  res2 <- PatientLevelPrediction::runPlp(
-  plpData = plpData,
-  outcomeId = 3,
-  modelSettings = modelSettings,
-  analysisId = 'Test',
-  analysisName = 'Testing Original',
-  populationSettings = populationSet,
-  splitSettings = createDefaultSplitSetting(),
-  sampleSettings = createSampleSettings(),  # none
-  featureEngineeringSettings = createFeatureEngineeringSettings(), # none
-  preprocessSettings = createPreprocessSettings(),
-  logSettings = createLogSettings(verbosity='TRACE'),
-  executeSettings = createExecuteSettings(
-    runSplitData = T,
-    runSampleData = F,
-    runfeatureEngineering = F,
-    runPreprocessData = T,
-    runModelDevelopment = T,
-    runCovariateSummary = T
-  ),
-  saveDirectory = '~/test/new_plp/'
-  )
-}
+
 
diff --git a/man/setDeepNNTorch.Rd b/man/setDeepNNTorch.Rd
deleted file mode 100644
index 6de3f9a..0000000
--- a/man/setDeepNNTorch.Rd
+++ /dev/null
@@ -1,43 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/DeepNNTorch.R
-\name{setDeepNNTorch}
-\alias{setDeepNNTorch}
-\title{Create setting for DeepNN model using Torch for R}
-\usage{
-setDeepNNTorch(
-  units = list(c(128, 64), 128),
-  layer_dropout = c(0.2),
-  lr = c(1e-04),
-  decay = c(1e-05),
-  outcome_weight = c(1),
-  batch_size = c(10000),
-  epochs = c(100),
-  device = "cpu",
-  seed = NULL
-)
-}
-\arguments{
-\item{units}{The number of units of the deep network - as a list of vectors}
-
-\item{layer_dropout}{The layer dropout rate (regularisation)}
-
-\item{lr}{Learning rate}
-
-\item{decay}{Learning rate decay over each update.}
-
-\item{outcome_weight}{The weight of the outcome class in the loss function}
-
-\item{batch_size}{The number of data points to use per training batch}
-
-\item{epochs}{Number of times to iterate over dataset}
-
-\item{seed}{Random seed used by deep learning model}
-}
-\description{
-Create setting for DeepNN model using Torch for R
-}
-\examples{
-\dontrun{
-model <- setDeepNN()
-}
-}
diff --git a/man/setResNet.Rd b/man/setResNet.Rd
index b197828..bf2347b 100644
--- a/man/setResNet.Rd
+++ b/man/setResNet.Rd
@@ -5,16 +5,16 @@
 \title{setResNet}
 \usage{
 setResNet(
-  numLayers = list(1:16),
-  sizeHidden = list(2^(6:10)),
-  hiddenFactor = list(1:4),
-  residualDropout = list(seq(0, 0.3, 0.05)),
-  hiddenDropout = list(seq(0, 0.3, 0.05)),
-  normalization = list("BatchNorm"),
-  activation = list("RelU"),
-  sizeEmbedding = list(2^(6:9)),
-  weightDecay = list(c(1e-06, 0.001)),
-  learningRate = list(c(0.01, 1e-05)),
+  numLayers = c(1:16),
+  sizeHidden = c(2^(6:10)),
+  hiddenFactor = C(1:4),
+  residualDropout = C(seq(0, 0.3, 0.05)),
+  hiddenDropout = c(seq(0, 0.3, 0.05)),
+  normalization = c("BatchNorm"),
+  activation = c("RelU"),
+  sizeEmbedding = c(2^(6:9)),
+  weightDecay = c(1e-06, 0.001),
+  learningRate = c(0.01, 1e-05),
   seed = NULL,
   hyperParamSearch = "random",
   randomSample = 100,