From d1c02934c8d7fe03221e3a0d6260c4430f202d0c Mon Sep 17 00:00:00 2001 From: Egill Fridgeirsson Date: Sat, 16 Apr 2022 16:05:12 +0200 Subject: [PATCH] fix example and saving of modified estimator --- NAMESPACE | 28 ++++++++++++++ R/Estimator.R | 5 +-- R/ResNet.R | 5 ++- R/Transformer.R | 2 +- extras/example.R | 90 +++++++++++++++---------------------------- man/setDeepNNTorch.Rd | 43 --------------------- man/setResNet.Rd | 20 +++++----- 7 files changed, 75 insertions(+), 118 deletions(-) create mode 100644 NAMESPACE delete mode 100644 man/setDeepNNTorch.Rd diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..7d3bc60 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,28 @@ +# Generated by roxygen2: do not edit by hand + +export(Dataset) +export(Estimator) +export(doubleLayerNN) +export(fitDeepNNTorch) +export(fitEstimator) +export(fitTabNetTorch) +export(gridCvDeep) +export(predictDeepEstimator) +export(predictDeepNN) +export(predictTabNetTorch) +export(setCIReNN) +export(setCNNTorch) +export(setCovNN) +export(setCovNN2) +export(setDeepNN) +export(setDeepNNTorch) +export(setRNNTorch) +export(setResNet) +export(setTabNetTorch) +export(setTransformer) +export(singleLayerNN) +export(toSparseMDeep) +export(toSparseRTorch) +export(transferLearning) +export(tripleLayerNN) +importFrom(zeallot,"%<-%") diff --git a/R/Estimator.R b/R/Estimator.R index 9c3af73..ee6a3e3 100644 --- a/R/Estimator.R +++ b/R/Estimator.R @@ -46,7 +46,7 @@ fitEstimator <- function( trainData$labels <- merge(trainData$labels, trainData$fold, by = 'rowId') } - mappedData <- PatientLevelPredictionArrow::toSparseM( + mappedData <- PatientLevelPrediction::toSparseM( plpData = trainData, map = NULL ) @@ -55,7 +55,7 @@ fitEstimator <- function( labels <- mappedData$labels covariateRef <- mappedData$covariateRef - outLoc <- PatientLevelPredictionArrow:::createTempModelLoc() # export + outLoc <- PatientLevelPrediction:::createTempModelLoc() # export cvResult <- do.call( what = gridCvDeep, @@ -245,7 +245,6 @@ gridCvDeep <- function( device = device ) - browser() estimator$fit( trainDataset, testDataset diff --git a/R/ResNet.R b/R/ResNet.R index febe3ee..89d3062 100644 --- a/R/ResNet.R +++ b/R/ResNet.R @@ -93,7 +93,7 @@ setResNet <- function( saveType = 'file', modelParamNames = c("numLayers", "sizeHidden", "hiddenFactor", "residualDropout", "hiddenDropout", "sizeEmbedding"), - baseModel = ResNet + baseModel = 'ResNet' ) results <- list( @@ -129,6 +129,9 @@ sparseLinearLayer <- torch::nn_module( } }, forward = function(input) { + if (input$dtype != torch::torch_float32()) { + input <- input$type_as(self$weight) + } torch::nnf_linear(input, self$weight, self$bias) } ) diff --git a/R/Transformer.R b/R/Transformer.R index 2a5b6b6..46f50b5 100644 --- a/R/Transformer.R +++ b/R/Transformer.R @@ -42,7 +42,7 @@ setTransformer <- function(numBlocks=3, dimToken=96, dimOut=1, saveType = 'file', modelParamNames = c('numBlocks', 'dimToken', 'dimOut', 'numHeads', 'attDropout', 'ffnDropout', 'resDropout', 'dimHidden'), - baseModel = Transformer + baseModel = 'Transformer' ) results <- list( diff --git a/extras/example.R b/extras/example.R index 82c7676..3503e88 100644 --- a/extras/example.R +++ b/extras/example.R @@ -1,10 +1,9 @@ # testing code (requires sequential branch of FeatureExtraction): # rm(list = ls()) library(FeatureExtraction) -library(PatientLevelPredictionArrow) +library(PatientLevelPrediction) library(DeepPatientLevelPrediction) -arrow <- T data(plpDataSimulationProfile) sampleSize <- 1e4 plpData <- simulatePlpData( @@ -13,68 +12,39 @@ plpData <- simulatePlpData( ) -populationSet <- PatientLevelPredictionArrow::createStudyPopulationSettings( +populationSet <- PatientLevelPrediction::createStudyPopulationSettings( requireTimeAtRisk = F, riskWindowStart = 1, riskWindowEnd = 365) -# modelSettings <- PatientLevelPrediction::setGradientBoostingMachine(ntrees = 100, nthread = 16, -# earlyStopRound = 25, maxDepth = 6, -# minChildWeight = 1, learnRate = 0.3, -# seed = 42) - -# modelSettings <- PatientLevelPredictionArrow::setLassoLogisticRegression() - -modelSettings <- DeepPatientLevelPrediction::setTabNetTorch(device='cuda:0', randomSamples = 1, - batchSize = 32) - -if (arrow) { - res2 <- runPlp( - plpData = plpData, - outcomeId = 3, - modelSettings = modelSettings, - analysisId = 'Test', - analysisName = 'Testing ARrow', - populationSettings = populationSet, - splitSettings = createDefaultSplitSetting(), - sampleSettings = createSampleSettings(), # none - featureEngineeringSettings = createFeatureEngineeringSettings(), # none - preprocessSettings = createPreprocessSettings(), - logSettings = createLogSettings(verbosity='TRACE'), - executeSettings = createExecuteSettings( - runSplitData = T, - runSampleData = F, - runfeatureEngineering = F, - runPreprocessData = T, - runModelDevelopment = T, - runCovariateSummary = T - ), - saveDirectory = '~/test/arrow_new_plp/' +modelSettings <- setResNet(numLayers = 2, sizeHidden = 64, hiddenFactor = 1, + residualDropout = 0, hiddenDropout = 0.2, normalization = 'BatchNorm', + activation = 'RelU', sizeEmbedding = 64, weightDecay = 1e-6, + learningRate = 3e-4, seed = 42, hyperParamSearch = 'random', + randomSample = 1, device = 'cuda:0',batchSize = 32,epochs = 1) + +res2 <- PatientLevelPrediction::runPlp( +plpData = plpData, +outcomeId = 3, +modelSettings = modelSettings, +analysisId = 'Test', +analysisName = 'Testing DeepPlp', +populationSettings = populationSet, +splitSettings = createDefaultSplitSetting(), +sampleSettings = createSampleSettings(), # none +featureEngineeringSettings = createFeatureEngineeringSettings(), # none +preprocessSettings = createPreprocessSettings(), +logSettings = createLogSettings(verbosity='TRACE'), +executeSettings = createExecuteSettings( + runSplitData = T, + runSampleData = F, + runfeatureEngineering = F, + runPreprocessData = T, + runModelDevelopment = T, + runCovariateSummary = T +), +saveDirectory = '~/test/new_plp/' ) -} else { - library(PatientLevelPrediction) - res2 <- PatientLevelPrediction::runPlp( - plpData = plpData, - outcomeId = 3, - modelSettings = modelSettings, - analysisId = 'Test', - analysisName = 'Testing Original', - populationSettings = populationSet, - splitSettings = createDefaultSplitSetting(), - sampleSettings = createSampleSettings(), # none - featureEngineeringSettings = createFeatureEngineeringSettings(), # none - preprocessSettings = createPreprocessSettings(), - logSettings = createLogSettings(verbosity='TRACE'), - executeSettings = createExecuteSettings( - runSplitData = T, - runSampleData = F, - runfeatureEngineering = F, - runPreprocessData = T, - runModelDevelopment = T, - runCovariateSummary = T - ), - saveDirectory = '~/test/new_plp/' - ) -} + diff --git a/man/setDeepNNTorch.Rd b/man/setDeepNNTorch.Rd deleted file mode 100644 index 6de3f9a..0000000 --- a/man/setDeepNNTorch.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DeepNNTorch.R -\name{setDeepNNTorch} -\alias{setDeepNNTorch} -\title{Create setting for DeepNN model using Torch for R} -\usage{ -setDeepNNTorch( - units = list(c(128, 64), 128), - layer_dropout = c(0.2), - lr = c(1e-04), - decay = c(1e-05), - outcome_weight = c(1), - batch_size = c(10000), - epochs = c(100), - device = "cpu", - seed = NULL -) -} -\arguments{ -\item{units}{The number of units of the deep network - as a list of vectors} - -\item{layer_dropout}{The layer dropout rate (regularisation)} - -\item{lr}{Learning rate} - -\item{decay}{Learning rate decay over each update.} - -\item{outcome_weight}{The weight of the outcome class in the loss function} - -\item{batch_size}{The number of data points to use per training batch} - -\item{epochs}{Number of times to iterate over dataset} - -\item{seed}{Random seed used by deep learning model} -} -\description{ -Create setting for DeepNN model using Torch for R -} -\examples{ -\dontrun{ -model <- setDeepNN() -} -} diff --git a/man/setResNet.Rd b/man/setResNet.Rd index b197828..bf2347b 100644 --- a/man/setResNet.Rd +++ b/man/setResNet.Rd @@ -5,16 +5,16 @@ \title{setResNet} \usage{ setResNet( - numLayers = list(1:16), - sizeHidden = list(2^(6:10)), - hiddenFactor = list(1:4), - residualDropout = list(seq(0, 0.3, 0.05)), - hiddenDropout = list(seq(0, 0.3, 0.05)), - normalization = list("BatchNorm"), - activation = list("RelU"), - sizeEmbedding = list(2^(6:9)), - weightDecay = list(c(1e-06, 0.001)), - learningRate = list(c(0.01, 1e-05)), + numLayers = c(1:16), + sizeHidden = c(2^(6:10)), + hiddenFactor = C(1:4), + residualDropout = C(seq(0, 0.3, 0.05)), + hiddenDropout = c(seq(0, 0.3, 0.05)), + normalization = c("BatchNorm"), + activation = c("RelU"), + sizeEmbedding = c(2^(6:9)), + weightDecay = c(1e-06, 0.001), + learningRate = c(0.01, 1e-05), seed = NULL, hyperParamSearch = "random", randomSample = 100,