Skip to content

Commit

Permalink
integer handling in python and input checks (#83)
Browse files Browse the repository at this point in the history
* integer handling in python

* add input checks

* add tests for wrong inputs
  • Loading branch information
egillax authored Sep 7, 2023
1 parent 31ef832 commit 66b8c84
Show file tree
Hide file tree
Showing 22 changed files with 418 additions and 182 deletions.
28 changes: 24 additions & 4 deletions R/Estimator.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,38 @@
#' @export
setEstimator <- function(learningRate='auto',
weightDecay = 0.0,
batchSize = 512L,
epochs = 30L,
batchSize = 512,
epochs = 30,
device='cpu',
optimizer = torch$optim$AdamW,
scheduler = list(fun=torch$optim$lr_scheduler$ReduceLROnPlateau,
params=list(patience=1L)),
params=list(patience=1)),
criterion = torch$nn$BCEWithLogitsLoss,
earlyStopping = list(useEarlyStopping=TRUE,
params = list(patience=4L)),
params = list(patience=4)),
metric = "auc",
seed = NULL
) {

checkIsClass(learningRate, c("numeric", "character"))
if (inherits(learningRate, "character")) {
if (learningRate != "auto"){
stop(paste0('Learning rate should be either a numeric or "auto", you provided: ', learningRate))
}
}
checkIsClass(weightDecay, "numeric")
checkHigherEqual(weightDecay, 0.0)
checkIsClass(batchSize, c("numeric", "integer"))
checkHigher(batchSize, 0)
checkIsClass(epochs, c("numeric", "integer"))
checkHigher(epochs, 0)
checkIsClass(device, c("character", "function"))
checkIsClass(scheduler, "list")
checkIsClass(earlyStopping, c("list", "NULL"))
checkIsClass(metric, c("character", "list"))
checkIsClass(seed, c("numeric", "integer", "NULL"))


if (length(learningRate)==1 && learningRate=='auto') {findLR <- TRUE} else {findLR <- FALSE}
if (is.null(seed)) {
seed <- as.integer(sample(1e5, 1))
Expand Down
41 changes: 40 additions & 1 deletion R/HelperFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,43 @@ camelCaseToSnakeCase <- function(string) {
camelCaseToSnakeCaseNames <- function(object) {
names(object) <- camelCaseToSnakeCase(names(object))
return(object)
}
}

#' helper function to check class of input
#'
#' @param parameter the input parameter to check
#' @param classes which classes it should belong to (one or more)
checkIsClass<- function(parameter,classes) {
name = deparse(substitute(parameter))
if (!inherits(x = parameter, what = classes)) {
ParallelLogger::logError(paste0(name, ' should be of class:', classes, ' '))
stop(paste0(name, ' is wrong class'))
}
return(TRUE)
}

#' helper function to check that input is higher than a certain value
#'
#' @param parameter the input parameter to check, can be a vector
#' @param value which value it should be higher than
checkHigher <- function(parameter,value) {
name = deparse(substitute(parameter))
if (!is.numeric(parameter) | all(parameter<=value)) {
ParallelLogger::logError(paste0(name, ' needs to be > ',value))
stop(paste0(name, ' needs to be > ', value))
}
return(TRUE)
}

#' helper function to check that input is higher or equal than a certain value
#'
#' @param parameter the input parameter to check, can be a vector
#' @param value which value it should be higher or equal than
checkHigherEqual <- function(parameter,value) {
name = deparse(substitute(parameter))
if (!is.numeric(parameter) | all(parameter<value)) {
ParallelLogger::logError(paste0(name, ' needs to be >= ',value))
stop(paste0(name, ' needs to be >= ', value))
}
return(TRUE)
}
29 changes: 24 additions & 5 deletions R/MLP.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,39 @@
#' @param randomSampleSeed Random seed to sample hyperparameter combinations
#'
#' @export
setMultiLayerPerceptron <- function(numLayers = as.integer(1:8),
sizeHidden = as.integer(2^(6:9)),
setMultiLayerPerceptron <- function(numLayers = c(1:8),
sizeHidden = c(2^(6:9)),
dropout = c(seq(0, 0.3, 0.05)),
sizeEmbedding = as.integer(2^(6:9)),
sizeEmbedding = c(2^(6:9)),
estimatorSettings = setEstimator(
learningRate = 'auto',
weightDecay = c(1e-6, 1e-3),
batchSize = 1024L,
epochs = 30L,
batchSize = 1024,
epochs = 30,
device="cpu"),
hyperParamSearch = "random",
randomSample = 100,
randomSampleSeed = NULL) {

checkIsClass(numLayers, c("integer", "numeric"))
checkHigherEqual(numLayers, 1)

checkIsClass(sizeHidden, c("integer", "numeric"))
checkHigherEqual(sizeHidden, 1)

checkIsClass(dropout, c("numeric"))
checkHigherEqual(dropout, 0)

checkIsClass(sizeEmbedding, c("numeric", "integer"))
checkHigherEqual(sizeEmbedding, 1)

checkIsClass(hyperParamSearch, "character")

checkIsClass(randomSample, c("numeric", "integer"))
checkHigherEqual(randomSample, 1)

checkIsClass(randomSampleSeed, c("numeric", "integer", "NULL"))

paramGrid <- list(
numLayers = numLayers,
sizeHidden = sizeHidden,
Expand Down
46 changes: 34 additions & 12 deletions R/ResNet.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@
setDefaultResNet <- function(estimatorSettings=setEstimator(learningRate='auto',
weightDecay=1e-6,
device='cpu',
batchSize=1024L,
epochs=50L,
batchSize=1024,
epochs=50,
seed=NULL)) {
resnetSettings <- setResNet(numLayers = 6L,
sizeHidden = 512L,
hiddenFactor = 2L,
resnetSettings <- setResNet(numLayers = 6,
sizeHidden = 512,
hiddenFactor = 2,
residualDropout = 0.1,
hiddenDropout = 0.4,
sizeEmbedding = 256L,
sizeEmbedding = 256,
estimatorSettings = estimatorSettings,
hyperParamSearch = 'random',
randomSample = 1)
Expand Down Expand Up @@ -68,22 +68,44 @@ setDefaultResNet <- function(estimatorSettings=setEstimator(learningRate='auto',
#' @param randomSample How many random samples from hyperparameter space to use
#' @param randomSampleSeed Random seed to sample hyperparameter combinations
#' @export
setResNet <- function(numLayers = as.integer(1:8),
sizeHidden = as.integer(2^(6:10)),
hiddenFactor = as.integer(1:4),
setResNet <- function(numLayers = c(1:8),
sizeHidden = c(2^(6:10)),
hiddenFactor = c(1:4),
residualDropout = c(seq(0, 0.5, 0.05)),
hiddenDropout = c(seq(0, 0.5, 0.05)),
sizeEmbedding = as.integer(2^(6:9)),
sizeEmbedding = c(2^(6:9)),
estimatorSettings = setEstimator(learningRate='auto',
weightDecay=c(1e-6, 1e-3),
device='cpu',
batchSize=1024L,
epochs=30L,
batchSize=1024,
epochs=30,
seed=NULL),
hyperParamSearch = "random",
randomSample = 100,
randomSampleSeed = NULL)
{
checkIsClass(numLayers, c("integer", "numeric"))
checkHigherEqual(numLayers, 1)

checkIsClass(sizeHidden, c("integer", "numeric"))
checkHigherEqual(sizeHidden, 1)

checkIsClass(residualDropout, "numeric")
checkHigherEqual(residualDropout, 0)

checkIsClass(hiddenDropout, "numeric")
checkHigherEqual(hiddenDropout, 0)

checkIsClass(sizeEmbedding, c("integer", "numeric"))
checkHigherEqual(sizeEmbedding, 1)

checkIsClass(hyperParamSearch, "character")

checkIsClass(randomSample, c("numeric", "integer"))
checkHigherEqual(randomSample, 1)

checkIsClass(randomSampleSeed, c("numeric", "integer", "NULL"))

paramGrid <- list(
numLayers = numLayers,
sizeHidden = sizeHidden,
Expand Down
67 changes: 56 additions & 11 deletions R/Transformer.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,19 @@
setDefaultTransformer <- function(estimatorSettings=setEstimator(
learningRate = 'auto',
weightDecay = 1e-4,
batchSize=512L,
epochs=10L,
batchSize=512,
epochs=10,
seed=NULL,
device='cpu')
) {
transformerSettings <- setTransformer(numBlocks = 3L,
dimToken = 192L,
dimOut = 1L,
numHeads = 8L,
transformerSettings <- setTransformer(numBlocks = 3,
dimToken = 192,
dimOut = 1,
numHeads = 8,
attDropout = 0.2,
ffnDropout = 0.1,
resDropout = 0.0,
dimHidden = 256L,
dimHidden = 256,
estimatorSettings=estimatorSettings,
hyperParamSearch = 'random',
randomSample = 1)
Expand Down Expand Up @@ -67,16 +67,61 @@ setDefaultTransformer <- function(estimatorSettings=setEstimator(
#' @param randomSampleSeed Random seed to sample hyperparameter combinations
#'
#' @export
setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
numHeads = 8, attDropout = 0.25, ffnDropout = 0.25,
resDropout = 0, dimHidden = 512, dimHiddenRatio = NULL,
setTransformer <- function(numBlocks = 3,
dimToken = 96,
dimOut = 1,
numHeads = 8,
attDropout = 0.25,
ffnDropout = 0.25,
resDropout = 0,
dimHidden = 512,
dimHiddenRatio = NULL,
estimatorSettings=setEstimator(weightDecay = 1e-6,
batchSize=1024,
epochs=10,
seed=NULL),
hyperParamSearch = "random",
randomSample = 1, randomSampleSeed = NULL) {
randomSample = 1,
randomSampleSeed = NULL) {

checkIsClass(numBlocks, c("integer", "numeric"))
checkHigherEqual(numBlocks, 1)

checkIsClass(dimToken, c("integer", "numeric"))
checkHigherEqual(dimToken, 1)

checkIsClass(dimOut, c("integer", "numeric"))
checkHigherEqual(dimOut, 1)

checkIsClass(numHeads, c("integer", "numeric"))
checkHigherEqual(numHeads, 1)

checkIsClass(attDropout, c("numeric"))
checkHigherEqual(attDropout, 0)

checkIsClass(ffnDropout, c("numeric"))
checkHigherEqual(ffnDropout, 0)

checkIsClass(resDropout, c("numeric"))
checkHigherEqual(resDropout, 0)

checkIsClass(dimHidden, c("integer", "numeric", "NULL"))
if (!is.null(dimHidden)) {
checkHigherEqual(dimHidden, 1)
}

checkIsClass(dimHiddenRatio, c("numeric", "NULL"))
if (!is.null(dimHiddenRatio)) {
checkHigher(dimHiddenRatio, 0)
}

checkIsClass(hyperParamSearch, "character")

checkIsClass(randomSample, c("numeric", "integer"))
checkHigherEqual(randomSample, 1)

checkIsClass(randomSampleSeed, c("numeric", "integer", "NULL"))

if (any(with(expand.grid(dimToken = dimToken, numHeads = numHeads), dimToken %% numHeads != 0))) {
stop(paste(
"dimToken needs to divisible by numHeads. dimToken =", dimToken,
Expand Down
6 changes: 3 additions & 3 deletions inst/python/Estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ def __init__(self,
self.model_parameters = model_parameters
self.estimator_settings = estimator_settings

self.epochs = estimator_settings.get("epochs", 5)
self.epochs = int(estimator_settings.get("epochs", 5))
self.learning_rate = estimator_settings.get("learning_rate", 3e-4)
self.weight_decay = estimator_settings.get("weight_decay", 1e-5)
self.batch_size = estimator_settings.get("batch_size", 1024)
self.batch_size = int(estimator_settings.get("batch_size", 1024))
self.prefix = estimator_settings.get("prefix", self.model.name)

self.previous_epochs = estimator_settings.get("previous_epochs", 0)
self.previous_epochs = int(estimator_settings.get("previous_epochs", 0))
self.model.to(device=self.device)

self.optimizer = estimator_settings["optimizer"](params=self.model.parameters(),
Expand Down
19 changes: 13 additions & 6 deletions inst/python/MLP.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,24 @@
class MLP(nn.Module):

def __init__(self,
cat_features,
num_features,
size_embedding,
size_hidden,
num_layers,
cat_features: int,
num_features: int,
size_embedding: int,
size_hidden: int,
num_layers: int,
activation=nn.ReLU,
normalization=nn.BatchNorm1d,
dropout=None,
d_out=1):
d_out: int = 1):
super(MLP, self).__init__()
self.name = "MLP"
cat_features = int(cat_features)
num_features = int(num_features)
size_embedding = int(size_embedding)
size_hidden = int(size_hidden)
num_layers = int(num_layers)
d_out = int(d_out)

self.embedding = nn.EmbeddingBag(cat_features + 1,
size_embedding,
padding_idx=0)
Expand Down
23 changes: 16 additions & 7 deletions inst/python/ResNet.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,29 @@
class ResNet(nn.Module):

def __init__(self,
cat_features,
num_features=0,
size_embedding=256,
size_hidden=256,
num_layers=2,
hidden_factor=1,
cat_features: int,
num_features: int = 0,
size_embedding: int = 256,
size_hidden: int = 256,
num_layers: int = 2,
hidden_factor: int = 1,
activation=nn.ReLU,
normalization=nn.BatchNorm1d,
hidden_dropout=0,
residual_dropout=0,
dim_out=1,
dim_out: int = 1,
concat_num=True):
super(ResNet, self).__init__()
self.name = 'ResNet'
cat_features = int(cat_features)
num_features = int(num_features)
size_embedding = int(size_embedding)
size_hidden = int(size_hidden)
num_layers = int(num_layers)
hidden_factor = int(hidden_factor)
dim_out = int(dim_out)


self.embedding = nn.EmbeddingBag(
num_embeddings=cat_features + 1,
embedding_dim=size_embedding,
Expand Down
Loading

0 comments on commit 66b8c84

Please sign in to comment.