Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

integer handling in python and input checks #83

Merged
merged 3 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions R/Estimator.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,38 @@
#' @export
setEstimator <- function(learningRate='auto',
weightDecay = 0.0,
batchSize = 512L,
epochs = 30L,
batchSize = 512,
epochs = 30,
device='cpu',
optimizer = torch$optim$AdamW,
scheduler = list(fun=torch$optim$lr_scheduler$ReduceLROnPlateau,
params=list(patience=1L)),
params=list(patience=1)),
criterion = torch$nn$BCEWithLogitsLoss,
earlyStopping = list(useEarlyStopping=TRUE,
params = list(patience=4L)),
params = list(patience=4)),
metric = "auc",
seed = NULL
) {

checkIsClass(learningRate, c("numeric", "character"))
if (inherits(learningRate, "character")) {
if (learningRate != "auto"){
stop(paste0('Learning rate should be either a numeric or "auto", you provided: ', learningRate))
}
}
checkIsClass(weightDecay, "numeric")
checkHigherEqual(weightDecay, 0.0)
checkIsClass(batchSize, c("numeric", "integer"))
checkHigher(batchSize, 0)
checkIsClass(epochs, c("numeric", "integer"))
checkHigher(epochs, 0)
checkIsClass(device, c("character", "function"))
checkIsClass(scheduler, "list")
checkIsClass(earlyStopping, c("list", "NULL"))
checkIsClass(metric, c("character", "list"))
checkIsClass(seed, c("numeric", "integer", "NULL"))


if (length(learningRate)==1 && learningRate=='auto') {findLR <- TRUE} else {findLR <- FALSE}
if (is.null(seed)) {
seed <- as.integer(sample(1e5, 1))
Expand Down
41 changes: 40 additions & 1 deletion R/HelperFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,43 @@ camelCaseToSnakeCase <- function(string) {
camelCaseToSnakeCaseNames <- function(object) {
names(object) <- camelCaseToSnakeCase(names(object))
return(object)
}
}

#' helper function to check class of input
#'
#' @param parameter the input parameter to check
#' @param classes which classes it should belong to (one or more)
checkIsClass<- function(parameter,classes) {
name = deparse(substitute(parameter))
if (!inherits(x = parameter, what = classes)) {
ParallelLogger::logError(paste0(name, ' should be of class:', classes, ' '))
stop(paste0(name, ' is wrong class'))
}
return(TRUE)
}

#' helper function to check that input is higher than a certain value
#'
#' @param parameter the input parameter to check, can be a vector
#' @param value which value it should be higher than
checkHigher <- function(parameter,value) {
name = deparse(substitute(parameter))
if (!is.numeric(parameter) | all(parameter<=value)) {
ParallelLogger::logError(paste0(name, ' needs to be > ',value))
stop(paste0(name, ' needs to be > ', value))
}
return(TRUE)
}

#' helper function to check that input is higher or equal than a certain value
#'
#' @param parameter the input parameter to check, can be a vector
#' @param value which value it should be higher or equal than
checkHigherEqual <- function(parameter,value) {
name = deparse(substitute(parameter))
if (!is.numeric(parameter) | all(parameter<value)) {
ParallelLogger::logError(paste0(name, ' needs to be >= ',value))
stop(paste0(name, ' needs to be >= ', value))
}
return(TRUE)
}
29 changes: 24 additions & 5 deletions R/MLP.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,39 @@
#' @param randomSampleSeed Random seed to sample hyperparameter combinations
#'
#' @export
setMultiLayerPerceptron <- function(numLayers = as.integer(1:8),
sizeHidden = as.integer(2^(6:9)),
setMultiLayerPerceptron <- function(numLayers = c(1:8),
sizeHidden = c(2^(6:9)),
dropout = c(seq(0, 0.3, 0.05)),
sizeEmbedding = as.integer(2^(6:9)),
sizeEmbedding = c(2^(6:9)),
estimatorSettings = setEstimator(
learningRate = 'auto',
weightDecay = c(1e-6, 1e-3),
batchSize = 1024L,
epochs = 30L,
batchSize = 1024,
epochs = 30,
device="cpu"),
hyperParamSearch = "random",
randomSample = 100,
randomSampleSeed = NULL) {

checkIsClass(numLayers, c("integer", "numeric"))
checkHigherEqual(numLayers, 1)

checkIsClass(sizeHidden, c("integer", "numeric"))
checkHigherEqual(sizeHidden, 1)

checkIsClass(dropout, c("numeric"))
checkHigherEqual(dropout, 0)

checkIsClass(sizeEmbedding, c("numeric", "integer"))
checkHigherEqual(sizeEmbedding, 1)

checkIsClass(hyperParamSearch, "character")

checkIsClass(randomSample, c("numeric", "integer"))
checkHigherEqual(randomSample, 1)

checkIsClass(randomSampleSeed, c("numeric", "integer", "NULL"))

paramGrid <- list(
numLayers = numLayers,
sizeHidden = sizeHidden,
Expand Down
46 changes: 34 additions & 12 deletions R/ResNet.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@
setDefaultResNet <- function(estimatorSettings=setEstimator(learningRate='auto',
weightDecay=1e-6,
device='cpu',
batchSize=1024L,
epochs=50L,
batchSize=1024,
epochs=50,
seed=NULL)) {
resnetSettings <- setResNet(numLayers = 6L,
sizeHidden = 512L,
hiddenFactor = 2L,
resnetSettings <- setResNet(numLayers = 6,
sizeHidden = 512,
hiddenFactor = 2,
residualDropout = 0.1,
hiddenDropout = 0.4,
sizeEmbedding = 256L,
sizeEmbedding = 256,
estimatorSettings = estimatorSettings,
hyperParamSearch = 'random',
randomSample = 1)
Expand Down Expand Up @@ -68,22 +68,44 @@ setDefaultResNet <- function(estimatorSettings=setEstimator(learningRate='auto',
#' @param randomSample How many random samples from hyperparameter space to use
#' @param randomSampleSeed Random seed to sample hyperparameter combinations
#' @export
setResNet <- function(numLayers = as.integer(1:8),
sizeHidden = as.integer(2^(6:10)),
hiddenFactor = as.integer(1:4),
setResNet <- function(numLayers = c(1:8),
sizeHidden = c(2^(6:10)),
hiddenFactor = c(1:4),
residualDropout = c(seq(0, 0.5, 0.05)),
hiddenDropout = c(seq(0, 0.5, 0.05)),
sizeEmbedding = as.integer(2^(6:9)),
sizeEmbedding = c(2^(6:9)),
estimatorSettings = setEstimator(learningRate='auto',
weightDecay=c(1e-6, 1e-3),
device='cpu',
batchSize=1024L,
epochs=30L,
batchSize=1024,
epochs=30,
seed=NULL),
hyperParamSearch = "random",
randomSample = 100,
randomSampleSeed = NULL)
{
checkIsClass(numLayers, c("integer", "numeric"))
checkHigherEqual(numLayers, 1)

checkIsClass(sizeHidden, c("integer", "numeric"))
checkHigherEqual(sizeHidden, 1)

checkIsClass(residualDropout, "numeric")
checkHigherEqual(residualDropout, 0)

checkIsClass(hiddenDropout, "numeric")
checkHigherEqual(hiddenDropout, 0)

checkIsClass(sizeEmbedding, c("integer", "numeric"))
checkHigherEqual(sizeEmbedding, 1)

checkIsClass(hyperParamSearch, "character")

checkIsClass(randomSample, c("numeric", "integer"))
checkHigherEqual(randomSample, 1)

checkIsClass(randomSampleSeed, c("numeric", "integer", "NULL"))

paramGrid <- list(
numLayers = numLayers,
sizeHidden = sizeHidden,
Expand Down
67 changes: 56 additions & 11 deletions R/Transformer.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,19 @@
setDefaultTransformer <- function(estimatorSettings=setEstimator(
learningRate = 'auto',
weightDecay = 1e-4,
batchSize=512L,
epochs=10L,
batchSize=512,
epochs=10,
seed=NULL,
device='cpu')
) {
transformerSettings <- setTransformer(numBlocks = 3L,
dimToken = 192L,
dimOut = 1L,
numHeads = 8L,
transformerSettings <- setTransformer(numBlocks = 3,
dimToken = 192,
dimOut = 1,
numHeads = 8,
attDropout = 0.2,
ffnDropout = 0.1,
resDropout = 0.0,
dimHidden = 256L,
dimHidden = 256,
estimatorSettings=estimatorSettings,
hyperParamSearch = 'random',
randomSample = 1)
Expand Down Expand Up @@ -67,16 +67,61 @@ setDefaultTransformer <- function(estimatorSettings=setEstimator(
#' @param randomSampleSeed Random seed to sample hyperparameter combinations
#'
#' @export
setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1,
numHeads = 8, attDropout = 0.25, ffnDropout = 0.25,
resDropout = 0, dimHidden = 512, dimHiddenRatio = NULL,
setTransformer <- function(numBlocks = 3,
dimToken = 96,
dimOut = 1,
numHeads = 8,
attDropout = 0.25,
ffnDropout = 0.25,
resDropout = 0,
dimHidden = 512,
dimHiddenRatio = NULL,
estimatorSettings=setEstimator(weightDecay = 1e-6,
batchSize=1024,
epochs=10,
seed=NULL),
hyperParamSearch = "random",
randomSample = 1, randomSampleSeed = NULL) {
randomSample = 1,
randomSampleSeed = NULL) {

checkIsClass(numBlocks, c("integer", "numeric"))
checkHigherEqual(numBlocks, 1)

checkIsClass(dimToken, c("integer", "numeric"))
checkHigherEqual(dimToken, 1)

checkIsClass(dimOut, c("integer", "numeric"))
checkHigherEqual(dimOut, 1)

checkIsClass(numHeads, c("integer", "numeric"))
checkHigherEqual(numHeads, 1)

checkIsClass(attDropout, c("numeric"))
checkHigherEqual(attDropout, 0)

checkIsClass(ffnDropout, c("numeric"))
checkHigherEqual(ffnDropout, 0)

checkIsClass(resDropout, c("numeric"))
checkHigherEqual(resDropout, 0)

checkIsClass(dimHidden, c("integer", "numeric", "NULL"))
if (!is.null(dimHidden)) {
checkHigherEqual(dimHidden, 1)
}

checkIsClass(dimHiddenRatio, c("numeric", "NULL"))
if (!is.null(dimHiddenRatio)) {
checkHigher(dimHiddenRatio, 0)
}

checkIsClass(hyperParamSearch, "character")

checkIsClass(randomSample, c("numeric", "integer"))
checkHigherEqual(randomSample, 1)

checkIsClass(randomSampleSeed, c("numeric", "integer", "NULL"))

if (any(with(expand.grid(dimToken = dimToken, numHeads = numHeads), dimToken %% numHeads != 0))) {
stop(paste(
"dimToken needs to divisible by numHeads. dimToken =", dimToken,
Expand Down
6 changes: 3 additions & 3 deletions inst/python/Estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ def __init__(self,
self.model_parameters = model_parameters
self.estimator_settings = estimator_settings

self.epochs = estimator_settings.get("epochs", 5)
self.epochs = int(estimator_settings.get("epochs", 5))
self.learning_rate = estimator_settings.get("learning_rate", 3e-4)
self.weight_decay = estimator_settings.get("weight_decay", 1e-5)
self.batch_size = estimator_settings.get("batch_size", 1024)
self.batch_size = int(estimator_settings.get("batch_size", 1024))
self.prefix = estimator_settings.get("prefix", self.model.name)

self.previous_epochs = estimator_settings.get("previous_epochs", 0)
self.previous_epochs = int(estimator_settings.get("previous_epochs", 0))
self.model.to(device=self.device)

self.optimizer = estimator_settings["optimizer"](params=self.model.parameters(),
Expand Down
19 changes: 13 additions & 6 deletions inst/python/MLP.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,24 @@
class MLP(nn.Module):

def __init__(self,
cat_features,
num_features,
size_embedding,
size_hidden,
num_layers,
cat_features: int,
num_features: int,
size_embedding: int,
size_hidden: int,
num_layers: int,
activation=nn.ReLU,
normalization=nn.BatchNorm1d,
dropout=None,
d_out=1):
d_out: int = 1):
super(MLP, self).__init__()
self.name = "MLP"
cat_features = int(cat_features)
num_features = int(num_features)
size_embedding = int(size_embedding)
size_hidden = int(size_hidden)
num_layers = int(num_layers)
d_out = int(d_out)

self.embedding = nn.EmbeddingBag(cat_features + 1,
size_embedding,
padding_idx=0)
Expand Down
23 changes: 16 additions & 7 deletions inst/python/ResNet.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,29 @@
class ResNet(nn.Module):

def __init__(self,
cat_features,
num_features=0,
size_embedding=256,
size_hidden=256,
num_layers=2,
hidden_factor=1,
cat_features: int,
num_features: int = 0,
size_embedding: int = 256,
size_hidden: int = 256,
num_layers: int = 2,
hidden_factor: int = 1,
activation=nn.ReLU,
normalization=nn.BatchNorm1d,
hidden_dropout=0,
residual_dropout=0,
dim_out=1,
dim_out: int = 1,
concat_num=True):
super(ResNet, self).__init__()
self.name = 'ResNet'
cat_features = int(cat_features)
num_features = int(num_features)
size_embedding = int(size_embedding)
size_hidden = int(size_hidden)
num_layers = int(num_layers)
hidden_factor = int(hidden_factor)
dim_out = int(dim_out)


self.embedding = nn.EmbeddingBag(
num_embeddings=cat_features + 1,
embedding_dim=size_embedding,
Expand Down
Loading
Loading