From 3fb855a9d00b78c9eb023ad732d7386788049060 Mon Sep 17 00:00:00 2001 From: egillax Date: Thu, 21 Nov 2024 15:28:28 +0100 Subject: [PATCH 1/6] torch compile and slighly more efficient conversions to torch from polars --- R/Estimator.R | 4 ++++ inst/python/Dataset.py | 11 +++-------- inst/python/Estimator.py | 2 ++ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/R/Estimator.R b/R/Estimator.R index 19f414e..d4b8bc5 100644 --- a/R/Estimator.R +++ b/R/Estimator.R @@ -33,6 +33,7 @@ #' @param criterion loss function to use #' @param earlyStopping If earlyStopping should be used which stops the #' training of your metric is not improving +#' @param compile if the model should be compiled before training, default FALSE #' @param metric either `auc` or `loss` or a custom metric to use. This is the #' metric used for scheduler and earlyStopping. #' Needs to be a list with function `fun`, mode either `min` or `max` and a @@ -59,6 +60,7 @@ setEstimator <- function( useEarlyStopping = TRUE, params = list(patience = 4) ), + compile = FALSE, metric = "auc", accumulationSteps = NULL, seed = NULL) { @@ -74,6 +76,7 @@ setEstimator <- function( checkIsClass(epochs, c("numeric", "integer")) checkHigher(epochs, 0) checkIsClass(earlyStopping, c("list", "NULL")) + checkIsClass(compile, "logical") checkIsClass(metric, c("character", "list")) checkIsClass(seed, c("numeric", "integer", "NULL")) @@ -100,6 +103,7 @@ setEstimator <- function( epochs = epochs, device = device, earlyStopping = earlyStopping, + compile = compile, findLR = findLR, metric = metric, accumulationSteps = accumulationSteps, diff --git a/inst/python/Dataset.py b/inst/python/Dataset.py index ed3c3bd..dfe48a6 100644 --- a/inst/python/Dataset.py +++ b/inst/python/Dataset.py @@ -50,7 +50,7 @@ def __init__(self, data, labels=None, numerical_features=None): .with_columns(pl.col("rowId") - 1) .collect() ) - cat_tensor = torch.tensor(data_cat.to_numpy()) + cat_tensor = data_cat.to_torch() tensor_list = torch.split( cat_tensor[:, 1], torch.unique_consecutive(cat_tensor[:, 0], return_counts=True)[1].tolist(), @@ -90,13 +90,8 @@ def __init__(self, data, labels=None, numerical_features=None): ) .collect() ) - indices = torch.as_tensor( - numerical_data.select(["rowId", "columnId"]).to_numpy(), - dtype=torch.long, - ) - values = torch.tensor( - numerical_data.select("covariateValue").to_numpy(), dtype=torch.float - ) + indices = numerical_data.select(["rowId", "columnId"]).to_torch(dtype=pl.Int64) + values = numerical_data.select("covariateValue").to_torch(dtype=pl.Float32) self.num = torch.sparse_coo_tensor( indices=indices.T, values=values.squeeze(), diff --git a/inst/python/Estimator.py b/inst/python/Estimator.py index 1b6ac18..d9a92ee 100644 --- a/inst/python/Estimator.py +++ b/inst/python/Estimator.py @@ -99,6 +99,8 @@ def __init__(self, model, model_parameters, estimator_settings): self.best_score = None self.best_epoch = None self.learn_rate_schedule = None + if parameters["estimator_settings"]["compile"]: + self.model = torch.compile(self.model, dynamic=False) def fit(self, dataset, test_dataset): train_dataloader = DataLoader( From bfaa6c5dc87abcee9a9660984ea790feeb5a61f3 Mon Sep 17 00:00:00 2001 From: egillax Date: Thu, 21 Nov 2024 15:36:41 +0100 Subject: [PATCH 2/6] docs --- DESCRIPTION | 2 +- man/setEstimator.Rd | 3 +++ man/setMultiLayerPerceptron.Rd | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 94e0aed..1119a68 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,7 @@ Suggests: Remotes: ohdsi/PatientLevelPrediction, ohdsi/ResultModelManager -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Encoding: UTF-8 Config/testthat/edition: 3 Config/testthat/parallel: TRUE diff --git a/man/setEstimator.Rd b/man/setEstimator.Rd index d9dd3d8..3557211 100644 --- a/man/setEstimator.Rd +++ b/man/setEstimator.Rd @@ -15,6 +15,7 @@ setEstimator( list(patience = 1)), criterion = torch$nn$BCEWithLogitsLoss, earlyStopping = list(useEarlyStopping = TRUE, params = list(patience = 4)), + compile = FALSE, metric = "auc", accumulationSteps = NULL, seed = NULL @@ -41,6 +42,8 @@ that evaluates to the device during runtime} \item{earlyStopping}{If earlyStopping should be used which stops the training of your metric is not improving} +\item{compile}{if the model should be compiled before training, default FALSE} + \item{metric}{either `auc` or `loss` or a custom metric to use. This is the metric used for scheduler and earlyStopping. Needs to be a list with function `fun`, mode either `min` or `max` and a diff --git a/man/setMultiLayerPerceptron.Rd b/man/setMultiLayerPerceptron.Rd index a5f96d7..fd78ef1 100644 --- a/man/setMultiLayerPerceptron.Rd +++ b/man/setMultiLayerPerceptron.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/MLP.R +% Please edit documentation in R/MultiLayerPerceptron.R \name{setMultiLayerPerceptron} \alias{setMultiLayerPerceptron} \title{setMultiLayerPerceptron} From 2b414b8ee30e438f43c5323eb3d1efb29e8d7756 Mon Sep 17 00:00:00 2001 From: egillax Date: Thu, 21 Nov 2024 15:50:32 +0100 Subject: [PATCH 3/6] fix for change in branch --- inst/python/Estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/python/Estimator.py b/inst/python/Estimator.py index d9a92ee..69ee153 100644 --- a/inst/python/Estimator.py +++ b/inst/python/Estimator.py @@ -99,7 +99,7 @@ def __init__(self, model, model_parameters, estimator_settings): self.best_score = None self.best_epoch = None self.learn_rate_schedule = None - if parameters["estimator_settings"]["compile"]: + if estimator_settings["compile"]: self.model = torch.compile(self.model, dynamic=False) def fit(self, dataset, test_dataset): From ba568ee4cbb5eaacf00fb8195dbdf078b40b45a5 Mon Sep 17 00:00:00 2001 From: egillax Date: Thu, 21 Nov 2024 16:03:42 +0100 Subject: [PATCH 4/6] get default of false for compile in estimator init --- inst/python/Estimator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/python/Estimator.py b/inst/python/Estimator.py index 69ee153..c9ad8b3 100644 --- a/inst/python/Estimator.py +++ b/inst/python/Estimator.py @@ -99,7 +99,8 @@ def __init__(self, model, model_parameters, estimator_settings): self.best_score = None self.best_epoch = None self.learn_rate_schedule = None - if estimator_settings["compile"]: + torch_compile = estimator_settings.get("compile", False) + if torch_compile: self.model = torch.compile(self.model, dynamic=False) def fit(self, dataset, test_dataset): From 87c33145082441797ecbc08245886525ec008b2e Mon Sep 17 00:00:00 2001 From: egillax Date: Thu, 21 Nov 2024 16:42:47 +0100 Subject: [PATCH 5/6] fix actions --- .github/workflows/R_CDM_check_hades.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/R_CDM_check_hades.yaml b/.github/workflows/R_CDM_check_hades.yaml index 2addc69..9cddc01 100644 --- a/.github/workflows/R_CDM_check_hades.yaml +++ b/.github/workflows/R_CDM_check_hades.yaml @@ -71,6 +71,10 @@ jobs: extra-packages: any::rcmdcheck needs: check + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: setup r-reticulate venv shell: Rscript {0} run: | From 3110a809bbec8f262eba4ce77f9d71d36976c8b0 Mon Sep 17 00:00:00 2001 From: egillax Date: Thu, 21 Nov 2024 17:04:25 +0100 Subject: [PATCH 6/6] update news --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 3b95d9b..85d10ba 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,10 @@ DeepPatientLevelPrediction 2.1.0.999 ====================== + - Add an option to use torch compile + - More efficient conversions from polars to torch in dataset processing + - Automatically detect broken links in docs using github actions + - Model initialization made more flexible with classes DeepPatientLevelPrediction 2.1.0 ======================