From 4b99d90698a45de053ac4e53c2198d7ef70a9ee6 Mon Sep 17 00:00:00 2001 From: Egill Axfjord Fridgeirsson Date: Thu, 21 Nov 2024 14:27:04 -0500 Subject: [PATCH] torch compile and slighly more efficient conversions to torch from polars (#133) * torch compile and slighly more efficient conversions to torch from polars --- .github/workflows/R_CDM_check_hades.yaml | 4 ++++ DESCRIPTION | 2 +- NEWS.md | 4 ++++ R/Estimator.R | 4 ++++ inst/python/Dataset.py | 11 +++-------- inst/python/Estimator.py | 3 +++ man/setEstimator.Rd | 3 +++ man/setMultiLayerPerceptron.Rd | 2 +- 8 files changed, 23 insertions(+), 10 deletions(-) diff --git a/.github/workflows/R_CDM_check_hades.yaml b/.github/workflows/R_CDM_check_hades.yaml index 83e28bc..c348c7a 100644 --- a/.github/workflows/R_CDM_check_hades.yaml +++ b/.github/workflows/R_CDM_check_hades.yaml @@ -75,6 +75,10 @@ jobs: extra-packages: any::rcmdcheck needs: check + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: setup r-reticulate venv shell: Rscript {0} run: | diff --git a/DESCRIPTION b/DESCRIPTION index 94e0aed..1119a68 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,7 @@ Suggests: Remotes: ohdsi/PatientLevelPrediction, ohdsi/ResultModelManager -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Encoding: UTF-8 Config/testthat/edition: 3 Config/testthat/parallel: TRUE diff --git a/NEWS.md b/NEWS.md index 3b95d9b..85d10ba 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,10 @@ DeepPatientLevelPrediction 2.1.0.999 ====================== + - Add an option to use torch compile + - More efficient conversions from polars to torch in dataset processing + - Automatically detect broken links in docs using github actions + - Model initialization made more flexible with classes DeepPatientLevelPrediction 2.1.0 ====================== diff --git a/R/Estimator.R b/R/Estimator.R index 19f414e..d4b8bc5 100644 --- a/R/Estimator.R +++ b/R/Estimator.R @@ -33,6 +33,7 @@ #' @param criterion loss function to use #' @param earlyStopping If earlyStopping should be used which stops the #' training of your metric is not improving +#' @param compile if the model should be compiled before training, default FALSE #' @param metric either `auc` or `loss` or a custom metric to use. This is the #' metric used for scheduler and earlyStopping. #' Needs to be a list with function `fun`, mode either `min` or `max` and a @@ -59,6 +60,7 @@ setEstimator <- function( useEarlyStopping = TRUE, params = list(patience = 4) ), + compile = FALSE, metric = "auc", accumulationSteps = NULL, seed = NULL) { @@ -74,6 +76,7 @@ setEstimator <- function( checkIsClass(epochs, c("numeric", "integer")) checkHigher(epochs, 0) checkIsClass(earlyStopping, c("list", "NULL")) + checkIsClass(compile, "logical") checkIsClass(metric, c("character", "list")) checkIsClass(seed, c("numeric", "integer", "NULL")) @@ -100,6 +103,7 @@ setEstimator <- function( epochs = epochs, device = device, earlyStopping = earlyStopping, + compile = compile, findLR = findLR, metric = metric, accumulationSteps = accumulationSteps, diff --git a/inst/python/Dataset.py b/inst/python/Dataset.py index ed3c3bd..dfe48a6 100644 --- a/inst/python/Dataset.py +++ b/inst/python/Dataset.py @@ -50,7 +50,7 @@ def __init__(self, data, labels=None, numerical_features=None): .with_columns(pl.col("rowId") - 1) .collect() ) - cat_tensor = torch.tensor(data_cat.to_numpy()) + cat_tensor = data_cat.to_torch() tensor_list = torch.split( cat_tensor[:, 1], torch.unique_consecutive(cat_tensor[:, 0], return_counts=True)[1].tolist(), @@ -90,13 +90,8 @@ def __init__(self, data, labels=None, numerical_features=None): ) .collect() ) - indices = torch.as_tensor( - numerical_data.select(["rowId", "columnId"]).to_numpy(), - dtype=torch.long, - ) - values = torch.tensor( - numerical_data.select("covariateValue").to_numpy(), dtype=torch.float - ) + indices = numerical_data.select(["rowId", "columnId"]).to_torch(dtype=pl.Int64) + values = numerical_data.select("covariateValue").to_torch(dtype=pl.Float32) self.num = torch.sparse_coo_tensor( indices=indices.T, values=values.squeeze(), diff --git a/inst/python/Estimator.py b/inst/python/Estimator.py index 1b6ac18..c9ad8b3 100644 --- a/inst/python/Estimator.py +++ b/inst/python/Estimator.py @@ -99,6 +99,9 @@ def __init__(self, model, model_parameters, estimator_settings): self.best_score = None self.best_epoch = None self.learn_rate_schedule = None + torch_compile = estimator_settings.get("compile", False) + if torch_compile: + self.model = torch.compile(self.model, dynamic=False) def fit(self, dataset, test_dataset): train_dataloader = DataLoader( diff --git a/man/setEstimator.Rd b/man/setEstimator.Rd index d9dd3d8..3557211 100644 --- a/man/setEstimator.Rd +++ b/man/setEstimator.Rd @@ -15,6 +15,7 @@ setEstimator( list(patience = 1)), criterion = torch$nn$BCEWithLogitsLoss, earlyStopping = list(useEarlyStopping = TRUE, params = list(patience = 4)), + compile = FALSE, metric = "auc", accumulationSteps = NULL, seed = NULL @@ -41,6 +42,8 @@ that evaluates to the device during runtime} \item{earlyStopping}{If earlyStopping should be used which stops the training of your metric is not improving} +\item{compile}{if the model should be compiled before training, default FALSE} + \item{metric}{either `auc` or `loss` or a custom metric to use. This is the metric used for scheduler and earlyStopping. Needs to be a list with function `fun`, mode either `min` or `max` and a diff --git a/man/setMultiLayerPerceptron.Rd b/man/setMultiLayerPerceptron.Rd index a5f96d7..fd78ef1 100644 --- a/man/setMultiLayerPerceptron.Rd +++ b/man/setMultiLayerPerceptron.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/MLP.R +% Please edit documentation in R/MultiLayerPerceptron.R \name{setMultiLayerPerceptron} \alias{setMultiLayerPerceptron} \title{setMultiLayerPerceptron}