From 3fb855a9d00b78c9eb023ad732d7386788049060 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Thu, 21 Nov 2024 15:28:28 +0100
Subject: [PATCH 1/6] torch compile and slighly more efficient conversions to
 torch from polars

---
 R/Estimator.R            |  4 ++++
 inst/python/Dataset.py   | 11 +++--------
 inst/python/Estimator.py |  2 ++
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/R/Estimator.R b/R/Estimator.R
index 19f414e..d4b8bc5 100644
--- a/R/Estimator.R
+++ b/R/Estimator.R
@@ -33,6 +33,7 @@
 #' @param criterion loss function to use
 #' @param earlyStopping If earlyStopping should be used which stops the
 #' training of your metric is not improving
+#' @param compile if the model should be compiled before training, default FALSE
 #' @param metric either `auc` or `loss` or a custom metric to use. This is the
 #' metric used for scheduler and earlyStopping.
 #' Needs to be a list with function `fun`, mode either `min` or `max` and a
@@ -59,6 +60,7 @@ setEstimator <- function(
       useEarlyStopping = TRUE,
       params = list(patience = 4)
     ),
+    compile = FALSE,
     metric = "auc",
     accumulationSteps = NULL,
     seed = NULL) {
@@ -74,6 +76,7 @@ setEstimator <- function(
   checkIsClass(epochs, c("numeric", "integer"))
   checkHigher(epochs, 0)
   checkIsClass(earlyStopping, c("list", "NULL"))
+  checkIsClass(compile, "logical")
   checkIsClass(metric, c("character", "list"))
   checkIsClass(seed, c("numeric", "integer", "NULL"))
   
@@ -100,6 +103,7 @@ setEstimator <- function(
     epochs = epochs,
     device = device,
     earlyStopping = earlyStopping,
+    compile = compile,
     findLR = findLR,
     metric = metric,
     accumulationSteps = accumulationSteps,
diff --git a/inst/python/Dataset.py b/inst/python/Dataset.py
index ed3c3bd..dfe48a6 100644
--- a/inst/python/Dataset.py
+++ b/inst/python/Dataset.py
@@ -50,7 +50,7 @@ def __init__(self, data, labels=None, numerical_features=None):
             .with_columns(pl.col("rowId") - 1)
             .collect()
         )
-        cat_tensor = torch.tensor(data_cat.to_numpy())
+        cat_tensor = data_cat.to_torch()
         tensor_list = torch.split(
             cat_tensor[:, 1],
             torch.unique_consecutive(cat_tensor[:, 0], return_counts=True)[1].tolist(),
@@ -90,13 +90,8 @@ def __init__(self, data, labels=None, numerical_features=None):
                 )
                 .collect()
             )
-            indices = torch.as_tensor(
-                numerical_data.select(["rowId", "columnId"]).to_numpy(),
-                dtype=torch.long,
-            )
-            values = torch.tensor(
-                numerical_data.select("covariateValue").to_numpy(), dtype=torch.float
-            )
+            indices = numerical_data.select(["rowId", "columnId"]).to_torch(dtype=pl.Int64)
+            values = numerical_data.select("covariateValue").to_torch(dtype=pl.Float32)
             self.num = torch.sparse_coo_tensor(
                 indices=indices.T,
                 values=values.squeeze(),
diff --git a/inst/python/Estimator.py b/inst/python/Estimator.py
index 1b6ac18..d9a92ee 100644
--- a/inst/python/Estimator.py
+++ b/inst/python/Estimator.py
@@ -99,6 +99,8 @@ def __init__(self, model, model_parameters, estimator_settings):
         self.best_score = None
         self.best_epoch = None
         self.learn_rate_schedule = None
+        if parameters["estimator_settings"]["compile"]:
+            self.model = torch.compile(self.model, dynamic=False)
 
     def fit(self, dataset, test_dataset):
         train_dataloader = DataLoader(

From bfaa6c5dc87abcee9a9660984ea790feeb5a61f3 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Thu, 21 Nov 2024 15:36:41 +0100
Subject: [PATCH 2/6] docs

---
 DESCRIPTION                    | 2 +-
 man/setEstimator.Rd            | 3 +++
 man/setMultiLayerPerceptron.Rd | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 94e0aed..1119a68 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -38,7 +38,7 @@ Suggests:
 Remotes:
     ohdsi/PatientLevelPrediction,
     ohdsi/ResultModelManager
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Encoding: UTF-8
 Config/testthat/edition: 3
 Config/testthat/parallel: TRUE
diff --git a/man/setEstimator.Rd b/man/setEstimator.Rd
index d9dd3d8..3557211 100644
--- a/man/setEstimator.Rd
+++ b/man/setEstimator.Rd
@@ -15,6 +15,7 @@ setEstimator(
     list(patience = 1)),
   criterion = torch$nn$BCEWithLogitsLoss,
   earlyStopping = list(useEarlyStopping = TRUE, params = list(patience = 4)),
+  compile = FALSE,
   metric = "auc",
   accumulationSteps = NULL,
   seed = NULL
@@ -41,6 +42,8 @@ that evaluates to the device during runtime}
 \item{earlyStopping}{If earlyStopping should be used which stops the
 training of your metric is not improving}
 
+\item{compile}{if the model should be compiled before training, default FALSE}
+
 \item{metric}{either `auc` or `loss` or a custom metric to use. This is the
 metric used for scheduler and earlyStopping.
 Needs to be a list with function `fun`, mode either `min` or `max` and a
diff --git a/man/setMultiLayerPerceptron.Rd b/man/setMultiLayerPerceptron.Rd
index a5f96d7..fd78ef1 100644
--- a/man/setMultiLayerPerceptron.Rd
+++ b/man/setMultiLayerPerceptron.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/MLP.R
+% Please edit documentation in R/MultiLayerPerceptron.R
 \name{setMultiLayerPerceptron}
 \alias{setMultiLayerPerceptron}
 \title{setMultiLayerPerceptron}

From 2b414b8ee30e438f43c5323eb3d1efb29e8d7756 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Thu, 21 Nov 2024 15:50:32 +0100
Subject: [PATCH 3/6] fix for change in branch

---
 inst/python/Estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/python/Estimator.py b/inst/python/Estimator.py
index d9a92ee..69ee153 100644
--- a/inst/python/Estimator.py
+++ b/inst/python/Estimator.py
@@ -99,7 +99,7 @@ def __init__(self, model, model_parameters, estimator_settings):
         self.best_score = None
         self.best_epoch = None
         self.learn_rate_schedule = None
-        if parameters["estimator_settings"]["compile"]:
+        if estimator_settings["compile"]:
             self.model = torch.compile(self.model, dynamic=False)
 
     def fit(self, dataset, test_dataset):

From ba568ee4cbb5eaacf00fb8195dbdf078b40b45a5 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Thu, 21 Nov 2024 16:03:42 +0100
Subject: [PATCH 4/6] get default of false for compile in estimator init

---
 inst/python/Estimator.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/inst/python/Estimator.py b/inst/python/Estimator.py
index 69ee153..c9ad8b3 100644
--- a/inst/python/Estimator.py
+++ b/inst/python/Estimator.py
@@ -99,7 +99,8 @@ def __init__(self, model, model_parameters, estimator_settings):
         self.best_score = None
         self.best_epoch = None
         self.learn_rate_schedule = None
-        if estimator_settings["compile"]:
+        torch_compile = estimator_settings.get("compile", False)
+        if torch_compile:
             self.model = torch.compile(self.model, dynamic=False)
 
     def fit(self, dataset, test_dataset):

From 87c33145082441797ecbc08245886525ec008b2e Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Thu, 21 Nov 2024 16:42:47 +0100
Subject: [PATCH 5/6] fix actions

---
 .github/workflows/R_CDM_check_hades.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/R_CDM_check_hades.yaml b/.github/workflows/R_CDM_check_hades.yaml
index 2addc69..9cddc01 100644
--- a/.github/workflows/R_CDM_check_hades.yaml
+++ b/.github/workflows/R_CDM_check_hades.yaml
@@ -71,6 +71,10 @@ jobs:
           extra-packages: any::rcmdcheck
           needs: check
           
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          
       - name: setup r-reticulate venv
         shell: Rscript {0}
         run: |

From 3110a809bbec8f262eba4ce77f9d71d36976c8b0 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Thu, 21 Nov 2024 17:04:25 +0100
Subject: [PATCH 6/6] update news

---
 NEWS.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 3b95d9b..85d10ba 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,10 @@
 
 DeepPatientLevelPrediction 2.1.0.999
 ======================
+  - Add an option to use torch compile
+  - More efficient conversions from polars to torch in dataset processing
+  - Automatically detect broken links in docs using github actions
+  - Model initialization made more flexible with classes
 
 DeepPatientLevelPrediction 2.1.0
 ======================