Do not pass batch_size to cudnnGetRNNParamsSize().

Mikhail Balakhno · drahnr · commit 847d185785bb · 2024-02-12T13:54:39.000+01:00
diff --git a/coaster-nn/src/frameworks/cuda/mod.rs b/coaster-nn/src/frameworks/cuda/mod.rs
@@ -844,14 +844,17 @@ where
     fn generate_rnn_weight_description(
         &self,
         rnn_config: &Self::CRNN,
-        batch_size: i32,
         input_size: i32,
     ) -> Result<Vec<usize>, Error> {
         let cudnn_framework = self.framework().cudnn();
         let data_type = <T as DataTypeInfo>::cudnn_data_type();
 
-        // MiniBatch, LayerSize, 1
-        let dim_x = vec![batch_size, input_size, 1];
+        // According to cuDNN API reference and examples, xDesc should have a
+        // least 3 dimensions with batch_size being the first. However, weights
+        // size does not depend on batch size and we'd like to avoid having to
+        // specify batch size in advance (as it can change during execution).
+        // So we use batch_size = 1 as it appers to work well.
+        let dim_x = vec![1, input_size, 1];
         let stride_x = vec![dim_x[2] * dim_x[1], dim_x[2], 1];
 
         // dummy desc to get the param size
diff --git a/coaster-nn/src/frameworks/native/mod.rs b/coaster-nn/src/frameworks/native/mod.rs
@@ -890,7 +890,6 @@ where
     fn generate_rnn_weight_description(
         &self,
         rnn_config: &Self::CRNN,
-        batch_size: i32,
         input_size: i32,
     ) -> Result<Vec<usize>, Error> {
         // This will end up being the tensor descriptor for the weights associated with the RNN pass
diff --git a/coaster-nn/src/plugin.rs b/coaster-nn/src/plugin.rs
@@ -335,7 +335,6 @@ pub trait Rnn<F>: NN<F> {
     fn generate_rnn_weight_description(
         &self,
         rnn_config: &Self::CRNN,
-        batch_size: i32,
         input_size: i32,
     ) -> Result<Vec<usize>, crate::co::error::Error>;
 
diff --git a/coaster-nn/src/tests/rnn.rs b/coaster-nn/src/tests/rnn.rs
@@ -57,7 +57,7 @@ where
         .unwrap();
 
     let filter_dimensions = backend
-        .generate_rnn_weight_description(&rnn_config, BATCH_SIZE as i32, INPUT_SIZE as i32)
+        .generate_rnn_weight_description(&rnn_config, INPUT_SIZE as i32)
         .unwrap();
 
     let w = uniformly_random_tensor::<T, F>(
diff --git a/juice-examples/mackey-glass-rnn-regression/README.md b/juice-examples/mackey-glass-rnn-regression/README.md
@@ -39,13 +39,14 @@ Rustflags must be set to link natively to `cuda.lib` and `cudnn.h` in the patter
 A generated version of Mackey-Glass is packaged with Juice, and packaged in a way suitable for RNN networks.
 
 ```bash
+cd juice-examples/mackey-glass-rnn-regression
 # Train a RNN Network (*nix)
-./target/release/example-rnn-regression train --file=SavedRNNNetwork.juice --learningRate=0.01 --batchSize=40
+../../target/release/example-rnn-regression train --learning-rate=0.01 --batch-size=40 SavedRNNNetwork.juice 
 # Train a RNN Network (Windows)
-.\target\release\example-rnn-regression.exe train --file=SavedRNNNetwork.juice --learningRate=0.01 --batchSize=40
+..\..\target\release\example-rnn-regression.exe train --learning-rate=0.01 --batch-size=40 SavedRNNNetwork.juice 
 
 # Test the RNN Network (*nix)
-../target/release/example-rnn-regression test --file=SavedRNNNetwork.juice
+../../target/release/example-rnn-regression test --batch-size=40  SavedRNNNetwork.juice
 # Test the RNN Network (Windows)
-cd ../target/release/ && example-rnn-regression.exe test --file=SavedRNNNetwork.juice
+..\..\target\release\example-rnn-regression.exe test --batch-size=40 SavedRNNNetwork.juice
 ```
diff --git a/juice-examples/mackey-glass-rnn-regression/rnn.juice b/juice-examples/mackey-glass-rnn-regression/rnn.juice
diff --git a/juice/src/layers/common/rnn.rs b/juice/src/layers/common/rnn.rs
@@ -137,7 +137,7 @@ impl<B: IBackend + conn::Rnn<f32>> ILayer<B> for Rnn<B> {
             .unwrap();
 
         let filter_dimensions: TensorDesc = backend
-            .generate_rnn_weight_description(&config, batch_size as i32, input_size as i32)
+            .generate_rnn_weight_description(&config, input_size as i32)
             .unwrap();
 
         // weights
@@ -492,7 +492,6 @@ mod tests {
         let filter_dimensions = <Backend<Cuda> as conn::Rnn<f32>>::generate_rnn_weight_description(
             &backend,
             &config,
-            BATCH_SIZE as i32,
             INPUT_SIZE as i32,
         )
         .unwrap();
diff --git a/rcudnn/cudnn/src/api/rnn.rs b/rcudnn/cudnn/src/api/rnn.rs
@@ -166,7 +166,7 @@ impl API {
     ) -> Result<::libc::size_t, Error> {
         let mut size: ::libc::size_t = 0;
         let size_ptr: *mut ::libc::size_t = &mut size;
-        match cudnnGetRNNParamsSize(handle, rnn_desc,x_desc, size_ptr, data_type) {
+        match cudnnGetRNNParamsSize(handle, rnn_desc, x_desc, size_ptr, data_type) {
             cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size),
             cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("One of the following; rnnDesc is invalid, x_desc is invalid, x_desc isn't fully packed, dataType & tensor Description type don't match")),
             cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The data type used in `rnn_desc` is not supported for RNN.")),