Update README + fix main changes

tracel-ai · Feb 3, 2025 · 9bdc8e9 · 9bdc8e9
1 parent 7ecc9c4
commit 9bdc8e9
Show file tree

Hide file tree

Showing 5 changed files with 50 additions and 43 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/examples/modern-lstm/Cargo.toml b/examples/modern-lstm/Cargo.toml
@@ -11,7 +11,7 @@ ndarray-blas-openblas = ["burn/ndarray", "burn/openblas"]
 tch-cpu = ["burn/tch"]
 tch-gpu = ["burn/tch"]
 wgpu = ["burn/wgpu"]
-cuda-jit = ["burn/cuda-jit"]
+cuda = ["burn/cuda"]
 
 [dependencies]
 burn = { path = "../../crates/burn", features=["train"] }

diff --git a/examples/modern-lstm/README.md b/examples/modern-lstm/README.md
@@ -1,42 +1,46 @@
 # Advanced LSTM Implementation with Burn
-A sophisticated implementation of Long Short-Term Memory (LSTM) networks in Burn, featuring state-of-the-art architectural enhancements and optimizations. This implementation includes bidirectional processing capabilities and advanced regularization techniques. More details can be found at the [PyTorch implementation](https://github.com/shiv08/Advanced-LSTM-Implementation-with-PyTorch).
 
-`LstmNetwork` is the top-level module with bidirectional support and output projection. It can support multiple LSTM variants by setting appropriate `bidirectional` and `num_layers`：
-* LSTM: `num_layers = 1` and `bidirectional = false`
-* Stacked LSTM: `num_layers > 1` and `bidirectional = false`
-* Bidirectional LSTM: `num_layers = 1` and `bidirectional = true`
-* Bidirectional Stacked LSTM: `num_layers > 1` and `bidirectional = true`
+A more advanced implementation of Long Short-Term Memory (LSTM) networks in Burn with combined
+weight matrices for the input and hidden states, based on the
+[PyTorch implementation](https://github.com/shiv08/Advanced-LSTM-Implementation-with-PyTorch).
 
-This implementation is complementary to Burn's official LSTM, users can choose either one depends on the project's specific needs.
+`LstmNetwork` is the top-level module with bidirectional and regularization support. The LSTM
+variants differ by `bidirectional` and `num_layers` settings：
 
-## Usage
+- LSTM: `num_layers = 1` and `bidirectional = false`
+- Stacked LSTM: `num_layers > 1` and `bidirectional = false`
+- Bidirectional LSTM: `num_layers = 1` and `bidirectional = true`
+- Bidirectional Stacked LSTM: `num_layers > 1` and `bidirectional = true`
+
+This implementation is complementary to Burn's official LSTM, users can choose either one depends on
+the project's specific needs.
 
+## Usage
 
 ## Training
 
 ```sh
 # Cuda backend
-cargo run --example train --release --features cuda-jit
+cargo run --example lstm-train --release --features cuda-jit
 
 # Wgpu backend
-cargo run --example train --release --features wgpu
+cargo run --example lstm-train --release --features wgpu
 
 # Tch GPU backend
 export TORCH_CUDA_VERSION=cu121 # Set the cuda version
-cargo run --example train --release --features tch-gpu
+cargo run --example lstm-train --release --features tch-gpu
 
 # Tch CPU backend
-cargo run --example train --release --features tch-cpu
+cargo run --example lstm-train --release --features tch-cpu
 
 # NdArray backend (CPU)
-cargo run --example train --release --features ndarray
-cargo run --example train --release --features ndarray-blas-openblas
-cargo run --example train --release --features ndarray-blas-netlib
+cargo run --example lstm-train --release --features ndarray
+cargo run --example lstm-train --release --features ndarray-blas-openblas
+cargo run --example lstm-train --release --features ndarray-blas-netlib
 ```
 
-
 ### Inference
 
 ```sh
-cargo run --example infer --release --features cuda-jit
+cargo run --example lstm-infer --release --features cuda-jit
 ```
diff --git a/examples/modern-lstm/examples/infer.rs → examples/modern-lstm/examples/lstm-infer.rs b/examples/modern-lstm/examples/infer.rs → examples/modern-lstm/examples/lstm-infer.rs
@@ -11,9 +11,7 @@ pub fn launch<B: Backend>(device: B::Device) {
     feature = "ndarray-blas-accelerate",
 ))]
 mod ndarray {
-    use burn::backend::{
-        ndarray::{NdArray, NdArrayDevice}
-    };
+    use burn::backend::ndarray::{NdArray, NdArrayDevice};
 
     use crate::launch;
 
@@ -24,9 +22,7 @@ mod ndarray {
 
 #[cfg(feature = "tch-gpu")]
 mod tch_gpu {
-    use burn::backend::{
-        libtorch::{LibTorch, LibTorchDevice}
-    };
+    use burn::backend::libtorch::{LibTorch, LibTorchDevice};
 
     use crate::launch;
 
@@ -42,9 +38,7 @@ mod tch_gpu {
 
 #[cfg(feature = "tch-cpu")]
 mod tch_cpu {
-    use burn::backend::{
-        libtorch::{LibTorch, LibTorchDevice}
-    };
+    use burn::backend::libtorch::{LibTorch, LibTorchDevice};
 
     use crate::launch;
 
@@ -63,13 +57,13 @@ mod wgpu {
     }
 }
 
-#[cfg(feature = "cuda-jit")]
-mod cuda_jit {
+#[cfg(feature = "cuda")]
+mod cuda {
     use crate::launch;
-    use burn::backend::CudaJit;
+    use burn::backend::Cuda;
 
     pub fn run() {
-        launch::<CudaJit>(Default::default());
+        launch::<Cuda>(Default::default());
     }
 }
 
@@ -87,6 +81,6 @@ fn main() {
     tch_cpu::run();
     #[cfg(feature = "wgpu")]
     wgpu::run();
-    #[cfg(feature = "cuda-jit")]
-    cuda_jit::run();
+    #[cfg(feature = "cuda")]
+    cuda::run();
 }
diff --git a/examples/modern-lstm/examples/train.rs → examples/modern-lstm/examples/lstm-train.rs b/examples/modern-lstm/examples/train.rs → examples/modern-lstm/examples/lstm-train.rs
@@ -1,15 +1,13 @@
 use burn::{
-    grad_clipping::GradientClippingConfig,
-    optim::AdamConfig,
-    tensor::backend::AutodiffBackend
+    grad_clipping::GradientClippingConfig, optim::AdamConfig, tensor::backend::AutodiffBackend,
 };
 use modern_lstm::{model::LstmNetworkConfig, training::TrainingConfig};
 
 pub fn launch<B: AutodiffBackend>(device: B::Device) {
     let config = TrainingConfig::new(
         LstmNetworkConfig::new(),
         // Gradient clipping via optimizer config
-        AdamConfig::new().with_grad_clipping(Some(GradientClippingConfig::Norm(1.0)))
+        AdamConfig::new().with_grad_clipping(Some(GradientClippingConfig::Norm(1.0))),
     );
 
     modern_lstm::training::train::<B>("/tmp/modern-lstm", config, device);
@@ -77,13 +75,13 @@ mod wgpu {
     }
 }
 
-#[cfg(feature = "cuda-jit")]
-mod cuda_jit {
+#[cfg(feature = "cuda")]
+mod cuda {
     use crate::launch;
-    use burn::backend::{cuda_jit::CudaDevice, Autodiff, CudaJit};
+    use burn::backend::{cuda::CudaDevice, Autodiff, Cuda};
 
     pub fn run() {
-        launch::<Autodiff<CudaJit>>(CudaDevice::default());
+        launch::<Autodiff<Cuda>>(CudaDevice::default());
     }
 }
 
@@ -101,6 +99,6 @@ fn main() {
     tch_cpu::run();
     #[cfg(feature = "wgpu")]
     wgpu::run();
-    #[cfg(feature = "cuda-jit")]
-    cuda_jit::run();
+    #[cfg(feature = "cuda")]
+    cuda::run();
 }