-
Notifications
You must be signed in to change notification settings - Fork 491
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
97 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
use crate as burn; | ||
|
||
use crate::config::Config; | ||
use crate::module::Module; | ||
use crate::module::Param; | ||
use crate::tensor::backend::Backend; | ||
use crate::tensor::Tensor; | ||
|
||
/// Configuration to create a [RMS Norm](RmsNorm) layer. | ||
#[derive(Config)] | ||
pub struct RmsNormConfig { | ||
/// The size of the input features. | ||
d_model: usize, | ||
/// A value required for numerical stability. Default: 1e-5 | ||
#[config(default = 1e-5)] | ||
epsilon: f64, | ||
} | ||
|
||
impl RmsNormConfig { | ||
/// Initialize a new [RMS Norm](RmsNorm) module. | ||
pub fn init<B: Backend>(&self, device: &B::Device) -> RmsNorm<B> { | ||
assert!(self.epsilon > 0.0, "epsilon must be positive."); | ||
|
||
let gamma = Tensor::ones([self.d_model], device); | ||
|
||
RmsNorm { | ||
gamma: Param::from(gamma), | ||
epsilon: self.epsilon, | ||
} | ||
} | ||
|
||
/// Initialize a new [RMS Norm](RmsNorm) module with a [record](RmsNormRecord). | ||
pub fn init_with<B: Backend>(&self, record: RmsNormRecord<B>) -> RmsNorm<B> { | ||
RmsNorm { | ||
gamma: record.gamma, | ||
epsilon: self.epsilon, | ||
} | ||
} | ||
} | ||
|
||
/// Applies RMS Normalization over an input tensor along the last dimension. | ||
/// | ||
/// `Y = X / sqrt(mean(X^2) + eps) * gamma` | ||
/// | ||
/// where `eps` is a small value to avoid division by zero. | ||
#[derive(Module, Debug)] | ||
pub struct RmsNorm<B: Backend> { | ||
/// The learnable parameter to scale the normalized tensor | ||
gamma: Param<Tensor<B, 1>>, | ||
/// A value required for numerical stability | ||
epsilon: f64, | ||
} | ||
|
||
impl<B: Backend> RmsNorm<B> { | ||
/// Applies the forward pass on the input tensor. | ||
/// | ||
/// # Shapes | ||
/// | ||
/// - input: `[..., any, d_model]` | ||
/// - output: `[..., any, d_model]` | ||
pub fn forward<const D: usize>(&self, x: Tensor<B, D>) -> Tensor<B, D> { | ||
// Calculate the root-mean-square norm of the input tensor along the last dimension | ||
let rms = (x.clone().powf_scalar(2.0).mean_dim(D - 1) + self.epsilon).sqrt(); | ||
(x / rms) * self.gamma.val().unsqueeze() | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
use crate::TestBackend; | ||
use burn_tensor::Data; | ||
|
||
#[test] | ||
fn rms_norm_forward() { | ||
let device = Default::default(); | ||
let module = RmsNormConfig::new(3) | ||
.with_epsilon(1e-5) | ||
.init::<TestBackend>(&device); | ||
|
||
let input = Tensor::arange(0..9, &device).float().reshape([3, 3]); | ||
|
||
let output = module.forward(input); | ||
|
||
output.to_data().assert_approx_eq( | ||
&Data::from([ | ||
[0.0000, 0.7746, 1.5492], | ||
[0.7348, 0.9798, 1.2247], | ||
[0.8514, 0.9933, 1.1352], | ||
]), | ||
4, | ||
); | ||
} | ||
} |