diff --git a/README.md b/README.md
index d0ccbcf411..951b2a9f24 100644
--- a/README.md
+++ b/README.md
@@ -621,19 +621,20 @@ leads to more reliable, bug-free solutions built faster (after some practice 
 <br />
 
 > **Deprecation Note**<br />Since `0.14.0`, the internal structure for tensor data has changed. The
-> previous `Data` struct is being deprecated in favor of the new `TensorData` struct, which allows
-> for more flexibility by storing the underlying data as bytes and keeping the data type as a field.
-> If you are using `Data` in your code, make sure to switch to `TensorData`.
+> previous `Data` struct was deprecated and officially removed since `0.17.0` in favor of the new
+> `TensorData` struct, which allows for more flexibility by storing the underlying data as bytes and
+> keeping the data type as a field. If you are using `Data` in your code, make sure to switch to
+> `TensorData`.
 
 <!-- >
 > In the event that you are trying to load a model record saved in a previous version, make sure to
-> enable the `record-backward-compat` feature. Otherwise, the record won't be deserialized correctly
-> and you will get an error message (which will also point you to the backward compatible feature
-> flag). The backward compatibility is maintained for deserialization (loading), so as soon as you
-> have saved the record again it will be saved according to the new structure and you won't need the
-> backward compatible feature flag anymore. Please note that binary formats are not backward
-> compatible. Thus, you will need to load your record in a previous version and save it to another
-> of the self-describing record formats before using the new version with the
+> enable the `record-backward-compat` feature using a previous version of burn (<=0.16.0). Otherwise,
+> the record won't be deserialized correctly and you will get an error message (which will also point
+> you to the backward compatible feature flag). The backward compatibility was maintained for
+> deserialization (loading), so as soon as you have saved the record again it will be saved according
+> to the new structure and you will be able to upgrade to this version. Please note that binary formats
+> are not backward compatible. Thus, you will need to load your record in a previous version and save it
+> to another of the self-describing record formats before using a compatible version (as described) with the
 > `record-backward-compat` feature flag. -->
 
 <details id="deprecation">
@@ -642,8 +643,9 @@ Loading Model Records From Previous Versions ⚠️
 </summary>
 <br />
 
-In the event that you are trying to load a model record saved in a previous version, make sure to
-enable the `record-backward-compat` feature flag.
+In the event that you are trying to load a model record saved in a version older than `0.14.0`, make
+sure to use a compatible version (`0.14`, `0.15` or `0.16`) with the `record-backward-compat`
+feature flag.
 
 ```
 features = [..., "record-backward-compat"]
@@ -652,13 +654,14 @@ features = [..., "record-backward-compat"]
 Otherwise, the record won't be deserialized correctly and you will get an error message. This error
 will also point you to the backward compatible feature flag.
 
-The backward compatibility is maintained for deserialization when loading records. Therefore, as
-soon as you have saved the record again it will be saved according to the new structure and you
-won't need the backward compatible feature flag anymore.
+The backward compatibility was maintained for deserialization when loading records. Therefore, as
+soon as you have saved the record again it will be saved according to the new structure and you can
+upgrade back to the current version
 
 Please note that binary formats are not backward compatible. Thus, you will need to load your record
 in a previous version and save it in any of the other self-describing record format (e.g., using the
-`NamedMpkFileRecorder`) before using the new version with the `record-backward-compat` feature flag.
+`NamedMpkFileRecorder`) before using a compatible version (as described) with the
+`record-backward-compat` feature flag.
 
 </details>
 
diff --git a/crates/burn-core/Cargo.toml b/crates/burn-core/Cargo.toml
index b968e28a68..e895cc4572 100644
--- a/crates/burn-core/Cargo.toml
+++ b/crates/burn-core/Cargo.toml
@@ -113,9 +113,6 @@ record-item-custom-serde = ["thiserror", "regex"]
 # Serialization formats
 experimental-named-tensor = ["burn-tensor/experimental-named-tensor"]
 
-# Backwards compatibility with previous serialized data format.
-record-backward-compat = []
-
 test-cuda = ["cuda-jit"] # To use cuda during testing, default uses ndarray.
 test-hip = ["hip-jit"] # To use hip during testing, default uses ndarray.
 test-tch = ["tch"] # To use tch during testing, default uses ndarray.
diff --git a/crates/burn-core/src/record/primitive.rs b/crates/burn-core/src/record/primitive.rs
index 9dd921e824..2f9fa3e83c 100644
--- a/crates/burn-core/src/record/primitive.rs
+++ b/crates/burn-core/src/record/primitive.rs
@@ -5,9 +5,7 @@ use super::tensor::{BoolTensorSerde, FloatTensorSerde, IntTensorSerde};
 use super::{PrecisionSettings, Record};
 use crate::module::{Param, ParamId};
 
-#[allow(deprecated)]
-use burn_tensor::DataSerialize;
-use burn_tensor::{backend::Backend, Bool, Element, Int, Tensor};
+use burn_tensor::{backend::Backend, Bool, Int, Tensor};
 
 use hashbrown::HashMap;
 use serde::{
@@ -143,23 +141,6 @@ where
     }
 }
 
-#[allow(deprecated)]
-impl<E, B> Record<B> for DataSerialize<E>
-where
-    E: Element,
-    B: Backend,
-{
-    type Item<S: PrecisionSettings> = DataSerialize<S::FloatElem>;
-
-    fn into_item<S: PrecisionSettings>(self) -> Self::Item<S> {
-        self.convert()
-    }
-
-    fn from_item<S: PrecisionSettings>(item: Self::Item<S>, _device: &B::Device) -> Self {
-        item.convert()
-    }
-}
-
 /// (De)serialize parameters into a clean format.
 #[derive(new, Debug, Clone, Serialize, Deserialize)]
 pub struct ParamSerde<T> {
diff --git a/crates/burn-core/src/record/tensor.rs b/crates/burn-core/src/record/tensor.rs
index ab6f448b7e..a07453bcba 100644
--- a/crates/burn-core/src/record/tensor.rs
+++ b/crates/burn-core/src/record/tensor.rs
@@ -4,20 +4,7 @@ use super::{PrecisionSettings, Record};
 use burn_tensor::{backend::Backend, Bool, DType, Element, Int, Tensor, TensorData};
 use serde::{Deserialize, Serialize};
 
-#[cfg(not(feature = "record-backward-compat"))]
 use alloc::format;
-#[cfg(feature = "record-backward-compat")]
-use burn_tensor::DataSerialize;
-
-/// Versioned serde data deserialization to maintain backward compatibility between formats.
-#[cfg(feature = "record-backward-compat")]
-#[allow(deprecated)]
-#[derive(Serialize, Deserialize)]
-#[serde(untagged)]
-enum TensorDataSerde<E> {
-    V1(DataSerialize<E>),
-    V2(TensorData),
-}
 
 /// Deserialize the value into [`TensorData`].
 fn deserialize_data<'de, E, De>(deserializer: De) -> Result<TensorData, De::Error>
@@ -25,31 +12,18 @@ where
     E: Element + Deserialize<'de>,
     De: serde::Deserializer<'de>,
 {
-    #[cfg(feature = "record-backward-compat")]
-    {
-        let data = match TensorDataSerde::<E>::deserialize(deserializer)? {
-            TensorDataSerde::V1(data) => data.into_tensor_data(),
-            // NOTE: loading f32 weights with f16 precision will deserialize the f32 weights (bytes) first and then convert to f16
-            TensorDataSerde::V2(data) => data.convert::<E>(),
-        };
-        Ok(data)
-    }
-
-    #[cfg(not(feature = "record-backward-compat"))]
-    {
-        let data = TensorData::deserialize(deserializer).map_err(|e| {
-            serde::de::Error::custom(format!(
-                "{:?}\nThe internal data format has changed since version 0.14.0. If you are trying to load a record saved in a previous version, use the `record-backward-compat` feature flag. Once you have saved the record in the new format, you can disable the feature flag.\n",
-                e
-            ))
-        })?;
-        let data = if let DType::QFloat(_) = data.dtype {
-            data // do not convert quantized tensors
-        } else {
-            data.convert::<E>()
-        };
-        Ok(data)
-    }
+    let data = TensorData::deserialize(deserializer).map_err(|e| {
+        serde::de::Error::custom(format!(
+            "{:?}\nThe internal data format has changed since version 0.14.0. If you are trying to load a record saved in a previous version, use the `record-backward-compat` feature flag with a previous version (<=0.16.0). Once you have saved the record in the new format, you can upgrade back to the current version.\n",
+            e
+        ))
+    })?;
+    let data = if let DType::QFloat(_) = data.dtype {
+        data // do not convert quantized tensors
+    } else {
+        data.convert::<E>()
+    };
+    Ok(data)
 }
 
 /// This struct implements serde to lazily serialize and deserialize a float tensor
diff --git a/crates/burn-tensor/src/lib.rs b/crates/burn-tensor/src/lib.rs
index d3cb280e90..0376da57a2 100644
--- a/crates/burn-tensor/src/lib.rs
+++ b/crates/burn-tensor/src/lib.rs
@@ -1,8 +1,6 @@
 #![cfg_attr(not(feature = "std"), no_std)]
 #![warn(missing_docs)]
 #![cfg_attr(docsrs, feature(doc_auto_cfg))]
-// Allow deprecated `Data` and `DataSerialize`
-#![allow(deprecated)]
 
 //! This library provides multiple tensor implementations hidden behind an easy to use API
 //! that supports reverse mode automatic differentiation.
@@ -59,6 +57,8 @@ mod cube_wgpu {
     use crate::backend::{DeviceId, DeviceOps};
     use cubecl::wgpu::WgpuDevice;
 
+    // Allow deprecated `WgpuDevice::BestAvailable`
+    #[allow(deprecated)]
     impl DeviceOps for WgpuDevice {
         fn id(&self) -> DeviceId {
             match self {
diff --git a/crates/burn-tensor/src/tensor/data.rs b/crates/burn-tensor/src/tensor/data.rs
index 5fa6f765fc..bd144e397f 100644
--- a/crates/burn-tensor/src/tensor/data.rs
+++ b/crates/burn-tensor/src/tensor/data.rs
@@ -1,7 +1,4 @@
-use core::{
-    any::{Any, TypeId},
-    f32,
-};
+use core::f32;
 
 use alloc::boxed::Box;
 use alloc::format;
@@ -14,7 +11,7 @@ use crate::{
     quantization::{
         Quantization, QuantizationScheme, QuantizationStrategy, QuantizationType, QuantizedBytes,
     },
-    tensor::{bytes::Bytes, Shape},
+    tensor::bytes::Bytes,
     DType, Distribution, Element, ElementConversion,
 };
 
@@ -777,396 +774,6 @@ impl core::fmt::Display for TensorData {
     }
 }
 
-/// Data structure for serializing and deserializing tensor data.
-#[derive(serde::Serialize, serde::Deserialize, Debug, PartialEq, Eq, Clone, new)]
-#[deprecated(
-    since = "0.14.0",
-    note = "the internal data format has changed, please use `TensorData` instead"
-)]
-pub struct DataSerialize<E> {
-    /// The values of the tensor.
-    pub value: Vec<E>,
-    /// The shape of the tensor.
-    pub shape: Vec<usize>,
-}
-
-/// Data structure for tensors.
-#[derive(new, Debug, Clone, PartialEq, Eq)]
-#[deprecated(
-    since = "0.14.0",
-    note = "the internal data format has changed, please use `TensorData` instead"
-)]
-pub struct Data<E, const D: usize> {
-    /// The values of the tensor.
-    pub value: Vec<E>,
-
-    /// The shape of the tensor.
-    pub shape: Shape,
-}
-
-#[allow(deprecated)]
-impl<const D: usize, E: Element> Data<E, D> {
-    /// Converts the data to a different element type.
-    pub fn convert<EOther: Element>(self) -> Data<EOther, D> {
-        let value: Vec<EOther> = self.value.into_iter().map(|a| a.elem()).collect();
-
-        Data {
-            value,
-            shape: self.shape,
-        }
-    }
-
-    /// Asserts each value is within a given range.
-    ///
-    /// # Arguments
-    ///
-    /// * `range` - The range.
-    ///
-    /// # Panics
-    ///
-    /// If any value is not within the half-open range bounded inclusively below
-    /// and exclusively above (`start..end`).
-    pub fn assert_within_range<EOther: Element>(&self, range: core::ops::Range<EOther>) {
-        let start = range.start.elem::<f32>();
-        let end = range.end.elem::<f32>();
-
-        for elem in self.value.iter() {
-            let elem = elem.elem::<f32>();
-            if elem < start || elem >= end {
-                panic!("Element ({elem:?}) is not within range {range:?}");
-            }
-        }
-    }
-}
-
-#[allow(deprecated)]
-impl<E: Element> DataSerialize<E> {
-    /// Converts the data to a different element type.
-    pub fn convert<EOther: Element>(self) -> DataSerialize<EOther> {
-        if TypeId::of::<E>() == TypeId::of::<EOther>() {
-            let cast: Box<dyn Any> = Box::new(self);
-            let cast: Box<DataSerialize<EOther>> = cast.downcast().unwrap();
-            return *cast;
-        }
-
-        let value: Vec<EOther> = self.value.into_iter().map(|a| a.elem()).collect();
-
-        DataSerialize {
-            value,
-            shape: self.shape,
-        }
-    }
-
-    /// Converts the data to the new [TensorData] format.
-    pub fn into_tensor_data(self) -> TensorData {
-        TensorData::new(self.value, self.shape)
-    }
-}
-
-#[allow(deprecated)]
-impl<E: Element, const D: usize> Data<E, D> {
-    /// Populates the data with random values.
-    pub fn random<R: RngCore>(shape: Shape, distribution: Distribution, rng: &mut R) -> Self {
-        let num_elements = shape.num_elements();
-        let mut data = Vec::with_capacity(num_elements);
-
-        for _ in 0..num_elements {
-            data.push(E::random(distribution, rng));
-        }
-
-        Data::new(data, shape)
-    }
-}
-
-#[allow(deprecated)]
-impl<E: core::fmt::Debug, const D: usize> Data<E, D>
-where
-    E: Element,
-{
-    /// Populates the data with zeros.
-    pub fn zeros<S: Into<Shape>>(shape: S) -> Data<E, D> {
-        let shape = shape.into();
-        let num_elements = shape.num_elements();
-        let mut data = Vec::with_capacity(num_elements);
-
-        for _ in 0..num_elements {
-            data.push(0.elem());
-        }
-
-        Data::new(data, shape)
-    }
-}
-
-#[allow(deprecated)]
-impl<E: core::fmt::Debug, const D: usize> Data<E, D>
-where
-    E: Element,
-{
-    /// Populates the data with ones.
-    pub fn ones(shape: Shape) -> Data<E, D> {
-        let num_elements = shape.num_elements();
-        let mut data = Vec::with_capacity(num_elements);
-
-        for _ in 0..num_elements {
-            data.push(1.elem());
-        }
-
-        Data::new(data, shape)
-    }
-}
-
-#[allow(deprecated)]
-impl<E: core::fmt::Debug, const D: usize> Data<E, D>
-where
-    E: Element,
-{
-    /// Populates the data with the given value
-    pub fn full(shape: Shape, fill_value: E) -> Data<E, D> {
-        let num_elements = shape.num_elements();
-        let mut data = Vec::with_capacity(num_elements);
-        for _ in 0..num_elements {
-            data.push(fill_value)
-        }
-
-        Data::new(data, shape)
-    }
-}
-
-#[allow(deprecated)]
-impl<E: core::fmt::Debug + Copy, const D: usize> Data<E, D> {
-    /// Serializes the data.
-    ///
-    /// # Returns
-    ///
-    /// The serialized data.
-    pub fn serialize(&self) -> DataSerialize<E> {
-        DataSerialize {
-            value: self.value.clone(),
-            shape: self.shape.dims.to_vec(),
-        }
-    }
-}
-
-#[allow(deprecated)]
-impl<E: Into<f64> + Clone + core::fmt::Debug + PartialEq + Element, const D: usize> Data<E, D> {
-    /// Asserts the data is approximately equal to another data.
-    ///
-    /// # Arguments
-    ///
-    /// * `other` - The other data.
-    /// * `precision` - The precision of the comparison.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the data is not approximately equal.
-    #[track_caller]
-    pub fn assert_approx_eq(&self, other: &Self, precision: usize) {
-        let tolerance = 0.1.pow(precision as f64);
-
-        self.assert_approx_eq_diff(other, tolerance)
-    }
-
-    /// Asserts the data is approximately equal to another data.
-    ///
-    /// # Arguments
-    ///
-    /// * `other` - The other data.
-    /// * `tolerance` - The tolerance of the comparison.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the data is not approximately equal.
-    #[track_caller]
-    pub fn assert_approx_eq_diff(&self, other: &Self, tolerance: f64) {
-        let mut message = String::new();
-        if self.shape != other.shape {
-            message += format!(
-                "\n  => Shape is different: {:?} != {:?}",
-                self.shape.dims, other.shape.dims
-            )
-            .as_str();
-        }
-
-        let iter = self.value.clone().into_iter().zip(other.value.clone());
-
-        let mut num_diff = 0;
-        let max_num_diff = 5;
-
-        for (i, (a, b)) in iter.enumerate() {
-            let a: f64 = a.into();
-            let b: f64 = b.into();
-
-            //if they are both nan, then they are equally nan
-            let both_nan = a.is_nan() && b.is_nan();
-            //this works for both infinities
-            let both_inf = a.is_infinite() && b.is_infinite() && ((a > 0.) == (b > 0.));
-
-            if both_nan || both_inf {
-                continue;
-            }
-
-            let err = (a - b).abs();
-
-            if E::dtype().is_float() {
-                if let Some((err, tolerance)) = compare_floats(a, b, E::dtype(), tolerance) {
-                    // Only print the first 5 different values.
-                    if num_diff < max_num_diff {
-                        message += format!(
-                            "\n  => Position {i}: {a} != {b} | difference {err} > tolerance \
-                         {tolerance}"
-                        )
-                        .as_str();
-                    }
-                    num_diff += 1;
-                }
-            } else if err > tolerance || err.is_nan() {
-                // Only print the first 5 different values.
-                if num_diff < max_num_diff {
-                    message += format!(
-                        "\n  => Position {i}: {a} != {b} | difference {err} > tolerance \
-                         {tolerance}"
-                    )
-                    .as_str();
-                }
-                num_diff += 1;
-            }
-        }
-
-        if num_diff >= max_num_diff {
-            message += format!("\n{} more errors...", num_diff - 5).as_str();
-        }
-
-        if !message.is_empty() {
-            panic!("Tensors are not approx eq:{}", message);
-        }
-    }
-}
-
-#[allow(deprecated)]
-impl<const D: usize> Data<usize, D> {
-    /// Converts the usize data to a different element type.
-    pub fn from_usize<O: num_traits::FromPrimitive>(self) -> Data<O, D> {
-        let value: Vec<O> = self
-            .value
-            .into_iter()
-            .map(|a| num_traits::FromPrimitive::from_usize(a).unwrap())
-            .collect();
-
-        Data {
-            value,
-            shape: self.shape,
-        }
-    }
-}
-
-#[allow(deprecated)]
-impl<E: Clone, const D: usize> From<&DataSerialize<E>> for Data<E, D> {
-    fn from(data: &DataSerialize<E>) -> Self {
-        let mut dims = [0; D];
-        dims[..D].copy_from_slice(&data.shape[..D]);
-        Data::new(data.value.clone(), Shape::new(dims))
-    }
-}
-
-#[allow(deprecated)]
-impl<E, const D: usize> From<DataSerialize<E>> for Data<E, D> {
-    fn from(data: DataSerialize<E>) -> Self {
-        let mut dims = [0; D];
-        dims[..D].copy_from_slice(&data.shape[..D]);
-        Data::new(data.value, Shape::new(dims))
-    }
-}
-
-#[allow(deprecated)]
-impl<E: core::fmt::Debug + Copy, const A: usize> From<[E; A]> for Data<E, 1> {
-    fn from(elems: [E; A]) -> Self {
-        let mut data = Vec::with_capacity(2 * A);
-        for elem in elems.into_iter() {
-            data.push(elem);
-        }
-
-        Data::new(data, Shape::new([A]))
-    }
-}
-
-#[allow(deprecated)]
-impl<E: core::fmt::Debug + Copy> From<&[E]> for Data<E, 1> {
-    fn from(elems: &[E]) -> Self {
-        let mut data = Vec::with_capacity(elems.len());
-        for elem in elems.iter() {
-            data.push(*elem);
-        }
-
-        Data::new(data, Shape::new([elems.len()]))
-    }
-}
-
-#[allow(deprecated)]
-impl<E: core::fmt::Debug + Copy, const A: usize, const B: usize> From<[[E; B]; A]> for Data<E, 2> {
-    fn from(elems: [[E; B]; A]) -> Self {
-        let mut data = Vec::with_capacity(A * B);
-        for elem in elems.into_iter().take(A) {
-            for elem in elem.into_iter().take(B) {
-                data.push(elem);
-            }
-        }
-
-        Data::new(data, Shape::new([A, B]))
-    }
-}
-
-#[allow(deprecated)]
-impl<E: core::fmt::Debug + Copy, const A: usize, const B: usize, const C: usize>
-    From<[[[E; C]; B]; A]> for Data<E, 3>
-{
-    fn from(elems: [[[E; C]; B]; A]) -> Self {
-        let mut data = Vec::with_capacity(A * B * C);
-
-        for elem in elems.into_iter().take(A) {
-            for elem in elem.into_iter().take(B) {
-                for elem in elem.into_iter().take(C) {
-                    data.push(elem);
-                }
-            }
-        }
-
-        Data::new(data, Shape::new([A, B, C]))
-    }
-}
-
-#[allow(deprecated)]
-impl<
-        E: core::fmt::Debug + Copy,
-        const A: usize,
-        const B: usize,
-        const C: usize,
-        const D: usize,
-    > From<[[[[E; D]; C]; B]; A]> for Data<E, 4>
-{
-    fn from(elems: [[[[E; D]; C]; B]; A]) -> Self {
-        let mut data = Vec::with_capacity(A * B * C * D);
-
-        for elem in elems.into_iter().take(A) {
-            for elem in elem.into_iter().take(B) {
-                for elem in elem.into_iter().take(C) {
-                    for elem in elem.into_iter().take(D) {
-                        data.push(elem);
-                    }
-                }
-            }
-        }
-
-        Data::new(data, Shape::new([A, B, C, D]))
-    }
-}
-
-#[allow(deprecated)]
-impl<E: core::fmt::Debug, const D: usize> core::fmt::Display for Data<E, D> {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        f.write_str(format!("{:?}", &self.value).as_str())
-    }
-}
-
 fn compare_floats(value: f64, other: f64, ty: DType, tolerance: f64) -> Option<(f64, f64)> {
     let epsilon_deviations = tolerance / f32::EPSILON as f64;
     let epsilon = match ty {
@@ -1192,9 +799,8 @@ fn compare_floats(value: f64, other: f64, ty: DType, tolerance: f64) -> Option<(
 }
 
 #[cfg(test)]
-#[allow(deprecated)]
 mod tests {
-    use crate::quantization::AffineQuantization;
+    use crate::{quantization::AffineQuantization, Shape};
 
     use super::*;
     use alloc::vec;
diff --git a/crates/burn/Cargo.toml b/crates/burn/Cargo.toml
index 0e7ff51e88..d54233f993 100644
--- a/crates/burn/Cargo.toml
+++ b/crates/burn/Cargo.toml
@@ -67,7 +67,6 @@ network = ["burn-core/network"]
 experimental-named-tensor = ["burn-core/experimental-named-tensor"]
 
 # Records
-record-backward-compat = ["burn-core/record-backward-compat"]
 record-item-custom-serde = ["burn-core/record-item-custom-serde"]
 
 [dependencies]