Feat/backend comparison/persistence (#979)

* setting up * wip * persistence works * cleanup * clippy * run checks * Cleanup * reverse json order --------- Co-authored-by: nathaniel <[email protected]>
tracel-ai · Nov 22, 2023 · 17f5905 · 17f5905
1 parent 630044e
commit 17f5905
Show file tree

Hide file tree

Showing 11 changed files with 184 additions and 33 deletions.
diff --git a/backend-comparison/Cargo.toml b/backend-comparison/Cargo.toml
@@ -28,6 +28,8 @@ burn = { path = "../burn" }
 derive-new = { workspace = true }
 rand = { workspace = true }
 burn-common = { path = "../burn-common", version = "0.11.0" }
+serde_json = { workspace = true }
+dirs = "5.0.1"
 
 [dev-dependencies]
 

diff --git a/backend-comparison/benches/binary.rs b/backend-comparison/benches/binary.rs
@@ -1,3 +1,4 @@
+use backend_comparison::persistence::Persistence;
 use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
 use burn_common::benchmark::{run_benchmark, Benchmark};
 
@@ -35,11 +36,13 @@ impl<B: Backend, const D: usize> Benchmark for BinaryBenchmark<B, D> {
 
 #[allow(dead_code)]
 fn bench<B: Backend>(device: &B::Device) {
-    run_benchmark(BinaryBenchmark::<B, 3> {
+    let benchmark = BinaryBenchmark::<B, 3> {
         shape: [32, 512, 1024].into(),
         num_repeats: 10,
         device: device.clone(),
-    })
+    };
+
+    Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
 }
 
 fn main() {

diff --git a/backend-comparison/benches/custom_gelu.rs b/backend-comparison/benches/custom_gelu.rs
@@ -1,3 +1,4 @@
+use backend_comparison::persistence::Persistence;
 use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
 use burn_common::benchmark::{run_benchmark, Benchmark};
 use core::f64::consts::SQRT_2;
@@ -88,25 +89,33 @@ fn bench<B: Backend>(device: &B::Device) {
     let shape: Shape<D> = [32, 512, 2048].into();
     let num_repeats = 1;
 
-    println!("Backend {}", B::name());
-    run_benchmark(CustomGeluBenchmark::<B, D>::new(
+    let reference_gelu = CustomGeluBenchmark::<B, D>::new(
         shape.clone(),
         num_repeats,
         device.clone(),
         GeluKind::Reference,
-    ));
-    run_benchmark(CustomGeluBenchmark::<B, D>::new(
+    );
+    let reference_erf_gelu = CustomGeluBenchmark::<B, D>::new(
         shape.clone(),
         num_repeats,
         device.clone(),
         GeluKind::WithReferenceErf,
-    ));
-    run_benchmark(CustomGeluBenchmark::<B, D>::new(
+    );
+    let custom_erf_gelu = CustomGeluBenchmark::<B, D>::new(
         shape,
         num_repeats,
         device.clone(),
         GeluKind::WithCustomErf,
-    ));
+    );
+
+    Persistence::persist::<B>(
+        vec![
+            run_benchmark(reference_gelu),
+            run_benchmark(reference_erf_gelu),
+            run_benchmark(custom_erf_gelu),
+        ],
+        device,
+    )
 }
 
 fn main() {

diff --git a/backend-comparison/benches/data.rs b/backend-comparison/benches/data.rs
@@ -1,3 +1,4 @@
+use backend_comparison::persistence::Persistence;
 use burn::tensor::{backend::Backend, Data, Distribution, Shape, Tensor};
 use burn_common::benchmark::{run_benchmark, Benchmark};
 use derive_new::new;
@@ -76,9 +77,10 @@ fn bench<B: Backend>(device: &B::Device) {
     let to_benchmark = ToDataBenchmark::<B, D>::new(shape.clone(), num_repeats, device.clone());
     let from_benchmark = FromDataBenchmark::<B, D>::new(shape, num_repeats, device.clone());
 
-    println!("Backend {}", B::name());
-    run_benchmark(to_benchmark);
-    run_benchmark(from_benchmark)
+    Persistence::persist::<B>(
+        vec![run_benchmark(to_benchmark), run_benchmark(from_benchmark)],
+        device,
+    )
 }
 
 fn main() {

diff --git a/backend-comparison/benches/matmul.rs b/backend-comparison/benches/matmul.rs
@@ -1,3 +1,4 @@
+use backend_comparison::persistence::Persistence;
 use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
 use burn_common::benchmark::{run_benchmark, Benchmark};
 use derive_new::new;
@@ -56,8 +57,7 @@ fn bench<B: Backend>(device: &B::Device) {
     let shape_rhs = [batch_size, k, n].into();
 
     let benchmark = MatmulBenchmark::<B, D>::new(shape_lhs, shape_rhs, num_repeats, device.clone());
-    println!("Backend {}", B::name());
-    run_benchmark(benchmark);
+    Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
 }
 
 fn main() {

diff --git a/backend-comparison/benches/unary.rs b/backend-comparison/benches/unary.rs
@@ -1,3 +1,4 @@
+use backend_comparison::persistence::Persistence;
 use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
 use burn_common::benchmark::{run_benchmark, Benchmark};
 use derive_new::new;
@@ -40,8 +41,7 @@ fn bench<B: Backend>(device: &B::Device) {
 
     let benchmark = UnaryBenchmark::<B, D>::new(shape, num_repeats, device.clone());
 
-    println!("Backend {}", B::name());
-    run_benchmark(benchmark)
+    Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
 }
 
 fn main() {

diff --git a/backend-comparison/src/lib.rs b/backend-comparison/src/lib.rs
@@ -1,3 +1,5 @@
+pub mod persistence;
+
 #[macro_export]
 macro_rules! bench_on_backend {
     () => {

diff --git a/backend-comparison/src/persistence/base.rs b/backend-comparison/src/persistence/base.rs
@@ -0,0 +1,100 @@
+use std::{
+    collections::HashMap,
+    fs::{create_dir_all, File},
+    path::PathBuf,
+    time::Duration,
+};
+
+use burn::tensor::backend::Backend;
+use burn_common::benchmark::BenchmarkResult;
+use dirs;
+use serde_json;
+
+type BenchmarkCommitResults = HashMap<String, BenchmarkOpResults>;
+type BenchmarkOpResults = HashMap<String, BenchmarkBackendResults>;
+type BenchmarkBackendResults = HashMap<String, StampedBenchmarks>;
+type StampedBenchmarks = HashMap<u128, Vec<Duration>>;
+
+#[derive(Default)]
+pub struct Persistence {
+    results: BenchmarkCommitResults,
+}
+
+impl Persistence {
+    /// Updates the cached backend comparison json file with new benchmarks results.
+    ///
+    /// The file has the following structure:
+    ///
+    ///  {
+    ///    "GIT_COMMIT_HASH":
+    ///      {
+    ///        "BENCHMARK_NAME (OP + SHAPE)": {
+    ///          "BACKEND_NAME-DEVICE": {
+    ///            "TIMESTAMP": \[
+    ///              DURATIONS
+    ///           \]
+    ///         }
+    ///       }
+    ///    }
+    ///  }
+    pub fn persist<B: Backend>(benches: Vec<BenchmarkResult>, device: &B::Device) {
+        for bench in benches.iter() {
+            println!("{}", bench);
+        }
+        let cache_file = dirs::home_dir()
+            .expect("Could not get home directory")
+            .join(".cache")
+            .join("backend-comparison")
+            .join("db.json");
+
+        let mut cache = Self::load(&cache_file);
+        cache.update::<B>(device, benches);
+        cache.save(&cache_file);
+        println!("Persisting to {:?}", cache_file);
+    }
+
+    /// Load the cache from disk.
+    fn load(path: &PathBuf) -> Self {
+        let results = match File::open(path) {
+            Ok(file) => serde_json::from_reader(file)
+                .expect("Should have parsed to BenchmarkCommitResults struct"),
+            Err(_) => HashMap::default(),
+        };
+
+        Self { results }
+    }
+
+    /// Save the cache on disk.
+    fn save(&self, path: &PathBuf) {
+        if let Some(parent) = path.parent() {
+            create_dir_all(parent).expect("Unable to create directory");
+        }
+        let file = File::create(path).expect("Unable to create backend comparison file");
+
+        serde_json::to_writer_pretty(file, &self.results)
+            .expect("Unable to write to backend comparison file");
+    }
+
+    /// Update the cache with the given [benchmark results](BenchmarkResult).
+    ///
+    /// Assumes only that benches share the same backend and device.
+    /// It could run faster if we assumed they have the same git hash
+    fn update<B: Backend>(&mut self, device: &B::Device, benches: Vec<BenchmarkResult>) {
+        let backend_key = format!("{}-{:?}", B::name(), device);
+
+        for bench in benches {
+            let mut benchmark_op_results = self.results.remove(&bench.git_hash).unwrap_or_default();
+            let mut benchmark_backend_results =
+                benchmark_op_results.remove(&bench.name).unwrap_or_default();
+
+            let mut stamped_benchmarks = benchmark_backend_results
+                .remove(&backend_key)
+                .unwrap_or_default();
+
+            stamped_benchmarks.insert(bench.timestamp, bench.durations.durations);
+            benchmark_backend_results.insert(backend_key.clone(), stamped_benchmarks);
+            benchmark_op_results.insert(bench.name, benchmark_backend_results);
+            self.results.insert(bench.git_hash, benchmark_op_results);
+        }
+    }
+}
diff --git a/backend-comparison/src/persistence/mod.rs b/backend-comparison/src/persistence/mod.rs
@@ -0,0 +1,2 @@
+mod base;
+pub use base::*;
diff --git a/burn-common/src/benchmark.rs b/burn-common/src/benchmark.rs
@@ -9,11 +9,12 @@ use std::time::Instant;
 
 /// Results of a benchmark run.
 #[derive(new, Debug)]
-pub struct BenchmarkResult {
-    durations: Vec<Duration>,
+pub struct BenchmarkDurations {
+    /// All durations of the run, in the order they were benchmarked
+    pub durations: Vec<Duration>,
 }
 
-impl BenchmarkResult {
+impl BenchmarkDurations {
     /// Returns the median duration among all durations
     pub fn median_duration(&self) -> Duration {
         let mut sorted = self.durations.clone();
@@ -25,7 +26,7 @@ impl BenchmarkResult {
     }
 }
 
-impl Display for BenchmarkResult {
+impl Display for BenchmarkDurations {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         let mean = self.mean_duration();
         let var = self
@@ -87,7 +88,7 @@ pub trait Benchmark {
     /// Wait for computations to be over
     fn sync(&self);
     /// Run the benchmark a number of times.
-    fn run(&self) -> BenchmarkResult {
+    fn run(&self) -> BenchmarkDurations {
         #[cfg(not(feature = "std"))]
         panic!("Attempting to run benchmark in a no-std environment");
 
@@ -114,14 +115,42 @@ pub trait Benchmark {
                 durations.push(end - start);
             }
 
-            BenchmarkResult { durations }
+            BenchmarkDurations { durations }
         }
     }
 }
 
+/// Result of a benchmark run, with metadata
+pub struct BenchmarkResult {
+    /// Individual results of the run
+    pub durations: BenchmarkDurations,
+    /// Time just before the run
+    pub timestamp: u128,
+    /// Git commit hash of the commit in which the run occurred
+    pub git_hash: String,
+    /// Name of the benchmark, normally with operation name and shapes
+    pub name: String,
+}
+
+impl Display for BenchmarkResult {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.write_str(
+            format!(
+                "
+        Timestamp: {}
+        Git Hash: {}
+        Benchmarking - {}{}
+        ",
+                self.timestamp, self.git_hash, self.name, self.durations
+            )
+            .as_str(),
+        )
+    }
+}
+
 #[cfg(feature = "std")]
 /// Runs the given benchmark on the device and prints result and information.
-pub fn run_benchmark<BM>(benchmark: BM)
+pub fn run_benchmark<BM>(benchmark: BM) -> BenchmarkResult
 where
     BM: Benchmark,
 {
@@ -130,12 +159,14 @@ where
         .unwrap()
         .as_millis();
     let output = std::process::Command::new("git")
-        .args(["rev-porse", "HEAD"])
+        .args(["rev-parse", "HEAD"])
         .output()
         .unwrap();
-    let git_hash = String::from_utf8(output.stdout).unwrap();
-
-    println!("Timestamp: {}", timestamp);
-    println!("Git Hash: {}", str::trim(&git_hash));
-    println!("Benchmarking - {}{}", benchmark.name(), benchmark.run());
+    let git_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
+    BenchmarkResult {
+        timestamp,
+        git_hash,
+        name: benchmark.name(),
+        durations: benchmark.run(),
+    }
 }
diff --git a/burn-compute/src/tune/tuner.rs b/burn-compute/src/tune/tuner.rs
@@ -4,7 +4,7 @@ use core::time::Duration;
 use alloc::boxed::Box;
 use alloc::string::ToString;
 use alloc::vec::Vec;
-use burn_common::benchmark::{Benchmark, BenchmarkResult};
+use burn_common::benchmark::{Benchmark, BenchmarkDurations};
 
 use crate::channel::ComputeChannel;
 use crate::client::ComputeClient;
@@ -50,7 +50,7 @@ impl<S: ComputeServer, C: ComputeChannel<S>> Tuner<S, C> {
         let mut names = Vec::with_capacity(autotunables.len());
 
         // Run all autotune benchmarks
-        let results: Vec<BenchmarkResult> = autotunables
+        let results: Vec<BenchmarkDurations> = autotunables
             .into_iter()
             .map(|op| {
                 names.push(op.name().to_string());
@@ -78,11 +78,11 @@ impl<S: ComputeServer, C: ComputeChannel<S>> Tuner<S, C> {
         &mut self,
         operation: Box<dyn AutotuneOperation>,
         client: &ComputeClient<S, C>,
-    ) -> BenchmarkResult {
+    ) -> BenchmarkDurations {
         TuneBenchmark::new(operation, client.clone()).run()
     }
 
-    fn find_fastest(&self, results: Vec<BenchmarkResult>) -> usize {
+    fn find_fastest(&self, results: Vec<BenchmarkDurations>) -> usize {
         let mut smallest_duration = Duration::MAX;
         let mut fastest_tunable = None;