Skip to content

Commit

Permalink
Feat/backend comparison/persistence (#979)
Browse files Browse the repository at this point in the history
* setting up

* wip

* persistence works

* cleanup

* clippy

* run checks

* Cleanup

* reverse json order

---------

Co-authored-by: nathaniel <[email protected]>
  • Loading branch information
louisfd and nathanielsimard authored Nov 22, 2023
1 parent 630044e commit 17f5905
Show file tree
Hide file tree
Showing 11 changed files with 184 additions and 33 deletions.
2 changes: 2 additions & 0 deletions backend-comparison/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ burn = { path = "../burn" }
derive-new = { workspace = true }
rand = { workspace = true }
burn-common = { path = "../burn-common", version = "0.11.0" }
serde_json = { workspace = true }
dirs = "5.0.1"

[dev-dependencies]

Expand Down
7 changes: 5 additions & 2 deletions backend-comparison/benches/binary.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use backend_comparison::persistence::Persistence;
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
use burn_common::benchmark::{run_benchmark, Benchmark};

Expand Down Expand Up @@ -35,11 +36,13 @@ impl<B: Backend, const D: usize> Benchmark for BinaryBenchmark<B, D> {

#[allow(dead_code)]
fn bench<B: Backend>(device: &B::Device) {
run_benchmark(BinaryBenchmark::<B, 3> {
let benchmark = BinaryBenchmark::<B, 3> {
shape: [32, 512, 1024].into(),
num_repeats: 10,
device: device.clone(),
})
};

Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
}

fn main() {
Expand Down
23 changes: 16 additions & 7 deletions backend-comparison/benches/custom_gelu.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use backend_comparison::persistence::Persistence;
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
use burn_common::benchmark::{run_benchmark, Benchmark};
use core::f64::consts::SQRT_2;
Expand Down Expand Up @@ -88,25 +89,33 @@ fn bench<B: Backend>(device: &B::Device) {
let shape: Shape<D> = [32, 512, 2048].into();
let num_repeats = 1;

println!("Backend {}", B::name());
run_benchmark(CustomGeluBenchmark::<B, D>::new(
let reference_gelu = CustomGeluBenchmark::<B, D>::new(
shape.clone(),
num_repeats,
device.clone(),
GeluKind::Reference,
));
run_benchmark(CustomGeluBenchmark::<B, D>::new(
);
let reference_erf_gelu = CustomGeluBenchmark::<B, D>::new(
shape.clone(),
num_repeats,
device.clone(),
GeluKind::WithReferenceErf,
));
run_benchmark(CustomGeluBenchmark::<B, D>::new(
);
let custom_erf_gelu = CustomGeluBenchmark::<B, D>::new(
shape,
num_repeats,
device.clone(),
GeluKind::WithCustomErf,
));
);

Persistence::persist::<B>(
vec![
run_benchmark(reference_gelu),
run_benchmark(reference_erf_gelu),
run_benchmark(custom_erf_gelu),
],
device,
)
}

fn main() {
Expand Down
8 changes: 5 additions & 3 deletions backend-comparison/benches/data.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use backend_comparison::persistence::Persistence;
use burn::tensor::{backend::Backend, Data, Distribution, Shape, Tensor};
use burn_common::benchmark::{run_benchmark, Benchmark};
use derive_new::new;
Expand Down Expand Up @@ -76,9 +77,10 @@ fn bench<B: Backend>(device: &B::Device) {
let to_benchmark = ToDataBenchmark::<B, D>::new(shape.clone(), num_repeats, device.clone());
let from_benchmark = FromDataBenchmark::<B, D>::new(shape, num_repeats, device.clone());

println!("Backend {}", B::name());
run_benchmark(to_benchmark);
run_benchmark(from_benchmark)
Persistence::persist::<B>(
vec![run_benchmark(to_benchmark), run_benchmark(from_benchmark)],
device,
)
}

fn main() {
Expand Down
4 changes: 2 additions & 2 deletions backend-comparison/benches/matmul.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use backend_comparison::persistence::Persistence;
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
use burn_common::benchmark::{run_benchmark, Benchmark};
use derive_new::new;
Expand Down Expand Up @@ -56,8 +57,7 @@ fn bench<B: Backend>(device: &B::Device) {
let shape_rhs = [batch_size, k, n].into();

let benchmark = MatmulBenchmark::<B, D>::new(shape_lhs, shape_rhs, num_repeats, device.clone());
println!("Backend {}", B::name());
run_benchmark(benchmark);
Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
}

fn main() {
Expand Down
4 changes: 2 additions & 2 deletions backend-comparison/benches/unary.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use backend_comparison::persistence::Persistence;
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
use burn_common::benchmark::{run_benchmark, Benchmark};
use derive_new::new;
Expand Down Expand Up @@ -40,8 +41,7 @@ fn bench<B: Backend>(device: &B::Device) {

let benchmark = UnaryBenchmark::<B, D>::new(shape, num_repeats, device.clone());

println!("Backend {}", B::name());
run_benchmark(benchmark)
Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
}

fn main() {
Expand Down
2 changes: 2 additions & 0 deletions backend-comparison/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
pub mod persistence;

#[macro_export]
macro_rules! bench_on_backend {
() => {
Expand Down
100 changes: 100 additions & 0 deletions backend-comparison/src/persistence/base.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
use std::{
collections::HashMap,
fs::{create_dir_all, File},
path::PathBuf,
time::Duration,
};

use burn::tensor::backend::Backend;
use burn_common::benchmark::BenchmarkResult;
use dirs;
use serde_json;

type BenchmarkCommitResults = HashMap<String, BenchmarkOpResults>;
type BenchmarkOpResults = HashMap<String, BenchmarkBackendResults>;
type BenchmarkBackendResults = HashMap<String, StampedBenchmarks>;
type StampedBenchmarks = HashMap<u128, Vec<Duration>>;

#[derive(Default)]
pub struct Persistence {
results: BenchmarkCommitResults,
}

impl Persistence {
/// Updates the cached backend comparison json file with new benchmarks results.
///
/// The file has the following structure:
///
/// {
/// "GIT_COMMIT_HASH":
/// {
/// "BENCHMARK_NAME (OP + SHAPE)": {
/// "BACKEND_NAME-DEVICE": {
/// "TIMESTAMP": \[
/// DURATIONS
/// \]
/// }
/// }
/// }
/// }
pub fn persist<B: Backend>(benches: Vec<BenchmarkResult>, device: &B::Device) {
for bench in benches.iter() {
println!("{}", bench);
}
let cache_file = dirs::home_dir()
.expect("Could not get home directory")
.join(".cache")
.join("backend-comparison")
.join("db.json");

let mut cache = Self::load(&cache_file);
cache.update::<B>(device, benches);
cache.save(&cache_file);
println!("Persisting to {:?}", cache_file);
}

/// Load the cache from disk.
fn load(path: &PathBuf) -> Self {
let results = match File::open(path) {
Ok(file) => serde_json::from_reader(file)
.expect("Should have parsed to BenchmarkCommitResults struct"),
Err(_) => HashMap::default(),
};

Self { results }
}

/// Save the cache on disk.
fn save(&self, path: &PathBuf) {
if let Some(parent) = path.parent() {
create_dir_all(parent).expect("Unable to create directory");
}
let file = File::create(path).expect("Unable to create backend comparison file");

serde_json::to_writer_pretty(file, &self.results)
.expect("Unable to write to backend comparison file");
}

/// Update the cache with the given [benchmark results](BenchmarkResult).
///
/// Assumes only that benches share the same backend and device.
/// It could run faster if we assumed they have the same git hash
fn update<B: Backend>(&mut self, device: &B::Device, benches: Vec<BenchmarkResult>) {
let backend_key = format!("{}-{:?}", B::name(), device);

for bench in benches {
let mut benchmark_op_results = self.results.remove(&bench.git_hash).unwrap_or_default();
let mut benchmark_backend_results =
benchmark_op_results.remove(&bench.name).unwrap_or_default();

let mut stamped_benchmarks = benchmark_backend_results
.remove(&backend_key)
.unwrap_or_default();

stamped_benchmarks.insert(bench.timestamp, bench.durations.durations);
benchmark_backend_results.insert(backend_key.clone(), stamped_benchmarks);
benchmark_op_results.insert(bench.name, benchmark_backend_results);
self.results.insert(bench.git_hash, benchmark_op_results);
}
}
}
2 changes: 2 additions & 0 deletions backend-comparison/src/persistence/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
mod base;
pub use base::*;
57 changes: 44 additions & 13 deletions burn-common/src/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ use std::time::Instant;

/// Results of a benchmark run.
#[derive(new, Debug)]
pub struct BenchmarkResult {
durations: Vec<Duration>,
pub struct BenchmarkDurations {
/// All durations of the run, in the order they were benchmarked
pub durations: Vec<Duration>,
}

impl BenchmarkResult {
impl BenchmarkDurations {
/// Returns the median duration among all durations
pub fn median_duration(&self) -> Duration {
let mut sorted = self.durations.clone();
Expand All @@ -25,7 +26,7 @@ impl BenchmarkResult {
}
}

impl Display for BenchmarkResult {
impl Display for BenchmarkDurations {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let mean = self.mean_duration();
let var = self
Expand Down Expand Up @@ -87,7 +88,7 @@ pub trait Benchmark {
/// Wait for computations to be over
fn sync(&self);
/// Run the benchmark a number of times.
fn run(&self) -> BenchmarkResult {
fn run(&self) -> BenchmarkDurations {
#[cfg(not(feature = "std"))]
panic!("Attempting to run benchmark in a no-std environment");

Expand All @@ -114,14 +115,42 @@ pub trait Benchmark {
durations.push(end - start);
}

BenchmarkResult { durations }
BenchmarkDurations { durations }
}
}
}

/// Result of a benchmark run, with metadata
pub struct BenchmarkResult {
/// Individual results of the run
pub durations: BenchmarkDurations,
/// Time just before the run
pub timestamp: u128,
/// Git commit hash of the commit in which the run occurred
pub git_hash: String,
/// Name of the benchmark, normally with operation name and shapes
pub name: String,
}

impl Display for BenchmarkResult {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.write_str(
format!(
"
Timestamp: {}
Git Hash: {}
Benchmarking - {}{}
",
self.timestamp, self.git_hash, self.name, self.durations
)
.as_str(),
)
}
}

#[cfg(feature = "std")]
/// Runs the given benchmark on the device and prints result and information.
pub fn run_benchmark<BM>(benchmark: BM)
pub fn run_benchmark<BM>(benchmark: BM) -> BenchmarkResult
where
BM: Benchmark,
{
Expand All @@ -130,12 +159,14 @@ where
.unwrap()
.as_millis();
let output = std::process::Command::new("git")
.args(["rev-porse", "HEAD"])
.args(["rev-parse", "HEAD"])
.output()
.unwrap();
let git_hash = String::from_utf8(output.stdout).unwrap();

println!("Timestamp: {}", timestamp);
println!("Git Hash: {}", str::trim(&git_hash));
println!("Benchmarking - {}{}", benchmark.name(), benchmark.run());
let git_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
BenchmarkResult {
timestamp,
git_hash,
name: benchmark.name(),
durations: benchmark.run(),
}
}
8 changes: 4 additions & 4 deletions burn-compute/src/tune/tuner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use core::time::Duration;
use alloc::boxed::Box;
use alloc::string::ToString;
use alloc::vec::Vec;
use burn_common::benchmark::{Benchmark, BenchmarkResult};
use burn_common::benchmark::{Benchmark, BenchmarkDurations};

use crate::channel::ComputeChannel;
use crate::client::ComputeClient;
Expand Down Expand Up @@ -50,7 +50,7 @@ impl<S: ComputeServer, C: ComputeChannel<S>> Tuner<S, C> {
let mut names = Vec::with_capacity(autotunables.len());

// Run all autotune benchmarks
let results: Vec<BenchmarkResult> = autotunables
let results: Vec<BenchmarkDurations> = autotunables
.into_iter()
.map(|op| {
names.push(op.name().to_string());
Expand Down Expand Up @@ -78,11 +78,11 @@ impl<S: ComputeServer, C: ComputeChannel<S>> Tuner<S, C> {
&mut self,
operation: Box<dyn AutotuneOperation>,
client: &ComputeClient<S, C>,
) -> BenchmarkResult {
) -> BenchmarkDurations {
TuneBenchmark::new(operation, client.clone()).run()
}

fn find_fastest(&self, results: Vec<BenchmarkResult>) -> usize {
fn find_fastest(&self, results: Vec<BenchmarkDurations>) -> usize {
let mut smallest_duration = Duration::MAX;
let mut fastest_tunable = None;

Expand Down

0 comments on commit 17f5905

Please sign in to comment.