diff --git a/benches/generate_data.py b/benches/generate_data.py index 8653b9f..038c44e 100644 --- a/benches/generate_data.py +++ b/benches/generate_data.py @@ -2,9 +2,9 @@ import shapely -gdf = gpd.read_file("Utah.geojson.zip", engine="pyogrio") +gdf = gpd.read_file("./bench_data/Utah.geojson", engine="pyogrio") bounds = shapely.bounds(gdf.geometry) print(bounds.shape) buf = bounds.tobytes("C") -with open("bounds.raw", "wb") as f: +with open("./bench_data/bounds.raw", "wb") as f: f.write(buf) diff --git a/benches/pyproject.toml b/benches/pyproject.toml new file mode 100644 index 0000000..f8c72e1 --- /dev/null +++ b/benches/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "geo-index-benches" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +dependencies = [ + "geoindex-rs", + "geopandas", + "pyogrio", + "pyarrow", + "numpy", + "requests", + "shapely", +] + diff --git a/benches/requirements.txt b/benches/requirements.txt new file mode 100644 index 0000000..e949e11 --- /dev/null +++ b/benches/requirements.txt @@ -0,0 +1,4 @@ +geoindex-rs +geopandas +pyogrio +pyarrow diff --git a/benches/rtree.py b/benches/rtree.py new file mode 100644 index 0000000..5fc7f03 --- /dev/null +++ b/benches/rtree.py @@ -0,0 +1,42 @@ +import geopandas as gpd +import numpy as np +import shapely +from geoindex_rs import rtree as rt +import timeit +import pyogrio +import requests +def load_data(): + path = "./bench_data/nz-building-outlines.parquet" + gdf = gpd.read_parquet(path) + wgs84_gdf = gdf.to_crs("epsg:4326") + bounds = wgs84_gdf.bounds + print(bounds) + return bounds + + +def construct_wsg84_tree(bounds): + builder = rt.RTreeBuilder(bounds.shape[0]) + min_x= np.array(bounds["minx"].values) + min_y=np.array(bounds["miny"].values) + max_x=np.array(bounds["maxx"].values) + max_y=np.array(bounds["maxy"].values) + builder.add(min_x, min_y, max_x, max_y) + return builder.finish() + +def construct_shapely_tree(bounds): + tree = shapely.SRTree(bounds.shape[0]) + return tree + + +if __name__ == "__main__": + bounds = load_data() + + time = timeit.timeit(stmt='construct_wsg84_tree(bounds)', number=100, + globals=globals()) + print(f"Rtree time: {time:.2f} seconds for 100 iterations") + + + + + + diff --git a/benches/rtree.rs b/benches/rtree.rs index e8c8061..1361f51 100644 --- a/benches/rtree.rs +++ b/benches/rtree.rs @@ -9,7 +9,7 @@ use rstar::AABB; use std::fs::read; fn load_data() -> Vec { - let buf = read("benches/bounds.raw").unwrap(); + let buf = read("benches/bench_data/bounds.raw").unwrap(); cast_slice(&buf).to_vec() } diff --git a/scripts/bench.sh b/scripts/bench.sh new file mode 100755 index 0000000..2c3d015 --- /dev/null +++ b/scripts/bench.sh @@ -0,0 +1,39 @@ +#!/bin/bash +mkdir -p ./benches/bench_data +cd ./benches/bench_data +if [ ! -f "Utah.geojson.zip" ] + then + echo "Downloading geojson benchmark data..." + wget https://minedbuildings.z5.web.core.windows.net/legacy/usbuildings-v2/Utah.geojson.zip + else + echo "Benchmark data already downloaded" +fi +if [ ! -f "Utah.geojson" ] + then + echo "Unzipping Utah.geojson.zip.." + unzip Utah.geojson.zip + else + echo "Utah.geojson already unzipped " +fi + +if [ ! -f "nz-building-outlines.parquet" ] + then + echo "Downloading parquet benchmark data..." + wget https://storage.googleapis.com/open-geodata/linz-examples/nz-building-outlines.parquet + else + echo "Parquet Benchmark data already downloaded" +fi + + +cd ../ +uv venv +source .venv/bin/activate +uv pip install -r pyproject.toml +uv run generate_data.py +cd ../ +echo "Running base benchmarks..." +cargo bench --bench rtree +echo "Running benchmarks with rayon feature..." +cargo bench --bench rtree --features rayon +cd ./benches +uv run rtree.py