Skip to content

Commit c596410

Browse files
Various improvements for disk graph (#1866)
* move LayerAdditions to disk and LayerIds to api * disable pom-storage * fix edge_frame_builder progress bar * fix the python node properties tests * add masked graph * just collect in parallel to vec when masking * add test to verify masked graph equals graph view * remove some useless checks * disable progress bars in graph loading from default * fix some issues from review * rename to cache_view * changes as per review
1 parent f37eca6 commit c596410

File tree

23 files changed

+664
-298
lines changed

23 files changed

+664
-298
lines changed

Cargo.lock

+12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ numpy = "0.22.1"
6262
itertools = "0.13.0"
6363
rand = "0.8.5"
6464
rayon = "1.8.1"
65+
roaring = "0.10.6"
6566
sorted_vector_map = "0.2.0"
6667
tokio = { version = "1.36.0", features = ["full"] }
6768
once_cell = "1.19.0"

raphtory-api/src/core/entities/mod.rs

+182
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
use super::input::input_node::parse_u64_strict;
22
use bytemuck::{Pod, Zeroable};
3+
use edges::edge_ref::EdgeRef;
34
use num_traits::ToPrimitive;
5+
use rayon::prelude::*;
46
use serde::{Deserialize, Serialize};
57
use std::{
68
borrow::Cow,
79
fmt::{Display, Formatter},
10+
sync::Arc,
811
};
912

1013
pub mod edges;
@@ -311,3 +314,182 @@ impl<'a> GidRef<'a> {
311314
}
312315
}
313316
}
317+
318+
#[derive(Clone, Debug)]
319+
pub enum LayerIds {
320+
None,
321+
All,
322+
One(usize),
323+
Multiple(Multiple),
324+
}
325+
326+
#[derive(Clone, Debug, Default)]
327+
pub struct Multiple(pub Arc<[usize]>);
328+
329+
impl Multiple {
330+
#[inline]
331+
pub fn binary_search(&self, pos: &usize) -> Option<usize> {
332+
self.0.binary_search(pos).ok()
333+
}
334+
335+
#[inline]
336+
pub fn into_iter(&self) -> impl Iterator<Item = usize> {
337+
let ids = self.0.clone();
338+
(0..ids.len()).map(move |i| ids[i])
339+
}
340+
341+
#[inline]
342+
pub fn iter(&self) -> impl Iterator<Item = usize> + '_ {
343+
self.0.iter().copied()
344+
}
345+
346+
#[inline]
347+
pub fn find(&self, id: usize) -> Option<usize> {
348+
self.0.get(id).copied()
349+
}
350+
351+
#[inline]
352+
pub fn par_iter(&self) -> impl rayon::iter::ParallelIterator<Item = usize> {
353+
let bit_vec = self.0.clone();
354+
(0..bit_vec.len()).into_par_iter().map(move |i| bit_vec[i])
355+
}
356+
357+
#[inline]
358+
pub fn len(&self) -> usize {
359+
self.0.len()
360+
}
361+
}
362+
363+
impl FromIterator<usize> for Multiple {
364+
fn from_iter<I: IntoIterator<Item = usize>>(iter: I) -> Self {
365+
Multiple(iter.into_iter().collect())
366+
}
367+
}
368+
369+
impl From<Vec<usize>> for Multiple {
370+
fn from(v: Vec<usize>) -> Self {
371+
v.into_iter().collect()
372+
}
373+
}
374+
375+
#[cfg(test)]
376+
mod test {
377+
use crate::core::entities::Multiple;
378+
379+
#[test]
380+
fn empty_bit_multiple() {
381+
let bm = super::Multiple::default();
382+
let actual = bm.into_iter().collect::<Vec<_>>();
383+
let expected: Vec<usize> = vec![];
384+
assert_eq!(actual, expected);
385+
}
386+
387+
#[test]
388+
fn set_one() {
389+
let bm: Multiple = [1].into_iter().collect();
390+
let actual = bm.into_iter().collect::<Vec<_>>();
391+
assert_eq!(actual, vec![1usize]);
392+
}
393+
394+
#[test]
395+
fn set_two() {
396+
let bm: Multiple = [1, 67].into_iter().collect();
397+
398+
let actual = bm.into_iter().collect::<Vec<_>>();
399+
assert_eq!(actual, vec![1usize, 67]);
400+
}
401+
}
402+
403+
impl LayerIds {
404+
pub fn find(&self, layer_id: usize) -> Option<usize> {
405+
match self {
406+
LayerIds::All => Some(layer_id),
407+
LayerIds::One(id) => {
408+
if *id == layer_id {
409+
Some(layer_id)
410+
} else {
411+
None
412+
}
413+
}
414+
LayerIds::Multiple(ids) => ids.binary_search(&layer_id).map(|_| layer_id),
415+
LayerIds::None => None,
416+
}
417+
}
418+
419+
pub fn intersect(&self, other: &LayerIds) -> LayerIds {
420+
match (self, other) {
421+
(LayerIds::None, _) => LayerIds::None,
422+
(_, LayerIds::None) => LayerIds::None,
423+
(LayerIds::All, other) => other.clone(),
424+
(this, LayerIds::All) => this.clone(),
425+
(LayerIds::One(id), other) => {
426+
if other.contains(id) {
427+
LayerIds::One(*id)
428+
} else {
429+
LayerIds::None
430+
}
431+
}
432+
(LayerIds::Multiple(ids), other) => {
433+
let ids: Vec<usize> = ids.iter().filter(|id| other.contains(id)).collect();
434+
match ids.len() {
435+
0 => LayerIds::None,
436+
1 => LayerIds::One(ids[0]),
437+
_ => LayerIds::Multiple(ids.into()),
438+
}
439+
}
440+
}
441+
}
442+
443+
pub fn constrain_from_edge(&self, e: EdgeRef) -> Cow<LayerIds> {
444+
match e.layer() {
445+
None => Cow::Borrowed(self),
446+
Some(l) => self
447+
.find(l)
448+
.map(|id| Cow::Owned(LayerIds::One(id)))
449+
.unwrap_or(Cow::Owned(LayerIds::None)),
450+
}
451+
}
452+
453+
pub fn contains(&self, layer_id: &usize) -> bool {
454+
self.find(*layer_id).is_some()
455+
}
456+
457+
pub fn is_none(&self) -> bool {
458+
matches!(self, LayerIds::None)
459+
}
460+
}
461+
462+
impl From<Vec<usize>> for LayerIds {
463+
fn from(mut v: Vec<usize>) -> Self {
464+
match v.len() {
465+
0 => LayerIds::All,
466+
1 => LayerIds::One(v[0]),
467+
_ => {
468+
v.sort_unstable();
469+
v.dedup();
470+
LayerIds::Multiple(v.into())
471+
}
472+
}
473+
}
474+
}
475+
476+
impl<const N: usize> From<[usize; N]> for LayerIds {
477+
fn from(v: [usize; N]) -> Self {
478+
match v.len() {
479+
0 => LayerIds::All,
480+
1 => LayerIds::One(v[0]),
481+
_ => {
482+
let mut v = v.to_vec();
483+
v.sort_unstable();
484+
v.dedup();
485+
LayerIds::Multiple(v.into())
486+
}
487+
}
488+
}
489+
}
490+
491+
impl From<usize> for LayerIds {
492+
fn from(id: usize) -> Self {
493+
LayerIds::One(id)
494+
}
495+
}

raphtory-api/src/core/utils/logging.rs

+8
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,11 @@ pub fn global_debug_logger() {
3939
pub fn global_trace_logger() {
4040
init_global_logger("TRACE".to_string())
4141
}
42+
43+
pub fn sysout_debug() {
44+
tracing_subscriber::fmt::fmt()
45+
.with_max_level(tracing::Level::DEBUG)
46+
.with_target(false)
47+
.with_span_events(FmtSpan::ENTER | FmtSpan::CLOSE)
48+
.init();
49+
}

raphtory-cypher/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ arrow.workspace = true
2020
arrow-buffer.workspace = true
2121
arrow-schema.workspace = true
2222
arrow-array.workspace = true
23+
tracing-subscriber.workspace = true
2324

2425
pest.workspace = true
2526
pest_derive.workspace = true

raphtory-cypher/examples/raphtory_cypher.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ mod cypher {
1414
use futures::{stream, StreamExt};
1515
use raphtory::{
1616
disk_graph::{graph_impl::ParquetLayerCols, DiskGraphStorage},
17-
logging::global_info_logger,
17+
logging::{global_info_logger, sysout_debug},
1818
};
1919
use raphtory_cypher::{run_cypher, run_cypher_to_streams, run_sql};
2020
use serde::{de::DeserializeOwned, Deserialize};
@@ -126,11 +126,11 @@ mod cypher {
126126

127127
// #[tokio::main]
128128
pub async fn main() {
129-
global_info_logger();
130129
let args = Args::parse();
131130

132131
match args {
133132
Args::Query(args) => {
133+
global_info_logger();
134134
let graph =
135135
DiskGraphStorage::load_from_dir(&args.graph_dir).expect("Failed to load graph");
136136

@@ -145,7 +145,6 @@ mod cypher {
145145

146146
let now = std::time::Instant::now();
147147
let batches = df.collect().await.unwrap();
148-
global_info_logger();
149148
info!("Query execution time: {:?}", now.elapsed());
150149
print_batches(&batches).expect("Failed to print batches");
151150
} else {
@@ -161,6 +160,7 @@ mod cypher {
161160
}
162161

163162
Args::Load(args) => {
163+
sysout_debug();
164164
let layers = args.layers;
165165
let layer_parquet_cols = (0..layers.len())
166166
.map(|layer_id| {

raphtory/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ pometry-storage = { workspace = true, optional = true }
7777

7878
prost = { workspace = true, optional = true }
7979
prost-types = { workspace = true, optional = true }
80+
roaring ={ workspace = true }
8081

8182
[dev-dependencies]
8283
csv = { workspace = true }

raphtory/src/algorithms/community_detection/louvain.rs

+1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ mod test {
7070
test_storage,
7171
};
7272
use proptest::prelude::*;
73+
#[cfg(feature = "io")]
7374
use tracing::info;
7475

7576
#[cfg(feature = "io")]

0 commit comments

Comments
 (0)