Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Impl index based filtering for Raphtory APIs alongside Search APIs #1899

Draft
wants to merge 54 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
d07e1a4
move index to graph storage
shivamka1 Dec 12, 2024
6fbad8e
fix python, graphql
shivamka1 Dec 12, 2024
ee77cc3
fix feature falg
shivamka1 Dec 12, 2024
e9d7f2d
fix docs
shivamka1 Dec 12, 2024
98c44d4
fmt
shivamka1 Dec 12, 2024
e78c468
add feature dep
shivamka1 Dec 13, 2024
587b5e9
impl filter collector
shivamka1 Dec 13, 2024
138328f
impl custom node filter and node history filter
shivamka1 Dec 17, 2024
e829f21
add missing node history filter impl
shivamka1 Dec 17, 2024
b4504a7
fix AND node search queries
shivamka1 Dec 18, 2024
edb4db9
ref
shivamka1 Dec 18, 2024
d204a5c
prop indexes
shivamka1 Jan 9, 2025
ef3aad9
ref property filter and impl composite filter
shivamka1 Jan 14, 2025
ce62726
intro propertyfitervalue
shivamka1 Jan 15, 2025
fce047b
impl nodefilter and node query builder
shivamka1 Jan 15, 2025
e95da50
impl node filter executor, add tokenizer to properties, fix tests
shivamka1 Jan 16, 2025
286d766
ref
shivamka1 Jan 17, 2025
ab34649
ref composite filter and add search api tests
shivamka1 Jan 17, 2025
e290ce2
introduce composite edge filter and add tests
shivamka1 Jan 17, 2025
9a0994c
simplify query builder
shivamka1 Jan 17, 2025
89dd9a2
impl edge query builder, executor, impl tests, ref
shivamka1 Jan 20, 2025
0ff83e7
add new to searcher
shivamka1 Jan 20, 2025
0b7eeda
edge query executor, searcher ref
shivamka1 Jan 20, 2025
c0bbcc8
rename query executors to filter executor and similar apis
shivamka1 Jan 20, 2025
d62fa25
impl filter counts and add tests
shivamka1 Jan 20, 2025
ce17018
ref
shivamka1 Jan 21, 2025
5ce1f68
create test mods per api for easier testing/debuging
shivamka1 Jan 21, 2025
eda1d21
add tests
shivamka1 Jan 21, 2025
bb79366
fix tests and filtering
shivamka1 Jan 21, 2025
3f1d23d
merge from master
shivamka1 Jan 21, 2025
898faa8
update pometry-storage-private
shivamka1 Jan 21, 2025
348198f
update pometry-storage-private
shivamka1 Jan 21, 2025
de4f6c4
impl basic benchmark code
shivamka1 Jan 21, 2025
3643582
fix bench
shivamka1 Jan 21, 2025
5c1d6fe
fix issue with property indexes creation
shivamka1 Jan 22, 2025
9e6491b
create property indexes earlier
shivamka1 Jan 23, 2025
2e37a54
fix index creation, ref
shivamka1 Jan 23, 2025
c6aa23b
improve benches
shivamka1 Jan 24, 2025
cb7d756
add more benches
shivamka1 Jan 24, 2025
08b0675
more benches
shivamka1 Jan 24, 2025
bcbb532
ref schemas
shivamka1 Jan 24, 2025
aef4cf9
fix edge index metadata
ljeub-pometry Jan 24, 2025
373c924
fix the raphtory search bench
ljeub-pometry Jan 24, 2025
e063502
Merge branch 'master' into feature/storage_index
shivamka1 Jan 27, 2025
a231c45
rid unnecessary mutex, fix benches
shivamka1 Jan 27, 2025
70868e7
add raphtory social graph data generator and graph loader, add more b…
shivamka1 Jan 29, 2025
ab9bac5
fix CArgo
shivamka1 Jan 29, 2025
c4d6a36
Merge branch 'feature/storage_index' of github.com:Pometry/Raphtory i…
shivamka1 Jan 29, 2025
e095c96
add more benches
shivamka1 Jan 29, 2025
4e2ad9d
improve accuracy of benches by moving setup to iter_batched setup fun…
shivamka1 Jan 29, 2025
729c131
ref benches
shivamka1 Jan 30, 2025
2b1ae03
feed random data into benches from random graph, phew! 🥵
shivamka1 Jan 31, 2025
df80e1c
ref loader
shivamka1 Jan 31, 2025
1f9280e
load graph while generation
shivamka1 Jan 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ quad-rand = "0.2.1"
zip = "2.1.3"
neo4rs = "0.8.0"
bzip2 = "0.4.4"
tantivy = "0.22"
tantivy = "0.22.0"
async-trait = "0.1.77"
async-openai = "0.26.0"
oauth2 = "4.0"
Expand Down Expand Up @@ -156,3 +156,4 @@ arrow-data = { version = "53.2.0" }
arrow-ipc = { version = "53.2.0" }
moka = { version = "0.12.7", features = ["sync"] }
indexmap = { version = "2.7.0", features = ["rayon"] }
fake = { version = "3.1.0", features = ["chrono"] }
22 changes: 22 additions & 0 deletions js-raphtory/src/graph/graph_view_impl.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
use super::{Graph, UnderGraph};
use raphtory::db::api::view::{
internal::{InheritIndexSearch, InheritNodeHistoryFilter},
Base, BoxableGraphView, InheritViewOps,
};

impl Base for Graph {
type Base = dyn BoxableGraphView + Send + Sync + 'static;

fn base(&self) -> &(dyn BoxableGraphView + Send + Sync + 'static) {
match &self.0 {
UnderGraph::TGraph(g) => g.as_ref(),
UnderGraph::WindowedGraph(g) => g.as_ref(),
}
}
}

impl InheritViewOps for Graph {}

impl InheritIndexSearch for Graph {}

impl InheritNodeHistoryFilter for Graph {}
16 changes: 7 additions & 9 deletions python/tests/test_graphdb/test_graphdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2956,44 +2956,42 @@ def test_fuzzy_search():

# @with_disk_graph # FIXME: Indexing doesn't seem to return layers and doesn't pass the layer id to the storage blowing up in the storage
def check(g):
index = g.index()

assert len(index.fuzzy_search_nodes("name:habza", levenshtein_distance=1)) == 1
assert len(g.fuzzy_search_nodes("name:habza", levenshtein_distance=1)) == 1
assert (
len(
index.fuzzy_search_nodes(
g.fuzzy_search_nodes(
"name:haa", levenshtein_distance=1, prefix=True
)
)
== 2
)
assert (
len(
index.fuzzy_search_nodes(
g.fuzzy_search_nodes(
"value_str:abc123", levenshtein_distance=2, prefix=True
)
)
== 2
)
assert (
len(
index.fuzzy_search_nodes(
g.fuzzy_search_nodes(
"value_str:dsss312", levenshtein_distance=2, prefix=False
)
)
== 1
)

assert len(index.fuzzy_search_edges("from:bon", levenshtein_distance=1)) == 2
assert len(g.fuzzy_search_edges("from:bon", levenshtein_distance=1)) == 2
assert (
len(
index.fuzzy_search_edges("from:bo", levenshtein_distance=1, prefix=True)
g.fuzzy_search_edges("from:bo", levenshtein_distance=1, prefix=True)
)
== 2
)
assert (
len(
index.fuzzy_search_edges(
g.fuzzy_search_edges(
"from:eon", levenshtein_distance=2, prefix=True
)
)
Expand Down
62 changes: 27 additions & 35 deletions python/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,32 +47,30 @@ def test_search_in_python():
},
)

index = g.index()

# Name tests
assert len(index.search_nodes("name:ben")) == 1
assert len(index.search_nodes("name:ben OR name:hamza")) == 2
assert len(index.search_nodes("name:ben AND name:hamza")) == 0
assert len(index.search_nodes("name: IN [ben, hamza]")) == 2
assert len(g.search_nodes("name:ben")) == 1
assert len(g.search_nodes("name:ben OR name:hamza")) == 2
assert len(g.search_nodes("name:ben AND name:hamza")) == 0
assert len(g.search_nodes("name: IN [ben, hamza]")) == 2

# Property tests
# assert len(index.search_nodes("value:<120 OR value_f:>30")) == 3
assert len(index.search_nodes("value:199")) == 1
# assert len(index.search_nodes("value:[0 TO 60]")) == 2
# assert len(index.search_nodes("value:[0 TO 60}")) == 1 # } == exclusive
# assert len(index.search_nodes("value:>59 AND value_str:abc123")) == 1
# assert len(g.search_nodes("value:<120 OR value_f:>30")) == 3
assert len(g.search_nodes("value:199")) == 1
# assert len(g.search_nodes("value:[0 TO 60]")) == 2
# assert len(g.search_nodes("value:[0 TO 60}")) == 1 # } == exclusive
# assert len(g.search_nodes("value:>59 AND value_str:abc123")) == 1

# edge tests
assert len(index.search_edges("from:ben")) == 2
assert len(index.search_edges("from:ben OR from:haaroon")) == 3
assert len(index.search_edges("to:haaroon AND from:ben")) == 1
assert len(index.search_edges("to: IN [ben, hamza]")) == 2
assert len(g.search_edges("from:ben")) == 2
assert len(g.search_edges("from:ben OR from:haaroon")) == 3
assert len(g.search_edges("to:haaroon AND from:ben")) == 1
assert len(g.search_edges("to: IN [ben, hamza]")) == 2

# edge prop tests
# assert len(index.search_edges("value:<120 OR value_f:>30")) == 3
# assert len(index.search_edges("value:[0 TO 60]")) == 2
# assert len(index.search_edges("value:[0 TO 60}")) == 1 # } == exclusive
# assert len(index.search_edges("value:>59 AND value_str:abc123")) == 1
# assert len(g.search_edges("value:<120 OR value_f:>30")) == 3
# assert len(g.search_edges("value:[0 TO 60]")) == 2
# assert len(g.search_edges("value:[0 TO 60}")) == 1 # } == exclusive
# assert len(g.search_edges("value:>59 AND value_str:abc123")) == 1

# Multiple history points test
g = Graph()
Expand All @@ -86,25 +84,22 @@ def test_search_in_python():
3, "hamza", properties={"value": 80, "value_f": 11.3, "value_str": "dsc2312"}
)

index = g.index()

# The semantics here are that the expressions independently need to evaluate at ANY point in the lifetime of the node - hence hamza is returned even though at no point does he have both these values at the same time
# assert len(index.search_nodes("value:<70 AND value_f:<19.2")) == 1
# assert len(g.search_nodes("value:<70 AND value_f:<19.2")) == 1

g.add_node(
4, "hamza", properties={"value": 100, "value_f": 11.3, "value_str": "dsc2312"}
)
# the graph isn't currently reindexed so this will not return hamza even though he now has a value which fits the bill
# assert len(index.search_nodes("value:>99")) == 0
# assert len(g.search_nodes("value:>99")) == 0


def test_type_search():
g = Graph()
ben = g.add_node(1, "ben", node_type="type_1")
hamza = g.add_node(2, "hamza", node_type="type_2")
indexed = g.index()
assert indexed.search_nodes("node_type:type_1") == [ben]
assert set(indexed.search_nodes("node_type:type_1 OR node_type:type_2")) == {
assert g.search_nodes("node_type:type_1") == [ben]
assert set(g.search_nodes("node_type:type_1 OR node_type:type_2")) == {
hamza,
ben,
}
Expand Down Expand Up @@ -150,16 +145,15 @@ def test_search_with_windows():

w_g = g.window(1, 3)

w_index = w_g.index()

# Testing if windowing works - ben shouldn't be included and Hamza should only have max value of 70
assert len(w_index.search_nodes("name:ben")) == 0
assert len(w_index.search_nodes("value:70")) == 1
assert len(w_g.search_nodes("name:ben")) == 0
assert len(w_g.search_nodes("value:70")) == 1
# assert len(w_index.search_nodes("value:>80")) == 0

assert len(w_index.search_edges("from:ben")) == 0
assert len(w_g.search_edges("from:ben")) == 0
# assert len(w_index.search_edges("from:haaroon AND value:>70")) == 0
assert len(w_index.search_edges("from:haaroon AND to:hamza")) == 1
assert len(w_g.search_edges("from:haaroon AND to:hamza")) == 1


def test_search_with_subgraphs():
Expand Down Expand Up @@ -188,10 +182,8 @@ def test_search_with_subgraphs():
)
g.add_edge(4, "hamza", "naomi")

index = g.index()
assert len(index.search_edges("from:hamza OR to:hamza")) == 3
assert len(g.search_edges("from:hamza OR to:hamza")) == 3

subgraph = g.subgraph([g.node("ben"), g.node("hamza"), g.node("haaroon")])
index = subgraph.index()

assert len(index.search_edges("from:hamza OR to:hamza")) == 2
assert len(subgraph.search_edges("from:hamza OR to:hamza")) == 2
12 changes: 8 additions & 4 deletions raphtory-benchmark/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,12 @@ edition = "2021"
criterion = { workspace = true }
raphtory = { path = "../raphtory", features = ["io", "proto"], version = "0.14.0" }
raphtory-api = { path = "../raphtory-api", version = "0.14.0" }
pometry-storage.workspace = true
sorted_vector_map = { workspace = true }
rand = { workspace = true }
rayon = { workspace = true }
tempfile = { workspace = true }
clap = { workspace = true }
csv = { workspace = true }
flate2 = { workspace = true }
tracing = {workspace = true}
once_cell = { workspace = true }

[[bench]]
name = "tgraph_benchmarks"
Expand Down Expand Up @@ -58,3 +55,10 @@ harness = false
# [[bench]]
# name = "arrow_algobench"
# harness = false

[[bench]]
name = "search_bench"
harness = false

[features]
search = ["raphtory/search"]
Loading