Pometry
diff --git a/‎.github/workflows/test_python_workflow.yml
+1 b/‎.github/workflows/test_python_workflow.yml
+1
diff --git a/‎Cargo.lock
+1 b/‎Cargo.lock
+1
diff --git a/‎Cargo.toml
+3 b/‎Cargo.toml
+3
diff --git a/‎js-raphtory/src/graph/node.rs
+1-1 b/‎js-raphtory/src/graph/node.rs
+1-1
diff --git a/‎pometry-storage-private b/‎pometry-storage-private
diff --git a/‎python/tests/notebook.ipynb
+9-9 b/‎python/tests/notebook.ipynb
+9-9
diff --git a/‎python/tests/test_disk_graph.py
+151-102 b/‎python/tests/test_disk_graph.py
+151-102
@@ -31,6 +31,7 @@ jobs:
     runs-on: '${{ matrix.os }}'
     env:
       CARGO_NET_GIT_FETCH_WITH_CLI: true
+      DISK_TEST_MARK: 1
     steps:
       - uses: actions/checkout@v3
         name: Checkout
 
@@ -25,6 +25,9 @@ authors = ["Pometry"]
 rust-version = "1.77"
 edition = "2021"
 
+[profile.dev]
+split-debuginfo = "unpacked"
+
 [profile.release-with-debug]
 inherits = "release"
 debug = true
 
@@ -26,7 +26,7 @@ impl From<NodeView<TGraph>> for Node {
 impl Node {
     #[wasm_bindgen(js_name = id)]
     pub fn id(&self) -> u64 {
-        self.0.id()
+        self.0.id().as_u64().expect("only u64 supported in js")
     }
 
     #[wasm_bindgen(js_name = name)]
 
@@ -67,9 +67,9 @@
     }
    ],
    "source": [
-    "g.add_edge(0, 1, 2)\n",
-    "g.add_edge(0, 1, 3, layer=\"layer1\")\n",
-    "g.add_edge(0, 1, 4, layer=\"layer2\")\n",
+    "g.add_edge(0, \"1\", \"2\")\n",
+    "g.add_edge(0, \"1\", \"3\", layer=\"layer1\")\n",
+    "g.add_edge(0, \"1\", \"4\", layer=\"layer2\")\n",
     "\n",
     "g.nodes.edges.layer_names"
    ]
@@ -116,14 +116,14 @@
     }
    ],
    "source": [
-    "# Basic Addition with integer IDs\n",
-    "g.add_node(timestamp=1, id=10)\n",
-    "g.add_edge(timestamp=2, src=1, dst=2)\n",
+    "# Basic Addition of Nodes and Edges\n",
+    "g.add_node(timestamp=1, id=\"10\")\n",
+    "g.add_edge(timestamp=2, src=\"1\", dst=\"2\")\n",
     "\n",
     "# checking node 10, 1 and 5 exist\n",
-    "print(g.has_node(10), g.has_node(1), g.has_node(5))\n",
+    "print(g.has_node(\"10\"), g.has_node(\"1\"), g.has_node(\"5\"))\n",
     "# checking edge 1,2 exists and 2,1 doesn't as Raphtory is directed\n",
-    "print(g.has_edge(1, 2), g.has_edge(2, 1))\n",
+    "print(g.has_edge(\"1\", \"2\"), g.has_edge(\"2\", \"1\"))\n",
     "# Check the total number of edges and nodes\n",
     "print(g.count_edges(), g.count_nodes())\n",
     "\n",
@@ -136,7 +136,7 @@
     "print(g.has_edge(src=\"Hamza\", dst=\"Ben\"), g.has_edge(src=\"Ben\", dst=\"Hamza\"))\n",
     "print(g.count_edges(), g.count_nodes())\n",
     "\n",
-    "g.add_edge(0, 1, 3, layer=\"toad\")\n",
+    "g.add_edge(0, \"1\", \"3\", layer=\"toad\")\n",
     "# Add an edge with Temporal Properties which can change over time\n",
     "e = g.add_edge(\n",
     "    timestamp=7,\n",
 
@@ -1,6 +1,9 @@
-from raphtory import DiskGraph, Query, State, PyDirection
+from raphtory import PyDirection, DiskGraphStorage
+from raphtory import algorithms
 import pandas as pd
 import tempfile
+from utils import measure
+import os
 
 edges = pd.DataFrame(
     {
@@ -33,7 +36,7 @@
 
 
 def create_graph(edges, dir):
-    return DiskGraph.load_from_pandas(dir, edges, "src", "dst", "time")
+    return DiskGraphStorage.load_from_pandas(dir, edges, "src", "dst", "time")
 
 
 # in every test use with to create a temporary directory that will be deleted automatically
@@ -42,113 +45,159 @@ def create_graph(edges, dir):
 
 def test_counts():
     dir = tempfile.TemporaryDirectory()
-    graph = create_graph(edges, dir.name)
+    graph = create_graph(edges, dir.name).to_events()
     assert graph.count_nodes() == 5
     assert graph.count_edges() == 20
 
 
-def test_simple_hop():
-    dir = tempfile.TemporaryDirectory()
-    graph = create_graph(edges, dir.name)
-    q = Query.from_node_ids([1]).hop(dir=PyDirection("OUT"), layer=None, limit=100)
-    state = State.path()
-    actual = q.run_to_vec(graph, state)
-
-    actual = [([n2.name, n1.name], n2.name) for ([n2, n1], n2) in actual]
-
-    expected = [
-        (["2", "1"], "2"),
-        (["3", "1"], "3"),
-        (["4", "1"], "4"),
-        (["5", "1"], "5"),
-    ]
-
-    actual.sort()
-    expected.sort()
-
-    assert actual == expected
-
-
-def test_simple_hop_from_node():
-    dir = tempfile.TemporaryDirectory()
-    graph = create_graph(edges, dir.name)
-    node = graph.node(1)
-    q = Query.from_node_ids([node]).out()
-    state = State.path()
-    actual = q.run_to_vec(graph, state)
-
-    actual = [([n2.name, n1.name], n2.name) for ([n2, n1], n2) in actual]
-
-    expected = [
-        (["2", "1"], "2"),
-        (["3", "1"], "3"),
-        (["4", "1"], "4"),
-        (["5", "1"], "5"),
-    ]
-
-    actual.sort()
-    expected.sort()
-
-    assert actual == expected
+def test_disk_graph():
+    curr_dir = os.path.dirname(os.path.abspath(__file__))
+    rsc_dir = os.path.join(curr_dir, "..", "..", "pometry-storage-private", "resources")
+    rsc_dir = os.path.normpath(rsc_dir)
+    print("rsc_dir:", rsc_dir + "/netflowsorted/nft_sorted")
 
-
-def test_double_hop():
-    dir = tempfile.TemporaryDirectory()
-    graph = create_graph(edges, dir.name)
-    q = Query.from_node_ids([1]).out().out()
-    state = State.path()
-    actual = q.run_to_vec(graph, state)
-
-    actual = [([n3.name, n2.name, n1.name], n3.name) for ([n3, n2, n1], n3) in actual]
-
-    expected = [
-        (["1", "5", "1"], "1"),
-        (["2", "4", "1"], "2"),
-        (["5", "3", "1"], "5"),
-        (["2", "5", "1"], "2"),
-        (["4", "2", "1"], "4"),
-        (["4", "3", "1"], "4"),
-        (["1", "4", "1"], "1"),
-        (["3", "2", "1"], "3"),
-        (["3", "4", "1"], "3"),
-        (["5", "2", "1"], "5"),
-        (["1", "2", "1"], "1"),
-        (["5", "4", "1"], "5"),
-        (["2", "3", "1"], "2"),
-        (["1", "3", "1"], "1"),
-        (["3", "5", "1"], "3"),
-        (["4", "5", "1"], "4"),
+    graph_dir = tempfile.TemporaryDirectory()
+    layer_parquet_cols = [
+        {
+            "parquet_dir": rsc_dir + "/netflowsorted/nft_sorted",
+            "layer": "netflow",
+            "src_col": "src",
+            "dst_col": "dst",
+            "time_col": "epoch_time",
+        },
+        {
+            "parquet_dir": rsc_dir + "/netflowsorted/v1_sorted",
+            "layer": "events_1v",
+            "src_col": "src",
+            "dst_col": "dst",
+            "time_col": "epoch_time",
+        },
+        {
+            "parquet_dir": rsc_dir + "/netflowsorted/v2_sorted",
+            "layer": "events_2v",
+            "src_col": "src",
+            "dst_col": "dst",
+            "time_col": "epoch_time",
+        },
     ]
 
-    actual.sort()
-    expected.sort()
-
-    assert actual == expected
-
-
-def test_hop_twice_forward():
-    dir = tempfile.TemporaryDirectory()
-    edges = pd.DataFrame(
+    # # Read the Parquet file
+    # table = pq.read_table(parquet_dir + '/part-00000-8b31eaa4-2bd9-4f07-b61c-a353aed2af22-c000.snappy.parquet')
+    # print(table.schema)
+
+    print()
+    try:
+        g = measure(
+            "Graph load from dir",
+            DiskGraphStorage.load_from_dir,
+            graph_dir,
+            print_result=False,
+        )
+    except Exception as e:
+        chunk_size = 268_435_456
+        num_threads = 4
+        t_props_chunk_size = int(chunk_size / 8)
+        read_chunk_size = 4_000_000
+        concurrent_files = 1
+
+        g = measure(
+            "Graph load from parquets",
+            DiskGraphStorage.load_from_parquets,
+            graph_dir.name,
+            layer_parquet_cols,
+            None,
+            chunk_size,
+            t_props_chunk_size,
+            read_chunk_size,
+            concurrent_files,
+            num_threads,
+            None,
+            print_result=False,
+        )
+
+    g = g.to_events()
+
+    assert g.count_nodes() == 1624
+    assert g.layer("netflow").count_edges() == 2018
+    assert g.earliest_time == 7257601
+    assert g.latest_time == 7343985
+
+    actual = measure(
+        "Weakly CC  Layer",
+        algorithms.weakly_connected_components,
+        g.layer("netflow"),
+        20,
+        print_result=False,
+    )
+    assert len(list(actual.get_all_with_names())) == 1624
+
+    # Doesn't work yet (was silently running on only the first layer before but now actually panics because of lack of multilayer edge views)
+    # actual = measure("Weakly CC", algorithms.weakly_connected_components, g, 20, print_result=False)
+    # assert len(list(actual.get_all_with_names())) == 1624
+
+    actual = measure(
+        "Page Rank", algorithms.pagerank, g.layer("netflow"), 100, print_result=False
+    )
+    assert len(list(actual.get_all_with_names())) == 1624
+
+def test_disk_graph_type_filter():
+    curr_dir = os.path.dirname(os.path.abspath(__file__))
+    rsc_dir = os.path.join(curr_dir, "..", "..", "pometry-storage-private", "resources")
+    rsc_dir = os.path.normpath(rsc_dir)
+    print("rsc_dir:", rsc_dir + "/netflowsorted/nft_sorted")
+
+    graph_dir = tempfile.TemporaryDirectory()
+    layer_parquet_cols = [
         {
-            "src": [0, 0, 1, 1, 3, 3, 3, 4, 4, 4],
-            "dst": [1, 2, 3, 4, 5, 6, 6, 3, 4, 7],
-            "time": [11, 10, 12, 13, 5, 10, 15, 14, 14, 10],
+            "parquet_dir": rsc_dir + "/netflowsorted/nft_sorted",
+            "layer": "netflow",
+            "src_col": "src",
+            "dst_col": "dst",
+            "time_col": "epoch_time",
         }
-    ).sort_values(["src", "dst", "time"])
-    graph = create_graph(edges, dir.name)
-    q = Query.from_node_ids([0, 1]).out().out()
-    state = State.path_window(keep_path=True, start_t=10, duration=100)
-    actual = q.run_to_vec(graph, state)
-
-    actual = [([n3.name, n2.name, n1.name], n3.name) for ([n3, n2, n1], n3) in actual]
-
-    expected = [
-        (["6", "3", "1"], "6"),
-        (["3", "1", "0"], "3"),
-        (["3", "4", "1"], "3"),
-        (["4", "4", "1"], "4"),
-        (["4", "1", "0"], "4"),
     ]
-    actual.sort()
-    expected.sort()
-    assert actual == expected
+
+    chunk_size = 268_435_456
+    num_threads = 4
+    t_props_chunk_size = int(chunk_size / 8)
+    read_chunk_size = 4_000_000
+    concurrent_files = 1
+
+    g = DiskGraphStorage.load_from_parquets(
+        graph_dir.name,
+        layer_parquet_cols,
+        rsc_dir + "/netflowsorted/props/props.parquet",
+        chunk_size,
+        t_props_chunk_size,
+        read_chunk_size,
+        concurrent_files,
+        num_threads,
+        "node_type"
+    ).to_events()
+
+    assert g.count_nodes() == 1619
+    assert g.layer("netflow").count_edges() == 2018
+    assert g.earliest_time == 7257619
+    assert g.latest_time == 7343970
+
+    assert len(g.nodes.type_filter(["A"]).name.collect()) == 785
+    assert len(g.nodes.type_filter([""]).name.collect()) == 0
+    assert len(g.nodes.type_filter(["A", "B"]).name.collect()) == 1619
+
+    neighbor_names = g.nodes.type_filter(["A"]).neighbours.name.collect()
+    total_length = sum(len(names) for names in neighbor_names)
+    assert total_length == 2056
+
+    assert g.nodes.type_filter([]).name.collect() == []
+
+    neighbor_names = g.nodes.type_filter(["A"]).neighbours.type_filter(["B"]).name.collect()
+    total_length = sum(len(names) for names in neighbor_names)
+    assert total_length == 1023
+
+    assert g.node("Comp175846").neighbours.type_filter(["A"]).name.collect() == ["Comp844043"]
+    assert g.node("Comp175846").neighbours.type_filter(["B"]).name.collect() == []
+    assert g.node("Comp175846").neighbours.type_filter([]).name.collect() == []
+    assert g.node("Comp175846").neighbours.type_filter(["A", "B"]).name.collect() == ["Comp844043"]
+
+    neighbor_names = g.node("Comp175846").neighbours.neighbours.name.collect()
+    assert len(neighbor_names) == 193
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@ impl From<NodeView<TGraph>> for Node {`
`26`	`26`	`impl Node {`
`27`	`27`	`#[wasm_bindgen(js_name = id)]`
`28`	`28`	`pub fn id(&self) -> u64 {`
`29`		`- self.0.id()`
	`29`	`+ self.0.id().as_u64().expect("only u64 supported in js")`
`30`	`30`	`}`
`31`	`31`
`32`	`32`	`#[wasm_bindgen(js_name = name)]`