@@ -2101,6 +2101,113 @@ CREATE INDEX ON CycleTest (vec) LSM_VECTOR
21012101 deleteDirectory (dbDir );
21022102 }
21032103
2104+ @ Test
2105+ void filteredSearchByRID () {
2106+ database .transaction (() -> {
2107+ // Create the schema
2108+ database .command ("sql" , "CREATE DOCUMENT TYPE FilteredDoc" );
2109+ database .command ("sql" , "CREATE PROPERTY FilteredDoc.id STRING" );
2110+ database .command ("sql" , "CREATE PROPERTY FilteredDoc.category STRING" );
2111+ database .command ("sql" , "CREATE PROPERTY FilteredDoc.embedding ARRAY_OF_FLOATS" );
2112+
2113+ // Create the LSM_VECTOR index
2114+ database .command ("sql" , """
2115+ CREATE INDEX ON FilteredDoc (embedding) LSM_VECTOR
2116+ METADATA {
2117+ "dimensions": 3,
2118+ "similarity": "COSINE",
2119+ "maxConnections": 8,
2120+ "beamWidth": 50
2121+ }""" );
2122+ });
2123+
2124+ // Create test data with different categories
2125+ final List <com .arcadedb .database .RID > categoryARIDs = new ArrayList <>();
2126+ final List <com .arcadedb .database .RID > categoryBRIDs = new ArrayList <>();
2127+
2128+ database .transaction (() -> {
2129+ for (int i = 0 ; i < 20 ; i ++) {
2130+ final var doc = database .newDocument ("FilteredDoc" );
2131+ doc .set ("id" , "doc" + i );
2132+ doc .set ("category" , i < 10 ? "A" : "B" );
2133+
2134+ // Create vectors with some pattern based on category
2135+ final float [] vector = new float [3 ];
2136+ if (i < 10 ) {
2137+ // Category A: vectors around [1, 1, 1]
2138+ vector [0 ] = 1.0f + (i * 0.1f );
2139+ vector [1 ] = 1.0f + (i * 0.1f );
2140+ vector [2 ] = 1.0f + (i * 0.1f );
2141+ } else {
2142+ // Category B: vectors around [10, 10, 10]
2143+ vector [0 ] = 10.0f + ((i - 10 ) * 0.1f );
2144+ vector [1 ] = 10.0f + ((i - 10 ) * 0.1f );
2145+ vector [2 ] = 10.0f + ((i - 10 ) * 0.1f );
2146+ }
2147+ doc .set ("embedding" , vector );
2148+
2149+ final com .arcadedb .database .RID rid = doc .save ().getIdentity ();
2150+ if (i < 10 ) {
2151+ categoryARIDs .add (rid );
2152+ } else {
2153+ categoryBRIDs .add (rid );
2154+ }
2155+ }
2156+ });
2157+
2158+ // Get the index
2159+ final com .arcadedb .index .TypeIndex typeIndex = (com .arcadedb .index .TypeIndex ) database .getSchema ()
2160+ .getIndexByName ("FilteredDoc[embedding]" );
2161+ final LSMVectorIndex index = (LSMVectorIndex ) typeIndex .getIndexesOnBuckets ()[0 ];
2162+
2163+ database .transaction (() -> {
2164+ // Query vector close to category A
2165+ final float [] queryVector = {1.5f , 1.5f , 1.5f };
2166+
2167+ // Test 1: Search without filter - should return results from both categories
2168+ final List <com .arcadedb .utility .Pair <com .arcadedb .database .RID , Float >> unfilteredResults =
2169+ index .findNeighborsFromVector (queryVector , 10 );
2170+ assertThat (unfilteredResults ).as ("Unfiltered search should return results" ).isNotEmpty ();
2171+ assertThat (unfilteredResults .size ()).as ("Should return up to 10 results" ).isLessThanOrEqualTo (10 );
2172+
2173+ // Test 2: Search with filter for category A only
2174+ final Set <com .arcadedb .database .RID > allowedRIDs = new HashSet <>(categoryARIDs );
2175+ final List <com .arcadedb .utility .Pair <com .arcadedb .database .RID , Float >> filteredResults =
2176+ index .findNeighborsFromVector (queryVector , 10 , allowedRIDs );
2177+
2178+ assertThat (filteredResults ).as ("Filtered search should return results" ).isNotEmpty ();
2179+ assertThat (filteredResults .size ()).as ("Should return at most 10 results" ).isLessThanOrEqualTo (10 );
2180+
2181+ // Verify all results are from the allowed set
2182+ for (final var result : filteredResults ) {
2183+ assertThat (allowedRIDs ).as ("Result RID should be in allowed set" ).contains (result .getFirst ());
2184+ }
2185+
2186+ // Test 3: Search with filter for category B only
2187+ final Set <com .arcadedb .database .RID > categoryBSet = new HashSet <>(categoryBRIDs );
2188+ final List <com .arcadedb .utility .Pair <com .arcadedb .database .RID , Float >> categoryBResults =
2189+ index .findNeighborsFromVector (queryVector , 10 , categoryBSet );
2190+
2191+ // Since query vector is close to category A, but we filter to category B,
2192+ // we should still get results (from category B), just with higher distances
2193+ assertThat (categoryBResults ).as ("Filtered search for category B should return results" ).isNotEmpty ();
2194+
2195+ for (final var result : categoryBResults ) {
2196+ assertThat (categoryBSet ).as ("Result RID should be from category B" ).contains (result .getFirst ());
2197+ }
2198+
2199+ // Test 4: Empty filter should work like unfiltered
2200+ final List <com .arcadedb .utility .Pair <com .arcadedb .database .RID , Float >> emptyFilterResults =
2201+ index .findNeighborsFromVector (queryVector , 10 , new HashSet <>());
2202+ assertThat (emptyFilterResults ).as ("Empty filter should return results like unfiltered" ).isNotEmpty ();
2203+
2204+ // Test 5: Null filter should work like unfiltered
2205+ final List <com .arcadedb .utility .Pair <com .arcadedb .database .RID , Float >> nullFilterResults =
2206+ index .findNeighborsFromVector (queryVector , 10 , null );
2207+ assertThat (nullFilterResults ).as ("Null filter should return results like unfiltered" ).isNotEmpty ();
2208+ });
2209+ }
2210+
21042211 /**
21052212 * Helper method to recursively delete a directory using Files.walk() API
21062213 */
0 commit comments