4242import com .arcadedb .schema .Schema ;
4343import com .arcadedb .schema .Type ;
4444import com .arcadedb .serializer .BinaryComparator ;
45- import com .arcadedb .serializer .json .JSONArray ;
4645import com .arcadedb .serializer .json .JSONObject ;
47- import com .arcadedb .utility .FileUtils ;
4846import com .arcadedb .utility .LockManager ;
4947import io .github .jbellis .jvector .graph .GraphIndexBuilder ;
5048import io .github .jbellis .jvector .graph .GraphSearcher ;
@@ -248,7 +246,7 @@ protected LSMVectorIndex(final LSMVectorIndexBuilder builder) throws IOException
248246 this .associatedBucketId = -1 ; // Will be set via setMetadata()
249247
250248 // Initialize compaction fields
251- this .currentMutablePages = new AtomicInteger (1 ); // Start with page 0
249+ this .currentMutablePages = new AtomicInteger (0 ); // No page0 - start with 0 pages
252250 this .minPagesToScheduleACompaction = builder .getDatabase ().getConfiguration ()
253251 .getValueAsInteger (com .arcadedb .GlobalConfiguration .INDEX_COMPACTION_MIN_PAGES_SCHEDULE );
254252 this .compactedSubIndex = null ;
@@ -258,6 +256,9 @@ protected LSMVectorIndex(final LSMVectorIndexBuilder builder) throws IOException
258256 ComponentFile .MODE .READ_WRITE , DEF_PAGE_SIZE );
259257 this .component .setMainIndex (this );
260258
259+ // Metadata is stored only in schema JSON (via toJSON()), not in pages
260+ // No page0 initialization needed - all pages contain only vector data
261+
261262 initializeGraphIndex ();
262263 }
263264
@@ -285,58 +286,6 @@ protected LSMVectorIndex(final DatabaseInternal database, final String name, fin
285286 .getValueAsInteger (com .arcadedb .GlobalConfiguration .INDEX_COMPACTION_MIN_PAGES_SCHEDULE );
286287 this .compactedSubIndex = null ;
287288
288- // Load configuration from metadata file or use sensible defaults
289- // Metadata will be applied from schema later via applyMetadataFromSchema() if available
290- JSONObject json = null ;
291-
292- // Try to read from metadata file if it exists (backward compatibility with non-replicated indexes)
293- String originalFilePath = filePath .replaceAll ("\\ .[0-9]+\\ .[0-9]+\\ .v[0-9]+\\ ." + FILE_EXT + "$" , "" );
294- originalFilePath = originalFilePath .replaceAll ("\\ ." + FILE_EXT + "$" , "" );
295- final String metadataPath = originalFilePath + ".metadata.json" ;
296- final File metadataFile = new File (metadataPath );
297-
298- if (metadataFile .exists ()) {
299- try {
300- final String fileContent = FileUtils .readFileAsString (metadataFile );
301- json = new JSONObject (fileContent );
302- LogManager .instance ().log (this , Level .FINE , "Loaded vector index metadata from file: %s" , metadataPath );
303- } catch (final Exception e ) {
304- LogManager .instance ()
305- .log (this , Level .WARNING , "Failed to read metadata file %s, using defaults: %s" , e , metadataPath , e .getMessage ());
306- }
307- }
308-
309- // Use sensible defaults if metadata file doesn't exist
310- // This is normal during schema replication when metadata is embedded in the schema and
311- // will be applied after construction via applyMetadataFromSchema()
312- if (json == null ) {
313- LogManager .instance ().log (this , Level .FINE ,
314- "Metadata file not found for index %s. Using defaults (will be overridden by schema if available)." , name );
315- json = new JSONObject ();
316- json .put ("dimensions" , 10 ); // Default dimensions
317- json .put ("similarityFunction" , "COSINE" );
318- json .put ("maxConnections" , 16 );
319- json .put ("beamWidth" , 100 );
320- json .put ("idPropertyName" , "id" );
321- json .put ("properties" , new JSONArray ());
322- }
323-
324- // indexName already set in constructor
325- this .typeName = json .getString ("typeName" , "" );
326- this .dimensions = json .getInt ("dimensions" );
327- this .similarityFunction = VectorSimilarityFunction .valueOf (json .getString ("similarityFunction" , "COSINE" ));
328- this .maxConnections = json .getInt ("maxConnections" , 16 );
329- this .beamWidth = json .getInt ("beamWidth" , 100 );
330- this .idPropertyName = json .getString ("idPropertyName" , "id" );
331-
332- // Load property names
333- this .propertyNames = new ArrayList <>();
334- if (json .has ("properties" )) {
335- final var jsonArray = json .getJSONArray ("properties" );
336- for (int i = 0 ; i < jsonArray .length (); i ++)
337- propertyNames .add (jsonArray .getString (i ));
338- }
339-
340289 // Load vectors from pages - only if this is an existing index file
341290 // During replication on replicas, the file may not exist yet and will be created/replicated later
342291 try {
@@ -437,42 +386,18 @@ private void loadVectorsFromPages() {
437386 com .arcadedb .log .LogManager .instance ().log (this , java .util .logging .Level .WARNING ,
438387 "DEBUG: loadVectorsFromPages STARTED: index=%s, totalPages=%d" , indexName , getTotalPages ());
439388 try {
440- // Read header from page 0
441- final BasePage page0 = getDatabase ().getTransaction ().getPage (new PageId (getDatabase (), getFileId (), 0 ), getPageSize ());
442- final ByteBuffer buffer0 = page0 .getContent ();
443- buffer0 .position (0 );
389+ // NOTE: All metadata (dimensions, similarityFunction, maxConnections, beamWidth) comes from schema JSON
390+ // via applyMetadataFromSchema(). Pages contain only vector data, no metadata.
444391
445- final int storedNextId = buffer0 .getInt ();
446- com .arcadedb .log .LogManager .instance ().log (this , java .util .logging .Level .WARNING ,
447- "DEBUG: loadVectorsFromPages - page0 storedNextId=%d, index=%s" , storedNextId , indexName );
448-
449- if (storedNextId == 0 ) {
450- LogManager .instance ().log (this , Level .FINE , "No vectors to load - empty index: " + indexName );
451- return ;
452- }
453-
454- // Read and validate metadata
455- final int storedDimensions = buffer0 .getInt ();
456- com .arcadedb .log .LogManager .instance ().log (this , java .util .logging .Level .WARNING ,
457- "DEBUG: loadVectorsFromPages - storedDimensions=%d, expectedDimensions=%d, index=%s" ,
458- storedDimensions , dimensions , indexName );
459-
460- if (storedDimensions != dimensions ) {
461- throw new IndexException ("Dimension mismatch: expected " + dimensions + " but found " + storedDimensions );
462- }
463-
464- // Skip similarity, maxConnections, beamWidth - already set from constructor
465- buffer0 .getInt ();
466- buffer0 .getInt ();
467- buffer0 .getInt ();
468-
469- nextId .set (storedNextId );
470-
471- // Read all data pages (1 onwards) in LSM style
392+ // Read all data pages (starting from page 0) in LSM style
472393 final int totalPages = getTotalPages ();
473394 int entriesRead = 0 ;
395+ int maxVectorId = -1 ; // Track max ID to compute nextId
474396
475- for (int pageNum = 1 ; pageNum < totalPages ; pageNum ++) {
397+ com .arcadedb .log .LogManager .instance ().log (this , java .util .logging .Level .WARNING ,
398+ "DEBUG: loadVectorsFromPages STARTED: index=%s, totalPages=%d" , indexName , totalPages );
399+
400+ for (int pageNum = 0 ; pageNum < totalPages ; pageNum ++) {
476401 final BasePage currentPage = getDatabase ().getTransaction ().getPage (
477402 new PageId (getDatabase (), getFileId (), pageNum ), getPageSize ());
478403 final ByteBuffer pageBuffer = currentPage .getContent ();
@@ -508,6 +433,10 @@ private void loadVectorsFromPages() {
508433
509434 final boolean deleted = pageBuffer .get () == 1 ;
510435
436+ // Track max vector ID to compute nextId
437+ if (id > maxVectorId )
438+ maxVectorId = id ;
439+
511440 // Add/update in registry (LSM style: later entries override earlier ones)
512441 final VectorEntry entry = new VectorEntry (id , rid , vector );
513442 entry .deleted = deleted ;
@@ -516,9 +445,12 @@ private void loadVectorsFromPages() {
516445 }
517446 }
518447
448+ // Compute nextId from the maximum vector ID found + 1
449+ nextId .set (maxVectorId + 1 );
450+
519451 LogManager .instance ().log (this , Level .INFO ,
520452 "Loaded " + vectorRegistry .size () + " unique vectors (" + entriesRead + " total entries) from " +
521- ( totalPages - 1 ) + " pages for index: " + indexName );
453+ totalPages + " pages for index: " + indexName + ", nextId=" + nextId . get () );
522454
523455 // Rebuild the graph index with loaded non-deleted vectors
524456 if (!vectorRegistry .isEmpty ()) {
@@ -543,27 +475,18 @@ private void persistVectorsDeltaIncremental(final List<Integer> changedVectorIds
543475 "DEBUG: persistVectorsDeltaIncremental called: index=%s, changedVectorIds=%d, totalPages=%d" ,
544476 indexName , changedVectorIds .size (), getTotalPages ());
545477
546- // Update metadata in page 0
547- final BasePage page0 = getDatabase ().getTransaction ().getPageToModify (
548- new PageId (getDatabase (), getFileId (), 0 ), getPageSize (), false );
549- final ByteBuffer buffer0 = page0 .getContent ();
550- buffer0 .position (0 );
551- buffer0 .putInt (nextId .get ()); // Update next ID
552- buffer0 .putInt (dimensions );
553- buffer0 .putInt (similarityFunction .ordinal ());
554- buffer0 .putInt (maxConnections );
555- buffer0 .putInt (beamWidth );
478+ // NO page0 writes needed! Metadata is stored in schema JSON, nextId is computed from max vector ID during load
556479
557480 if (changedVectorIds .isEmpty ())
558481 return ;
559482
560483 // Calculate entry size: id(4) + position(8) + bucketId(4) + vector(dimensions*4) + deleted(1)
561484 final int entrySize = 4 + 8 + 4 + (dimensions * 4 ) + 1 ;
562485
563- // Get or create the last mutable page
486+ // Get or create the last mutable page (pages start from 0 now - no page0 metadata)
564487 int lastPageNum = getTotalPages () - 1 ;
565- if (lastPageNum < 1 ) {
566- lastPageNum = 1 ;
488+ if (lastPageNum < 0 ) {
489+ lastPageNum = 0 ;
567490 createNewVectorDataPage (lastPageNum );
568491 }
569492
0 commit comments