@@ -1281,23 +1281,29 @@ func TestKNNScoreBoosting(t *testing.T) {
12811281 searchRequest .AddKNN ("vector" , queryVec , 3 , 1.0 )
12821282 searchRequest .Fields = []string {"content" , "vector" }
12831283
1284- hits , _ := index .Search (searchRequest )
1284+ hits , err := index .Search (searchRequest )
1285+ if err != nil {
1286+ t .Fatal (err )
1287+ }
12851288 hitsMap := make (map [string ]float64 , 0 )
12861289 for _ , hit := range hits .Hits {
12871290 hitsMap [hit .ID ] = (hit .Score )
12881291 }
12891292
1290- searchRequest2 : = NewSearchRequest (NewMatchNoneQuery ())
1293+ searchRequest = NewSearchRequest (NewMatchNoneQuery ())
12911294 searchRequest .AddKNN ("vector" , queryVec , 3 , 10.0 )
12921295 searchRequest .Fields = []string {"content" , "vector" }
12931296
1294- hits2 , _ := index .Search (searchRequest2 )
1297+ hits , err = index .Search (searchRequest )
1298+ if err != nil {
1299+ t .Fatal (err )
1300+ }
12951301 hitsMap2 := make (map [string ]float64 , 0 )
1296- for _ , hit := range hits2 .Hits {
1302+ for _ , hit := range hits .Hits {
12971303 hitsMap2 [hit .ID ] = (hit .Score )
12981304 }
12991305
1300- for _ , hit := range hits2 .Hits {
1306+ for _ , hit := range hits .Hits {
13011307 if hitsMap [hit .ID ] != hitsMap2 [hit .ID ]/ 10 {
13021308 t .Errorf ("boosting not working: %v %v \n " , hitsMap [hit .ID ], hitsMap2 [hit .ID ])
13031309 }
@@ -1645,6 +1651,210 @@ func TestNestedVectors(t *testing.T) {
16451651 }
16461652}
16471653
1654+ // -----------------------------------------------------------------------------
1655+ // TestMultiVector tests the KNN functionality which handles duplicate
1656+ // vectors being matched within the same document. When a document has multiple vectors
1657+ // (via [[]] array of vectors or [{}] array of objects with vectors), the KNN
1658+ // searcher must pick the best scoring vector match for that document. This test covers these scenarios:
1659+ // - Single vector field (baseline)
1660+ // - [[]] style: array of vectors (same doc appears multiple times)
1661+ // - [{}] style: array of objects with vector field (chunks pattern)
1662+ func TestMultiVector (t * testing.T ) {
1663+ tmpIndexPath := createTmpIndexPath (t )
1664+ defer cleanupTmpIndexPath (t , tmpIndexPath )
1665+
1666+ // JSON documents covering merger scenarios:
1667+ // - Single vector (baseline)
1668+ // - [[]] style: array of vectors (same doc appears multiple times)
1669+ // - [{}] style: array of objects with vector field (chunks pattern)
1670+ docs := map [string ]string {
1671+ // Single vector - baseline
1672+ "doc1" : `{
1673+ "vec": [10, 10, 10],
1674+ "vecB": [100, 100, 100]
1675+ }` ,
1676+ // [[]] style - array of 2 vectors
1677+ "doc2" : `{
1678+ "vec": [[0, 0, 0], [500, 500, 500]],
1679+ "vecB": [[900, 900, 900], [950, 950, 950], [975, 975, 975], [990, 990, 990]]
1680+ }` ,
1681+ // [[]] style - array of 3 vectors
1682+ "doc3" : `{
1683+ "vec": [[50, 50, 50], [200, 200, 200], [400, 400, 400]],
1684+ "vecB": [[800, 800, 800], [850, 850, 850]]
1685+ }` ,
1686+ // Single vector - baseline
1687+ "doc4" : `{
1688+ "vec": [1000, 1000, 1000],
1689+ "vecB": [1, 1, 1]
1690+ }` ,
1691+ // [{}] style - array of objects with vector field (chunks pattern)
1692+ "doc5" : `{
1693+ "chunks": [
1694+ {"vec": [10, 10, 10], "text": "chunk1"},
1695+ {"vec": [20, 20, 20], "text": "chunk2"},
1696+ {"vec": [30, 30, 30], "text": "chunk3"},
1697+ {"vec": [40, 40, 40], "text": "chunk4"}
1698+ ]
1699+ }` ,
1700+ "doc6" : `{
1701+ "chunks": [
1702+ {"vec": [[10, 10, 10],[20, 20, 20]], "text": "chunk1"},
1703+ {"vec": [[30, 30, 30],[40, 40, 40]], "text": "chunk2"}
1704+ ]
1705+ }` ,
1706+ }
1707+
1708+ // Parse JSON documents
1709+ dataset := make (map [string ]map [string ]interface {})
1710+ for docID , jsonStr := range docs {
1711+ var doc map [string ]interface {}
1712+ if err := json .Unmarshal ([]byte (jsonStr ), & doc ); err != nil {
1713+ t .Fatalf ("failed to unmarshal %s: %v" , docID , err )
1714+ }
1715+ dataset [docID ] = doc
1716+ }
1717+
1718+ // Index mapping
1719+ indexMapping := NewIndexMapping ()
1720+
1721+ vecMapping := mapping .NewVectorFieldMapping ()
1722+ vecMapping .Dims = 3
1723+ vecMapping .Similarity = index .InnerProduct
1724+ indexMapping .DefaultMapping .AddFieldMappingsAt ("vec" , vecMapping )
1725+ indexMapping .DefaultMapping .AddFieldMappingsAt ("vecB" , vecMapping )
1726+
1727+ // Nested chunks mapping for [{}] style
1728+ chunksMapping := mapping .NewDocumentMapping ()
1729+ chunksMapping .AddFieldMappingsAt ("vec" , vecMapping )
1730+ indexMapping .DefaultMapping .AddSubDocumentMapping ("chunks" , chunksMapping )
1731+
1732+ // Create and populate index
1733+ idx , err := New (tmpIndexPath , indexMapping )
1734+ if err != nil {
1735+ t .Fatal (err )
1736+ }
1737+ defer func () {
1738+ if err := idx .Close (); err != nil {
1739+ t .Fatal (err )
1740+ }
1741+ }()
1742+
1743+ batch := idx .NewBatch ()
1744+ for docID , doc := range dataset {
1745+ if err := batch .Index (docID , doc ); err != nil {
1746+ t .Fatal (err )
1747+ }
1748+ }
1749+ if err := idx .Batch (batch ); err != nil {
1750+ t .Fatal (err )
1751+ }
1752+
1753+ // Test: Single KNN query - basic functionality
1754+ t .Run ("VecFieldSingle" , func (t * testing.T ) {
1755+ searchReq := NewSearchRequest (query .NewMatchNoneQuery ())
1756+ searchReq .AddKNN ("vec" , []float32 {1 , 1 , 1 }, 20 , 1.0 )
1757+ res , err := idx .Search (searchReq )
1758+ if err != nil {
1759+ t .Fatal (err )
1760+ }
1761+ // Inner product: score = sum(query_i * doc_i)
1762+ // doc1 vec=[10,10,10]: 1*10*3 = 30
1763+ // doc2 vec best is [500,500,500]: 1*500*3 = 1500
1764+ // doc3 vec best is [400,400,400]: 1*400*3 = 1200
1765+ // doc4 vec=[1000,1000,1000]: 1*1000*3 = 3000
1766+ expectedResult := []struct {
1767+ docID string
1768+ expectedScore float64
1769+ }{
1770+ {docID : "doc4" , expectedScore : 3000 },
1771+ {docID : "doc2" , expectedScore : 1500 },
1772+ {docID : "doc3" , expectedScore : 1200 },
1773+ {docID : "doc1" , expectedScore : 30 },
1774+ }
1775+
1776+ if len (res .Hits ) != len (expectedResult ) {
1777+ t .Fatalf ("expected %d hits, got %d" , len (expectedResult ), len (res .Hits ))
1778+ }
1779+
1780+ for i , expected := range expectedResult {
1781+ if res .Hits [i ].ID != expected .docID {
1782+ t .Fatalf ("at rank %d, expected docID %s, got %s" , i + 1 , expected .docID , res .Hits [i ].ID )
1783+ }
1784+ if res .Hits [i ].Score != expected .expectedScore {
1785+ t .Fatalf ("at rank %d, expected score %v, got %v" , i + 1 , expected .expectedScore , res .Hits [i ].Score )
1786+ }
1787+ }
1788+ })
1789+
1790+ // Test: Single KNN query on vecB field
1791+ t .Run ("VecBFieldSingle" , func (t * testing.T ) {
1792+ searchReq := NewSearchRequest (query .NewMatchNoneQuery ())
1793+ searchReq .AddKNN ("vecB" , []float32 {1000 , 1000 , 1000 }, 20 , 1.0 )
1794+ res , err := idx .Search (searchReq )
1795+ if err != nil {
1796+ t .Fatal (err )
1797+ }
1798+ // Inner product: score = sum(query_i * doc_i) for each dimension
1799+ // doc1: vecB=[100,100,100] -> 1000*100*3 = 300,000
1800+ // doc2: vecB best is [990,990,990] -> 1000*990*3 = 2,970,000
1801+ // doc3: vecB best is [850,850,850] -> 1000*850*3 = 2,550,000
1802+ // doc4: vecB=[1,1,1] -> 1000*1*3 = 3,000
1803+ expectedResult := []struct {
1804+ docID string
1805+ expectedScore float64
1806+ }{
1807+ {docID : "doc2" , expectedScore : 2970000 },
1808+ {docID : "doc3" , expectedScore : 2550000 },
1809+ {docID : "doc1" , expectedScore : 300000 },
1810+ {docID : "doc4" , expectedScore : 3000 },
1811+ }
1812+
1813+ if len (res .Hits ) != len (expectedResult ) {
1814+ t .Fatalf ("expected %d hits, got %d" , len (expectedResult ), len (res .Hits ))
1815+ }
1816+
1817+ for i , expected := range expectedResult {
1818+ if res .Hits [i ].ID != expected .docID {
1819+ t .Fatalf ("at rank %d, expected docID %s, got %s" , i + 1 , expected .docID , res .Hits [i ].ID )
1820+ }
1821+ if res .Hits [i ].Score != expected .expectedScore {
1822+ t .Fatalf ("at rank %d, expected score %v, got %v" , i + 1 , expected .expectedScore , res .Hits [i ].Score )
1823+ }
1824+ }
1825+ })
1826+
1827+ // Test: Single KNN query on nested chunks.vec field
1828+ t .Run ("ChunksVecFieldSingle" , func (t * testing.T ) {
1829+ searchReq := NewSearchRequest (query .NewMatchNoneQuery ())
1830+ searchReq .AddKNN ("chunks.vec" , []float32 {1 , 1 , 1 }, 20 , 1.0 )
1831+ searchReq .SortBy ([]string {"_score" , "docID" })
1832+ res , err := idx .Search (searchReq )
1833+ if err != nil {
1834+ t .Fatal (err )
1835+ }
1836+
1837+ // Only doc5 and doc6 have chunks.vec
1838+ // doc5 chunks: [10,10,10], [20,20,20], [30,30,30], [40,40,40]
1839+ // Best score: 1*40*3 = 120
1840+ // doc6 chunks: [[10,10,10],[20,20,20]], [[30,30,30],[40,40,40]]
1841+ // Best score: 1*40*3 = 120
1842+ if len (res .Hits ) != 2 {
1843+ t .Fatalf ("expected 2 hits, got %d" , len (res .Hits ))
1844+ }
1845+
1846+ // Both should have score 120
1847+ for _ , hit := range res .Hits {
1848+ if hit .ID != "doc5" && hit .ID != "doc6" {
1849+ t .Fatalf ("unexpected docID %s, expected doc5 or doc6" , hit .ID )
1850+ }
1851+ if hit .Score != 120 {
1852+ t .Fatalf ("for %s, expected score 120, got %v" , hit .ID , hit .Score )
1853+ }
1854+ }
1855+ })
1856+ }
1857+
16481858func TestNumVecsStat (t * testing.T ) {
16491859
16501860 dataset , _ , err := readDatasetAndQueries (testInputCompressedFile )
0 commit comments