Skip to content

Commit

Permalink
feat: add upsert for sparse indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
heyjorgedev committed Jan 21, 2025
1 parent e7da1ca commit 5c45cdd
Show file tree
Hide file tree
Showing 12 changed files with 324 additions and 7 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ env:
DENSE_UPSTASH_VECTOR_REST_TOKEN: ${{ secrets.DENSE_UPSTASH_VECTOR_REST_TOKEN }}
DENSE_EMBEDDING_UPSTASH_VECTOR_REST_URL: ${{ secrets.DENSE_EMBEDDING_UPSTASH_VECTOR_REST_URL }}
DENSE_EMBEDDING_UPSTASH_VECTOR_REST_TOKEN: ${{ secrets.DENSE_EMBEDDING_UPSTASH_VECTOR_REST_TOKEN }}
SPARSE_UPSTASH_VECTOR_REST_URL: ${{ secrets.SPARSE_UPSTASH_VECTOR_REST_URL }}
SPARSE_UPSTASH_VECTOR_REST_TOKEN: ${{ secrets.SPARSE_UPSTASH_VECTOR_REST_TOKEN }}

jobs:
test:
Expand Down
3 changes: 3 additions & 0 deletions phpunit.xml.dist
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,8 @@
<!-- For testing dense index with embedding -->
<env name="DENSE_EMBEDDING_UPSTASH_VECTOR_REST_URL" value="https://vector.upstash.com" />
<env name="DENSE_EMBEDDING_UPSTASH_VECTOR_REST_TOKEN" value="test-token" />
<!-- For testing sparse index -->
<env name="SPARSE_UPSTASH_VECTOR_REST_URL" value="https://vector.upstash.com" />
<env name="SPARSE_UPSTASH_VECTOR_REST_TOKEN" value="test-token" />
</php>
</phpunit>
17 changes: 16 additions & 1 deletion src/DataQueryResult.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@

namespace Upstash\Vector;

use ArrayAccess;
use ArrayIterator;
use Countable;
use IteratorAggregate;
use Traversable;

final readonly class DataQueryResult implements Countable, IteratorAggregate
final readonly class DataQueryResult implements ArrayAccess, Countable, IteratorAggregate
{
/**
* @param array<QueryResult> $results
Expand All @@ -33,4 +34,18 @@ public function getResults(): array
{
return $this->results;
}

public function offsetExists(mixed $offset): bool
{
return isset($this->results[$offset]);
}

public function offsetGet(mixed $offset): mixed
{
return $this->results[$offset];
}

public function offsetSet(mixed $offset, mixed $value): void {}

public function offsetUnset(mixed $offset): void {}
}
25 changes: 25 additions & 0 deletions src/SparseVector.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<?php

namespace Upstash\Vector;

final readonly class SparseVector
{
/**
* @param array<int> $indices
* @param array<float> $values
*/
public function __construct(public array $indices = [], public array $values = []) {}

public static function of(array $indices = [], array $values = []): SparseVector
{
return new self($indices, $values);
}

public function toArray(): array
{
return [
'indices' => $this->indices,
'values' => $this->values,
];
}
}
15 changes: 13 additions & 2 deletions src/VectorQuery.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
* @param array<float> $vector
*/
public function __construct(
public array $vector,
public array $vector = [],
public ?SparseVector $sparseVector = null,
public int $topK = 10,
public bool $includeMetadata = false,
public bool $includeVectors = false,
Expand All @@ -23,8 +24,18 @@ public function __construct(

public function toArray(): array
{
$data = [];

if (! empty($this->vector)) {
$data['vector'] = $this->vector;
}

if ($this->sparseVector !== null) {
$data['sparseVector'] = $this->sparseVector->toArray();
}

$data = [
'vector' => $this->vector,
...$data,
'topK' => $this->topK,
'includeMetadata' => $this->includeMetadata,
'includeVectors' => $this->includeVectors,
Expand Down
16 changes: 12 additions & 4 deletions src/VectorUpsert.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,27 @@
public function __construct(
public string $id,
public array $vector = [],
public ?SparseVector $sparseVector = null,
public array $metadata = [],
public ?string $data = null,
) {}

public function toArray(): array
{
// TODO: Improve transformation

return [
$result = [
'id' => $this->id,
'vector' => $this->vector,
'metadata' => (object) $this->metadata,
'data' => $this->data,
];

if ($this->sparseVector !== null) {
$result['sparseVector'] = $this->sparseVector->toArray();
}

if (! empty($this->vector)) {
$result['vector'] = $this->vector;
}

return $result;
}
}
33 changes: 33 additions & 0 deletions tests/Concerns/UsesSparseIndex.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?php

namespace Upstash\Vector\Tests\Concerns;

use Upstash\Vector\Contracts\IndexInterface;
use Upstash\Vector\Contracts\IndexNamespaceInterface;
use Upstash\Vector\Index;

trait UsesSparseIndex
{
protected IndexInterface $index;

protected IndexNamespaceInterface $namespace;

public function setUp(): void
{
parent::setUp();

$this->index = new Index(
url: getenv('SPARSE_UPSTASH_VECTOR_REST_URL'),
token: getenv('SPARSE_UPSTASH_VECTOR_REST_TOKEN'),
);

$this->namespace = $this->index->namespace(bin2hex(random_bytes(32)));
}

public function tearDown(): void
{
$this->namespace->delete();

parent::tearDown();
}
}
43 changes: 43 additions & 0 deletions tests/Dense/Operations/QueryDataTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?php

namespace Upstash\Vector\Tests\Dense\Operations;

use PHPUnit\Framework\TestCase;
use Upstash\Vector\DataQuery;
use Upstash\Vector\DataUpsert;
use Upstash\Vector\Tests\Concerns\UsesDenseIndexWithEmbedding;
use Upstash\Vector\Tests\Concerns\WaitsForIndex;

class QueryDataTest extends TestCase
{
use UsesDenseIndexWithEmbedding;
use WaitsForIndex;

public function test_query_data(): void
{
$this->namespace->upsertDataMany([
new DataUpsert(
id: '1',
data: 'The capital of Japan is Tokyo',
),
new DataUpsert(
id: '2',
data: 'The capital of France is Paris',
),
new DataUpsert(
id: '3',
data: 'The capital of Germany is Berlin',
),
]);

$this->waitForIndex($this->namespace);

$results = $this->namespace->queryData(new DataQuery(
data: 'What is the capital of France?',
topK: 1,
));

$this->assertCount(1, $results);
$this->assertEquals('2', $results[0]->id);
}
}
43 changes: 43 additions & 0 deletions tests/Sparse/Operations/QueryDataTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?php

namespace Upstash\Vector\Tests\Sparse\Operations;

use PHPUnit\Framework\TestCase;
use Upstash\Vector\DataQuery;
use Upstash\Vector\DataUpsert;
use Upstash\Vector\Tests\Concerns\UsesSparseIndex;
use Upstash\Vector\Tests\Concerns\WaitsForIndex;

class QueryDataTest extends TestCase
{
use UsesSparseIndex;
use WaitsForIndex;

public function test_query_data(): void
{
$this->namespace->upsertDataMany([
new DataUpsert(
id: '1',
data: 'The capital of Japan is Tokyo',
),
new DataUpsert(
id: '2',
data: 'The capital of France is Paris',
),
new DataUpsert(
id: '3',
data: 'The capital of Germany is Berlin',
),
]);

$this->waitForIndex($this->namespace);

$results = $this->namespace->queryData(new DataQuery(
data: 'capital of France',
topK: 1,
));

$this->assertCount(1, $results);
$this->assertEquals('2', $results[0]->id);
}
}
43 changes: 43 additions & 0 deletions tests/Sparse/Operations/QueryVectorsTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?php

namespace Upstash\Vector\Tests\Sparse\Operations;

use PHPUnit\Framework\TestCase;
use Upstash\Vector\SparseVector;
use Upstash\Vector\Tests\Concerns\UsesSparseIndex;
use Upstash\Vector\Tests\Concerns\WaitsForIndex;
use Upstash\Vector\VectorQuery;
use Upstash\Vector\VectorUpsert;

class QueryVectorsTest extends TestCase
{
use UsesSparseIndex;
use WaitsForIndex;

public function test_query_vectors(): void
{
$this->namespace->upsertMany([
new VectorUpsert(
id: '1',
sparseVector: new SparseVector(
indices: [1, 2, 3],
values: [5, 6, 7],
),
data: 'Test Data',
),
]);

$this->waitForIndex($this->namespace);

$results = $this->namespace->query(new VectorQuery(
sparseVector: new SparseVector(
indices: [1, 2, 3],
values: [5, 6, 7],
),
topK: 1,
));

$this->assertCount(1, $results);
$this->assertEquals('1', $results[0]->id);
}
}
40 changes: 40 additions & 0 deletions tests/Sparse/Operations/UpsertDataTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

namespace Upstash\Vector\Tests\Sparse\Operations;

use PHPUnit\Framework\TestCase;
use Upstash\Vector\DataUpsert;
use Upstash\Vector\Tests\Concerns\UsesSparseIndex;
use Upstash\Vector\Tests\Concerns\WaitsForIndex;

class UpsertDataTest extends TestCase
{
use UsesSparseIndex;
use WaitsForIndex;

public function test_upsert_data(): void
{
$this->namespace->upsertData(new DataUpsert('1', 'The capital of Japan is Tokyo'));

$this->waitForIndex($this->namespace);

$info = $this->namespace->getNamespaceInfo();

$this->assertSame(1, $info->vectorCount);
}

public function test_upsert_many_data(): void
{
$this->namespace->upsertDataMany([
new DataUpsert('1', 'The capital of Japan is Tokyo'),
new DataUpsert('2', 'The capital of France is Paris'),
new DataUpsert('3', 'The capital of Germany is Berlin'),
]);

$this->waitForIndex($this->namespace);

$info = $this->namespace->getNamespaceInfo();

$this->assertSame(3, $info->vectorCount);
}
}
51 changes: 51 additions & 0 deletions tests/Sparse/Operations/UpsertVectorTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<?php

namespace Upstash\Vector\Tests\Sparse\Operations;

use PHPUnit\Framework\TestCase;
use Upstash\Vector\SparseVector;
use Upstash\Vector\Tests\Concerns\UsesSparseIndex;
use Upstash\Vector\Tests\Concerns\WaitsForIndex;
use Upstash\Vector\VectorUpsert;

class UpsertVectorTest extends TestCase
{
use UsesSparseIndex;
use WaitsForIndex;

public function test_upsert_vector(): void
{
$this->namespace->upsert(new VectorUpsert(
id: '1',
sparseVector: new SparseVector(
indices: [0, 1],
values: [1, 2],
),
));
$this->waitForIndex($this->namespace);

$info = $this->namespace->getNamespaceInfo();

$this->assertSame(1, $info->vectorCount);
}

public function test_upsert_many_vectors(): void
{
$this->namespace->upsertMany([
new VectorUpsert(id: '1', sparseVector: new SparseVector(
indices: [0, 1],
values: [1, 2],
)),
new VectorUpsert(id: '2', sparseVector: new SparseVector(
indices: [2, 3],
values: [4, 5],
)),
]);

$this->waitForIndex($this->namespace);

$info = $this->namespace->getNamespaceInfo();

$this->assertSame(2, $info->vectorCount);
}
}

0 comments on commit 5c45cdd

Please sign in to comment.