Skip to content

Commit 694d6e3

Browse files
committed
Merge tag 'v0.7.2' into update_pg_vector
2 parents 0416939 + 9b89bed commit 694d6e3

File tree

130 files changed

+19258
-1426
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+19258
-1426
lines changed

.github/workflows/build.yml

+70-10
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ jobs:
88
fail-fast: false
99
matrix:
1010
include:
11+
- postgres: 17
12+
os: ubuntu-22.04
13+
- postgres: 16
14+
os: ubuntu-22.04
1115
- postgres: 15
1216
os: ubuntu-22.04
1317
- postgres: 14
@@ -16,15 +20,15 @@ jobs:
1620
os: ubuntu-20.04
1721
- postgres: 12
1822
os: ubuntu-20.04
19-
- postgres: 11
20-
os: ubuntu-18.04
2123
steps:
22-
- uses: actions/checkout@v3
24+
- uses: actions/checkout@v4
2325
- uses: ankane/setup-postgres@v1
2426
with:
2527
postgres-version: ${{ matrix.postgres }}
2628
dev-files: true
2729
- run: make
30+
env:
31+
PG_CFLAGS: -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter -Wno-sign-compare
2832
- run: |
2933
export PG_CONFIG=`which pg_config`
3034
sudo --preserve-env=PG_CONFIG make install
@@ -36,29 +40,48 @@ jobs:
3640
sudo apt-get install libipc-run-perl
3741
- run: make prove_installcheck
3842
mac:
39-
runs-on: macos-latest
43+
runs-on: ${{ matrix.os }}
4044
if: ${{ !startsWith(github.ref_name, 'windows') }}
45+
strategy:
46+
fail-fast: false
47+
matrix:
48+
include:
49+
- postgres: 16
50+
os: macos-14
51+
- postgres: 14
52+
os: macos-12
4153
steps:
42-
- uses: actions/checkout@v3
54+
- uses: actions/checkout@v4
4355
- uses: ankane/setup-postgres@v1
4456
with:
45-
postgres-version: 14
57+
postgres-version: ${{ matrix.postgres }}
4658
- run: make
59+
env:
60+
PG_CFLAGS: -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter
4761
- run: make install
4862
- run: make installcheck
4963
- if: ${{ failure() }}
5064
run: cat regression.diffs
65+
# Homebrew Postgres does not enable TAP tests, so need to download
5166
- run: |
5267
brew install cpanm
5368
cpanm --notest IPC::Run
54-
wget -q https://github.com/postgres/postgres/archive/refs/tags/REL_14_5.tar.gz
55-
tar xf REL_14_5.tar.gz
56-
- run: make prove_installcheck PROVE_FLAGS="-I ./postgres-REL_14_5/src/test/perl" PERL5LIB="/Users/runner/perl5/lib/perl5"
69+
wget -q https://github.com/postgres/postgres/archive/refs/tags/$TAG.tar.gz
70+
tar xf $TAG.tar.gz
71+
mv postgres-$TAG postgres
72+
env:
73+
TAG: ${{ matrix.postgres == 16 && 'REL_16_2' || 'REL_14_11' }}
74+
- run: make prove_installcheck PROVE_FLAGS="-I ./postgres/src/test/perl -I ./test/perl"
75+
env:
76+
PERL5LIB: /Users/runner/perl5/lib/perl5
77+
- run: make clean && $(brew --prefix llvm@15)/bin/scan-build --status-bugs make
78+
env:
79+
PG_CFLAGS: -DUSE_ASSERT_CHECKING
5780
windows:
5881
runs-on: windows-latest
5982
if: ${{ !startsWith(github.ref_name, 'mac') }}
6083
steps:
61-
- uses: actions/checkout@v3
84+
- uses: actions/checkout@v4
6285
- uses: ankane/setup-postgres@v1
6386
with:
6487
postgres-version: 14
@@ -70,3 +93,40 @@ jobs:
7093
nmake /NOLOGO /F Makefile.win clean && ^
7194
nmake /NOLOGO /F Makefile.win uninstall
7295
shell: cmd
96+
- if: ${{ failure() }}
97+
run: cat regression.diffs
98+
i386:
99+
if: ${{ !startsWith(github.ref_name, 'mac') && !startsWith(github.ref_name, 'windows') }}
100+
runs-on: ubuntu-latest
101+
container:
102+
image: debian:12
103+
options: --platform linux/386
104+
steps:
105+
- run: apt-get update && apt-get install -y build-essential git libipc-run-perl postgresql-15 postgresql-server-dev-15 sudo
106+
- run: service postgresql start
107+
- run: |
108+
git clone https://github.com/${{ github.repository }}.git pgvector
109+
cd pgvector
110+
git fetch origin ${{ github.ref }}
111+
git reset --hard FETCH_HEAD
112+
make
113+
make install
114+
chown -R postgres .
115+
sudo -u postgres make installcheck
116+
sudo -u postgres make prove_installcheck
117+
env:
118+
PG_CFLAGS: -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter -Wno-sign-compare
119+
- if: ${{ failure() }}
120+
run: cat pgvector/regression.diffs
121+
valgrind:
122+
if: ${{ !startsWith(github.ref_name, 'mac') && !startsWith(github.ref_name, 'windows') }}
123+
runs-on: ubuntu-latest
124+
steps:
125+
- uses: actions/checkout@v4
126+
- uses: ankane/setup-postgres-valgrind@v1
127+
with:
128+
postgres-version: 16
129+
check-ub: yes
130+
- run: make OPTFLAGS=""
131+
- run: sudo --preserve-env=PG_CONFIG make install
132+
- run: make installcheck

CHANGELOG.md

+95-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,102 @@
1-
## 0.4.1 (unreleased)
1+
## 0.7.2 (2024-06-11)
22

3+
- Fixed initialization fork for indexes on unlogged tables
4+
5+
## 0.7.1 (2024-06-03)
6+
7+
- Improved performance of on-disk HNSW index builds
8+
- Fixed `undefined symbol` error with GCC 8
9+
- Fixed compilation error with universal binaries on Mac
10+
- Fixed compilation warning with Clang < 14
11+
12+
## 0.7.0 (2024-04-29)
13+
14+
- Added `halfvec` type
15+
- Added `sparsevec` type
16+
- Added support for indexing `bit` type
17+
- Added support for indexing L1 distance with HNSW
18+
- Added `binary_quantize` function
19+
- Added `hamming_distance` function
20+
- Added `jaccard_distance` function
21+
- Added `l2_normalize` function
22+
- Added `subvector` function
23+
- Added concatenate operator for vectors
24+
- Added CPU dispatching for distance functions on Linux x86-64
25+
- Updated comparison operators to support vectors with different dimensions
26+
27+
## 0.6.2 (2024-03-18)
28+
29+
- Reduced lock contention with parallel HNSW index builds
30+
31+
## 0.6.1 (2024-03-04)
32+
33+
- Fixed error with `ANALYZE` and vectors with different dimensions
34+
- Fixed segmentation fault with `shared_preload_libraries`
35+
- Fixed vector subtraction being marked as commutative
36+
37+
## 0.6.0 (2024-01-29)
38+
39+
If upgrading with Postgres 12 or Docker, see [these notes](https://github.com/pgvector/pgvector#060).
40+
41+
- Added support for parallel index builds for HNSW
42+
- Added validation for GUC parameters
43+
- Changed storage for vector from `extended` to `external`
44+
- Improved performance of HNSW
45+
- Reduced memory usage for HNSW index builds
46+
- Reduced WAL generation for HNSW index builds
47+
- Fixed error with logical replication
48+
- Fixed `invalid memory alloc request size` error with HNSW index builds
49+
- Moved Docker image to `pgvector` org
50+
- Added Docker tags for each supported version of Postgres
51+
- Dropped support for Postgres 11
52+
53+
## 0.5.1 (2023-10-10)
54+
55+
- Improved performance of HNSW index builds
56+
- Added check for MVCC-compliant snapshot for index scans
57+
58+
## 0.5.0 (2023-08-28)
59+
60+
- Added HNSW index type
61+
- Added support for parallel index builds for IVFFlat
62+
- Added `l1_distance` function
63+
- Added element-wise multiplication for vectors
64+
- Added `sum` aggregate
65+
- Improved performance of distance functions
66+
- Fixed out of range results for cosine distance
67+
- Fixed results for NULL and NaN distances for IVFFlat
68+
69+
## 0.4.4 (2023-06-12)
70+
71+
- Improved error message for malformed vector literal
72+
- Fixed segmentation fault with text input
73+
- Fixed consecutive delimiters with text input
74+
75+
## 0.4.3 (2023-06-10)
76+
77+
- Improved cost estimation
78+
- Improved support for spaces with text input
79+
- Fixed infinite and NaN values with binary input
80+
- Fixed infinite values with vector addition and subtraction
81+
- Fixed infinite values with list centers
82+
- Fixed compilation error when `float8` is pass by reference
83+
- Fixed compilation error on PowerPC
84+
- Fixed segmentation fault with index creation on i386
85+
86+
## 0.4.2 (2023-05-13)
87+
88+
- Added notice when index created with little data
89+
- Fixed dimensions check for some direct function calls
90+
- Fixed installation error with Postgres 12.0-12.2
91+
92+
## 0.4.1 (2023-03-21)
93+
94+
- Improved performance of cosine distance
395
- Fixed index scan count
496

597
## 0.4.0 (2023-01-11)
698

7-
If upgrading with Postgres < 13, see [this note](https://github.com/pgvector/pgvector#040).
99+
If upgrading with Postgres < 13, see [this note](https://github.com/pgvector/pgvector/blob/v0.4.0/README.md#040).
8100

9101
- Changed text representation for vector elements to match `real`
10102
- Changed storage for vector from `plain` to `extended`
@@ -21,7 +113,7 @@ If upgrading with Postgres < 13, see [this note](https://github.com/pgvector/pgv
21113

22114
## 0.3.1 (2022-11-02)
23115

24-
If upgrading from 0.2.7 or 0.3.0, [recreate](https://github.com/pgvector/pgvector#031) all `ivfflat` indexes after upgrading to ensure all data is indexed.
116+
If upgrading from 0.2.7 or 0.3.0, [recreate](https://github.com/pgvector/pgvector/blob/v0.3.1/README.md#031) all `ivfflat` indexes after upgrading to ensure all data is indexed.
25117

26118
- Fixed issue with inserts silently corrupting `ivfflat` indexes (introduced in 0.2.7)
27119
- Fixed segmentation fault with index creation when lists > 6500

Dockerfile

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,21 @@
11
# syntax=docker/dockerfile:1
22

3-
ARG PG_MAJOR="13"
4-
ARG PG_TAG="13-13.3"
3+
ARG PG_MAJOR="15"
4+
# see https://hub.docker.com/r/postgis/postgis for valid images
5+
ARG PG_TAG="15-3.4""
56
6-
FROM postgis/postgis:13-3.3
7-
# FROM postgis/postgis:${PG_TAG}
7+
FROM $PG_IMAGE
8+
9+
ARG PG_MAJOR
810
911
LABEL org.opencontainers.image.source "https://github.com/x-b-e/pgvector"
1012
LABEL org.opencontainers.image.description "XBE server postgres with postgis, pgvector"
1113
LABEL org.opencontainers.image.licenses "PostgreSQL License"
1214
13-
# ARG PG_MAJOR
14-
# ENV PG_MAJOR=${PG_MAJOR}
15-
ENV PG_MAJOR=13
16-
1715
COPY . /tmp/pgvector
1816
1917
RUN apt-get update && \
18+
apt-mark hold locales && \
2019
apt-get install -y --no-install-recommends build-essential postgresql-server-dev-${PG_MAJOR} && \
2120
cd /tmp/pgvector && \
2221
make clean && \
@@ -27,4 +26,5 @@ RUN apt-get update && \
2726
rm -r /tmp/pgvector && \
2827
apt-get remove -y build-essential postgresql-server-dev-${PG_MAJOR} && \
2928
apt-get autoremove -y && \
29+
apt-mark unhold locales && \
3030
rm -rf /var/lib/apt/lists/*

LICENSE

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
1+
Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
22

33
Portions Copyright (c) 1994, The Regents of the University of California
44

META.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "vector",
33
"abstract": "Open-source vector similarity search for Postgres",
44
"description": "Supports L2 distance, inner product, and cosine distance",
5-
"version": "0.4.0",
5+
"version": "0.7.2",
66
"maintainer": [
77
"Andrew Kane <[email protected]>"
88
],
@@ -12,15 +12,15 @@
1212
"prereqs": {
1313
"runtime": {
1414
"requires": {
15-
"PostgreSQL": "11.0.0"
15+
"PostgreSQL": "12.0.0"
1616
}
1717
}
1818
},
1919
"provides": {
2020
"vector": {
2121
"file": "sql/vector.sql",
2222
"docfile": "README.md",
23-
"version": "0.4.0",
23+
"version": "0.7.2",
2424
"abstract": "Open-source vector similarity search for Postgres"
2525
}
2626
},

Makefile

+21-5
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,31 @@
11
EXTENSION = vector
2-
EXTVERSION = 0.4.0
2+
EXTVERSION = 0.7.2
33

44
MODULE_big = vector
55
DATA = $(wildcard sql/*--*.sql)
6-
OBJS = src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o
6+
OBJS = src/bitutils.o src/bitvec.o src/halfutils.o src/halfvec.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/sparsevec.o src/vector.o
7+
HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
78

89
TESTS = $(wildcard test/sql/*.sql)
910
REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS))
10-
REGRESS_OPTS = --inputdir=test --load-extension=vector
11+
REGRESS_OPTS = --inputdir=test --load-extension=$(EXTENSION)
1112

13+
# To compile for portability, run: make OPTFLAGS=""
1214
OPTFLAGS = -march=native
1315

14-
# Mac ARM doesn't support -march=native
16+
# Mac ARM doesn't always support -march=native
1517
ifeq ($(shell uname -s), Darwin)
1618
ifeq ($(shell uname -p), arm)
19+
# no difference with -march=armv8.5-a
1720
OPTFLAGS =
1821
endif
1922
endif
2023

24+
# PowerPC doesn't support -march=native
25+
ifneq ($(filter ppc64%, $(shell uname -m)), )
26+
OPTFLAGS =
27+
endif
28+
2129
# For auto-vectorization:
2230
# - GCC (needs -ftree-vectorize OR -O3) - https://gcc.gnu.org/projects/tree-ssa/vectorization.html
2331
# - Clang (could use pragma instead) - https://llvm.org/docs/Vectorizers.html
@@ -58,7 +66,15 @@ dist:
5866
mkdir -p dist
5967
git archive --format zip --prefix=$(EXTENSION)-$(EXTVERSION)/ --output dist/$(EXTENSION)-$(EXTVERSION).zip master
6068

69+
# for Docker
70+
PG_MAJOR ?= 16
71+
6172
.PHONY: docker
6273

6374
docker:
64-
docker build --pull --no-cache --platform linux/amd64 -t ankane/pgvector:latest .
75+
docker build --pull --no-cache --build-arg PG_MAJOR=$(PG_MAJOR) -t pgvector/pgvector:pg$(PG_MAJOR) -t pgvector/pgvector:$(EXTVERSION)-pg$(PG_MAJOR) .
76+
77+
.PHONY: docker-release
78+
79+
docker-release:
80+
docker buildx build --push --pull --no-cache --platform linux/amd64,linux/arm64 --build-arg PG_MAJOR=$(PG_MAJOR) -t pgvector/pgvector:pg$(PG_MAJOR) -t pgvector/pgvector:$(EXTVERSION)-pg$(PG_MAJOR) .

Makefile.win

+10-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
EXTENSION = vector
2-
EXTVERSION = 0.4.0
2+
EXTVERSION = 0.7.2
33

4-
OBJS = src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\vector.obj
4+
OBJS = src\bitutils.obj src\bitvec.obj src\halfutils.obj src\halfvec.obj src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\sparsevec.obj src\vector.obj
5+
HEADERS = src\halfvec.h src\sparsevec.h src\vector.h
56

6-
REGRESS = btree cast copy functions input ivfflat_cosine ivfflat_ip ivfflat_l2 ivfflat_options ivfflat_unlogged
7-
REGRESS_OPTS = --inputdir=test --load-extension=vector
7+
REGRESS = bit btree cast copy halfvec hnsw_bit hnsw_halfvec hnsw_sparsevec hnsw_vector ivfflat_bit ivfflat_halfvec ivfflat_vector sparsevec vector_type
8+
REGRESS_OPTS = --inputdir=test --load-extension=$(EXTENSION)
89

910
# For /arch flags
1011
# https://learn.microsoft.com/en-us/cpp/build/reference/arch-minimum-cpu-architecture
@@ -54,14 +55,18 @@ install:
5455
copy $(SHLIB) "$(PKGLIBDIR)"
5556
copy $(EXTENSION).control "$(SHAREDIR)\extension"
5657
copy sql\$(EXTENSION)--*.sql "$(SHAREDIR)\extension"
58+
mkdir "$(INCLUDEDIR_SERVER)\extension\$(EXTENSION)"
59+
for %f in ($(HEADERS)) do copy %f "$(INCLUDEDIR_SERVER)\extension\$(EXTENSION)"
5760

5861
installcheck:
5962
"$(BINDIR)\pg_regress" --bindir="$(BINDIR)" $(REGRESS_OPTS) $(REGRESS)
6063

6164
uninstall:
6265
del /f "$(PKGLIBDIR)\$(SHLIB)"
6366
del /f "$(SHAREDIR)\extension\$(EXTENSION).control"
64-
del /f "$(SHAREDIR)\extension\vector--*.sql"
67+
del /f "$(SHAREDIR)\extension\$(EXTENSION)--*.sql"
68+
del /f "$(INCLUDEDIR_SERVER)\extension\$(EXTENSION)\*.h"
69+
rmdir "$(INCLUDEDIR_SERVER)\extension\$(EXTENSION)"
6570

6671
clean:
6772
del /f $(SHLIB) $(EXTENSION).lib $(EXTENSION).exp

0 commit comments

Comments
 (0)