diff --git a/.github/workflows/go-driver.yml b/.github/workflows/go-driver.yml index 19decec1b..5b6d15030 100644 --- a/.github/workflows/go-driver.yml +++ b/.github/workflows/go-driver.yml @@ -19,10 +19,10 @@ jobs: working-directory: drivers/golang/age/ steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Run apache/age docker image - run: docker-compose up -d + run: docker compose up -d - name: Set up Go uses: actions/setup-go@v3 diff --git a/.github/workflows/installcheck.yaml b/.github/workflows/installcheck.yaml index c38245382..dddefa48a 100644 --- a/.github/workflows/installcheck.yaml +++ b/.github/workflows/installcheck.yaml @@ -22,25 +22,37 @@ jobs: path: ~/pg16 key: ${{ runner.os }}-v1-pg16-${{ env.PG_COMMIT_HASH }} - - name: Install PostgreSQL 16 + - name: Install PostgreSQL 16 and some extensions if: steps.pg16cache.outputs.cache-hit != 'true' run: | git clone --depth 1 --branch REL_16_STABLE git://git.postgresql.org/git/postgresql.git ~/pg16source cd ~/pg16source ./configure --prefix=$HOME/pg16 CFLAGS="-std=gnu99 -ggdb -O0" --enable-cassert make install -j$(nproc) > /dev/null + cd contrib + cd fuzzystrmatch + make PG_CONFIG=$HOME/pg16/bin/pg_config install -j$(nproc) > /dev/null + cd ../pg_trgm + make PG_CONFIG=$HOME/pg16/bin/pg_config install -j$(nproc) > /dev/null - uses: actions/checkout@v3 - - name: Build + - name: Build AGE id: build run: | make PG_CONFIG=$HOME/pg16/bin/pg_config install -j$(nproc) + + - name: Pull and build pgvector + id: pgvector + run: | + git clone https://github.com/pgvector/pgvector.git + cd pgvector + make PG_CONFIG=$HOME/pg16/bin/pg_config install -j$(nproc) > /dev/null - name: Regression tests id: regression_tests run: | - make PG_CONFIG=$HOME/pg16/bin/pg_config installcheck + make PG_CONFIG=$HOME/pg16/bin/pg_config installcheck EXTRA_TESTS="pgvector fuzzystrmatch pg_trgm" continue-on-error: true - name: Dump regression test errors diff --git a/.github/workflows/jdbc-driver.yaml b/.github/workflows/jdbc-driver.yaml index 085eb8085..29b368438 100644 --- a/.github/workflows/jdbc-driver.yaml +++ b/.github/workflows/jdbc-driver.yaml @@ -15,7 +15,7 @@ jobs: working-directory: drivers/jdbc steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Java uses: actions/setup-java@v3 diff --git a/.github/workflows/nodejs-driver.yaml b/.github/workflows/nodejs-driver.yaml index 235f7f502..3d9e07023 100644 --- a/.github/workflows/nodejs-driver.yaml +++ b/.github/workflows/nodejs-driver.yaml @@ -16,10 +16,10 @@ jobs: working-directory: drivers/nodejs/ steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Run apache/age docker image - run: docker-compose up -d + run: docker compose up -d - name: Set up Node uses: actions/setup-node@v3 diff --git a/.github/workflows/python-driver.yaml b/.github/workflows/python-driver.yaml index 9a9754a6c..099b5c871 100644 --- a/.github/workflows/python-driver.yaml +++ b/.github/workflows/python-driver.yaml @@ -16,10 +16,10 @@ jobs: working-directory: drivers/python steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Run apache/age docker image - run: docker-compose up -d + run: docker compose up -d - name: Set up python uses: actions/setup-python@v4 diff --git a/Makefile b/Makefile index 400d5a7a1..c0a847830 100644 --- a/Makefile +++ b/Makefile @@ -112,8 +112,13 @@ REGRESS = scan \ name_validation \ jsonb_operators \ list_comprehension \ - map_projection \ - drop + map_projection + +ifneq ($(EXTRA_TESTS),) + REGRESS += $(EXTRA_TESTS) +endif + +REGRESS += drop srcdir=`pwd` diff --git a/age--1.5.0--y.y.y.sql b/age--1.5.0--y.y.y.sql index 9461e3bf3..85c2db7a4 100644 --- a/age--1.5.0--y.y.y.sql +++ b/age--1.5.0--y.y.y.sql @@ -110,3 +110,31 @@ CREATE FUNCTION ag_catalog.graph_exists(graph_name name) RETURNS agtype LANGUAGE c AS 'MODULE_PATHNAME', 'age_graph_exists'; + +CREATE FUNCTION ag_catalog.age_is_valid_label_name(agtype) + RETURNS boolean + LANGUAGE c + IMMUTABLE +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE OR REPLACE FUNCTION ag_catalog.create_vlabel(graph_name cstring, label_name cstring) + RETURNS void + LANGUAGE c + AS 'MODULE_PATHNAME'; + +CREATE OR REPLACE FUNCTION ag_catalog.create_elabel(graph_name cstring, label_name cstring) + RETURNS void + LANGUAGE c + AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.agtype_to_json(agtype) + RETURNS json + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE CAST (agtype AS json) + WITH FUNCTION ag_catalog.agtype_to_json(agtype); \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index 0df81e4d3..336070589 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -16,23 +16,15 @@ # limitations under the License. # -FROM postgres:16 +# Build stage: Install necessary development tools for compilation and installation +FROM postgres:16 AS build RUN apt-get update \ && apt-get install -y --no-install-recommends --no-install-suggests \ bison \ build-essential \ flex \ - postgresql-server-dev-16 \ - locales - -ENV LANG=en_US.UTF-8 -ENV LC_COLLATE=en_US.UTF-8 -ENV LC_CTYPE=en_US.UTF-8 - -RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen \ - && locale-gen \ - && update-locale LANG=en_US.UTF-8 + postgresql-server-dev-16 COPY . /age @@ -40,6 +32,25 @@ WORKDIR /age RUN make && make install + +# Final stage: Create a final image by copying the files created in the build stage +FROM postgres:16 + +RUN apt-get update \ + && apt-get install -y --no-install-recommends --no-install-suggests \ + locales + +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen \ + && locale-gen \ + && update-locale LANG=en_US.UTF-8 + +ENV LANG=en_US.UTF-8 +ENV LC_COLLATE=en_US.UTF-8 +ENV LC_CTYPE=en_US.UTF-8 + +COPY --from=build /usr/lib/postgresql/16/lib/age.so /usr/lib/postgresql/16/lib/ +COPY --from=build /usr/share/postgresql/16/extension/age--1.5.0.sql /usr/share/postgresql/16/extension/ +COPY --from=build /usr/share/postgresql/16/extension/age.control /usr/share/postgresql/16/extension/ COPY docker/docker-entrypoint-initdb.d/00-create-extension-age.sql /docker-entrypoint-initdb.d/00-create-extension-age.sql CMD ["postgres", "-c", "shared_preload_libraries=age"] diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index 8635a499b..b638e636b 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -19,6 +19,7 @@ \! cp -r regress/age_load/data regress/instance/data/age_load LOAD 'age'; SET search_path TO ag_catalog; +-- Create a country using CREATE clause SELECT create_graph('agload_test_graph'); NOTICE: graph "agload_test_graph" has been created create_graph @@ -26,34 +27,79 @@ NOTICE: graph "agload_test_graph" has been created (1 row) -SELECT create_vlabel('agload_test_graph','Country'); -NOTICE: VLabel "Country" has been created - create_vlabel ---------------- - +SELECT * FROM cypher('agload_test_graph', $$CREATE (n:Country {__id__:1}) RETURN n$$) as (n agtype); + n +---------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Country", "properties": {"__id__": 1}}::vertex (1 row) +-- +-- Load countries with id +-- SELECT load_labels_from_file('agload_test_graph', 'Country', - 'age_load/countries.csv'); + 'age_load/countries.csv', true); load_labels_from_file ----------------------- (1 row) -SELECT create_vlabel('agload_test_graph','City'); -NOTICE: VLabel "City" has been created - create_vlabel ---------------- - +-- A temporary table should have been created with 54 ids; 1 from CREATE and 53 from file +SELECT COUNT(*)=54 FROM "_agload_test_graph_ag_vertex_ids"; + ?column? +---------- + t +(1 row) + +-- Sequence should be equal to max entry id i.e. 248 +SELECT currval('agload_test_graph."Country_id_seq"')=248; + ?column? +---------- + t (1 row) +-- Should error out on loading the same file again due to duplicate id +SELECT load_labels_from_file('agload_test_graph', 'Country', + 'age_load/countries.csv', true); +ERROR: Cannot insert duplicate vertex id: 844424930131970 +HINT: Entry id 2 is already used +-- +-- Load cities with id +-- +-- Should create City label automatically and load cities SELECT load_labels_from_file('agload_test_graph', 'City', - 'age_load/cities.csv'); + 'age_load/cities.csv', true); +NOTICE: VLabel "City" has been created load_labels_from_file ----------------------- (1 row) +-- Temporary table should have 54+72485 rows now +SELECT COUNT(*)=54+72485 FROM "_agload_test_graph_ag_vertex_ids"; + ?column? +---------- + t +(1 row) + +-- Sequence should be equal to max entry id i.e. 146941 +SELECT currval('agload_test_graph."City_id_seq"')=146941; + ?column? +---------- + t +(1 row) + +-- Should error out on loading the same file again due to duplicate id +SELECT load_labels_from_file('agload_test_graph', 'City', + 'age_load/cities.csv', true); +ERROR: Cannot insert duplicate vertex id: 1125899906842777 +HINT: Entry id 153 is already used +-- +-- Load edges -- Connects cities to countries +-- +-- Should error out for using vertex label +SELECT load_edges_from_file('agload_test_graph', 'Country', + 'age_load/edges.csv'); +ERROR: label "Country" already exists as edge label SELECT create_elabel('agload_test_graph','has_city'); NOTICE: ELabel "has_city" has been created create_elabel @@ -68,6 +114,17 @@ SELECT load_edges_from_file('agload_test_graph', 'has_city', (1 row) +-- Sequence should be equal to number of edges loaded i.e. 72485 +SELECT currval('agload_test_graph."has_city_id_seq"')=72485; + ?column? +---------- + t +(1 row) + +-- Should error out for using edge label +SELECT load_labels_from_file('agload_test_graph', 'has_city', + 'age_load/cities.csv'); +ERROR: label "has_city" already exists as vertex label SELECT table_catalog, table_schema, lower(table_name) as table_name, table_type FROM information_schema.tables WHERE table_schema = 'agload_test_graph' ORDER BY table_name ASC; @@ -83,7 +140,7 @@ WHERE table_schema = 'agload_test_graph' ORDER BY table_name ASC; SELECT COUNT(*) FROM agload_test_graph."Country"; count ------- - 53 + 54 (1 row) SELECT COUNT(*) FROM agload_test_graph."City"; @@ -101,7 +158,7 @@ SELECT COUNT(*) FROM agload_test_graph."has_city"; SELECT COUNT(*) FROM cypher('agload_test_graph', $$MATCH(n) RETURN n$$) as (n agtype); count ------- - 72538 + 72539 (1 row) SELECT COUNT(*) FROM cypher('agload_test_graph', $$MATCH (a)-[e]->(b) RETURN e$$) as (n agtype); @@ -110,6 +167,17 @@ SELECT COUNT(*) FROM cypher('agload_test_graph', $$MATCH (a)-[e]->(b) RETURN e$$ 72485 (1 row) +-- +-- Load countries and cities without id +-- +-- Should load countries in Country label without error since it should use sequence now +SELECT load_labels_from_file('agload_test_graph', 'Country', + 'age_load/countries.csv', false); + load_labels_from_file +----------------------- + +(1 row) + SELECT create_vlabel('agload_test_graph','Country2'); NOTICE: VLabel "Country2" has been created create_vlabel @@ -153,6 +221,7 @@ SELECT COUNT(*) FROM agload_test_graph."City2"; SELECT id FROM agload_test_graph."Country" LIMIT 10; id ----------------- + 844424930131969 844424930131970 844424930131971 844424930131974 @@ -162,7 +231,6 @@ SELECT id FROM agload_test_graph."Country" LIMIT 10; 844424930131996 844424930132002 844424930132023 - 844424930132025 (10 rows) SELECT id FROM agload_test_graph."Country2" LIMIT 10; @@ -180,13 +248,16 @@ SELECT id FROM agload_test_graph."Country2" LIMIT 10; 1688849860263946 (10 rows) +-- Should return 2 rows for Country with same properties, but different ids SELECT * FROM cypher('agload_test_graph', $$MATCH(n:Country {iso2 : 'BE'}) RETURN id(n), n.name, n.iso2 $$) as ("id(n)" agtype, "n.name" agtype, "n.iso2" agtype); id(n) | n.name | n.iso2 -----------------+-----------+-------- 844424930131990 | "Belgium" | "BE" -(1 row) + 844424930132223 | "Belgium" | "BE" +(2 rows) +-- Should return 1 row SELECT * FROM cypher('agload_test_graph', $$MATCH(n:Country2 {iso2 : 'BE'}) RETURN id(n), n.name, n.iso2 $$) as ("id(n)" agtype, "n.name" agtype, "n.iso2" agtype); id(n) | n.name | n.iso2 @@ -194,13 +265,16 @@ SELECT * FROM cypher('agload_test_graph', $$MATCH(n:Country2 {iso2 : 'BE'}) 1688849860263942 | "Belgium" | "BE" (1 row) +-- Should return 2 rows for Country with same properties, but different ids SELECT * FROM cypher('agload_test_graph', $$MATCH(n:Country {iso2 : 'AT'}) RETURN id(n), n.name, n.iso2 $$) as ("id(n)" agtype, "n.name" agtype, "n.iso2" agtype); id(n) | n.name | n.iso2 -----------------+-----------+-------- 844424930131983 | "Austria" | "AT" -(1 row) + 844424930132221 | "Austria" | "AT" +(2 rows) +-- Should return 1 row SELECT * FROM cypher('agload_test_graph', $$MATCH(n:Country2 {iso2 : 'AT'}) RETURN id(n), n.name, n.iso2 $$) as ("id(n)" agtype, "n.name" agtype, "n.iso2" agtype); id(n) | n.name | n.iso2 @@ -208,14 +282,23 @@ SELECT * FROM cypher('agload_test_graph', $$MATCH(n:Country2 {iso2 : 'AT'}) 1688849860263940 | "Austria" | "AT" (1 row) +-- Should return 2 rows for Country with same properties, but different ids SELECT * FROM cypher('agload_test_graph', $$ MATCH (u:Country {region : "Europe"}) WHERE u.name =~ 'Cro.*' - RETURN u.name, u.region -$$) AS (result_1 agtype, result_2 agtype); - result_1 | result_2 ------------+---------- - "Croatia" | "Europe" + RETURN id(u), u.name, u.region +$$) AS ("id(u)" agtype, result_1 agtype, result_2 agtype); + id(u) | result_1 | result_2 +-----------------+-----------+---------- + 844424930132023 | "Croatia" | "Europe" + 844424930132226 | "Croatia" | "Europe" +(2 rows) + +-- There shouldn't be any duplicates +SELECT * FROM cypher('agload_test_graph', $$return graph_stats('agload_test_graph')$$) as (a agtype); + a +------------------------------------------------------------------------------------------ + {"graph": "agload_test_graph", "num_loaded_edges": 72485, "num_loaded_vertices": 145130} (1 row) SELECT drop_graph('agload_test_graph', true); @@ -236,22 +319,11 @@ NOTICE: graph "agload_test_graph" has been dropped -- -- Test property type conversion -- -SELECT create_graph('agload_conversion'); -NOTICE: graph "agload_conversion" has been created - create_graph --------------- - -(1 row) - -- vertex: load as agtype -SELECT create_vlabel('agload_conversion','Person1'); -NOTICE: VLabel "Person1" has been created - create_vlabel ---------------- - -(1 row) - +-- Should create graph and label automatically SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv', true, true); +NOTICE: graph "agload_conversion" has been created +NOTICE: VLabel "Person1" has been created load_labels_from_file ----------------------- diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out index af5bd9ea4..238a4c472 100644 --- a/regress/expected/cypher_merge.out +++ b/regress/expected/cypher_merge.out @@ -1649,6 +1649,74 @@ SELECT * FROM cypher('issue_1709', $$ MATCH (u) DELETE u $$) AS (a agtype); --- (0 rows) +-- +-- Fix issue 1907: SET on MERGE not storing edge properties +-- +-- setup +SELECT * FROM create_graph('issue_1907'); +NOTICE: graph "issue_1907" has been created + create_graph +-------------- + +(1 row) + +SELECT * from cypher('issue_1907', $$ CREATE (n:Testnode {name: 'Test Node A'}) + RETURN n $$) as (n agtype); + n +--------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Testnode", "properties": {"name": "Test Node A"}}::vertex +(1 row) + +SELECT * from cypher('issue_1907', $$ CREATE (n:Testnode {name: 'Test Node B'}) + RETURN n $$) as (n agtype); + n +--------------------------------------------------------------------------------------------- + {"id": 844424930131970, "label": "Testnode", "properties": {"name": "Test Node B"}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1907', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype); + r +--- +(0 rows) + +-- should return properties added +SELECT * FROM cypher('issue_1907', $$ MERGE (a {name: 'Test Node A'})-[r:RELATED_TO]->(b {name: 'Test Node B'}) + SET r = {property1: 'something', property2: 'else'} + RETURN r $$) AS (r agtype); + r +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 1125899906842625, "label": "RELATED_TO", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {"property1": "something", "property2": "else"}}::edge +(1 row) + +-- should return properties added +SELECT * FROM cypher('issue_1907', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype); + r +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 1125899906842625, "label": "RELATED_TO", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {"property1": "something", "property2": "else"}}::edge +(1 row) + +-- cleanup +SELECT * FROM cypher('issue_1907', $$ MATCH ()-[r]->() DELETE r $$) AS (r agtype); + r +--- +(0 rows) + +-- do it again, but a different way +SELECT * FROM cypher('issue_1907', $$ MERGE (a {name: 'Test Node A'})-[r:RELATED_TO]->(b {name: 'Test Node B'}) + SET r.property1 = 'something', r.property2 = 'else' + RETURN r $$) AS (r agtype); + r +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 1125899906842626, "label": "RELATED_TO", "end_id": 281474976710660, "start_id": 281474976710659, "properties": {"property1": "something", "property2": "else"}}::edge +(1 row) + +-- should return properties added +SELECT * FROM cypher('issue_1907', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype); + r +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 1125899906842626, "label": "RELATED_TO", "end_id": 281474976710660, "start_id": 281474976710659, "properties": {"property1": "something", "property2": "else"}}::edge +(1 row) + -- -- clean up graphs -- @@ -1670,6 +1738,18 @@ SELECT * FROM cypher('issue_1709', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype -- -- delete graphs -- +SELECT drop_graph('issue_1907', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table issue_1907._ag_label_vertex +drop cascades to table issue_1907._ag_label_edge +drop cascades to table issue_1907."Testnode" +drop cascades to table issue_1907."RELATED_TO" +NOTICE: graph "issue_1907" has been dropped + drop_graph +------------ + +(1 row) + SELECT drop_graph('cypher_merge', true); NOTICE: drop cascades to 19 other objects DETAIL: drop cascades to table cypher_merge._ag_label_vertex diff --git a/regress/expected/expr.out b/regress/expected/expr.out index bdc3fd936..76eecbe0c 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -2426,6 +2426,185 @@ SELECT agtype_typecast_path(null); (1 row) +-- +-- Tests for explicit typecast to json +-- +-- Should pass +SELECT agtype_to_json('{}'::agtype); + agtype_to_json +---------------- + {} +(1 row) + +SELECT agtype_to_json('{ "hello": "world" }'::agtype); + agtype_to_json +-------------------- + {"hello": "world"} +(1 row) + +SELECT agtype_to_json('{ "hello": "world" }'::agtype)->>'hello'; + ?column? +---------- + world +(1 row) + +SELECT agtype_to_json('[]'::agtype); + agtype_to_json +---------------- + [] +(1 row) + +SELECT agtype_to_json('[1, 2, 3]'::agtype); + agtype_to_json +---------------- + [1, 2, 3] +(1 row) + +SELECT agtype_to_json(null::agtype); + agtype_to_json +---------------- + +(1 row) + +SELECT cast('{}'::agtype as json); + json +------ + {} +(1 row) + +SELECT cast('{ "hello": "world" }'::agtype as json); + json +-------------------- + {"hello": "world"} +(1 row) + +SELECT cast('{ "hello": "world" }'::agtype as json)->>'hello'; + ?column? +---------- + world +(1 row) + +SELECT cast('[]'::agtype as json); + json +------ + [] +(1 row) + +SELECT cast('[1, 2, 3]'::agtype as json); + json +----------- + [1, 2, 3] +(1 row) + +SELECT cast('[1, 2, 3]'::agtype as json)->1; + ?column? +---------- + 2 +(1 row) + +SELECT cast(null::agtype as json); + json +------ + +(1 row) + +SELECT vertex_in_json, vertex_in_json->'id' as id, pg_typeof(vertex_in_json) FROM cypher('type_coercion', $$ MATCH (a) RETURN a $$) AS (vertex_in_json json); + vertex_in_json | id | pg_typeof +--------------------------------------------------------+-----------------+----------- + {"id": 281474976710657, "label": "", "properties": {}} | 281474976710657 | json + {"id": 281474976710658, "label": "", "properties": {}} | 281474976710658 | json +(2 rows) + +SELECT edge_in_json, edge_in_json->'id' as id, pg_typeof(edge_in_json) FROM cypher('type_coercion', $$ MATCH ()-[e]->() RETURN e $$) AS (edge_in_json json); + edge_in_json | id | pg_typeof +--------------------------------------------------------------------------------------------------------------------+-----------------+----------- + {"id": 844424930131969, "label": "edge", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}} | 844424930131969 | json +(1 row) + +SELECT vle_in_json, vle_in_json->0 as first_edge, pg_typeof(vle_in_json) FROM cypher('type_coercion', $$ MATCH ()-[e *]->() RETURN e $$) AS (vle_in_json json); + vle_in_json | first_edge | pg_typeof +----------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+----------- + [{"id": 844424930131969, "label": "edge", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}}] | {"id": 844424930131969, "label": "edge", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}} | json +(1 row) + +SELECT *, pg_typeof(props_in_json) FROM cypher('type_coercion', $$ MATCH (a) RETURN properties(a) $$) AS (props_in_json json); + props_in_json | pg_typeof +---------------+----------- + {} | json + {} | json +(2 rows) + +SELECT path_in_json, path_in_json->0 as first_node FROM cypher('type_coercion', $$ MATCH p=()-[]->() RETURN p $$) AS (path_in_json json); + path_in_json | first_node +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------- + [{"id": 281474976710657, "label": "", "properties": {}}, {"id": 844424930131969, "label": "edge", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}}, {"id": 281474976710658, "label": "", "properties": {}}] | {"id": 281474976710657, "label": "", "properties": {}} +(1 row) + +SELECT *, pg_typeof(nodes_in_json) FROM cypher('type_coercion', $$ MATCH p=()-[]->() RETURN nodes(p) $$) AS (nodes_in_json json); + nodes_in_json | pg_typeof +------------------------------------------------------------------------------------------------------------------+----------- + [{"id": 281474976710657, "label": "", "properties": {}}, {"id": 281474976710658, "label": "", "properties": {}}] | json +(1 row) + +SELECT *, pg_typeof(rels_in_json) FROM cypher('type_coercion', $$ MATCH p=()-[]->() RETURN relationships(p) $$) AS (rels_in_json json); + rels_in_json | pg_typeof +----------------------------------------------------------------------------------------------------------------------+----------- + [{"id": 844424930131969, "label": "edge", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}}] | json +(1 row) + +SELECT cast(result as json) FROM cypher('type_coercion', $$ MATCH (a) RETURN a $$) AS (result agtype); + result +-------------------------------------------------------- + {"id": 281474976710657, "label": "", "properties": {}} + {"id": 281474976710658, "label": "", "properties": {}} +(2 rows) + +SELECT cast(result as json) FROM cypher('type_coercion', $$ MATCH ()-[e]-() RETURN e $$) AS (result agtype); + result +-------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "edge", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}} + {"id": 844424930131969, "label": "edge", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}} +(2 rows) + +SELECT cast(result as json) FROM cypher('type_coercion', $$ MATCH ()-[e *]->() RETURN e $$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------- + [{"id": 844424930131969, "label": "edge", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}}] +(1 row) + +SELECT cast(result as json) FROM cypher('type_coercion', $$ MATCH p=()-[]->() RETURN p $$) AS (result agtype); + result +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"id": 281474976710657, "label": "", "properties": {}}, {"id": 844424930131969, "label": "edge", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}}, {"id": 281474976710658, "label": "", "properties": {}}] +(1 row) + +SELECT pg_typeof(cast(result as json)) FROM cypher('type_coercion', $$ MATCH p=()-[]->() RETURN p $$) AS (result agtype); + pg_typeof +----------- + json +(1 row) + +-- Should fail +SELECT agtype_to_json('1'::agtype); +ERROR: cannot cast agtype integer to json +SELECT agtype_to_json('1.111'::agtype); +ERROR: cannot cast agtype float to json +SELECT agtype_to_json('true'::agtype); +ERROR: cannot cast agtype boolean to json +SELECT agtype_to_json('false'::agtype); +ERROR: cannot cast agtype boolean to json +SELECT agtype_to_json('1::numeric'::agtype); +ERROR: cannot cast agtype numeric to json +SELECT cast(result as json) FROM cypher('type_coercion', $$ RETURN 1 $$) AS (result agtype); +ERROR: cannot cast agtype integer to json +SELECT cast(result as json) FROM cypher('type_coercion', $$ RETURN 1.111 $$) AS (result agtype); +ERROR: cannot cast agtype float to json +SELECT cast(result as json) FROM cypher('type_coercion', $$ RETURN true $$) AS (result agtype); +ERROR: cannot cast agtype boolean to json +SELECT cast(result as json) FROM cypher('type_coercion', $$ RETURN false $$) AS (result agtype); +ERROR: cannot cast agtype boolean to json +SELECT cast(result as json) FROM cypher('type_coercion', $$ RETURN 1::numeric $$) AS (result agtype); +ERROR: cannot cast agtype numeric to json -- test functions -- create some vertices and edges SELECT * FROM cypher('expr', $$CREATE (:v)$$) AS (a agtype); @@ -8463,9 +8642,161 @@ SELECT * FROM cypher('issue_1953', $$ RETURN is_valid_label_name('issue_1953')[0 ERROR: A_indirection could not convert type boolean to agtype LINE 1: ...cypher('issue_1953', $$ RETURN is_valid_label_name('issue_19... ^ +-- +-- Issue 1988: How to update a property which is a keyword. +-- +SELECT * FROM create_graph('issue_1988'); +NOTICE: graph "issue_1988" has been created + create_graph +-------------- + +(1 row) + +SELECT * from cypher('issue_1988', $$ + CREATE (p1:Part {part_num: 123}), + (p2:Part {part_num: 345}), + (p3:Part {part_num: 456}), + (p4:Part {part_num: 789}) $$) as (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('issue_1988', $$ + MATCH (p) RETURN p $$) as (p agtype); + p +----------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Part", "properties": {"part_num": 123}}::vertex + {"id": 844424930131970, "label": "Part", "properties": {"part_num": 345}}::vertex + {"id": 844424930131971, "label": "Part", "properties": {"part_num": 456}}::vertex + {"id": 844424930131972, "label": "Part", "properties": {"part_num": 789}}::vertex +(4 rows) + +SELECT * from cypher('issue_1988', $$ + MATCH (p1:Part {part_num: 123}), (p2:Part {part_num: 345}) + CREATE (p1)-[u:used_by { quantity: 1 }]->(p2) RETURN p1, u, p2 $$) as (p1 agtype, u agtype, p2 agtype); + p1 | u | p2 +-----------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Part", "properties": {"part_num": 123}}::vertex | {"id": 1125899906842625, "label": "used_by", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {"quantity": 1}}::edge | {"id": 844424930131970, "label": "Part", "properties": {"part_num": 345}}::vertex +(1 row) + +-- should fail +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.match = 'xyz' RETURN p $$) as (p agtype); +ERROR: syntax error at or near "=" +LINE 2: MATCH (p:Part { part_num: 123 }) SET p.match = 'xyz' RET... + ^ +-- should succeed +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`match` = 'xyz' RETURN p $$) as (p agtype); + p +--------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Part", "properties": {"match": "xyz", "part_num": 123}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`set` = 'xyz' RETURN p $$) as (p agtype); + p +----------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Part", "properties": {"set": "xyz", "match": "xyz", "part_num": 123}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`delete` = 'xyz' RETURN p $$) as (p agtype); + p +---------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Part", "properties": {"set": "xyz", "match": "xyz", "delete": "xyz", "part_num": 123}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`merge` = 'xyz' RETURN p $$) as (p agtype); + p +-------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Part", "properties": {"set": "xyz", "match": "xyz", "merge": "xyz", "delete": "xyz", "part_num": 123}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`create` = 'xyz' RETURN p $$) as (p agtype); + p +------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Part", "properties": {"set": "xyz", "match": "xyz", "merge": "xyz", "create": "xyz", "delete": "xyz", "part_num": 123}}::vertex +(1 row) + +-- should succeed +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`match` = 'match' RETURN p $$) as (p agtype); + p +--------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Part", "properties": {"set": "xyz", "match": "match", "merge": "xyz", "create": "xyz", "delete": "xyz", "part_num": 123}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`set` = 'set' RETURN p $$) as (p agtype); + p +--------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Part", "properties": {"set": "set", "match": "match", "merge": "xyz", "create": "xyz", "delete": "xyz", "part_num": 123}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`delete` = 'delete' RETURN p $$) as (p agtype); + p +------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 844424930131969, "label": "Part", "properties": {"set": "set", "match": "match", "merge": "xyz", "create": "xyz", "delete": "delete", "part_num": 123}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`merge` = 'merge' RETURN p $$) as (p agtype); + p +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Part", "properties": {"set": "set", "match": "match", "merge": "merge", "create": "xyz", "delete": "delete", "part_num": 123}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`create` = 'create' RETURN p $$) as (p agtype); + p +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Part", "properties": {"set": "set", "match": "match", "merge": "merge", "create": "create", "delete": "delete", "part_num": 123}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1988', $$ + MATCH (p) RETURN p $$) as (p agtype); + p +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131970, "label": "Part", "properties": {"part_num": 345}}::vertex + {"id": 844424930131971, "label": "Part", "properties": {"part_num": 456}}::vertex + {"id": 844424930131972, "label": "Part", "properties": {"part_num": 789}}::vertex + {"id": 844424930131969, "label": "Part", "properties": {"set": "set", "match": "match", "merge": "merge", "create": "create", "delete": "delete", "part_num": 123}}::vertex +(4 rows) + +-- +-- Issue 2093: Server crashes when executing SELECT agtype_hash_cmp(agtype_in('[null, null, null, null, null]')); +-- +SELECT agtype_access_operator(agtype_in('[null, null]')); + agtype_access_operator +------------------------ + +(1 row) + +SELECT agtype_hash_cmp(agtype_in('[null, null, null, null, null]')); + agtype_hash_cmp +----------------- + -505290721 +(1 row) + -- -- Cleanup -- +SELECT * FROM drop_graph('issue_1988', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table issue_1988._ag_label_vertex +drop cascades to table issue_1988._ag_label_edge +drop cascades to table issue_1988."Part" +drop cascades to table issue_1988.used_by +NOTICE: graph "issue_1988" has been dropped + drop_graph +------------ + +(1 row) + SELECT * FROM drop_graph('issue_1953', true); NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table issue_1953._ag_label_vertex diff --git a/regress/expected/fuzzystrmatch.out b/regress/expected/fuzzystrmatch.out new file mode 100644 index 000000000..1d4613ee3 --- /dev/null +++ b/regress/expected/fuzzystrmatch.out @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path=ag_catalog; +SELECT create_graph('graph'); +NOTICE: graph "graph" has been created + create_graph +-------------- + +(1 row) + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN soundex("hello") $$) AS (n agtype); +ERROR: function soundex does not exist +LINE 1: SELECT * FROM cypher('graph', $$ RETURN soundex("hello") $$)... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +-- Create the extension in the public schema +CREATE EXTENSION fuzzystrmatch SCHEMA public; +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN soundex("hello") $$) AS (n agtype); +ERROR: function soundex does not exist +LINE 1: SELECT * FROM cypher('graph', $$ RETURN soundex("hello") $$)... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +-- Should work +SET search_path=ag_catalog, public; +SELECT * FROM cypher('graph', $$ CREATE (:Person {name: "Jane"}), + (:Person {name: "John"}), + (:Person {name: "Jone"}), + (:Person {name: "Jack"}), + (:Person {name: "Jax"}), + (:Person {name: "Jake"}), + (:Person {name: "Julie"}), + (:Person {name: "Julius"}), + (:Person {name: "Jill"}), + (:Person {name: "Jillie"}), + (:Person {name: "Julian"}) +$$) AS (n agtype); + n +--- +(0 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return soundex(p.name) $$) AS (n agtype); + n +-------- + "J500" + "J500" + "J500" + "J200" + "J200" + "J200" + "J400" + "J420" + "J400" + "J400" + "J450" +(11 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return levenshtein(p.name, "John") $$) AS (n agtype); + n +--- + 3 + 0 + 2 + 3 + 3 + 3 + 4 + 5 + 3 + 5 + 4 +(11 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return difference(p.name, "John") $$) AS (n agtype); + n +--- + 4 + 4 + 4 + 3 + 3 + 3 + 3 + 2 + 3 + 3 + 2 +(11 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return metaphone(p.name, 4) $$) AS (n agtype); + n +------- + "JN" + "JN" + "JN" + "JK" + "JKS" + "JK" + "JL" + "JLS" + "JL" + "JL" + "JLN" +(11 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return dmetaphone(p.name) $$) AS (n agtype); + n +------- + "JN" + "JN" + "JN" + "JK" + "JKS" + "JK" + "JL" + "JLS" + "JL" + "JL" + "JLN" +(11 rows) + +-- Difference is basically similarity using soundex, https://www.postgresql.org/docs/current/fuzzystrmatch.html +SELECT * FROM cypher('graph', $$ MATCH (p) return p ORDER BY difference(p.name, "Jon") DESC LIMIT 3$$) AS (n agtype); + n +------------------------------------------------------------------------------------ + {"id": 844424930131970, "label": "Person", "properties": {"name": "John"}}::vertex + {"id": 844424930131971, "label": "Person", "properties": {"name": "Jone"}}::vertex + {"id": 844424930131969, "label": "Person", "properties": {"name": "Jane"}}::vertex +(3 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return p ORDER BY difference(p.name, "Jak") DESC LIMIT 3$$) AS (n agtype); + n +------------------------------------------------------------------------------------ + {"id": 844424930131972, "label": "Person", "properties": {"name": "Jack"}}::vertex + {"id": 844424930131973, "label": "Person", "properties": {"name": "Jax"}}::vertex + {"id": 844424930131974, "label": "Person", "properties": {"name": "Jake"}}::vertex +(3 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return p ORDER BY difference(p.name, "Jil") DESC LIMIT 3$$) AS (n agtype); + n +-------------------------------------------------------------------------------------- + {"id": 844424930131975, "label": "Person", "properties": {"name": "Julie"}}::vertex + {"id": 844424930131977, "label": "Person", "properties": {"name": "Jill"}}::vertex + {"id": 844424930131978, "label": "Person", "properties": {"name": "Jillie"}}::vertex +(3 rows) + +-- Clean up +SELECT drop_graph('graph', true); +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table graph._ag_label_vertex +drop cascades to table graph._ag_label_edge +drop cascades to table graph."Person" +NOTICE: graph "graph" has been dropped + drop_graph +------------ + +(1 row) + +DROP EXTENSION fuzzystrmatch CASCADE; diff --git a/regress/expected/list_comprehension.out b/regress/expected/list_comprehension.out index 0260fa5ef..bf5731d2a 100644 --- a/regress/expected/list_comprehension.out +++ b/regress/expected/list_comprehension.out @@ -617,6 +617,53 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WITH *, [i in [1,2,3]] a [1, 2, 3] (1 row) +-- Issue 1955 - variable reference in list comprehension +SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WHERE u.list=[i IN u.list] RETURN u $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 281474976710658, "label": "", "properties": {"list": [1, 3, 5, 7, 9, 11, 13]}}::vertex + {"id": 281474976710660, "label": "", "properties": {"list": [12, 14, 16, 18, 20, 22, 24]}}::vertex + {"id": 281474976710662, "label": "", "properties": {"list": [25.0, 49.0, 81.0, 121.0, 169.0]}}::vertex + {"id": 281474976710657, "label": "", "properties": {"a": [], "b": [0, 1, 2, 3, 4, 5], "c": [0, 2, 4, 6, 8, 10, 12], "list": [0, 2, 4, 6, 8, 10, 12]}}::vertex + {"id": 281474976710663, "label": "", "properties": {"list": [1, 2, 3]}}::vertex + {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex + {"id": 281474976710661, "label": "", "properties": {"list": [6, 8, 10, 12]}}::vertex + {"id": 844424930131969, "label": "csm_match", "properties": {"list": ["abc", "def", "ghi"]}}::vertex +(8 rows) + +SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WHERE u.list=[i IN u.list WHERE i>0] RETURN u $$) AS (result agtype); + result +-------------------------------------------------------------------------------------------------------- + {"id": 281474976710658, "label": "", "properties": {"list": [1, 3, 5, 7, 9, 11, 13]}}::vertex + {"id": 281474976710660, "label": "", "properties": {"list": [12, 14, 16, 18, 20, 22, 24]}}::vertex + {"id": 281474976710662, "label": "", "properties": {"list": [25.0, 49.0, 81.0, 121.0, 169.0]}}::vertex + {"id": 281474976710663, "label": "", "properties": {"list": [1, 2, 3]}}::vertex + {"id": 281474976710661, "label": "", "properties": {"list": [6, 8, 10, 12]}}::vertex +(5 rows) + +SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WHERE size([e in u.list where e starts with "a"])>0 RETURN u $$) AS (result agtype); + result +------------------------------------------------------------------------------------------------------ + {"id": 844424930131969, "label": "csm_match", "properties": {"list": ["abc", "def", "ghi"]}}::vertex +(1 row) + +SELECT * FROM cypher('list_comprehension', $$ MATCH (u ={list:[i IN u.list | i+1]}) RETURN u $$) AS (result agtype); + result +-------------------------------------------------------------------------- + {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex +(1 row) + +SELECT * FROM cypher('list_comprehension', $$ MATCH (u ={list:[i IN u.list WHERE i>0]}) RETURN u$$) AS (result agtype); + result +-------------------------------------------------------------------------------------------------------- + {"id": 281474976710658, "label": "", "properties": {"list": [1, 3, 5, 7, 9, 11, 13]}}::vertex + {"id": 281474976710660, "label": "", "properties": {"list": [12, 14, 16, 18, 20, 22, 24]}}::vertex + {"id": 281474976710662, "label": "", "properties": {"list": [25.0, 49.0, 81.0, 121.0, 169.0]}}::vertex + {"id": 281474976710663, "label": "", "properties": {"list": [1, 2, 3]}}::vertex + {"id": 281474976710661, "label": "", "properties": {"list": [6, 8, 10, 12]}}::vertex +(5 rows) + +-- Clean up SELECT * FROM drop_graph('list_comprehension', true); NOTICE: drop cascades to 4 other objects DETAIL: drop cascades to table list_comprehension._ag_label_vertex diff --git a/regress/expected/name_validation.out b/regress/expected/name_validation.out index 232582bc2..d0ba31ec1 100644 --- a/regress/expected/name_validation.out +++ b/regress/expected/name_validation.out @@ -233,8 +233,10 @@ NOTICE: graph "graph123" has been created -- length -- invalid SELECT create_vlabel('graph123', ''); +WARNING: label name length not in range (1 <= length <= 63) length = 0 ERROR: label name is invalid SELECT create_elabel('graph123', ''); +WARNING: label name length not in range (1 <= length <= 63) length = 0 ERROR: label name is invalid -- valid SELECT create_vlabel('graph123', 'labelx'); @@ -396,9 +398,57 @@ SELECT * from cypher('graph123', $$ return is_valid_label_name('label2') $$) as true (1 row) +-- issue 1986: label name validation of long names. +-- Label names are relation names which are restricted to NAMEDATALEN-1 in size. +-- However, we can't validate PG type Names due to namein() truncating anything +-- over NAMEDATALEN-1. To allow the label names to be checked over NAMEDATELEN-1 +-- we changed the input type from PG's Name to cstring. These checks are to +-- verify that these can now be caught. +-- +-- should return false and a warning. +SELECT * from cypher('graph123', $$ return is_valid_label_name('label01234567890123456789012345678901234567890123456789012345678') $$) as (result agtype); +WARNING: label name length not in range (1 <= length <= 63) length = 64 + result +-------- + false +(1 row) + +-- should be successful +SELECT * from cypher('graph123', $$ return is_valid_label_name('label0123456789012345678901234567890123456789012345678901234567') $$) as (result agtype); + result +-------- + true +(1 row) + +-- +-- now check vlabel creation, should fail +SELECT create_vlabel('graph123', 'vlabel01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678'); +WARNING: label name length not in range (1 <= length <= 63) length = 95 +ERROR: label name is invalid +-- should be successful +SELECT create_vlabel('graph123', 'vlabel012345678901234567890123456789012345678901234567890123456'); +NOTICE: VLabel "vlabel012345678901234567890123456789012345678901234567890123456" has been created + create_vlabel +--------------- + +(1 row) + +-- +-- now check elabel creation, should fail +SELECT create_elabel('graph123', 'elabel0123456789012345678901234567890123456789012345678901234567'); +WARNING: label name length not in range (1 <= length <= 63) length = 64 +ERROR: label name is invalid +-- should be okay +SELECT create_elabel('graph123', 'elabel012345678901234567890123456789012345678901234567890123456'); +NOTICE: ELabel "elabel012345678901234567890123456789012345678901234567890123456" has been created + create_elabel +--------------- + +(1 row) + -- clean up SELECT drop_graph('graph123', true); -NOTICE: drop cascades to 18 other objects +NOTICE: drop cascades to 20 other objects DETAIL: drop cascades to table graph123._ag_label_vertex drop cascades to table graph123._ag_label_edge drop cascades to table graph123.labelx @@ -417,12 +467,22 @@ drop cascades to table graph123.mylabel drop cascades to table graph123."A" drop cascades to table graph123.mylabel2 drop cascades to table graph123."C" +drop cascades to table graph123.vlabel012345678901234567890123456789012345678901234567890123456 +drop cascades to table graph123.elabel012345678901234567890123456789012345678901234567890123456 NOTICE: graph "graph123" has been dropped drop_graph ------------ (1 row) +-- +-- Test GUC names +-- +SET age.enable_containment TO ON; +SET age.invalid_parameter TO ON; +ERROR: invalid configuration parameter name "age.invalid_parameter" +DETAIL: "age" is a reserved prefix. +SET any_placeholder.any_parameter TO ON; -- -- End of test -- diff --git a/regress/expected/pg_trgm.out b/regress/expected/pg_trgm.out new file mode 100644 index 000000000..5b2a2b9c8 --- /dev/null +++ b/regress/expected/pg_trgm.out @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path=ag_catalog; +SELECT create_graph('graph'); +NOTICE: graph "graph" has been created + create_graph +-------------- + +(1 row) + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN show_trgm("hello") $$) AS (n agtype); +ERROR: function show_trgm does not exist +LINE 1: SELECT * FROM cypher('graph', $$ RETURN show_trgm("hello") $... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +-- Create the extension in the public schema +CREATE EXTENSION pg_trgm SCHEMA public; +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN show_trgm("hello") $$) AS (n agtype); +ERROR: function show_trgm does not exist +LINE 1: SELECT * FROM cypher('graph', $$ RETURN show_trgm("hello") $... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +-- Should work +SET search_path=ag_catalog, public; +SELECT * FROM cypher('graph', $$ CREATE (:Person {name: "Jane"}), + (:Person {name: "John"}), + (:Person {name: "Jone"}), + (:Person {name: "Jack"}), + (:Person {name: "Jax"}), + (:Person {name: "Jake"}), + (:Person {name: "Julie"}), + (:Person {name: "Julius"}), + (:Person {name: "Jill"}), + (:Person {name: "Jillie"}), + (:Person {name: "Julian"}) +$$) AS (n agtype); + n +--- +(0 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return show_trgm(p.name) $$) AS (n text[]); + n +------------------------------------- + {" j"," ja",ane,jan,"ne "} + {" j"," jo","hn ",joh,ohn} + {" j"," jo",jon,"ne ",one} + {" j"," ja",ack,"ck ",jac} + {" j"," ja","ax ",jax} + {" j"," ja",ake,jak,"ke "} + {" j"," ju","ie ",jul,lie,uli} + {" j"," ju",ius,jul,liu,uli,"us "} + {" j"," ji",ill,jil,"ll "} + {" j"," ji","ie ",ill,jil,lie,lli} + {" j"," ju","an ",ian,jul,lia,uli} +(11 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) with p, similarity(p.name, "Jon") as sim return p.name, sim ORDER BY sim DESC $$) AS (n agtype, s real); + n | s +----------+------------ + "Jone" | 0.5 + "John" | 0.2857143 + "Jax" | 0.14285715 + "Jane" | 0.125 + "Jack" | 0.125 + "Jake" | 0.125 + "Jill" | 0.125 + "Julie" | 0.11111111 + "Julius" | 0.1 + "Julian" | 0.1 + "Jillie" | 0.1 +(11 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) with p, word_similarity(p.name, "Jon") as sim return p.name, sim ORDER BY sim DESC $$) AS (n agtype, s real); + n | s +----------+------------ + "Jone" | 0.6 + "John" | 0.4 + "Jax" | 0.25 + "Jane" | 0.2 + "Jack" | 0.2 + "Jake" | 0.2 + "Jill" | 0.2 + "Julie" | 0.16666667 + "Julius" | 0.14285715 + "Julian" | 0.14285715 + "Jillie" | 0.14285715 +(11 rows) + +-- Clean up +SELECT drop_graph('graph', true); +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table graph._ag_label_vertex +drop cascades to table graph._ag_label_edge +drop cascades to table graph."Person" +NOTICE: graph "graph" has been dropped + drop_graph +------------ + +(1 row) + +DROP EXTENSION pg_trgm CASCADE; diff --git a/regress/expected/pgvector.out b/regress/expected/pgvector.out new file mode 100644 index 000000000..f1bd53ed4 --- /dev/null +++ b/regress/expected/pgvector.out @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path=ag_catalog; +SELECT create_graph('graph'); +NOTICE: graph "graph" has been created + create_graph +-------------- + +(1 row) + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,3]") $$) AS (n agtype); +ERROR: function cosine_distance does not exist +LINE 1: SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +-- Create the extension in the public schema +CREATE EXTENSION vector SCHEMA public; +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,3]") $$) AS (n agtype); +ERROR: function cosine_distance does not exist +LINE 1: SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +-- Should work +SET search_path=ag_catalog, public; +SELECT create_graph('graph'); +ERROR: graph "graph" already exists +SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n vector); + n +------------------ + [1.22,2.22,3.33] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n halfvec); + n +--------------------------------- + [1.2197266,2.2207031,3.3300781] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n sparsevec); + n +-------------------------- + {1:1.22,2:2.22,3:3.33}/3 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN l2_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN inner_product("[1,2,3]", "[1,2,4]") $$) AS (n agtype); + n +------ + 17.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); + n +--------------------- + 0.00853986601633272 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN l1_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN vector_dims("[1,2,3]") $$) AS (n agtype); + n +--- + 3 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN vector_norm("[1,2,3]") $$) AS (n agtype); + n +-------------------- + 3.7416573867739413 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]") $$) AS (n vector); + n +----------------------------------- + [0.26726124,0.5345225,0.80178374] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]")::text $$) AS (n agtype); + n +------------------------------------- + [0.26726124, 0.5345225, 0.80178374] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4) $$) AS (n vector); + n +----------- + [2,3,4,5] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4)::text $$) AS (n agtype); + n +-------------- + [2, 3, 4, 5] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit); + n +----- + 111 +(1 row) + +-- An example usage +SELECT * FROM cypher('graph', $$ + CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: "[-0.07594558, 0.04081754, 0.29592122, -0.11921061]"}), + (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: "[0.30228977, -0.22839354, 0.35070436, 0.01262819]"}), + (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: "[ 0.12240622, -0.29752459, 0.22620453, 0.24454723]"}), + (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: "[ 0.34717246, -0.13820869, 0.29214213, 0.08090488]"}), + (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: "[ 0.03923657, 0.39284106, -0.20927092, -0.17770818]"}), + (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: "[-0.29302418, -0.39615033, -0.23393948, -0.09601383]"}), + (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: "[-0.13663386, 0.00635589, -0.03038832, -0.08252723]"}), + (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: "[ 0.27215557, -0.1479577, -0.09972772, -0.08234394]"}), + (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: "[ 0.21560573, -0.07505179, -0.01331814, 0.13403069]"}), + (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: "[ 0.24008012, 0.44954908, -0.30905488, 0.15195407]"}), + (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: "[-0.17471036, 0.14695261, -0.06272433, -0.21795064]"}), + (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: "[-0.33280967, 0.07733926, 0.11015328, 0.53382836]"}), + (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: "[-0.21629286, 0.31114665, 0.08303899, 0.46199759]"}), + (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: "[-0.11528205, -0.0208782, -0.0735215, 0.14327449]"}), + (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: "[ 0.33666933, 0.18040994, -0.01075103, -0.11117851]"}), + (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: "[ 0.34698868, 0.06439331, 0.06232323, -0.19534876]"}), + (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: "[ 0.01794725, -0.11434246, -0.46831815, -0.01049593]"}), + (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: "[ 0.00546514, -0.37005171, -0.42612838, 0.07968612]"}) +$$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, (m.embedding)::vector $$) AS (title agtype, embedding vector); + title | embedding +------------------------------+--------------------------------------------------- + "The Matrix" | [-0.07594558,0.04081754,0.2959212,-0.11921061] + "The Matrix Reloaded" | [0.30228978,-0.22839354,0.35070437,0.01262819] + "The Matrix Revolutions" | [0.12240622,-0.2975246,0.22620453,0.24454723] + "The Matrix Resurrections" | [0.34717247,-0.13820869,0.29214212,0.08090488] + "Inception" | [0.03923657,0.39284107,-0.20927092,-0.17770818] + "Interstellar" | [-0.29302418,-0.39615032,-0.23393948,-0.09601383] + "Avatar" | [-0.13663386,0.00635589,-0.03038832,-0.08252723] + "Blade Runner" | [0.27215558,-0.1479577,-0.09972772,-0.08234394] + "Blade Runner 2049" | [0.21560574,-0.07505179,-0.01331814,0.13403068] + "Minority Report" | [0.24008012,0.44954908,-0.30905488,0.15195407] + "Total Recall" | [-0.17471036,0.14695261,-0.06272433,-0.21795064] + "Elysium" | [-0.33280966,0.07733926,0.11015328,0.5338284] + "Gattaca" | [-0.21629286,0.31114665,0.08303899,0.4619976] + "The Fifth Element" | [-0.11528205,-0.0208782,-0.0735215,0.14327449] + "The Terminator" | [0.33666933,0.18040994,-0.01075103,-0.11117851] + "Terminator 2: Judgment Day" | [0.34698868,0.06439331,0.06232323,-0.19534875] + "Jurassic Park" | [0.01794725,-0.11434246,-0.46831816,-0.01049593] + "The Avengers" | [0.00546514,-0.3700517,-0.4261284,0.07968612] +(18 rows) + +-- Check the dimension of the embedding +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, vector_dims(m.embedding) $$) AS (title agtype, dimension int); + title | dimension +------------------------------+----------- + "The Matrix" | 4 + "The Matrix Reloaded" | 4 + "The Matrix Revolutions" | 4 + "The Matrix Resurrections" | 4 + "Inception" | 4 + "Interstellar" | 4 + "Avatar" | 4 + "Blade Runner" | 4 + "Blade Runner 2049" | 4 + "Minority Report" | 4 + "Total Recall" | 4 + "Elysium" | 4 + "Gattaca" | 4 + "The Fifth Element" | 4 + "The Terminator" | 4 + "Terminator 2: Judgment Day" | 4 + "Jurassic Park" | 4 + "The Avengers" | 4 +(18 rows) + +-- Get top 4 most similar movies to The Terminator using cosine distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); + title +------------------------------ + "The Terminator" + "Terminator 2: Judgment Day" + "Minority Report" + "Blade Runner" +(4 rows) + +-- Get top 4 most similar movies to The Matrix using cosine distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +-- l2 norm of the embedding +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=(l2_normalize(m.embedding))::text return m.title, m.embedding $$) AS (title agtype, embedding agtype); + title | embedding +------------------------------+---------------------------------------------------- + "The Matrix" | "[-0.22980669,0.12351139,0.89543957,-0.36072403]" + "The Matrix Reloaded" | "[0.58534974,-0.44225806,0.6790991,0.024453051]" + "The Matrix Revolutions" | "[0.26431033,-0.6424414,0.4884408,0.528048]" + "The Matrix Resurrections" | "[0.72151977,-0.28723562,0.60715157,0.16814256]" + "Inception" | "[0.08159459,0.81693435,-0.43519026,-0.3695538]" + "Interstellar" | "[-0.5290723,-0.71527255,-0.4223914,-0.17335857]" + "Avatar" | "[-0.84023285,0.039085682,-0.18687363,-0.507503]" + "Blade Runner" | "[0.81074023,-0.44075987,-0.29708475,-0.2452992]" + "Blade Runner 2049" | "[0.8134027,-0.28314334,-0.05024454,0.50564945]" + "Minority Report" | "[0.39031598,0.7308651,-0.5024533,0.24704295]" + "Total Recall" | "[-0.54291505,0.4566574,-0.19491677,-0.67728484]" + "Elysium" | "[-0.517338,0.12022049,0.17122844,0.82981277]" + "Gattaca" | "[-0.35853538,0.51576865,0.13764863,0.765825]" + "The Fifth Element" | "[-0.5788842,-0.10483904,-0.36918527,0.7194471]" + "The Terminator" | "[0.84599304,0.45333964,-0.02701552,-0.27937278]" + "Terminator 2: Judgment Day" | "[0.8501332,0.15776564,0.15269388,-0.4786106]" + "Jurassic Park" | "[0.037194606,-0.23696794,-0.9705615,-0.02175219]" + "The Avengers" | "[0.009587915,-0.6492101,-0.7475897,0.13979948]" +(18 rows) + +-- Get top 4 most similar movies to The Terminator using l2 distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); + title +------------------------------ + "The Terminator" + "Terminator 2: Judgment Day" + "Minority Report" + "Blade Runner" +(4 rows) + +-- Get top 4 most similar movies to The Matrix using l2 distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +SELECT drop_graph('graph', true); +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table graph._ag_label_vertex +drop cascades to table graph._ag_label_edge +drop cascades to table graph."Movie" +NOTICE: graph "graph" has been dropped + drop_graph +------------ + +(1 row) + +DROP EXTENSION vector CASCADE; diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index cee34f59c..425ca5417 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -22,20 +22,65 @@ LOAD 'age'; SET search_path TO ag_catalog; + +-- Create a country using CREATE clause SELECT create_graph('agload_test_graph'); -SELECT create_vlabel('agload_test_graph','Country'); +SELECT * FROM cypher('agload_test_graph', $$CREATE (n:Country {__id__:1}) RETURN n$$) as (n agtype); + +-- +-- Load countries with id +-- +SELECT load_labels_from_file('agload_test_graph', 'Country', + 'age_load/countries.csv', true); + +-- A temporary table should have been created with 54 ids; 1 from CREATE and 53 from file +SELECT COUNT(*)=54 FROM "_agload_test_graph_ag_vertex_ids"; + +-- Sequence should be equal to max entry id i.e. 248 +SELECT currval('agload_test_graph."Country_id_seq"')=248; + +-- Should error out on loading the same file again due to duplicate id SELECT load_labels_from_file('agload_test_graph', 'Country', - 'age_load/countries.csv'); + 'age_load/countries.csv', true); + +-- +-- Load cities with id +-- + +-- Should create City label automatically and load cities +SELECT load_labels_from_file('agload_test_graph', 'City', + 'age_load/cities.csv', true); + +-- Temporary table should have 54+72485 rows now +SELECT COUNT(*)=54+72485 FROM "_agload_test_graph_ag_vertex_ids"; -SELECT create_vlabel('agload_test_graph','City'); +-- Sequence should be equal to max entry id i.e. 146941 +SELECT currval('agload_test_graph."City_id_seq"')=146941; + +-- Should error out on loading the same file again due to duplicate id SELECT load_labels_from_file('agload_test_graph', 'City', - 'age_load/cities.csv'); + 'age_load/cities.csv', true); + +-- +-- Load edges -- Connects cities to countries +-- + +-- Should error out for using vertex label +SELECT load_edges_from_file('agload_test_graph', 'Country', + 'age_load/edges.csv'); SELECT create_elabel('agload_test_graph','has_city'); SELECT load_edges_from_file('agload_test_graph', 'has_city', 'age_load/edges.csv'); +-- Sequence should be equal to number of edges loaded i.e. 72485 +SELECT currval('agload_test_graph."has_city_id_seq"')=72485; + +-- Should error out for using edge label +SELECT load_labels_from_file('agload_test_graph', 'has_city', + 'age_load/cities.csv'); + SELECT table_catalog, table_schema, lower(table_name) as table_name, table_type FROM information_schema.tables WHERE table_schema = 'agload_test_graph' ORDER BY table_name ASC; @@ -48,6 +93,14 @@ SELECT COUNT(*) FROM cypher('agload_test_graph', $$MATCH(n) RETURN n$$) as (n ag SELECT COUNT(*) FROM cypher('agload_test_graph', $$MATCH (a)-[e]->(b) RETURN e$$) as (n agtype); +-- +-- Load countries and cities without id +-- + +-- Should load countries in Country label without error since it should use sequence now +SELECT load_labels_from_file('agload_test_graph', 'Country', + 'age_load/countries.csv', false); + SELECT create_vlabel('agload_test_graph','Country2'); SELECT load_labels_from_file('agload_test_graph', 'Country2', 'age_load/countries.csv', false); @@ -62,31 +115,39 @@ SELECT COUNT(*) FROM agload_test_graph."City2"; SELECT id FROM agload_test_graph."Country" LIMIT 10; SELECT id FROM agload_test_graph."Country2" LIMIT 10; +-- Should return 2 rows for Country with same properties, but different ids SELECT * FROM cypher('agload_test_graph', $$MATCH(n:Country {iso2 : 'BE'}) RETURN id(n), n.name, n.iso2 $$) as ("id(n)" agtype, "n.name" agtype, "n.iso2" agtype); +-- Should return 1 row SELECT * FROM cypher('agload_test_graph', $$MATCH(n:Country2 {iso2 : 'BE'}) RETURN id(n), n.name, n.iso2 $$) as ("id(n)" agtype, "n.name" agtype, "n.iso2" agtype); +-- Should return 2 rows for Country with same properties, but different ids SELECT * FROM cypher('agload_test_graph', $$MATCH(n:Country {iso2 : 'AT'}) RETURN id(n), n.name, n.iso2 $$) as ("id(n)" agtype, "n.name" agtype, "n.iso2" agtype); +-- Should return 1 row SELECT * FROM cypher('agload_test_graph', $$MATCH(n:Country2 {iso2 : 'AT'}) RETURN id(n), n.name, n.iso2 $$) as ("id(n)" agtype, "n.name" agtype, "n.iso2" agtype); +-- Should return 2 rows for Country with same properties, but different ids SELECT * FROM cypher('agload_test_graph', $$ MATCH (u:Country {region : "Europe"}) WHERE u.name =~ 'Cro.*' - RETURN u.name, u.region -$$) AS (result_1 agtype, result_2 agtype); + RETURN id(u), u.name, u.region +$$) AS ("id(u)" agtype, result_1 agtype, result_2 agtype); + +-- There shouldn't be any duplicates +SELECT * FROM cypher('agload_test_graph', $$return graph_stats('agload_test_graph')$$) as (a agtype); SELECT drop_graph('agload_test_graph', true); -- -- Test property type conversion -- -SELECT create_graph('agload_conversion'); -- vertex: load as agtype -SELECT create_vlabel('agload_conversion','Person1'); + +-- Should create graph and label automatically SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv', true, true); SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person1) RETURN properties(n) $$) as (a agtype); diff --git a/regress/sql/cypher_merge.sql b/regress/sql/cypher_merge.sql index 59efad1b7..02c9d21c2 100644 --- a/regress/sql/cypher_merge.sql +++ b/regress/sql/cypher_merge.sql @@ -760,6 +760,31 @@ SELECT * FROM cypher('issue_1709', $$ MATCH ()-[e]->() DELETE e $$) AS (a agtype -- clean up SELECT * FROM cypher('issue_1709', $$ MATCH (u) DELETE u $$) AS (a agtype); +-- +-- Fix issue 1907: SET on MERGE not storing edge properties +-- +-- setup +SELECT * FROM create_graph('issue_1907'); +SELECT * from cypher('issue_1907', $$ CREATE (n:Testnode {name: 'Test Node A'}) + RETURN n $$) as (n agtype); +SELECT * from cypher('issue_1907', $$ CREATE (n:Testnode {name: 'Test Node B'}) + RETURN n $$) as (n agtype); +SELECT * FROM cypher('issue_1907', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype); +-- should return properties added +SELECT * FROM cypher('issue_1907', $$ MERGE (a {name: 'Test Node A'})-[r:RELATED_TO]->(b {name: 'Test Node B'}) + SET r = {property1: 'something', property2: 'else'} + RETURN r $$) AS (r agtype); +-- should return properties added +SELECT * FROM cypher('issue_1907', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype); +-- cleanup +SELECT * FROM cypher('issue_1907', $$ MATCH ()-[r]->() DELETE r $$) AS (r agtype); +-- do it again, but a different way +SELECT * FROM cypher('issue_1907', $$ MERGE (a {name: 'Test Node A'})-[r:RELATED_TO]->(b {name: 'Test Node B'}) + SET r.property1 = 'something', r.property2 = 'else' + RETURN r $$) AS (r agtype); +-- should return properties added +SELECT * FROM cypher('issue_1907', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype); + -- -- clean up graphs -- @@ -770,6 +795,7 @@ SELECT * FROM cypher('issue_1709', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype -- -- delete graphs -- +SELECT drop_graph('issue_1907', true); SELECT drop_graph('cypher_merge', true); SELECT drop_graph('issue_1630', true); SELECT drop_graph('issue_1691', true); diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index 86e78c91c..a0cf1b024 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -1065,6 +1065,52 @@ SELECT agtype_in('null::path'); SELECT * FROM cypher('expr', $$ RETURN null::path $$) AS r(result agtype); SELECT agtype_typecast_path(agtype_in('null')); SELECT agtype_typecast_path(null); +-- +-- Tests for explicit typecast to json +-- + +-- Should pass +SELECT agtype_to_json('{}'::agtype); +SELECT agtype_to_json('{ "hello": "world" }'::agtype); +SELECT agtype_to_json('{ "hello": "world" }'::agtype)->>'hello'; +SELECT agtype_to_json('[]'::agtype); +SELECT agtype_to_json('[1, 2, 3]'::agtype); +SELECT agtype_to_json(null::agtype); + +SELECT cast('{}'::agtype as json); +SELECT cast('{ "hello": "world" }'::agtype as json); +SELECT cast('{ "hello": "world" }'::agtype as json)->>'hello'; +SELECT cast('[]'::agtype as json); +SELECT cast('[1, 2, 3]'::agtype as json); +SELECT cast('[1, 2, 3]'::agtype as json)->1; +SELECT cast(null::agtype as json); + +SELECT vertex_in_json, vertex_in_json->'id' as id, pg_typeof(vertex_in_json) FROM cypher('type_coercion', $$ MATCH (a) RETURN a $$) AS (vertex_in_json json); +SELECT edge_in_json, edge_in_json->'id' as id, pg_typeof(edge_in_json) FROM cypher('type_coercion', $$ MATCH ()-[e]->() RETURN e $$) AS (edge_in_json json); +SELECT vle_in_json, vle_in_json->0 as first_edge, pg_typeof(vle_in_json) FROM cypher('type_coercion', $$ MATCH ()-[e *]->() RETURN e $$) AS (vle_in_json json); +SELECT *, pg_typeof(props_in_json) FROM cypher('type_coercion', $$ MATCH (a) RETURN properties(a) $$) AS (props_in_json json); +SELECT path_in_json, path_in_json->0 as first_node FROM cypher('type_coercion', $$ MATCH p=()-[]->() RETURN p $$) AS (path_in_json json); +SELECT *, pg_typeof(nodes_in_json) FROM cypher('type_coercion', $$ MATCH p=()-[]->() RETURN nodes(p) $$) AS (nodes_in_json json); +SELECT *, pg_typeof(rels_in_json) FROM cypher('type_coercion', $$ MATCH p=()-[]->() RETURN relationships(p) $$) AS (rels_in_json json); + +SELECT cast(result as json) FROM cypher('type_coercion', $$ MATCH (a) RETURN a $$) AS (result agtype); +SELECT cast(result as json) FROM cypher('type_coercion', $$ MATCH ()-[e]-() RETURN e $$) AS (result agtype); +SELECT cast(result as json) FROM cypher('type_coercion', $$ MATCH ()-[e *]->() RETURN e $$) AS (result agtype); +SELECT cast(result as json) FROM cypher('type_coercion', $$ MATCH p=()-[]->() RETURN p $$) AS (result agtype); +SELECT pg_typeof(cast(result as json)) FROM cypher('type_coercion', $$ MATCH p=()-[]->() RETURN p $$) AS (result agtype); + +-- Should fail +SELECT agtype_to_json('1'::agtype); +SELECT agtype_to_json('1.111'::agtype); +SELECT agtype_to_json('true'::agtype); +SELECT agtype_to_json('false'::agtype); +SELECT agtype_to_json('1::numeric'::agtype); + +SELECT cast(result as json) FROM cypher('type_coercion', $$ RETURN 1 $$) AS (result agtype); +SELECT cast(result as json) FROM cypher('type_coercion', $$ RETURN 1.111 $$) AS (result agtype); +SELECT cast(result as json) FROM cypher('type_coercion', $$ RETURN true $$) AS (result agtype); +SELECT cast(result as json) FROM cypher('type_coercion', $$ RETURN false $$) AS (result agtype); +SELECT cast(result as json) FROM cypher('type_coercion', $$ RETURN 1::numeric $$) AS (result agtype); -- test functions -- create some vertices and edges @@ -3444,9 +3490,62 @@ SELECT * FROM cypher('issue_1953', $$ RETURN is_valid_label_name('issue_1953')[{ SELECT * FROM cypher('issue_1953', $$ RETURN is_valid_label_name('issue_1953')[0] $$) AS (result agtype); SELECT * FROM cypher('issue_1953', $$ RETURN is_valid_label_name('issue_1953')[0..1] $$) AS (result agtype); +-- +-- Issue 1988: How to update a property which is a keyword. +-- +SELECT * FROM create_graph('issue_1988'); +SELECT * from cypher('issue_1988', $$ + CREATE (p1:Part {part_num: 123}), + (p2:Part {part_num: 345}), + (p3:Part {part_num: 456}), + (p4:Part {part_num: 789}) $$) as (a agtype); +SELECT * FROM cypher('issue_1988', $$ + MATCH (p) RETURN p $$) as (p agtype); + +SELECT * from cypher('issue_1988', $$ + MATCH (p1:Part {part_num: 123}), (p2:Part {part_num: 345}) + CREATE (p1)-[u:used_by { quantity: 1 }]->(p2) RETURN p1, u, p2 $$) as (p1 agtype, u agtype, p2 agtype); + +-- should fail +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.match = 'xyz' RETURN p $$) as (p agtype); + +-- should succeed +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`match` = 'xyz' RETURN p $$) as (p agtype); +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`set` = 'xyz' RETURN p $$) as (p agtype); +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`delete` = 'xyz' RETURN p $$) as (p agtype); +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`merge` = 'xyz' RETURN p $$) as (p agtype); +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`create` = 'xyz' RETURN p $$) as (p agtype); +-- should succeed +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`match` = 'match' RETURN p $$) as (p agtype); +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`set` = 'set' RETURN p $$) as (p agtype); +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`delete` = 'delete' RETURN p $$) as (p agtype); +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`merge` = 'merge' RETURN p $$) as (p agtype); +SELECT * FROM cypher('issue_1988', $$ + MATCH (p:Part { part_num: 123 }) SET p.`create` = 'create' RETURN p $$) as (p agtype); + +SELECT * FROM cypher('issue_1988', $$ + MATCH (p) RETURN p $$) as (p agtype); + +-- +-- Issue 2093: Server crashes when executing SELECT agtype_hash_cmp(agtype_in('[null, null, null, null, null]')); +-- +SELECT agtype_access_operator(agtype_in('[null, null]')); +SELECT agtype_hash_cmp(agtype_in('[null, null, null, null, null]')); + -- -- Cleanup -- +SELECT * FROM drop_graph('issue_1988', true); SELECT * FROM drop_graph('issue_1953', true); SELECT * FROM drop_graph('expanded_map', true); SELECT * FROM drop_graph('issue_1124', true); diff --git a/regress/sql/fuzzystrmatch.sql b/regress/sql/fuzzystrmatch.sql new file mode 100644 index 000000000..b850f46ed --- /dev/null +++ b/regress/sql/fuzzystrmatch.sql @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path=ag_catalog; + +SELECT create_graph('graph'); + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN soundex("hello") $$) AS (n agtype); + +-- Create the extension in the public schema +CREATE EXTENSION fuzzystrmatch SCHEMA public; + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN soundex("hello") $$) AS (n agtype); + +-- Should work +SET search_path=ag_catalog, public; +SELECT * FROM cypher('graph', $$ CREATE (:Person {name: "Jane"}), + (:Person {name: "John"}), + (:Person {name: "Jone"}), + (:Person {name: "Jack"}), + (:Person {name: "Jax"}), + (:Person {name: "Jake"}), + (:Person {name: "Julie"}), + (:Person {name: "Julius"}), + (:Person {name: "Jill"}), + (:Person {name: "Jillie"}), + (:Person {name: "Julian"}) +$$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return soundex(p.name) $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return levenshtein(p.name, "John") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return difference(p.name, "John") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return metaphone(p.name, 4) $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return dmetaphone(p.name) $$) AS (n agtype); + +-- Difference is basically similarity using soundex, https://www.postgresql.org/docs/current/fuzzystrmatch.html +SELECT * FROM cypher('graph', $$ MATCH (p) return p ORDER BY difference(p.name, "Jon") DESC LIMIT 3$$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return p ORDER BY difference(p.name, "Jak") DESC LIMIT 3$$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return p ORDER BY difference(p.name, "Jil") DESC LIMIT 3$$) AS (n agtype); + +-- Clean up +SELECT drop_graph('graph', true); +DROP EXTENSION fuzzystrmatch CASCADE; \ No newline at end of file diff --git a/regress/sql/list_comprehension.sql b/regress/sql/list_comprehension.sql index cef92a22a..cb941a61b 100644 --- a/regress/sql/list_comprehension.sql +++ b/regress/sql/list_comprehension.sql @@ -157,4 +157,12 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WITH * WHERE u.list=[i I SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WITH * WITH *, [i in [1,2,3]] as list RETURN list LIMIT 1 $$) AS (result agtype); SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WITH *, [i in [1,2,3]] as list WITH * RETURN list LIMIT 1 $$) AS (result agtype); +-- Issue 1955 - variable reference in list comprehension +SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WHERE u.list=[i IN u.list] RETURN u $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WHERE u.list=[i IN u.list WHERE i>0] RETURN u $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WHERE size([e in u.list where e starts with "a"])>0 RETURN u $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ MATCH (u ={list:[i IN u.list | i+1]}) RETURN u $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ MATCH (u ={list:[i IN u.list WHERE i>0]}) RETURN u$$) AS (result agtype); + +-- Clean up SELECT * FROM drop_graph('list_comprehension', true); \ No newline at end of file diff --git a/regress/sql/name_validation.sql b/regress/sql/name_validation.sql index bfb5d886b..acf782677 100644 --- a/regress/sql/name_validation.sql +++ b/regress/sql/name_validation.sql @@ -153,9 +153,38 @@ SELECT * from cypher('graph123', $$ return is_valid_label_name('2label') $$) as SELECT * from cypher('graph123', $$ return is_valid_label_name('label1') $$) as (result agtype); SELECT * from cypher('graph123', $$ return is_valid_label_name('label2') $$) as (result agtype); +-- issue 1986: label name validation of long names. +-- Label names are relation names which are restricted to NAMEDATALEN-1 in size. +-- However, we can't validate PG type Names due to namein() truncating anything +-- over NAMEDATALEN-1. To allow the label names to be checked over NAMEDATELEN-1 +-- we changed the input type from PG's Name to cstring. These checks are to +-- verify that these can now be caught. +-- +-- should return false and a warning. +SELECT * from cypher('graph123', $$ return is_valid_label_name('label01234567890123456789012345678901234567890123456789012345678') $$) as (result agtype); +-- should be successful +SELECT * from cypher('graph123', $$ return is_valid_label_name('label0123456789012345678901234567890123456789012345678901234567') $$) as (result agtype); +-- +-- now check vlabel creation, should fail +SELECT create_vlabel('graph123', 'vlabel01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678'); +-- should be successful +SELECT create_vlabel('graph123', 'vlabel012345678901234567890123456789012345678901234567890123456'); +-- +-- now check elabel creation, should fail +SELECT create_elabel('graph123', 'elabel0123456789012345678901234567890123456789012345678901234567'); +-- should be okay +SELECT create_elabel('graph123', 'elabel012345678901234567890123456789012345678901234567890123456'); + -- clean up SELECT drop_graph('graph123', true); +-- +-- Test GUC names +-- +SET age.enable_containment TO ON; +SET age.invalid_parameter TO ON; +SET any_placeholder.any_parameter TO ON; + -- -- End of test -- diff --git a/regress/sql/pg_trgm.sql b/regress/sql/pg_trgm.sql new file mode 100644 index 000000000..a276e5913 --- /dev/null +++ b/regress/sql/pg_trgm.sql @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path=ag_catalog; + +SELECT create_graph('graph'); + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN show_trgm("hello") $$) AS (n agtype); + +-- Create the extension in the public schema +CREATE EXTENSION pg_trgm SCHEMA public; + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN show_trgm("hello") $$) AS (n agtype); + +-- Should work +SET search_path=ag_catalog, public; +SELECT * FROM cypher('graph', $$ CREATE (:Person {name: "Jane"}), + (:Person {name: "John"}), + (:Person {name: "Jone"}), + (:Person {name: "Jack"}), + (:Person {name: "Jax"}), + (:Person {name: "Jake"}), + (:Person {name: "Julie"}), + (:Person {name: "Julius"}), + (:Person {name: "Jill"}), + (:Person {name: "Jillie"}), + (:Person {name: "Julian"}) +$$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return show_trgm(p.name) $$) AS (n text[]); +SELECT * FROM cypher('graph', $$ MATCH (p) with p, similarity(p.name, "Jon") as sim return p.name, sim ORDER BY sim DESC $$) AS (n agtype, s real); +SELECT * FROM cypher('graph', $$ MATCH (p) with p, word_similarity(p.name, "Jon") as sim return p.name, sim ORDER BY sim DESC $$) AS (n agtype, s real); + +-- Clean up +SELECT drop_graph('graph', true); +DROP EXTENSION pg_trgm CASCADE; \ No newline at end of file diff --git a/regress/sql/pgvector.sql b/regress/sql/pgvector.sql new file mode 100644 index 000000000..816d6eb9f --- /dev/null +++ b/regress/sql/pgvector.sql @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path=ag_catalog; + +SELECT create_graph('graph'); + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,3]") $$) AS (n agtype); + +-- Create the extension in the public schema +CREATE EXTENSION vector SCHEMA public; + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,3]") $$) AS (n agtype); + +-- Should work +SET search_path=ag_catalog, public; + +SELECT create_graph('graph'); +SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n halfvec); +SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n sparsevec); + +SELECT * FROM cypher('graph', $$ RETURN l2_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN inner_product("[1,2,3]", "[1,2,4]") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN l1_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN vector_dims("[1,2,3]") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN vector_norm("[1,2,3]") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]") $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]")::text $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4) $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4)::text $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit); + +-- An example usage +SELECT * FROM cypher('graph', $$ + CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: "[-0.07594558, 0.04081754, 0.29592122, -0.11921061]"}), + (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: "[0.30228977, -0.22839354, 0.35070436, 0.01262819]"}), + (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: "[ 0.12240622, -0.29752459, 0.22620453, 0.24454723]"}), + (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: "[ 0.34717246, -0.13820869, 0.29214213, 0.08090488]"}), + (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: "[ 0.03923657, 0.39284106, -0.20927092, -0.17770818]"}), + (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: "[-0.29302418, -0.39615033, -0.23393948, -0.09601383]"}), + (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: "[-0.13663386, 0.00635589, -0.03038832, -0.08252723]"}), + (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: "[ 0.27215557, -0.1479577, -0.09972772, -0.08234394]"}), + (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: "[ 0.21560573, -0.07505179, -0.01331814, 0.13403069]"}), + (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: "[ 0.24008012, 0.44954908, -0.30905488, 0.15195407]"}), + (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: "[-0.17471036, 0.14695261, -0.06272433, -0.21795064]"}), + (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: "[-0.33280967, 0.07733926, 0.11015328, 0.53382836]"}), + (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: "[-0.21629286, 0.31114665, 0.08303899, 0.46199759]"}), + (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: "[-0.11528205, -0.0208782, -0.0735215, 0.14327449]"}), + (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: "[ 0.33666933, 0.18040994, -0.01075103, -0.11117851]"}), + (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: "[ 0.34698868, 0.06439331, 0.06232323, -0.19534876]"}), + (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: "[ 0.01794725, -0.11434246, -0.46831815, -0.01049593]"}), + (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: "[ 0.00546514, -0.37005171, -0.42612838, 0.07968612]"}) +$$) AS (result agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, (m.embedding)::vector $$) AS (title agtype, embedding vector); + +-- Check the dimension of the embedding +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, vector_dims(m.embedding) $$) AS (title agtype, dimension int); + +-- Get top 4 most similar movies to The Terminator using cosine distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); +-- Get top 4 most similar movies to The Matrix using cosine distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); +-- l2 norm of the embedding +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=(l2_normalize(m.embedding))::text return m.title, m.embedding $$) AS (title agtype, embedding agtype); + +-- Get top 4 most similar movies to The Terminator using l2 distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); +-- Get top 4 most similar movies to The Matrix using l2 distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); + +SELECT drop_graph('graph', true); +DROP EXTENSION vector CASCADE; \ No newline at end of file diff --git a/sql/age_main.sql b/sql/age_main.sql index 3bea3daca..59ada0f9f 100644 --- a/sql/age_main.sql +++ b/sql/age_main.sql @@ -100,12 +100,12 @@ CREATE FUNCTION ag_catalog.drop_graph(graph_name name, cascade boolean = false) LANGUAGE c AS 'MODULE_PATHNAME'; -CREATE FUNCTION ag_catalog.create_vlabel(graph_name name, label_name name) +CREATE FUNCTION ag_catalog.create_vlabel(graph_name cstring, label_name cstring) RETURNS void LANGUAGE c AS 'MODULE_PATHNAME'; -CREATE FUNCTION ag_catalog.create_elabel(graph_name name, label_name name) +CREATE FUNCTION ag_catalog.create_elabel(graph_name cstring, label_name cstring) RETURNS void LANGUAGE c AS 'MODULE_PATHNAME'; diff --git a/sql/agtype_coercions.sql b/sql/agtype_coercions.sql index cdf5f6f8c..bdc33af80 100644 --- a/sql/agtype_coercions.sql +++ b/sql/agtype_coercions.sql @@ -32,6 +32,15 @@ AS 'MODULE_PATHNAME'; CREATE CAST (agtype AS text) WITH FUNCTION ag_catalog.agtype_to_text(agtype); +-- text -> agtype +CREATE FUNCTION ag_catalog.text_to_agtype(text) + RETURNS agtype + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + -- agtype -> boolean (implicit) CREATE FUNCTION ag_catalog.agtype_to_bool(agtype) RETURNS boolean @@ -69,7 +78,7 @@ AS 'MODULE_PATHNAME'; CREATE CAST (float8 AS agtype) WITH FUNCTION ag_catalog.float8_to_agtype(float8); --- agtype -> float8 (implicit) +-- agtype -> float8 (exmplicit) CREATE FUNCTION ag_catalog.agtype_to_float8(agtype) RETURNS float8 LANGUAGE c @@ -106,6 +115,18 @@ CREATE CAST (agtype AS bigint) WITH FUNCTION ag_catalog.agtype_to_int8(variadic "any") AS ASSIGNMENT; +-- int4 -> agtype (explicit) +CREATE FUNCTION ag_catalog.int4_to_agtype(int4) + RETURNS agtype + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE CAST (int4 AS agtype) + WITH FUNCTION ag_catalog.int4_to_agtype(int4); + -- agtype -> int4 CREATE FUNCTION ag_catalog.agtype_to_int4(variadic "any") RETURNS int @@ -141,3 +162,14 @@ AS 'MODULE_PATHNAME'; CREATE CAST (agtype AS int[]) WITH FUNCTION ag_catalog.agtype_to_int4_array(variadic "any"); + +CREATE FUNCTION ag_catalog.agtype_to_json(agtype) + RETURNS json + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE CAST (agtype AS json) + WITH FUNCTION ag_catalog.agtype_to_json(agtype); diff --git a/src/backend/catalog/ag_label.c b/src/backend/catalog/ag_label.c index 1b8eaa5cc..3c242a000 100644 --- a/src/backend/catalog/ag_label.c +++ b/src/backend/catalog/ag_label.c @@ -161,6 +161,11 @@ char get_label_kind(const char *label_name, Oid label_graph) } } +char *get_label_seq_relation_name(const char *label_name) +{ + return psprintf("%s_id_seq", label_name); +} + PG_FUNCTION_INFO_V1(_label_name); /* diff --git a/src/backend/commands/graph_commands.c b/src/backend/commands/graph_commands.c index dbcb60a10..f7e8d070b 100644 --- a/src/backend/commands/graph_commands.c +++ b/src/backend/commands/graph_commands.c @@ -30,6 +30,7 @@ #include "catalog/ag_graph.h" #include "catalog/ag_label.h" #include "commands/label_commands.h" +#include "commands/graph_commands.h" #include "utils/name_validation.h" /* @@ -48,10 +49,7 @@ PG_FUNCTION_INFO_V1(create_graph); /* function that is evoked for creating a graph */ Datum create_graph(PG_FUNCTION_ARGS) { - char *graph; Name graph_name; - char *graph_name_str; - Oid nsp_id; /* if no argument is passed with the function, graph name cannot be null */ if (PG_ARGISNULL(0)) @@ -63,6 +61,23 @@ Datum create_graph(PG_FUNCTION_ARGS) /* gets graph name as function argument */ graph_name = PG_GETARG_NAME(0); + create_graph_internal(graph_name); + + ereport(NOTICE, + (errmsg("graph \"%s\" has been created", NameStr(*graph_name)))); + + /* + * According to postgres specification of c-language functions + * if function returns void this is the syntax. + */ + PG_RETURN_VOID(); +} + +Oid create_graph_internal(const Name graph_name) +{ + Oid nsp_id; + char *graph_name_str; + graph_name_str = NameStr(*graph_name); /* checking if the name of the graph falls under the pre-decided graph naming conventions(regex) */ @@ -89,15 +104,10 @@ Datum create_graph(PG_FUNCTION_ARGS) CommandCounterIncrement(); /* Create the default label tables */ - graph = graph_name->data; - create_label(graph, AG_DEFAULT_LABEL_VERTEX, LABEL_TYPE_VERTEX, NIL); - create_label(graph, AG_DEFAULT_LABEL_EDGE, LABEL_TYPE_EDGE, NIL); + create_label(graph_name_str, AG_DEFAULT_LABEL_VERTEX, LABEL_TYPE_VERTEX, NIL); + create_label(graph_name_str, AG_DEFAULT_LABEL_EDGE, LABEL_TYPE_EDGE, NIL); - ereport(NOTICE, - (errmsg("graph \"%s\" has been created", NameStr(*graph_name)))); - - /* according to postgres specification of c-language functions if function returns void this is the syntax */ - PG_RETURN_VOID(); + return nsp_id; } PG_FUNCTION_INFO_V1(age_graph_exists); diff --git a/src/backend/commands/label_commands.c b/src/backend/commands/label_commands.c index ff4446044..f603ec97c 100644 --- a/src/backend/commands/label_commands.c +++ b/src/backend/commands/label_commands.c @@ -116,8 +116,8 @@ Datum age_is_valid_label_name(PG_FUNCTION_ARGS) label_name = pnstrdup(agtv_value->val.string.val, agtv_value->val.string.len); - is_valid = is_valid_label(label_name, 0); - pfree(label_name); + is_valid = is_valid_label_name(label_name, 0); + pfree_if_not_null(label_name); if (is_valid) { @@ -141,17 +141,11 @@ PG_FUNCTION_INFO_V1(create_vlabel); Datum create_vlabel(PG_FUNCTION_ARGS) { - char *graph; - Name graph_name; - char *graph_name_str; + char *graph_name; Oid graph_oid; List *parent; - RangeVar *rv; - - char *label; - Name label_name; - char *label_name_str; + char *label_name; /* checking if user has not provided the graph name */ if (PG_ARGISNULL(0)) @@ -167,42 +161,49 @@ Datum create_vlabel(PG_FUNCTION_ARGS) errmsg("label name must not be NULL"))); } - graph_name = PG_GETARG_NAME(0); - label_name = PG_GETARG_NAME(1); + graph_name = PG_GETARG_CSTRING(0); + label_name = PG_GETARG_CSTRING(1); - graph_name_str = NameStr(*graph_name); - label_name_str = NameStr(*label_name); + /* validate the graph and label names */ + if (is_valid_graph_name(graph_name) == 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("graph name is invalid"))); + } + + if (is_valid_label_name(label_name, 0) == 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("label name is invalid"))); + } /* Check if graph does not exist */ - if (!graph_exists(graph_name_str)) + if (!graph_exists(graph_name)) { ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA), - errmsg("graph \"%s\" does not exist.", graph_name_str))); + errmsg("graph \"%s\" does not exist.", graph_name))); } - graph_oid = get_graph_oid(graph_name_str); + graph_oid = get_graph_oid(graph_name); /* Check if label with the input name already exists */ - if (label_exists(label_name_str, graph_oid)) + if (label_exists(label_name, graph_oid)) { ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA), - errmsg("label \"%s\" already exists", label_name_str))); + errmsg("label \"%s\" already exists", label_name))); } /* Create the default label tables */ - graph = graph_name->data; - label = label_name->data; - - rv = get_label_range_var(graph, graph_oid, AG_DEFAULT_LABEL_VERTEX); + rv = get_label_range_var(graph_name, graph_oid, AG_DEFAULT_LABEL_VERTEX); parent = list_make1(rv); - create_label(graph, label, LABEL_TYPE_VERTEX, parent); + create_label(graph_name, label_name, LABEL_TYPE_VERTEX, parent); ereport(NOTICE, - (errmsg("VLabel \"%s\" has been created", NameStr(*label_name)))); + (errmsg("VLabel \"%s\" has been created", label_name))); PG_RETURN_VOID(); } @@ -221,17 +222,11 @@ PG_FUNCTION_INFO_V1(create_elabel); Datum create_elabel(PG_FUNCTION_ARGS) { - char *graph; - Name graph_name; - char *graph_name_str; + char *graph_name; Oid graph_oid; List *parent; - RangeVar *rv; - - char *label; - Name label_name; - char *label_name_str; + char *label_name; /* checking if user has not provided the graph name */ if (PG_ARGISNULL(0)) @@ -247,41 +242,48 @@ Datum create_elabel(PG_FUNCTION_ARGS) errmsg("label name must not be NULL"))); } - graph_name = PG_GETARG_NAME(0); - label_name = PG_GETARG_NAME(1); + graph_name = PG_GETARG_CSTRING(0); + label_name = PG_GETARG_CSTRING(1); - graph_name_str = NameStr(*graph_name); - label_name_str = NameStr(*label_name); + /* validate the graph and label names */ + if (is_valid_graph_name(graph_name) == 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("graph name is invalid"))); + } + + if (is_valid_label_name(label_name, 0) == 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("label name is invalid"))); + } /* Check if graph does not exist */ - if (!graph_exists(graph_name_str)) + if (!graph_exists(graph_name)) { ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA), - errmsg("graph \"%s\" does not exist.", graph_name_str))); + errmsg("graph \"%s\" does not exist.", graph_name))); } - graph_oid = get_graph_oid(graph_name_str); + graph_oid = get_graph_oid(graph_name); /* Check if label with the input name already exists */ - if (label_exists(label_name_str, graph_oid)) + if (label_exists(label_name, graph_oid)) { ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA), - errmsg("label \"%s\" already exists", label_name_str))); + errmsg("label \"%s\" already exists", label_name))); } /* Create the default label tables */ - graph = graph_name->data; - label = label_name->data; - - rv = get_label_range_var(graph, graph_oid, AG_DEFAULT_LABEL_EDGE); + rv = get_label_range_var(graph_name, graph_oid, AG_DEFAULT_LABEL_EDGE); parent = list_make1(rv); - create_label(graph, label, LABEL_TYPE_EDGE, parent); + create_label(graph_name, label_name, LABEL_TYPE_EDGE, parent); ereport(NOTICE, - (errmsg("ELabel \"%s\" has been created", NameStr(*label_name)))); + (errmsg("ELabel \"%s\" has been created", label_name))); PG_RETURN_VOID(); } @@ -304,7 +306,7 @@ void create_label(char *graph_name, char *label_name, char label_type, int32 label_id; Oid relation_id; - if (!is_valid_label(label_name, label_type)) + if (!is_valid_label_name(label_name, label_type)) { ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA), errmsg("label name is invalid"))); diff --git a/src/backend/executor/cypher_merge.c b/src/backend/executor/cypher_merge.c index 29ab1191f..9136825ab 100644 --- a/src/backend/executor/cypher_merge.c +++ b/src/backend/executor/cypher_merge.c @@ -346,7 +346,7 @@ static void free_path_entry_array(path_entry **path_array, int length) for (index = 0; index < length; index++) { - pfree(path_array[index]); + pfree_if_not_null(path_array[index]); } } @@ -893,10 +893,10 @@ static void end_cypher_merge(CustomScanState *node) free_path_entry_array(entry, path_length); /* free up the array container */ - pfree(entry); + pfree_if_not_null(entry); /* free up the created_path container */ - pfree(css->created_paths_list); + pfree_if_not_null(css->created_paths_list); css->created_paths_list = next; } @@ -1401,10 +1401,47 @@ static void merge_edge(cypher_merge_custom_scan_state *css, elemTupleSlot->tts_values[edge_tuple_properties] = prop; elemTupleSlot->tts_isnull[edge_tuple_properties] = isNull; - /* Insert the edge, if it is a new edge */ - if (should_insert) + /* + * Insert the new edge. + * + * Depending on the currentCommandId, we need to do this one of two + * different ways - + * + * 1) If they are equal, the currentCommandId hasn't been used for an + * update, or it hasn't been incremented after being used. In either + * case, we need to use the current one and then increment it so that + * the following commands will have visibility of this update. Note, + * it isn't our job to update the currentCommandId first and then do + * this check. + * + * 2) If they are not equal, the currentCommandId has been used and/or + * updated. In this case, we can't use it. Otherwise our update won't + * be visible to anything that follows, until the currentCommandId is + * updated again. Remember, visibility is, greater than but not equal + * to, the currentCommandID used for the update. So, in this case we + * need to use the original currentCommandId when begin_cypher_merge + * was initiated as everything under this instance of merge needs to + * be based off of that initial currentCommandId. This allows the + * following command to see the updates generated by this instance of + * merge. + */ + if (should_insert && + css->base_currentCommandId == GetCurrentCommandId(false)) { insert_entity_tuple(resultRelInfo, elemTupleSlot, estate); + + /* + * Increment the currentCommandId since we processed an update. We + * don't want to do this outside of this block because we don't want + * to inadvertently or unnecessarily update the commandCounterId of + * another command. + */ + CommandCounterIncrement(); + } + else if (should_insert) + { + insert_entity_tuple_cid(resultRelInfo, elemTupleSlot, estate, + css->base_currentCommandId); } /* restore the old result relation info */ diff --git a/src/backend/executor/cypher_set.c b/src/backend/executor/cypher_set.c index f8a04d24b..d1837fb16 100644 --- a/src/backend/executor/cypher_set.c +++ b/src/backend/executor/cypher_set.c @@ -596,7 +596,7 @@ static void process_update_list(CustomScanState *node) lidx++; } /* free our lookup array */ - pfree(luindex); + pfree_if_not_null(luindex); } static TupleTableSlot *exec_cypher_set(CustomScanState *node) diff --git a/src/backend/parser/ag_scanner.l b/src/backend/parser/ag_scanner.l index 256e662dc..45ccdac3b 100644 --- a/src/backend/parser/ag_scanner.l +++ b/src/backend/parser/ag_scanner.l @@ -34,6 +34,7 @@ #include "mb/pg_wchar.h" #include "parser/ag_scanner.h" +#include "utils/agtype.h" } %option 8bit @@ -628,7 +629,7 @@ ag_token token; scan_errposition())); } - token.type = AG_TOKEN_IDENTIFIER; + token.type = AG_TOKEN_BQIDENT; token.value.s = strbuf_get_str(&yyextra.literal_buf); token.location = get_location(); return token; @@ -794,7 +795,7 @@ void *ag_yyrealloc(void *ptr, yy_size_t size, yyscan_t yyscanner) { if (size == 0) { - pfree(ptr); + pfree_if_not_null(ptr); return NULL; } else @@ -811,7 +812,7 @@ void *ag_yyrealloc(void *ptr, yy_size_t size, yyscan_t yyscanner) void ag_yyfree(void *ptr, yyscan_t yyscanner) { if (ptr) - pfree(ptr); + pfree_if_not_null(ptr); } static void strbuf_init(strbuf *sb, int capacity) @@ -824,7 +825,7 @@ static void strbuf_init(strbuf *sb, int capacity) static void strbuf_cleanup(strbuf *sb) { if (sb->buffer) - pfree(sb->buffer); + pfree_if_not_null(sb->buffer); } static void strbuf_append_buf(strbuf *sb, const char *b, const int len) @@ -1119,8 +1120,8 @@ static void _numstr_to_decimal(const char *numstr, const int base, strbuf *sb) strbuf_append_buf(sb, &buf[buf_i], NDIGITS_PER_REMAINDER - buf_i); } - pfree(remainders); - pfree(words); + pfree_if_not_null(remainders); + pfree_if_not_null(words); } static uint32 hexdigit_value(const char c) diff --git a/src/backend/parser/cypher_analyze.c b/src/backend/parser/cypher_analyze.c index f0f60f638..128acd0fb 100644 --- a/src/backend/parser/cypher_analyze.c +++ b/src/backend/parser/cypher_analyze.c @@ -113,7 +113,7 @@ static void post_parse_analyze(ParseState *pstate, Query *query, JumbleState *js } /* reset extra_node */ - pfree(extra_node); + pfree_if_not_null(extra_node); extra_node = NULL; } } @@ -303,7 +303,7 @@ static void build_explain_query(Query *query, Node *explain_node) ((ExplainStmt *)explain_node)->options = NULL; /* we need to free query_node as it is no longer needed */ - pfree(query_node); + pfree_if_not_null(query_node); } static bool is_rte_cypher(RangeTblEntry *rte) diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index cbd720046..16d6e3572 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -24,6 +24,9 @@ #include "postgres.h" +#include "catalog/pg_proc.h" +#include "catalog/dependency.h" +#include "commands/extension.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" @@ -33,7 +36,9 @@ #include "parser/cypher_clause.h" #include "parser/parse_oper.h" #include "parser/parse_relation.h" +#include "parser/parse_type.h" #include "utils/builtins.h" +#include "utils/catcache.h" #include "utils/float.h" #include "utils/lsyscache.h" @@ -52,6 +57,7 @@ #define FUNC_AGTYPE_TYPECAST_PG_FLOAT8 "agtype_to_float8" #define FUNC_AGTYPE_TYPECAST_PG_BIGINT "agtype_to_int8" #define FUNC_AGTYPE_TYPECAST_BOOL "agtype_typecast_bool" +#define FUNC_AGTYPE_TYPECAST_PG_TEXT "agtype_to_text" static Node *transform_cypher_expr_recurse(cypher_parsestate *cpstate, Node *expr); @@ -94,6 +100,24 @@ static Node *transform_column_ref_for_indirection(cypher_parsestate *cpstate, ColumnRef *cr); static Node *transform_cypher_list_comprehension(cypher_parsestate *cpstate, cypher_unwind *expr); +static Node *transform_external_ext_FuncCall(cypher_parsestate *cpstate, + FuncCall *fn, List *targs, + Form_pg_proc procform, + char *extension); +static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, + Form_pg_proc procform, + List *fargs, + Oid *target_types); +static Node *cast_to_target_type(cypher_parsestate *cpstate, Node *expr, + Oid source_oid, Oid target_oid); +static Node *wrap_text_output_to_agtype(cypher_parsestate *cpstate, + FuncExpr *fexpr); +static Form_pg_proc get_procform(FuncCall *fn, bool err_not_found); +static char *get_mapped_extension(Oid func_oid); +static bool is_extension_external(char *extension); +static bool is_pgvector_datatype(char *typename); +static char *construct_age_function_name(char *funcname); +static bool function_exists(char *funcname, char *extension); /* transform a cypher expression */ Node *transform_cypher_expr(cypher_parsestate *cpstate, Node *expr, @@ -1298,7 +1322,8 @@ static Node *transform_column_ref_for_indirection(cypher_parsestate *cpstate, } /* find the properties column of the NSI and return a var for it */ - node = scanNSItemForColumn(pstate, pnsi, 0, "properties", cr->location); + node = scanNSItemForColumn(pstate, pnsi, levels_up, "properties", + cr->location); /* * Error out if we couldn't find it. @@ -1538,6 +1563,7 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, { List *fname; FuncCall *fnode; + ParseState *pstate; /* verify input parameter */ Assert (cpstate != NULL); @@ -1545,6 +1571,7 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, /* create the qualified function name, schema first */ fname = list_make1(makeString("ag_catalog")); + pstate = &cpstate->pstate; /* append the name of the requested typecast function */ if (pg_strcasecmp(ctypecast->typecast, "edge") == 0) @@ -1580,11 +1607,49 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, { fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_BIGINT)); } - else if ((pg_strcasecmp(ctypecast->typecast, "bool") == 0 || + else if ((pg_strcasecmp(ctypecast->typecast, "bool") == 0 || pg_strcasecmp(ctypecast->typecast, "boolean") == 0)) { fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_BOOL)); } + else if (pg_strcasecmp(ctypecast->typecast, "pg_text") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_TEXT)); + } + else if (is_pgvector_datatype(ctypecast->typecast)) + { + TypeName *target_typname; + Oid source_oid; + Oid target_oid; + Node *expr; + + /* transform the expr before casting */ + expr = transform_cypher_expr_recurse(cpstate, + ctypecast->expr); + + /* get the source and target oids */ + target_typname = makeTypeNameFromNameList(list_make1( + makeString(ctypecast->typecast))); + target_oid = typenameTypeId(pstate, target_typname); + source_oid = exprType(expr); + + if (source_oid == AGTYPEOID) + { + /* + * Cast to text and then to target type, since we cant + * directly cast agtype to pgvector datatypes. + */ + expr = cast_to_target_type(cpstate, expr, source_oid, TEXTOID); + expr = cast_to_target_type(cpstate, expr, TEXTOID, target_oid); + } + else + { + /* try a direct cast, it will error out if not possible */ + expr = cast_to_target_type(cpstate, expr, source_oid, target_oid); + } + + return expr; + } /* if none was found, error out */ else { @@ -1601,6 +1666,352 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, return transform_FuncCall(cpstate, fnode); } +static Node *transform_external_ext_FuncCall(cypher_parsestate *cpstate, + FuncCall *fn, List *targs, + Form_pg_proc procform, + char *extension) +{ + ParseState *pstate = &cpstate->pstate; + FuncExpr *fexpr = NULL; + Node *retval = NULL; + Node *last_srf = pstate->p_last_srf; + Oid *proargtypes; + + /* make sure procform in not NULL */ + Assert(procform != NULL); + proargtypes = procform->proargtypes.values; + + /* cast the agtype arguments to the types accepted by function */ + targs = cast_agtype_args_to_target_type(cpstate, procform, targs, proargtypes); + + /* now get the function node for the external function */ + fexpr = (FuncExpr *)ParseFuncOrColumn(pstate, fn->funcname, targs, + last_srf, fn, false, + fn->location); + + /* + * This will cast TEXT output to AGTYPE. It will error out if this is + * not possible to do. For TEXT to AGTYPE we need to wrap the output + * due to issues with creating a cast from TEXT to AGTYPE. + */ + if (fexpr->funcresulttype == TEXTOID) + { + retval = wrap_text_output_to_agtype(cpstate, fexpr); + } + else + { + retval = (Node *)fexpr; + } + + /* additional casts or wraps can be done here for other types */ + + /* flag that an aggregate was found during a transform */ + if (retval != NULL && retval->type == T_Aggref) + { + cpstate->exprHasAgg = true; + } + + /* we can just return it here */ + return retval; +} + +/* + * Cast a function's input parameter list from agtype to that function's input + * type. This is used for functions that don't take agtype as input and where + * there isn't an implicit cast to do this for us. + */ +static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, + Form_pg_proc procform, + List *fargs, + Oid *target_types) +{ + char *funcname = NameStr(procform->proname); + int nargs = procform->pronargs; + ListCell *lc = NULL; + int i = 0; + + /* verify the length of args are same */ + if (list_length(fargs) != nargs) + { + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("function %s requires %d arguments, %d given", + funcname, nargs, list_length(fargs)))); + } + + /* iterate through the function's args */ + foreach (lc, fargs) + { + char *target_typname; + Node *expr = lfirst(lc); + Oid source_oid = exprType(expr); + Oid target_oid = target_types[i]; + + /* get the typename from target_oid */ + target_typname = format_type_be(target_oid); + + /* cast the agtype to the target type */ + if (source_oid == AGTYPEOID && is_pgvector_datatype(target_typname)) + { + /* + * There is no cast from agtype to vector, so we first + * cast agtype to text and then text to vector. + */ + expr = cast_to_target_type(cpstate, expr, source_oid, TEXTOID); + expr = cast_to_target_type(cpstate, expr, TEXTOID, target_oid); + } + /* additional casts can be added here for other types */ + else + { + /* try a direct cast, it will error out if not possible */ + expr = cast_to_target_type(cpstate, expr, source_oid, target_oid); + } + + lfirst(lc) = expr; + i++; + } + + return fargs; +} + +/* + * Cast an input type to an output type, error out if not possible. + * Thanks to Taha for this idea. + */ +static Node *cast_to_target_type(cypher_parsestate *cpstate, Node *expr, + Oid source_oid, Oid target_oid) +{ + ParseState *pstate = &cpstate->pstate; + + /* can we cast from source to target oid? */ + if (can_coerce_type(1, &source_oid, &target_oid, COERCION_EXPLICIT)) + { + /* coerce the source to the target */ + expr = coerce_type(pstate, expr, source_oid, target_oid, -1, + COERCION_EXPLICIT, COERCE_EXPLICIT_CAST, -1); + } + /* error out if we can't cast */ + else + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("cannot cast type %s to %s", format_type_be(source_oid), + format_type_be(target_oid)))); + } + + /* return the casted expression */ + return expr; +} + +/* + * Due to issues with creating a cast from text to agtype, we need to wrap a + * function that outputs text with text_to_agtype. + */ +static Node *wrap_text_output_to_agtype(cypher_parsestate *cpstate, + FuncExpr *fexpr) +{ + ParseState *pstate = &cpstate->pstate; + Node *last_srf = pstate->p_last_srf; + Node *retval = NULL; + List *fname = NIL; + FuncCall *fnode = NULL; + + if (fexpr->funcresulttype != TEXTOID) + { + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("can only wrap text to agtype"))); + } + + /* make a function call node to cast text to agtype */ + fname = list_make2(makeString("ag_catalog"), makeString("text_to_agtype")); + + /* the input function is the arg to the new function (wrapper) */ + fnode = makeFuncCall(fname, list_make1(fexpr), COERCE_SQL_SYNTAX, -1); + + /* ... and hand off to ParseFuncOrColumn to create it */ + retval = ParseFuncOrColumn(pstate, fname, list_make1(fexpr), last_srf, + fnode, false, -1); + + /* return the wrapped function */ + return retval; +} + +/* + * Returns Form_pg_proc struct for given function, if the function + * is not in search path, it is not considered. + */ +static Form_pg_proc get_procform(FuncCall *fn, bool err_not_found) +{ + CatCList *catlist = NULL; + Form_pg_proc procform = NULL; + int nargs; + int i = 0; + List *asp; + bool found = false; + char *funcname = (((String*)linitial(fn->funcname))->sval); + + /* get a list of matching functions */ + catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(funcname)); + + if (catlist->n_members == 0) + { + ReleaseSysCacheList(catlist); + return NULL; + } + + asp = fetch_search_path(false); + nargs = list_length(fn->args); + + /* iterate through them and verify that they are in the search path */ + for (i = 0; i < catlist->n_members; i++) + { + ListCell *nsp; + HeapTuple proctup = &catlist->members[i]->tuple; + procform = (Form_pg_proc) GETSTRUCT(proctup); + + /* + * Check if the function name, number of arguments, and + * variadic match before checking if it is in the search + * path. + */ + if (pg_strcasecmp(funcname, procform->proname.data) == 0 && + nargs == procform->pronargs && + fn->func_variadic == procform->provariadic) + { + foreach(nsp, asp) + { + Oid oid = lfirst_oid(nsp); + + if (procform->pronamespace == oid && + isTempNamespace(procform->pronamespace) == false) + { + found = true; + break; + } + } + } + + if (found) + { + break; + } + + /* reset procform */ + procform = NULL; + } + + /* Error out if function not found */ + if (err_not_found && (procform == NULL)) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", funcname), + errhint("If the function is from an external extension, " + "make sure the extension is installed and the " + "function is in the search path."))); + } + + /* we need to release the cache list */ + ReleaseSysCacheList(catlist); + pfree_if_not_null(asp); + + return procform; +} + +static char *get_mapped_extension(Oid func_oid) +{ + Oid extension_oid; + char *extension = NULL; + + extension_oid = getExtensionOfObject(ProcedureRelationId, func_oid); + extension = get_extension_name(extension_oid); + + return extension; +} + +static bool is_extension_external(char *extension) +{ + return ((extension != NULL) && + (pg_strcasecmp(extension, "age") != 0)); +} + +static bool is_pgvector_datatype(char *typename) +{ + return (pg_strcasecmp(typename, "vector") || + pg_strcasecmp(typename, "halfvec") || + pg_strcasecmp(typename, "sparsevec")); +} + +/* Returns age_ prefiexed lower case function name */ +static char *construct_age_function_name(char *funcname) +{ + int pnlen = strlen(funcname); + char *ag_name = palloc(pnlen + 5); + int i; + + /* copy in the prefix - all AGE functions are prefixed with age_ */ + strncpy(ag_name, "age_", 4); + + /* + * All AGE function names are in lower case. So, copy in the funcname + * in lower case. + */ + for (i = 0; i < pnlen; i++) + { + ag_name[i + 4] = tolower(funcname[i]); + } + + /* terminate it with 0 */ + ag_name[i + 4] = 0; + + return ag_name; +} + + +/* + * Checks if a function exists. If the extension name is given, + * then it checks if the function exists in that extension. + */ +static bool function_exists(char *funcname, char *extension) +{ + CatCList *catlist = NULL; + bool found = false; + int i = 0; + + /* get a list of matching functions */ + catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(funcname)); + + if (catlist->n_members == 0) + { + ReleaseSysCacheList(catlist); + return false; + } + else if (extension == NULL) + { + ReleaseSysCacheList(catlist); + return true; + } + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple proctup = &catlist->members[i]->tuple; + Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(proctup); + char *ext = get_mapped_extension(procform->oid); + + if (ext != NULL && pg_strcasecmp(ext, extension) == 0) + { + found = true; + break; + } + } + + /* we need to release the cache list */ + ReleaseSysCacheList(catlist); + + return found; +} + /* * Code borrowed from PG's transformFuncCall and updated for AGE */ @@ -1625,59 +2036,84 @@ static Node *transform_FuncCall(cypher_parsestate *cpstate, FuncCall *fn) /* within group should not happen */ Assert(!fn->agg_within_group); + /* If it is a qualified function call, let it through. */ + if (list_length(fn->funcname) > 1) + { + fname = fn->funcname; + } /* - * If the function name is not qualified, then it is one of ours. We need to - * construct its name, and qualify it, so that PG can find it. + * Else We need to check if the function call is for + * age or for some external extension. */ - if (list_length(fn->funcname) == 1) + else { - /* get the name, size, and the ag name allocated */ - char *name = ((String*)linitial(fn->funcname))->sval; - int pnlen = strlen(name); - char *ag_name = palloc(pnlen + 5); - int i; - - /* copy in the prefix - all AGE functions are prefixed with age_ */ - strncpy(ag_name, "age_", 4); + char *name = strVal(linitial(fn->funcname)); + char *ag_name = construct_age_function_name(name); - /* - * All AGE function names are in lower case. So, copy in the name - * in lower case. - */ - for (i = 0; i < pnlen; i++) - ag_name[i + 4] = tolower(name[i]); - - /* terminate it with 0 */ - ag_name[i + 4] = 0; + if (function_exists(ag_name, "age")) + { + /* qualify the name with our schema name */ + fname = list_make2(makeString("ag_catalog"), makeString(ag_name)); - /* qualify the name with our schema name */ - fname = list_make2(makeString("ag_catalog"), makeString(ag_name)); + /* + * Currently 3 functions need the graph name passed in as the first + * argument - in addition to the other arguments: startNode, endNode, + * and vle. So, check for those 3 functions here and that the arg list + * is not empty. Then prepend the graph name if necessary. + */ + if ((list_length(targs) != 0) && + (strcmp("startNode", name) == 0 || + strcmp("endNode", name) == 0 || + strcmp("vle", name) == 0 || + strcmp("vertex_stats", name) == 0)) + { + char *graph_name = cpstate->graph_name; + Datum d = string_to_agtype(graph_name); + Const *c = makeConst(AGTYPEOID, -1, InvalidOid, -1, d, false, + false); - /* - * Currently 3 functions need the graph name passed in as the first - * argument - in addition to the other arguments: startNode, endNode, - * and vle. So, check for those 3 functions here and that the arg list - * is not empty. Then prepend the graph name if necessary. + targs = lcons(c, targs); + } + } + /* + * If it's not in age, check if it's a potential call to some function + * in another installed extension. */ - if ((list_length(targs) != 0) && - (strcmp("startNode", name) == 0 || - strcmp("endNode", name) == 0 || - strcmp("vle", name) == 0 || - strcmp("vertex_stats", name) == 0)) + else if(function_exists(name, NULL)) { - char *graph_name = cpstate->graph_name; - Datum d = string_to_agtype(graph_name); - Const *c = makeConst(AGTYPEOID, -1, InvalidOid, -1, d, false, - false); + Form_pg_proc procform = get_procform(fn, true); + char *extension = get_mapped_extension(procform->oid); - targs = lcons(c, targs); + /* + * If the function is from another extension, transform + * it if possible and return the function expr. + */ + if (is_extension_external(extension)) + { + retval = transform_external_ext_FuncCall(cpstate, fn, targs, + procform, extension); + return retval; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", name), + errhint("If the function is from an external extension, " + "make sure the extension is installed and the " + "function is in the search path."))); + } + } + /* no function found */ + else + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", name), + errhint("If the function is from an external extension, " + "make sure the extension is installed and the " + "function is in the search path."))); } - - } - /* If it is not one of our functions, pass the name list through */ - else - { - fname = fn->funcname; } /* ... and hand off to ParseFuncOrColumn */ diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index 4dee36060..6cb15e505 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -26,6 +26,7 @@ #include "parser/cypher_gram.h" #include "parser/cypher_parse_node.h" #include "parser/scansup.h" +#include "utils/agtype.h" /* override the default action for locations */ #define YYLLOC_DEFAULT(current, rhs, n) \ @@ -66,6 +67,8 @@ %token IDENTIFIER %token PARAMETER +%token BQIDENT +%token CHAR /* operators that have more than 1 character */ %token NOT_EQ LT_EQ GT_EQ DOT_DOT TYPECAST PLUS_EQ EQ_TILDE CONCAT @@ -640,7 +643,7 @@ subquery_stmt_no_return: single_subquery: subquery_part_init reading_clause_list return { - $$ = list_concat($1, lappend($2, $3)); + $$ = list_concat($1, lappend($2, $3)); } ; @@ -2922,7 +2925,7 @@ static char *create_unique_name(char *prefix_name) /* if we created the prefix, we need to free it */ if (prefix_name == NULL || strlen(prefix_name) <= 0) { - pfree(prefix); + pfree_if_not_null(prefix); } return name; @@ -3279,7 +3282,7 @@ static Node *verify_rule_as_list_comprehension(Node *expr, Node *expr2, int where_loc, int mapping_loc) { Node *result = NULL; - + /* * If the first expression is a ColumnRef, then we can build a * list_comprehension node. @@ -3358,4 +3361,4 @@ static Node *build_list_comprehension_node(ColumnRef *cref, Node *expr, /* return the UNWIND node */ return (Node *)unwind; -} \ No newline at end of file +} diff --git a/src/backend/parser/cypher_parse_agg.c b/src/backend/parser/cypher_parse_agg.c index cd743fcc4..22ec2df74 100644 --- a/src/backend/parser/cypher_parse_agg.c +++ b/src/backend/parser/cypher_parse_agg.c @@ -31,6 +31,7 @@ #include "parser/cypher_parse_agg.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" +#include "utils/agtype.h" typedef struct { @@ -817,7 +818,7 @@ static List * expand_grouping_sets(List *groupingSets, int limit) result = lappend(result, list_union_int(NIL, (List *) lfirst(lc))); } - for_each_cell(lc, expanded_groups, + for_each_cell(lc, expanded_groups, lnext(expanded_groups, list_head(expanded_groups))) { List *p = lfirst(lc); @@ -857,7 +858,7 @@ static List * expand_grouping_sets(List *groupingSets, int limit) while (result_len-- > 0) result = lappend(result, *ptr++); - pfree(buf); + pfree_if_not_null(buf); } return result; diff --git a/src/backend/parser/cypher_parser.c b/src/backend/parser/cypher_parser.c index 4ec56fb9a..d2b64ffef 100644 --- a/src/backend/parser/cypher_parser.c +++ b/src/backend/parser/cypher_parser.c @@ -50,7 +50,9 @@ int cypher_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, ag_scanner_t scanner) ACCESS_PATH, ANY_EXISTS, ALL_EXISTS, - CONCAT + CONCAT, + CHAR, + BQIDENT }; ag_token token; @@ -93,6 +95,18 @@ int cypher_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, ag_scanner_t scanner) lvalp->string = ident; break; } + case AG_TOKEN_BQIDENT: + { + char *ident; + + /* these are identifiers, just back ticked */ + token.type = AG_TOKEN_IDENTIFIER; + + ident = pstrdup(token.value.s); + truncate_identifier(ident, strlen(ident), true); + lvalp->string = ident; + break; + } case AG_TOKEN_PARAMETER: lvalp->string = pstrdup(token.value.s); break; diff --git a/src/backend/utils/adt/age_global_graph.c b/src/backend/utils/adt/age_global_graph.c index aeae5e32a..6f30060ae 100644 --- a/src/backend/utils/adt/age_global_graph.c +++ b/src/backend/utils/adt/age_global_graph.c @@ -80,6 +80,7 @@ typedef struct GRAPH_global_context int64 num_loaded_vertices; /* number of loaded vertices in this graph */ int64 num_loaded_edges; /* number of loaded edges in this graph */ ListGraphId *vertices; /* vertices for vertex hashtable cleanup */ + ListGraphId *edges; /* edges for edge hashtable cleanup */ struct GRAPH_global_context *next; /* next graph */ } GRAPH_global_context; @@ -181,7 +182,7 @@ static void create_GRAPH_global_hashtables(GRAPH_global_context *ggctx) ggctx->vertex_hashtable = hash_create(vhn, VERTEX_HTAB_INITIAL_SIZE, &vertex_ctl, HASH_ELEM | HASH_FUNCTION); - pfree(vhn); + pfree_if_not_null(vhn); /* initialize the edge hashtable */ MemSet(&edge_ctl, 0, sizeof(edge_ctl)); @@ -190,7 +191,7 @@ static void create_GRAPH_global_hashtables(GRAPH_global_context *ggctx) edge_ctl.hash = tag_hash; ggctx->edge_hashtable = hash_create(ehn, EDGE_HTAB_INITIAL_SIZE, &edge_ctl, HASH_ELEM | HASH_FUNCTION); - pfree(ehn); + pfree_if_not_null(ehn); } /* helper function to get a List of all label names for the specified graph */ @@ -301,6 +302,9 @@ static bool insert_edge_entry(GRAPH_global_context *ggctx, graphid edge_id, ee->end_vertex_id = end_vertex_id; ee->edge_label_table_oid = edge_label_table_oid; + /* we also need to store the edge id for clean up of edge property datums */ + ggctx->edges = append_graphid(ggctx->edges, edge_id); + /* increment the number of loaded edges */ ggctx->num_loaded_edges++; @@ -696,6 +700,7 @@ static void freeze_GRAPH_global_hashtables(GRAPH_global_context *ggctx) static bool free_specific_GRAPH_global_context(GRAPH_global_context *ggctx) { GraphIdNode *curr_vertex = NULL; + GraphIdNode *curr_edge = NULL; /* don't do anything if NULL */ if (ggctx == NULL) @@ -704,13 +709,13 @@ static bool free_specific_GRAPH_global_context(GRAPH_global_context *ggctx) } /* free the graph name */ - pfree(ggctx->graph_name); + pfree_if_not_null(ggctx->graph_name); ggctx->graph_name = NULL; ggctx->graph_oid = InvalidOid; ggctx->next = NULL; - /* free the vertex edge lists, starting with the head */ + /* free the vertex edge lists and properties, starting with the head */ curr_vertex = peek_stack_head(ggctx->vertices); while (curr_vertex != NULL) { @@ -735,6 +740,10 @@ static bool free_specific_GRAPH_global_context(GRAPH_global_context *ggctx) return false; } + /* free the vertex's datumCopy properties */ + pfree_if_not_null(DatumGetPointer(value->vertex_properties)); + value->vertex_properties = 0; + /* free the edge list associated with this vertex */ free_ListGraphId(value->edges_in); free_ListGraphId(value->edges_out); @@ -748,10 +757,47 @@ static bool free_specific_GRAPH_global_context(GRAPH_global_context *ggctx) curr_vertex = next_vertex; } + /* free the edge properties, starting with the head */ + curr_edge = peek_stack_head(ggctx->edges); + while (curr_edge != NULL) + { + GraphIdNode *next_edge = NULL; + edge_entry *value = NULL; + bool found = false; + graphid edge_id; + + /* get the next edge in the list, if any */ + next_edge = next_GraphIdNode(curr_edge); + + /* get the current edge id */ + edge_id = get_graphid(curr_edge); + + /* retrieve the edge entry */ + value = (edge_entry *)hash_search(ggctx->edge_hashtable, + (void *)&edge_id, HASH_FIND, + &found); + /* this is bad if it isn't found, but leave that to the caller */ + if (found == false) + { + return false; + } + + /* free the edge's datumCopy properties */ + pfree_if_not_null(DatumGetPointer(value->edge_properties)); + value->edge_properties = 0; + + /* move to the next edge */ + curr_edge = next_edge; + } + /* free the vertices list */ free_ListGraphId(ggctx->vertices); ggctx->vertices = NULL; + /* free the edges list */ + free_ListGraphId(ggctx->edges); + ggctx->edges = NULL; + /* free the hashtables */ hash_destroy(ggctx->vertex_hashtable); hash_destroy(ggctx->edge_hashtable); @@ -760,7 +806,7 @@ static bool free_specific_GRAPH_global_context(GRAPH_global_context *ggctx) ggctx->edge_hashtable = NULL; /* free the context */ - pfree(ggctx); + pfree_if_not_null(ggctx); ggctx = NULL; return true; @@ -836,7 +882,7 @@ GRAPH_global_context *manage_GRAPH_global_contexts(char *graph_name, pthread_mutex_unlock(&global_graph_contexts_container.mutex_lock); ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), - errmsg("missing vertex_entry during free"))); + errmsg("missing vertex or edge entry during free"))); } } else @@ -891,6 +937,8 @@ GRAPH_global_context *manage_GRAPH_global_contexts(char *graph_name, /* initialize our vertices list */ new_ggctx->vertices = NULL; + /* initialize our edges list */ + new_ggctx->edges = NULL; /* build the hashtables for this graph */ create_GRAPH_global_hashtables(new_ggctx); @@ -939,7 +987,7 @@ static bool delete_GRAPH_global_contexts(void) pthread_mutex_unlock(&global_graph_contexts_container.mutex_lock); ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), - errmsg("missing vertex_entry during free"))); + errmsg("missing vertex or edge entry during free"))); } /* advance to the next context */ @@ -1261,7 +1309,7 @@ Datum age_vertex_stats(PG_FUNCTION_ARGS) ggctx = manage_GRAPH_global_contexts(graph_name, graph_oid); /* free the graph name */ - pfree(graph_name); + pfree_if_not_null(graph_name); /* get the id */ agtv_temp = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_vertex, "id"); @@ -1367,7 +1415,7 @@ Datum age_graph_stats(PG_FUNCTION_ARGS) ggctx = manage_GRAPH_global_contexts(graph_name, graph_oid); /* free the graph name */ - pfree(graph_name); + pfree_if_not_null(graph_name); /* zero the state */ memset(&result, 0, sizeof(agtype_in_state)); diff --git a/src/backend/utils/adt/age_graphid_ds.c b/src/backend/utils/adt/age_graphid_ds.c index 8c632b6d8..625a6947c 100644 --- a/src/backend/utils/adt/age_graphid_ds.c +++ b/src/backend/utils/adt/age_graphid_ds.c @@ -144,12 +144,14 @@ void free_ListGraphId(ListGraphId *container) { next_node = curr_node->next; /* we can do this because this is just a list of ints */ - pfree(curr_node); + pfree_if_not_null(curr_node); + container->size--; curr_node = next_node; } + Assert(container->size == 0); /* free the container */ - pfree(container); + pfree_if_not_null(container); } /* helper function to create a new, empty, graphid stack */ @@ -186,7 +188,7 @@ void free_graphid_stack(ListGraphId *stack) GraphIdNode *next = stack->head->next; /* free the head element */ - pfree(stack->head); + pfree_if_not_null(stack->head); /* move the head to the next */ stack->head = next; } @@ -251,7 +253,7 @@ graphid pop_graphid_stack(ListGraphId *stack) stack->head = stack->head->next; stack->size--; /* free the element */ - pfree(node); + pfree_if_not_null(node); /* return the id */ return id; diff --git a/src/backend/utils/adt/age_session_info.c b/src/backend/utils/adt/age_session_info.c index 350273eb3..f224d4064 100644 --- a/src/backend/utils/adt/age_session_info.c +++ b/src/backend/utils/adt/age_session_info.c @@ -125,12 +125,12 @@ void reset_session_info(void) { if (session_info_graph_name != NULL) { - pfree(session_info_graph_name); + pfree_if_not_null(session_info_graph_name); } if (session_info_cypher_statement != NULL) { - pfree(session_info_cypher_statement); + pfree_if_not_null(session_info_cypher_statement); } } diff --git a/src/backend/utils/adt/age_vle.c b/src/backend/utils/adt/age_vle.c index e6bdec78c..f0adab2e9 100644 --- a/src/backend/utils/adt/age_vle.c +++ b/src/backend/utils/adt/age_vle.c @@ -319,7 +319,7 @@ static void create_VLE_local_state_hashtable(VLE_local_context *vlelctx) EDGE_STATE_HTAB_INITIAL_SIZE, &edge_state_ctl, HASH_ELEM | HASH_FUNCTION); - pfree(eshn); + pfree_if_not_null(eshn); } /* @@ -433,14 +433,14 @@ static void free_VLE_local_context(VLE_local_context *vlelctx) /* free the stored graph name */ if (vlelctx->graph_name != NULL) { - pfree(vlelctx->graph_name); + pfree_if_not_null(vlelctx->graph_name); vlelctx->graph_name = NULL; } /* free the stored edge label name */ if (vlelctx->edge_label_name != NULL) { - pfree(vlelctx->edge_label_name); + pfree_if_not_null(vlelctx->edge_label_name); vlelctx->edge_label_name = NULL; } @@ -462,15 +462,15 @@ static void free_VLE_local_context(VLE_local_context *vlelctx) } /* free the containers */ - pfree(vlelctx->dfs_vertex_stack); - pfree(vlelctx->dfs_edge_stack); - pfree(vlelctx->dfs_path_stack); + pfree_if_not_null(vlelctx->dfs_vertex_stack); + pfree_if_not_null(vlelctx->dfs_edge_stack); + pfree_if_not_null(vlelctx->dfs_path_stack); vlelctx->dfs_vertex_stack = NULL; vlelctx->dfs_edge_stack = NULL; vlelctx->dfs_path_stack = NULL; /* and finally the context itself */ - pfree(vlelctx); + pfree_if_not_null(vlelctx); vlelctx = NULL; } @@ -1878,8 +1878,19 @@ Datum age_vle(PG_FUNCTION_ARGS) */ agtype *agt_materialize_vle_path(agtype *agt_arg_vpc) { - /* convert the agtype_value to agtype and return it */ - return agtype_value_to_agtype(agtv_materialize_vle_path(agt_arg_vpc)); + agtype *agt_path = NULL; + agtype_value *agtv_path = NULL; + + /* get the path */ + agtv_path = agtv_materialize_vle_path(agt_arg_vpc); + + /* convert agtype_value to agtype */ + agt_path = agtype_value_to_agtype(agtv_path); + + /* free in memory path */ + pfree_agtype_value(agtv_path); + + return agt_path; } /* @@ -1939,6 +1950,8 @@ Datum age_match_two_vle_edges(PG_FUNCTION_ARGS) left_array_size = left_path->graphid_array_size; left_array = GET_GRAPHID_ARRAY_FROM_CONTAINER(left_path); + PG_FREE_IF_COPY(agt_arg_vpc, 0); + agt_arg_vpc = AG_GET_ARG_AGTYPE_P(1); if (!AGT_ROOT_IS_BINARY(agt_arg_vpc) || @@ -1953,6 +1966,8 @@ Datum age_match_two_vle_edges(PG_FUNCTION_ARGS) right_path = (VLE_path_container *)agt_arg_vpc; right_array = GET_GRAPHID_ARRAY_FROM_CONTAINER(right_path); + PG_FREE_IF_COPY(agt_arg_vpc, 1); + if (left_array[left_array_size - 1] != right_array[0]) { PG_RETURN_BOOL(false); @@ -2132,7 +2147,6 @@ agtype_value *agtv_materialize_vle_edges(agtype *agt_arg_vpc) /* build the AGTV_ARRAY of edges from the VLE_path_container */ agtv_array = build_edge_list(vpc); - /* convert the agtype_value to agtype and return it */ return agtv_array; } diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index f44d17509..090b1789e 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -30,6 +30,7 @@ #include "postgres.h" #include "varatt.h" +#include "utils/jsonfuncs.h" #include #include @@ -97,7 +98,8 @@ static void agtype_in_array_start(void *pstate); static void agtype_in_array_end(void *pstate); static void agtype_in_object_field_start(void *pstate, char *fname, bool isnull); -static void agtype_put_escaped_value(StringInfo out, agtype_value *scalar_val); +static void agtype_put_escaped_value(StringInfo out, agtype_value *scalar_val, + bool extend); static void escape_agtype(StringInfo buf, const char *str); bool is_decimal_needed(char *numstr); static void agtype_in_scalar(void *pstate, char *token, @@ -115,7 +117,8 @@ static void datum_to_agtype(Datum val, bool is_null, agtype_in_state *result, agt_type_category tcategory, Oid outfuncoid, bool key_scalar); static char *agtype_to_cstring_worker(StringInfo out, agtype_container *in, - int estimated_len, bool indent); + int estimated_len, bool indent, + bool extend); static text *agtype_value_to_text(agtype_value *scalar_val, bool err_not_scalar); static void add_indent(StringInfo out, bool indent, int level); @@ -178,6 +181,19 @@ static agtype_value *agtype_build_map_as_agtype_value(FunctionCallInfo fcinfo); agtype_value *agtype_composite_to_agtype_value_binary(agtype *a); static agtype_value *tostring_helper(Datum arg, Oid type, char *msghdr); +/* + * Due to how pfree can be implemented, it may not check for a passed NULL. This + * wrapper does just that, it will only call pfree is the pointer passed is not + * NULL. + */ +void pfree_if_not_null(void *ptr) +{ + if (ptr != NULL) + { + pfree(ptr); + } +} + /* global storage of OID for agtype and _agtype */ static Oid g_AGTYPEOID = InvalidOid; static Oid g_AGTYPEARRAYOID = InvalidOid; @@ -239,8 +255,11 @@ PG_FUNCTION_INFO_V1(graphid_recv); Datum graphid_recv(PG_FUNCTION_ARGS) { StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + int64 result = pq_getmsgint64(buf); - PG_RETURN_INT64(pq_getmsgint64(buf)); + PG_FREE_IF_COPY(buf, 0); + + PG_RETURN_INT64(result); } /* @@ -278,6 +297,7 @@ Datum agtype_recv(PG_FUNCTION_ARGS) int version = pq_getmsgint(buf, 1); char *str = NULL; int nbytes = 0; + Datum result; if (version == 1) { @@ -288,7 +308,12 @@ Datum agtype_recv(PG_FUNCTION_ARGS) elog(ERROR, "unsupported agtype version number %d", version); } - return agtype_from_cstring(str, nbytes); + result = agtype_from_cstring(str, nbytes); + + PG_FREE_IF_COPY(buf, 0); + pfree_if_not_null(str); + + return result; } /* @@ -311,8 +336,10 @@ Datum agtype_send(PG_FUNCTION_ARGS) pq_begintypsend(&buf); pq_sendint8(&buf, version); pq_sendtext(&buf, agtype_text->data, agtype_text->len); - pfree(agtype_text->data); - pfree(agtype_text); + pfree_if_not_null(agtype_text->data); + pfree_if_not_null(agtype_text); + + PG_FREE_IF_COPY(agt, 0); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } @@ -325,8 +352,11 @@ PG_FUNCTION_INFO_V1(agtype_in); Datum agtype_in(PG_FUNCTION_ARGS) { char *str = PG_GETARG_CSTRING(0); + Datum result = agtype_from_cstring(str, strlen(str)); + + PG_FREE_IF_COPY(str, 0); - return agtype_from_cstring(str, strlen(str)); + return result; } PG_FUNCTION_INFO_V1(agtype_out); @@ -343,6 +373,8 @@ Datum agtype_out(PG_FUNCTION_ARGS) out = agtype_to_cstring(NULL, &agt->root, VARSIZE(agt)); + PG_FREE_IF_COPY(agt, 0); + PG_RETURN_CSTRING(out); } @@ -391,8 +423,10 @@ agtype_value *agtype_value_from_cstring(char *str, int len) static inline Datum agtype_from_cstring(char *str, int len) { agtype_value *agtv = agtype_value_from_cstring(str, len); + agtype *agt = agtype_value_to_agtype(agtv); - PG_RETURN_POINTER(agtype_value_to_agtype(agtv)); + pfree_agtype_value(agtv); + PG_RETURN_POINTER(agt); } size_t check_string_length(size_t len) @@ -791,7 +825,8 @@ static bool is_array_path(agtype_value *agtv) return true; } -static void agtype_put_escaped_value(StringInfo out, agtype_value *scalar_val) +static void agtype_put_escaped_value(StringInfo out, agtype_value *scalar_val, + bool extend) { char *numstr; @@ -808,7 +843,10 @@ static void agtype_put_escaped_value(StringInfo out, agtype_value *scalar_val) appendStringInfoString( out, DatumGetCString(DirectFunctionCall1( numeric_out, PointerGetDatum(scalar_val->val.numeric)))); - appendBinaryStringInfo(out, "::numeric", 9); + if (extend) + { + appendBinaryStringInfo(out, "::numeric", 9); + } break; case AGTV_INTEGER: appendStringInfoString( @@ -834,8 +872,12 @@ static void agtype_put_escaped_value(StringInfo out, agtype_value *scalar_val) agtype *prop; scalar_val->type = AGTV_OBJECT; prop = agtype_value_to_agtype(scalar_val); - agtype_to_cstring_worker(out, &prop->root, prop->vl_len_, false); - appendBinaryStringInfo(out, "::vertex", 8); + agtype_to_cstring_worker(out, &prop->root, prop->vl_len_, + false, extend); + if (extend) + { + appendBinaryStringInfo(out, "::vertex", 8); + } break; } case AGTV_EDGE: @@ -843,8 +885,12 @@ static void agtype_put_escaped_value(StringInfo out, agtype_value *scalar_val) agtype *prop; scalar_val->type = AGTV_OBJECT; prop = agtype_value_to_agtype(scalar_val); - agtype_to_cstring_worker(out, &prop->root, prop->vl_len_, false); - appendBinaryStringInfo(out, "::edge", 6); + agtype_to_cstring_worker(out, &prop->root, prop->vl_len_, + false, extend); + if (extend) + { + appendBinaryStringInfo(out, "::edge", 6); + } break; } case AGTV_PATH: @@ -852,8 +898,12 @@ static void agtype_put_escaped_value(StringInfo out, agtype_value *scalar_val) agtype *prop; scalar_val->type = AGTV_ARRAY; prop = agtype_value_to_agtype(scalar_val); - agtype_to_cstring_worker(out, &prop->root, prop->vl_len_, false); - appendBinaryStringInfo(out, "::path", 6); + agtype_to_cstring_worker(out, &prop->root, prop->vl_len_, + false, extend); + if (extend) + { + appendBinaryStringInfo(out, "::path", 6); + } break; } @@ -1051,7 +1101,8 @@ static void agtype_in_scalar(void *pstate, char *token, char *agtype_to_cstring(StringInfo out, agtype_container *in, int estimated_len) { - return agtype_to_cstring_worker(out, in, estimated_len, false); + return agtype_to_cstring_worker(out, in, estimated_len, false, + true); } /* @@ -1060,14 +1111,16 @@ char *agtype_to_cstring(StringInfo out, agtype_container *in, char *agtype_to_cstring_indent(StringInfo out, agtype_container *in, int estimated_len) { - return agtype_to_cstring_worker(out, in, estimated_len, true); + return agtype_to_cstring_worker(out, in, estimated_len, true, + true); } /* * common worker for above two functions */ static char *agtype_to_cstring_worker(StringInfo out, agtype_container *in, - int estimated_len, bool indent) + int estimated_len, bool indent, + bool extend) { bool first = true; agtype_iterator *it; @@ -1135,14 +1188,14 @@ static char *agtype_to_cstring_worker(StringInfo out, agtype_container *in, add_indent(out, use_indent, level); /* agtype rules guarantee this is a string */ - agtype_put_escaped_value(out, &v); + agtype_put_escaped_value(out, &v, extend); appendBinaryStringInfo(out, ": ", 2); type = agtype_iterator_next(&it, &v, false); if (type == WAGT_VALUE) { first = false; - agtype_put_escaped_value(out, &v); + agtype_put_escaped_value(out, &v, extend); } else { @@ -1163,7 +1216,7 @@ static char *agtype_to_cstring_worker(StringInfo out, agtype_container *in, if (!raw_scalar) add_indent(out, use_indent, level); - agtype_put_escaped_value(out, &v); + agtype_put_escaped_value(out, &v, extend); break; case WAGT_END_ARRAY: level--; @@ -1506,7 +1559,7 @@ static void datum_to_agtype(Datum val, bool is_null, agtype_in_state *result, intd = DirectFunctionCall1(int8in, CStringGetDatum(outputstr)); agtv.type = AGTV_INTEGER; agtv.val.int_value = DatumGetInt64(intd); - pfree(outputstr); + pfree_if_not_null(outputstr); } break; case AGT_TYPE_FLOAT: @@ -1551,7 +1604,7 @@ static void datum_to_agtype(Datum val, bool is_null, agtype_in_state *result, ObjectIdGetDatum(InvalidOid), Int32GetDatum(-1)); agtv.val.numeric = DatumGetNumeric(numd); - pfree(outputstr); + pfree_if_not_null(outputstr); } else { @@ -1781,8 +1834,8 @@ static void array_to_agtype_internal(Datum array, agtype_in_state *result) array_dim_to_agtype(result, 0, ndim, dim, elements, nulls, &count, tcategory, outfuncoid); - pfree(elements); - pfree(nulls); + pfree_if_not_null(elements); + pfree_if_not_null(nulls); } /* @@ -1929,7 +1982,7 @@ agtype_value *string_to_agtype_value(char *s) agtv->type = AGTV_STRING; agtv->val.string.len = check_string_length(strlen(s)); - agtv->val.string.val = s; + agtv->val.string.val = pnstrdup(s, agtv->val.string.len); return agtv; } @@ -1953,6 +2006,7 @@ PG_FUNCTION_INFO_V1(_agtype_build_path); Datum _agtype_build_path(PG_FUNCTION_ARGS) { agtype_in_state result; + agtype *agt_result; Datum *args = NULL; bool *nulls = NULL; Oid *types = NULL; @@ -1995,8 +2049,10 @@ Datum _agtype_build_path(PG_FUNCTION_ARGS) AGT_ROOT_BINARY_FLAGS(agt) == AGT_FBINARY_TYPE_VLE_PATH) { agtype *path = agt_materialize_vle_path(agt); + PG_FREE_IF_COPY(agt, i); PG_RETURN_POINTER(path); } + PG_FREE_IF_COPY(agt, i); } } @@ -2046,6 +2102,8 @@ Datum _agtype_build_path(PG_FUNCTION_ARGS) /* get the VLE path from the container as an agtype_value */ agtv_path = agtv_materialize_vle_path(agt); + PG_FREE_IF_COPY(agt, i); + /* it better be an AGTV_PATH */ Assert(agtv_path->type == AGTV_PATH); @@ -2099,6 +2157,7 @@ Datum _agtype_build_path(PG_FUNCTION_ARGS) { add_agtype(AGTYPE_P_GET_DATUM(agt), false, &result, types[i], false); + PG_FREE_IF_COPY(agt, i); } /* If we got here, we had a zero boundary case. So, clear it */ else @@ -2113,11 +2172,16 @@ Datum _agtype_build_path(PG_FUNCTION_ARGS) /* set it to a path type */ result.res->type = AGTV_PATH; - PG_RETURN_POINTER(agtype_value_to_agtype(result.res)); + agt_result = agtype_value_to_agtype(result.res); + + pfree_agtype_in_state(&result); + + PG_RETURN_POINTER(agt_result); } Datum make_path(List *path) { + agtype *agt_result; ListCell *lc; agtype_in_state result; int i = 1; @@ -2143,7 +2207,7 @@ Datum make_path(List *path) foreach (lc, path) { - agtype *agt= DATUM_GET_AGTYPE_P(PointerGetDatum(lfirst(lc))); + agtype *agt = DATUM_GET_AGTYPE_P(PointerGetDatum(lfirst(lc))); agtype_value *elem; elem = get_ith_agtype_value_from_container(&agt->root, 0); @@ -2168,6 +2232,12 @@ Datum make_path(List *path) add_agtype((Datum)agt, false, &result, AGTYPEOID, false); + if ((Pointer) (agt) != lfirst(lc)) + { + pfree_if_not_null(agt); + } + pfree_agtype_value(elem); + i++; } @@ -2175,7 +2245,11 @@ Datum make_path(List *path) result.res->type = AGTV_PATH; - PG_RETURN_POINTER(agtype_value_to_agtype(result.res)); + agt_result = agtype_value_to_agtype(result.res); + + pfree_agtype_in_state(&result); + + PG_RETURN_POINTER(agt_result); } PG_FUNCTION_INFO_V1(_agtype_build_vertex); @@ -2242,6 +2316,9 @@ Datum _agtype_build_vertex(PG_FUNCTION_ARGS) rawscalar = build_agtype(bstate); pfree_agtype_build_state(bstate); + PG_FREE_IF_COPY(label, 1); + PG_FREE_IF_COPY(properties, 2); + PG_RETURN_POINTER(rawscalar); } @@ -2341,6 +2418,10 @@ Datum _agtype_build_edge(PG_FUNCTION_ARGS) write_extended(bstate, edge, AGT_HEADER_EDGE); rawscalar = build_agtype(bstate); pfree_agtype_build_state(bstate); + + PG_FREE_IF_COPY(label, 3); + PG_FREE_IF_COPY(properties, 4); + PG_RETURN_POINTER(rawscalar); } @@ -2414,7 +2495,7 @@ static agtype_value *agtype_build_map_as_agtype_value(FunctionCallInfo fcinfo) result.res = push_agtype_value(&result.parse_state, WAGT_KEY, agtv); /* free the agtype_value from tostring_helper */ - pfree(agtv); + pfree_if_not_null(agtv); } else { @@ -2437,6 +2518,7 @@ PG_FUNCTION_INFO_V1(agtype_build_map); Datum agtype_build_map(PG_FUNCTION_ARGS) { agtype_value *result = NULL; + agtype *agt_result = NULL; result = agtype_build_map_as_agtype_value(fcinfo); if (result == NULL) @@ -2444,7 +2526,10 @@ Datum agtype_build_map(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } - PG_RETURN_POINTER(agtype_value_to_agtype(result)); + agt_result = agtype_value_to_agtype(result); + pfree_agtype_value(result); + + PG_RETURN_POINTER(agt_result); } PG_FUNCTION_INFO_V1(agtype_build_map_noargs); @@ -2455,13 +2540,18 @@ PG_FUNCTION_INFO_V1(agtype_build_map_noargs); Datum agtype_build_map_noargs(PG_FUNCTION_ARGS) { agtype_in_state result; + agtype *agt_result; memset(&result, 0, sizeof(agtype_in_state)); push_agtype_value(&result.parse_state, WAGT_BEGIN_OBJECT, NULL); result.res = push_agtype_value(&result.parse_state, WAGT_END_OBJECT, NULL); - PG_RETURN_POINTER(agtype_value_to_agtype(result.res)); + agt_result = agtype_value_to_agtype(result.res); + + pfree_agtype_in_state(&result); + + PG_RETURN_POINTER(agt_result); } PG_FUNCTION_INFO_V1(agtype_build_map_nonull); @@ -2472,6 +2562,7 @@ PG_FUNCTION_INFO_V1(agtype_build_map_nonull); Datum agtype_build_map_nonull(PG_FUNCTION_ARGS) { agtype_value *result = NULL; + agtype *agt_result; result = agtype_build_map_as_agtype_value(fcinfo); if (result == NULL) @@ -2480,8 +2571,11 @@ Datum agtype_build_map_nonull(PG_FUNCTION_ARGS) } remove_null_from_agtype_object(result); + agt_result = agtype_value_to_agtype(result); - PG_RETURN_POINTER(agtype_value_to_agtype(result)); + pfree_agtype_value(result); + + PG_RETURN_POINTER(agt_result); } PG_FUNCTION_INFO_V1(agtype_build_list); @@ -2497,12 +2591,15 @@ Datum agtype_build_list(PG_FUNCTION_ARGS) Datum *args; bool *nulls; Oid *types; + agtype *agt_result; /*build argument values to build the array */ nargs = extract_variadic_args(fcinfo, 0, true, &args, &types, &nulls); if (nargs < 0) + { PG_RETURN_NULL(); + } memset(&result, 0, sizeof(agtype_in_state)); @@ -2510,11 +2607,17 @@ Datum agtype_build_list(PG_FUNCTION_ARGS) NULL); for (i = 0; i < nargs; i++) + { add_agtype(args[i], nulls[i], &result, types[i], false); + PG_FREE_IF_COPY(DatumGetPointer(args[i]), i); + } result.res = push_agtype_value(&result.parse_state, WAGT_END_ARRAY, NULL); + agt_result = agtype_value_to_agtype(result.res); - PG_RETURN_POINTER(agtype_value_to_agtype(result.res)); + pfree_agtype_in_state(&result); + + PG_RETURN_POINTER(agt_result); } PG_FUNCTION_INFO_V1(agtype_build_list_noargs); @@ -2525,13 +2628,18 @@ PG_FUNCTION_INFO_V1(agtype_build_list_noargs); Datum agtype_build_list_noargs(PG_FUNCTION_ARGS) { agtype_in_state result; + agtype *agt_result; memset(&result, 0, sizeof(agtype_in_state)); push_agtype_value(&result.parse_state, WAGT_BEGIN_ARRAY, NULL); result.res = push_agtype_value(&result.parse_state, WAGT_END_ARRAY, NULL); - PG_RETURN_POINTER(agtype_value_to_agtype(result.res)); + agt_result = agtype_value_to_agtype(result.res); + + pfree_agtype_in_state(&result); + + PG_RETURN_POINTER(agt_result); } /* @@ -2644,6 +2752,7 @@ Datum agtype_to_int8(PG_FUNCTION_ARGS) agtype_value *container = NULL; int64 result = 0x0; agtype *arg_agt = NULL; + bool is_scalar = false; /* get the agtype equivalence of any convertable input type */ arg_agt = get_one_agtype_from_variadic_args(fcinfo, 0, 1); @@ -2654,7 +2763,11 @@ Datum agtype_to_int8(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } - if (!agtype_extract_scalar(&arg_agt->root, &agtv) || + /* get the scalar value if it is one and set the flag accordingly */ + is_scalar = agtype_extract_scalar(&arg_agt->root, &agtv); + + /* if it isn't something that can be cast error out */ + if (!is_scalar || (agtv.type != AGTV_FLOAT && agtv.type != AGTV_INTEGER && agtv.type != AGTV_NUMERIC && @@ -2691,7 +2804,7 @@ Datum agtype_to_int8(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid agtype string to int8 type: %d", - (int)temp->type))); + temp->type))); } /* save the top agtype_value */ @@ -2710,7 +2823,7 @@ Datum agtype_to_int8(PG_FUNCTION_ARGS) else { elog(ERROR, "unexpected string type: %d in agtype_to_int8", - (int)temp->type); + temp->type); } } @@ -2738,13 +2851,13 @@ Datum agtype_to_int8(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid conversion type in agtype_to_int8: %d", - (int)agtv_p->type))); + agtv_p->type))); } /* free the container, if it was used */ if (container) { - pfree(container); + pfree_if_not_null(container); } PG_FREE_IF_COPY(arg_agt, 0); @@ -2865,7 +2978,7 @@ Datum agtype_to_int4(PG_FUNCTION_ARGS) /* free the container, if it was used */ if (container) { - pfree(container); + pfree_if_not_null(container); } PG_FREE_IF_COPY(arg_agt, 0); @@ -2987,7 +3100,7 @@ Datum agtype_to_int2(PG_FUNCTION_ARGS) /* free the container, if it was used */ if (container) { - pfree(container); + pfree_if_not_null(container); } PG_FREE_IF_COPY(arg_agt, 0); @@ -3074,18 +3187,25 @@ Datum agtype_to_text(PG_FUNCTION_ARGS) /* Return null if arg_agt is null. This covers SQL and Agtype NULLS */ if (arg_agt == NULL) + { PG_RETURN_NULL(); + } /* check that we have a scalar value */ if (!AGT_ROOT_IS_SCALAR(arg_agt)) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("agtype argument must resolve to a scalar value"))); + } /* get the arg parameter */ arg_value = get_ith_agtype_value_from_container(&arg_agt->root, 0); + PG_FREE_IF_COPY(arg_agt, 0); text_value = agtype_value_to_text(arg_value, true); + pfree_agtype_value(arg_value); + if (text_value == NULL) { PG_RETURN_NULL(); @@ -3094,6 +3214,90 @@ Datum agtype_to_text(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(text_value); } +PG_FUNCTION_INFO_V1(text_to_agtype); + +/* + * Cast text to agtype. + */ +Datum text_to_agtype(PG_FUNCTION_ARGS) +{ + agtype *result = NULL; + agtype_value agtv; + text *text_value = NULL; + char *string = NULL; + int len = 0; + + if (PG_ARGISNULL(0)) + { + PG_RETURN_NULL(); + } + + /* get the text value */ + text_value = PG_GETARG_TEXT_PP(0); + /* convert it to a string */ + string = text_to_cstring(text_value); + /* get the length */ + len = strlen(string); + + /* create a temporary agtype string */ + agtv.type = AGTV_STRING; + agtv.val.string.len = len; + agtv.val.string.val = pstrdup(string); + + /* free the string */ + pfree_if_not_null(string); + + /* convert to agtype */ + result = agtype_value_to_agtype(&agtv); + + /* free the input arg if necessary */ + PG_FREE_IF_COPY(text_value, 0); + + /* return our result */ + PG_RETURN_POINTER(result); +} + +PG_FUNCTION_INFO_V1(agtype_to_json); + +/* + * Cast agtype to json. + * + * If the input agtype is vertex, edge or path, the trailing + * type(::vertex, ::edge, ::path) is removed. + */ +Datum agtype_to_json(PG_FUNCTION_ARGS) +{ + Datum result; + char *json_str; + agtype *agt; + + agt = AG_GET_ARG_AGTYPE_P(0); + + if (AGT_ROOT_IS_SCALAR(agt)) + { + enum agtype_value_type type; + + type = get_ith_agtype_value_type(&agt->root, 0); + if (type >= AGTV_NUMERIC && type <= AGTV_BOOL) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot cast agtype %s to json", + agtype_value_type_to_string(type)))); + } + } + + json_str = agtype_to_cstring_worker(NULL, &agt->root, VARSIZE(agt), + false, false); + + result = DirectFunctionCall1(json_in, CStringGetDatum(json_str)); + + PG_FREE_IF_COPY(agt, 0); + pfree_if_not_null(json_str); + + PG_RETURN_DATUM(result); +} + PG_FUNCTION_INFO_V1(bool_to_agtype); /* @@ -3117,13 +3321,22 @@ Datum float8_to_agtype(PG_FUNCTION_ARGS) PG_FUNCTION_INFO_V1(int8_to_agtype); /* - * Cast float8 to agtype. + * Cast int8 to agtype. */ Datum int8_to_agtype(PG_FUNCTION_ARGS) { return integer_to_agtype(PG_GETARG_INT64(0)); } +PG_FUNCTION_INFO_V1(int4_to_agtype); +/* + * Cast int to agtype. + */ +Datum int4_to_agtype(PG_FUNCTION_ARGS) +{ + return integer_to_agtype((int64)PG_GETARG_INT32(0)); +} + PG_FUNCTION_INFO_V1(agtype_to_int4_array); /* @@ -3290,6 +3503,7 @@ static agtype_value *execute_array_access_operator(agtype *array, agtype *array_index) { agtype_value *array_index_value = NULL; + agtype_value *result = NULL; /* unpack the array index value */ array_index_value = get_ith_agtype_value_from_container(&array_index->root, @@ -3298,6 +3512,7 @@ static agtype_value *execute_array_access_operator(agtype *array, /* if AGTV_NULL return NULL */ if (array_index_value->type == AGTV_NULL) { + pfree_agtype_value(array_index_value); return NULL; } @@ -3308,8 +3523,11 @@ static agtype_value *execute_array_access_operator(agtype *array, (errmsg("array index must resolve to an integer value"))); } - return execute_array_access_operator_internal( - array, array_value, array_index_value->val.int_value); + result = execute_array_access_operator_internal(array, array_value, + array_index_value->val.int_value); + + pfree_agtype_value(array_index_value); + return result; } static agtype_value *execute_array_access_operator_internal(agtype *array, @@ -3613,7 +3831,7 @@ static Datum process_access_operator_result(FunctionCallInfo fcinfo, str = agtype_to_cstring_worker(out, agtc, agtv->val.binary.len, - false); + false, true); result = cstring_to_text(str); } else @@ -3689,32 +3907,44 @@ Datum agtype_object_field_agtype(PG_FUNCTION_ARGS) { agtype *agt = AG_GET_ARG_AGTYPE_P(0); agtype *key = AG_GET_ARG_AGTYPE_P(1); - agtype_value *key_value; - if (!AGT_ROOT_IS_SCALAR(key)) + if (AGT_ROOT_IS_SCALAR(key)) { - PG_RETURN_NULL(); - } + agtype_value *key_value; - key_value = get_ith_agtype_value_from_container(&key->root, 0); + key_value = get_ith_agtype_value_from_container(&key->root, 0); - if (key_value->type == AGTV_INTEGER) - { - PG_RETURN_TEXT_P((const void*)agtype_array_element_impl(fcinfo, agt, + if (key_value->type == AGTV_INTEGER || + key_value->type == AGTV_STRING) + { + Datum retval = 0; + + if (key_value->type == AGTV_INTEGER) + { + retval = agtype_array_element_impl(fcinfo, agt, key_value->val.int_value, - false)); - } - else if (key_value->type == AGTV_STRING) - { - AG_RETURN_AGTYPE_P((const void*)agtype_object_field_impl(fcinfo, agt, - key_value->val.string.val, - key_value->val.string.len, - false)); - } - else - { - PG_RETURN_NULL(); + false); + } + else if (key_value->type == AGTV_STRING) + { + retval = agtype_object_field_impl(fcinfo, agt, + key_value->val.string.val, + key_value->val.string.len, + false); + } + + pfree_agtype_value(key_value); + PG_FREE_IF_COPY(agt, 0); + PG_FREE_IF_COPY(key, 1); + + PG_RETURN_POINTER((const void*) retval); + } + pfree_agtype_value(key_value); } + + PG_FREE_IF_COPY(agt, 0); + PG_FREE_IF_COPY(key, 1); + PG_RETURN_NULL(); } PG_FUNCTION_INFO_V1(agtype_object_field_text_agtype); @@ -3723,32 +3953,43 @@ Datum agtype_object_field_text_agtype(PG_FUNCTION_ARGS) { agtype *agt = AG_GET_ARG_AGTYPE_P(0); agtype *key = AG_GET_ARG_AGTYPE_P(1); - agtype_value *key_value; - if (!AGT_ROOT_IS_SCALAR(key)) + if (AGT_ROOT_IS_SCALAR(key)) { - PG_RETURN_NULL(); - } + agtype_value *key_value; - key_value = get_ith_agtype_value_from_container(&key->root, 0); + key_value = get_ith_agtype_value_from_container(&key->root, 0); - if (key_value->type == AGTV_INTEGER) - { - PG_RETURN_TEXT_P((const void*)agtype_array_element_impl(fcinfo, agt, + if (key_value->type == AGTV_INTEGER || key_value->type == AGTV_STRING) + { + Datum retval = 0; + + if (key_value->type == AGTV_INTEGER) + { + retval = agtype_array_element_impl(fcinfo, agt, key_value->val.int_value, - true)); - } - else if (key_value->type == AGTV_STRING) - { - AG_RETURN_AGTYPE_P((const void*)agtype_object_field_impl(fcinfo, agt, - key_value->val.string.val, - key_value->val.string.len, - true)); - } - else - { - PG_RETURN_NULL(); + true); + } + else if (key_value->type == AGTV_STRING) + { + retval = agtype_object_field_impl(fcinfo, agt, + key_value->val.string.val, + key_value->val.string.len, + true); + } + + pfree_agtype_value(key_value); + PG_FREE_IF_COPY(agt, 0); + PG_FREE_IF_COPY(key, 1); + + PG_RETURN_POINTER((const void*) retval); + } + pfree_agtype_value(key_value); } + + PG_FREE_IF_COPY(agt, 0); + PG_FREE_IF_COPY(key, 1); + PG_RETURN_NULL(); } PG_FUNCTION_INFO_V1(agtype_object_field); @@ -3757,10 +3998,14 @@ Datum agtype_object_field(PG_FUNCTION_ARGS) { agtype *agt = AG_GET_ARG_AGTYPE_P(0); text *key = PG_GETARG_TEXT_PP(1); + Datum retval; - AG_RETURN_AGTYPE_P((const void*)agtype_object_field_impl(fcinfo, agt, VARDATA_ANY(key), - VARSIZE_ANY_EXHDR(key), - false)); + retval = agtype_object_field_impl(fcinfo, agt, VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key), false); + PG_FREE_IF_COPY(agt, 0); + PG_FREE_IF_COPY(key, 1); + + AG_RETURN_AGTYPE_P((const void*) retval); } PG_FUNCTION_INFO_V1(agtype_object_field_text); @@ -3769,9 +4014,14 @@ Datum agtype_object_field_text(PG_FUNCTION_ARGS) { agtype *agt = AG_GET_ARG_AGTYPE_P(0); text *key = PG_GETARG_TEXT_PP(1); + Datum retval; + + retval = agtype_object_field_impl(fcinfo, agt, VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key), true); + PG_FREE_IF_COPY(agt, 0); + PG_FREE_IF_COPY(key, 1); - PG_RETURN_TEXT_P((const void*)agtype_object_field_impl(fcinfo, agt, VARDATA_ANY(key), - VARSIZE_ANY_EXHDR(key), true)); + PG_RETURN_TEXT_P((const void*) retval); } PG_FUNCTION_INFO_V1(agtype_array_element); @@ -3780,9 +4030,13 @@ Datum agtype_array_element(PG_FUNCTION_ARGS) { agtype *agt = AG_GET_ARG_AGTYPE_P(0); int elem = PG_GETARG_INT32(1); + Datum retval; + + retval = agtype_array_element_impl(fcinfo, agt, elem, false); + + PG_FREE_IF_COPY(agt, 0); - AG_RETURN_AGTYPE_P((const void*) - agtype_array_element_impl(fcinfo, agt, elem, false)); + AG_RETURN_AGTYPE_P((const void*) retval); } PG_FUNCTION_INFO_V1(agtype_array_element_text); @@ -3791,9 +4045,13 @@ Datum agtype_array_element_text(PG_FUNCTION_ARGS) { agtype *agt = AG_GET_ARG_AGTYPE_P(0); int elem = PG_GETARG_INT32(1); + Datum retval; - PG_RETURN_TEXT_P((const void*) - agtype_array_element_impl(fcinfo, agt, elem, true)); + retval = agtype_array_element_impl(fcinfo, agt, elem, true); + + PG_FREE_IF_COPY(agt, 0); + + PG_RETURN_TEXT_P((const void*) retval); } PG_FUNCTION_INFO_V1(agtype_access_operator); @@ -3809,6 +4067,7 @@ Datum agtype_access_operator(PG_FUNCTION_ARGS) int nargs = 0; agtype *container = NULL; agtype_value *container_value = NULL; + agtype *result = NULL; int i = 0; /* extract our args, we need at least 2 */ @@ -3826,6 +4085,10 @@ Datum agtype_access_operator(PG_FUNCTION_ARGS) */ if (args == NULL || nargs == 0 || nulls[0] == true) { + pfree_if_not_null(args); + pfree_if_not_null(types); + pfree_if_not_null(nulls); + PG_RETURN_NULL(); } @@ -3835,6 +4098,9 @@ Datum agtype_access_operator(PG_FUNCTION_ARGS) /* if we have a NULL, return NULL */ if (nulls[i] == true) { + pfree_if_not_null(args); + pfree_if_not_null(types); + pfree_if_not_null(nulls); PG_RETURN_NULL(); } } @@ -3850,6 +4116,7 @@ Datum agtype_access_operator(PG_FUNCTION_ARGS) /* retrieve an array of edges from the vpc */ container_value = agtv_materialize_vle_edges(container); /* clear the container reference */ + container = NULL; } else @@ -3952,8 +4219,14 @@ Datum agtype_access_operator(PG_FUNCTION_ARGS) container = NULL; } + pfree_if_not_null(args); + pfree_if_not_null(types); + pfree_if_not_null(nulls); + /* serialize and return the result */ - return AGTYPE_P_GET_DATUM(agtype_value_to_agtype(container_value)); + result = agtype_value_to_agtype(container_value); + + return AGTYPE_P_GET_DATUM(result); } PG_FUNCTION_INFO_V1(agtype_access_slice); @@ -3965,7 +4238,10 @@ Datum agtype_access_slice(PG_FUNCTION_ARGS) agtype_value *lidx_value = NULL; agtype_value *uidx_value = NULL; agtype_in_state result; + agtype *agt_result = NULL; agtype *agt_array = NULL; + agtype *agt_lidx = NULL; + agtype *agt_uidx = NULL; agtype_value *agtv_array = NULL; int64 upper_index = 0; int64 lower_index = 0; @@ -4014,8 +4290,8 @@ Datum agtype_access_slice(PG_FUNCTION_ARGS) } else { - lidx_value = get_ith_agtype_value_from_container( - &(AG_GET_ARG_AGTYPE_P(1))->root, 0); + agt_lidx = AG_GET_ARG_AGTYPE_P(1); + lidx_value = get_ith_agtype_value_from_container(&agt_lidx->root, 0); /* adjust for AGTV_NULL */ if (lidx_value->type == AGTV_NULL) { @@ -4031,8 +4307,8 @@ Datum agtype_access_slice(PG_FUNCTION_ARGS) } else { - uidx_value = get_ith_agtype_value_from_container( - &(AG_GET_ARG_AGTYPE_P(2))->root, 0); + agt_uidx = AG_GET_ARG_AGTYPE_P(2); + uidx_value = get_ith_agtype_value_from_container(&agt_uidx->root, 0); /* adjust for AGTV_NULL */ if (uidx_value->type == AGTV_NULL) { @@ -4060,10 +4336,12 @@ Datum agtype_access_slice(PG_FUNCTION_ARGS) if (lidx_value) { lower_index = lidx_value->val.int_value; + pfree_agtype_value(lidx_value); } if (uidx_value) { upper_index = uidx_value->val.int_value; + pfree_agtype_value(uidx_value); } /* adjust for negative and out of bounds index values */ @@ -4119,7 +4397,14 @@ Datum agtype_access_slice(PG_FUNCTION_ARGS) result.res = push_agtype_value(&result.parse_state, WAGT_END_ARRAY, NULL); - PG_RETURN_POINTER(agtype_value_to_agtype(result.res)); + agt_result = agtype_value_to_agtype(result.res); + + pfree_agtype_in_state(&result); + PG_FREE_IF_COPY(agt_array, 0); + PG_FREE_IF_COPY(agt_lidx, 1); + PG_FREE_IF_COPY(agt_uidx, 2); + + PG_RETURN_POINTER(agt_result); } PG_FUNCTION_INFO_V1(agtype_in_operator); @@ -4264,7 +4549,7 @@ Datum agtype_in_operator(PG_FUNCTION_ARGS) result = (compare_agtype_scalar_values(&agtv_item, &agtv_elem) == 0); } - } + } } return boolean_to_agtype(result); @@ -4278,6 +4563,7 @@ Datum agtype_string_match_starts_with(PG_FUNCTION_ARGS) { agtype *lhs = AG_GET_ARG_AGTYPE_P(0); agtype *rhs = AG_GET_ARG_AGTYPE_P(1); + bool result = false; if (AGT_ROOT_IS_SCALAR(lhs) && AGT_ROOT_IS_SCALAR(rhs)) { @@ -4290,17 +4576,33 @@ Datum agtype_string_match_starts_with(PG_FUNCTION_ARGS) if (lhs_value->type == AGTV_STRING && rhs_value->type == AGTV_STRING) { if (lhs_value->val.string.len < rhs_value->val.string.len) - return boolean_to_agtype(false); - - if (strncmp(lhs_value->val.string.val, rhs_value->val.string.val, - rhs_value->val.string.len) == 0) - return boolean_to_agtype(true); + { + result = false; + } + else if (strncmp(lhs_value->val.string.val, + rhs_value->val.string.val, + rhs_value->val.string.len) == 0) + { + result = true; + } else - return boolean_to_agtype(false); + { + result = false; + } } + pfree_agtype_value(lhs_value); + pfree_agtype_value(rhs_value); } - ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("agtype string values expected"))); + else + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("agtype string values expected"))); + } + + PG_FREE_IF_COPY(lhs, 0); + PG_FREE_IF_COPY(rhs, 1); + + return boolean_to_agtype(result); } PG_FUNCTION_INFO_V1(agtype_string_match_ends_with); @@ -4311,6 +4613,7 @@ Datum agtype_string_match_ends_with(PG_FUNCTION_ARGS) { agtype *lhs = AG_GET_ARG_AGTYPE_P(0); agtype *rhs = AG_GET_ARG_AGTYPE_P(1); + bool result = false; if (AGT_ROOT_IS_SCALAR(lhs) && AGT_ROOT_IS_SCALAR(rhs)) { @@ -4323,19 +4626,35 @@ Datum agtype_string_match_ends_with(PG_FUNCTION_ARGS) if (lhs_value->type == AGTV_STRING && rhs_value->type == AGTV_STRING) { if (lhs_value->val.string.len < rhs_value->val.string.len) - return boolean_to_agtype(false); - - if (strncmp(lhs_value->val.string.val + lhs_value->val.string.len - - rhs_value->val.string.len, - rhs_value->val.string.val, - rhs_value->val.string.len) == 0) - return boolean_to_agtype(true); + { + result = false; + } + else if (strncmp((lhs_value->val.string.val + + lhs_value->val.string.len - + rhs_value->val.string.len), + rhs_value->val.string.val, + rhs_value->val.string.len) == 0) + { + result = true; + } else - return boolean_to_agtype(false); + { + result = false; + } } + pfree_agtype_value(lhs_value); + pfree_agtype_value(rhs_value); } - ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("agtype string values expected"))); + else + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("agtype string values expected"))); + } + + PG_FREE_IF_COPY(lhs, 0); + PG_FREE_IF_COPY(rhs, 1); + + return boolean_to_agtype(result); } PG_FUNCTION_INFO_V1(agtype_string_match_contains); @@ -4346,6 +4665,7 @@ Datum agtype_string_match_contains(PG_FUNCTION_ARGS) { agtype *lhs = AG_GET_ARG_AGTYPE_P(0); agtype *rhs = AG_GET_ARG_AGTYPE_P(1); + bool result = false; if (AGT_ROOT_IS_SCALAR(lhs) && AGT_ROOT_IS_SCALAR(rhs)) { @@ -4361,19 +4681,37 @@ Datum agtype_string_match_contains(PG_FUNCTION_ARGS) char *r; if (lhs_value->val.string.len < rhs_value->val.string.len) - return boolean_to_agtype(false); + { + result = false; + } l = pnstrdup(lhs_value->val.string.val, lhs_value->val.string.len); r = pnstrdup(rhs_value->val.string.val, rhs_value->val.string.len); if (strstr(l, r) == NULL) - return boolean_to_agtype(false); + { + result = false; + } else - return boolean_to_agtype(true); + { + result = true; + } + pfree_if_not_null(l); + pfree_if_not_null(r); } + pfree_agtype_value(lhs_value); + pfree_agtype_value(rhs_value); } - ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("agtype string values expected"))); + else + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("agtype string values expected"))); + } + + PG_FREE_IF_COPY(lhs, 0); + PG_FREE_IF_COPY(rhs, 1); + + return boolean_to_agtype(result); } #define LEFT_ROTATE(n, i) ((n << i) | (n >> (64 - i))) @@ -4418,6 +4756,9 @@ Datum agtype_hash_cmp(PG_FUNCTION_ARGS) seed = LEFT_ROTATE(seed, 1); } + pfree_if_not_null(r); + PG_FREE_IF_COPY(agt, 0); + PG_RETURN_INT32(hash); } @@ -4428,6 +4769,7 @@ Datum agtype_btree_cmp(PG_FUNCTION_ARGS) { agtype *agtype_lhs; agtype *agtype_rhs; + int32 result; /* this function returns INTEGER which is 32bits */ if (PG_ARGISNULL(0) && PG_ARGISNULL(1)) @@ -4446,8 +4788,13 @@ Datum agtype_btree_cmp(PG_FUNCTION_ARGS) agtype_lhs = AG_GET_ARG_AGTYPE_P(0); agtype_rhs = AG_GET_ARG_AGTYPE_P(1); - PG_RETURN_INT32(compare_agtype_containers_orderability(&agtype_lhs->root, - &agtype_rhs->root)); + result = compare_agtype_containers_orderability(&agtype_lhs->root, + &agtype_rhs->root); + + PG_FREE_IF_COPY(agtype_lhs, 0); + PG_FREE_IF_COPY(agtype_rhs, 1); + + PG_RETURN_INT32(result); } PG_FUNCTION_INFO_V1(agtype_typecast_numeric); @@ -4461,23 +4808,31 @@ Datum agtype_typecast_numeric(PG_FUNCTION_ARGS) agtype_value result_value; Datum numd; char *string = NULL; + agtype *result = NULL; /* get the agtype equivalence of any convertable input type */ arg_agt = get_one_agtype_from_variadic_args(fcinfo, 0, 1); /* Return null if arg_agt is null. This covers SQL and Agtype NULLS */ if (arg_agt == NULL) + { PG_RETURN_NULL(); + } /* check that we have a scalar value */ if (!AGT_ROOT_IS_SCALAR(arg_agt)) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("typecast argument must resolve to a scalar value"))); + } /* get the arg parameter */ arg_value = get_ith_agtype_value_from_container(&arg_agt->root, 0); + /* we don't need to agtype arg anymore */ + PG_FREE_IF_COPY(arg_agt, 0); + /* the input type drives the casting */ switch(arg_value->type) { @@ -4491,7 +4846,9 @@ Datum agtype_typecast_numeric(PG_FUNCTION_ARGS) break; case AGTV_NUMERIC: /* it is already a numeric so just return it */ - PG_RETURN_POINTER(agtype_value_to_agtype(arg_value)); + result = agtype_value_to_agtype(arg_value); + pfree_agtype_value(arg_value); + PG_RETURN_POINTER(result); break; /* this allows string numbers and NaN */ case AGTV_STRING: @@ -4506,7 +4863,7 @@ Datum agtype_typecast_numeric(PG_FUNCTION_ARGS) ObjectIdGetDatum(InvalidOid), Int32GetDatum(-1)); /* free the string */ - pfree(string); + pfree_if_not_null(string); string = NULL; break; /* what was given doesn't cast to a numeric */ @@ -4517,11 +4874,16 @@ Datum agtype_typecast_numeric(PG_FUNCTION_ARGS) break; } + pfree_agtype_value(arg_value); + /* fill in and return our result */ result_value.type = AGTV_NUMERIC; result_value.val.numeric = DatumGetNumeric(numd); - PG_RETURN_POINTER(agtype_value_to_agtype(&result_value)); + result = agtype_value_to_agtype(&result_value); + pfree_agtype_value_content(&result_value); + + PG_RETURN_POINTER(result); } PG_FUNCTION_INFO_V1(agtype_typecast_int); @@ -4589,7 +4951,7 @@ Datum agtype_typecast_int(PG_FUNCTION_ARGS) d = DirectFunctionCall1(int8in, CStringGetDatum(string)); /* free the string */ - pfree(string); + pfree_if_not_null(string); string = NULL; break; /* what was given doesn't cast to an int */ @@ -4726,7 +5088,7 @@ Datum agtype_typecast_float(PG_FUNCTION_ARGS) d = DirectFunctionCall1(float8in, CStringGetDatum(string)); /* free the string */ - pfree(string); + pfree_if_not_null(string); string = NULL; break; /* what was given doesn't cast to a float */ @@ -5598,7 +5960,7 @@ Datum age_tail(PG_FUNCTION_ARGS) WAGT_END_ARRAY, NULL); agt_result = agtype_value_to_agtype(agis_result.res); - pfree_agtype_value(agis_result.res); + pfree_agtype_in_state(&agis_result); PG_RETURN_POINTER(agt_result); } @@ -6846,7 +7208,7 @@ Datum age_tostring(PG_FUNCTION_ARGS) /* convert to agtype and free the agtype_value */ agt = agtype_value_to_agtype(agtv); - pfree(agtv); + pfree_if_not_null(agtv); PG_RETURN_POINTER(agt); } @@ -10060,6 +10422,9 @@ agtype *get_one_agtype_from_variadic_args(FunctionCallInfo fcinfo, /* if null, return null */ if (nulls[0]) { + pfree_if_not_null(args); + pfree_if_not_null(nulls); + pfree_if_not_null(types); return NULL; } @@ -10078,6 +10443,11 @@ agtype *get_one_agtype_from_variadic_args(FunctionCallInfo fcinfo, if (AGTYPE_CONTAINER_IS_SCALAR(agtc) && AGTE_IS_NULL(agtc->children[0])) { + PG_FREE_IF_COPY(agtype_result, variadic_offset); + + pfree_if_not_null(args); + pfree_if_not_null(nulls); + pfree_if_not_null(types); return NULL; } } @@ -10097,7 +10467,14 @@ agtype *get_one_agtype_from_variadic_args(FunctionCallInfo fcinfo, datum_to_agtype(args[0], false, &state, tcategory, outfuncoid, false); /* convert it to an agtype */ agtype_result = agtype_value_to_agtype(state.res); + + pfree_agtype_in_state(&state); } + + pfree_if_not_null(args); + pfree_if_not_null(nulls); + pfree_if_not_null(types); + return agtype_result; } diff --git a/src/backend/utils/adt/agtype_gin.c b/src/backend/utils/adt/agtype_gin.c index 246dbcab6..91ed15fc6 100644 --- a/src/backend/utils/adt/agtype_gin.c +++ b/src/backend/utils/adt/agtype_gin.c @@ -553,7 +553,7 @@ static Datum make_scalar_key(const agtype_value *scalarVal, bool is_key) */ cstr = numeric_normalize(scalarVal->val.numeric); item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr)); - pfree(cstr); + pfree_if_not_null(cstr); break; case AGTV_STRING: item = make_text_key(is_key ? AGT_GIN_FLAG_KEY : AGT_GIN_FLAG_STR, diff --git a/src/backend/utils/adt/agtype_raw.c b/src/backend/utils/adt/agtype_raw.c index e7270ef84..d8bad3d24 100644 --- a/src/backend/utils/adt/agtype_raw.c +++ b/src/backend/utils/adt/agtype_raw.c @@ -172,8 +172,8 @@ void pfree_agtype_build_state(agtype_build_state *bstate) * bstate->buffer->data is not pfree'd because this pointer * is returned by the `build_agtype` function. */ - pfree(bstate->buffer); - pfree(bstate); + pfree_if_not_null(bstate->buffer); + pfree_if_not_null(bstate); } void write_string(agtype_build_state *bstate, char *str) diff --git a/src/backend/utils/adt/agtype_util.c b/src/backend/utils/adt/agtype_util.c index 89aedeace..01a965cdd 100644 --- a/src/backend/utils/adt/agtype_util.c +++ b/src/backend/utils/adt/agtype_util.c @@ -87,6 +87,8 @@ static agtype_value *push_agtype_value_scalar(agtype_parse_state **pstate, agtype_value *scalar_val); static int compare_two_floats_orderability(float8 lhs, float8 rhs); static int get_type_sort_priority(enum agtype_value_type type); +static void pfree_iterator_agtype_value_token(agtype_iterator_token token, + agtype_value *agtv); /* * Turn an in-memory agtype_value into an agtype for on-disk storage. @@ -234,6 +236,17 @@ static int get_type_sort_priority(enum agtype_value_type type) return -1; } +static void pfree_iterator_agtype_value_token(agtype_iterator_token token, + agtype_value *agtv) +{ + if (token == WAGT_KEY || + token == WAGT_VALUE || + token == WAGT_ELEM) + { + pfree_agtype_value_content(agtv); + } +} + /* * BT comparator worker function. Returns an integer less than, equal to, or * greater than zero, indicating whether a is less than, equal to, or greater @@ -269,6 +282,10 @@ int compare_agtype_containers_orderability(agtype_container *a, if (ra == WAGT_DONE) { /* Decisively equal */ + + /* free the agtype_values associated with the tokens */ + pfree_iterator_agtype_value_token(ra, &va); + pfree_iterator_agtype_value_token(rb, &vb); break; } @@ -280,6 +297,10 @@ int compare_agtype_containers_orderability(agtype_container *a, * initially, at the WAGT_BEGIN_ARRAY and WAGT_BEGIN_OBJECT * tokens. */ + + /* free the agtype_values associated with the tokens */ + pfree_iterator_agtype_value_token(ra, &va); + pfree_iterator_agtype_value_token(rb, &vb); continue; } @@ -367,12 +388,18 @@ int compare_agtype_containers_orderability(agtype_container *a, if (ra == WAGT_END_ARRAY || ra == WAGT_END_OBJECT) { res = -1; + /* free the agtype_values associated with the tokens */ + pfree_iterator_agtype_value_token(ra, &va); + pfree_iterator_agtype_value_token(rb, &vb); break; } /* If right side is shorter, greater than */ if (rb == WAGT_END_ARRAY || rb == WAGT_END_OBJECT) { res = 1; + /* free the agtype_values associated with the tokens */ + pfree_iterator_agtype_value_token(ra, &va); + pfree_iterator_agtype_value_token(rb, &vb); break; } @@ -387,7 +414,7 @@ int compare_agtype_containers_orderability(agtype_container *a, { rb = agtype_iterator_next(&itb, &vb, false); } - + Assert(va.type != vb.type); Assert(va.type != AGTV_BINARY); Assert(vb.type != AGTV_BINARY); @@ -397,20 +424,23 @@ int compare_agtype_containers_orderability(agtype_container *a, -1 : 1; } + /* free the agtype_values associated with the tokens */ + pfree_iterator_agtype_value_token(ra, &va); + pfree_iterator_agtype_value_token(rb, &vb); } while (res == 0); while (ita != NULL) { agtype_iterator *i = ita->parent; - pfree(ita); + pfree_if_not_null(ita); ita = i; } while (itb != NULL) { agtype_iterator *i = itb->parent; - pfree(itb); + pfree_if_not_null(itb); itb = i; } @@ -527,7 +557,7 @@ agtype_value *find_agtype_value_from_container(agtype_container *container, } /* Not found */ - pfree(result); + pfree_if_not_null(result); return NULL; } @@ -560,6 +590,88 @@ agtype_value *get_ith_agtype_value_from_container(agtype_container *container, return result; } +/* + * Get type of i-th value of an agtype array. + */ +enum agtype_value_type get_ith_agtype_value_type(agtype_container *container, + uint32 i) +{ + enum agtype_value_type type; + uint32 nelements; + agtentry entry; + + if (!AGTYPE_CONTAINER_IS_ARRAY(container)) + { + ereport(ERROR, (errmsg("container is not an agtype array"))); + } + + nelements = AGTYPE_CONTAINER_SIZE(container); + if (i >= nelements) + { + ereport(ERROR, (errmsg("index out of bounds"))); + } + + entry = container->children[i]; + switch ((entry)&AGTENTRY_TYPEMASK) + { + case AGTENTRY_IS_STRING: + type = AGTV_STRING; + break; + case AGTENTRY_IS_NUMERIC: + type = AGTV_NUMERIC; + break; + case AGTENTRY_IS_AGTYPE: + { + char *base_addr; + uint32 agt_header; + char *base; + + base_addr = (char *)&container->children[nelements]; + base = base_addr + INTALIGN(get_agtype_offset(container, i)); + agt_header = *((uint32 *)base); + + switch (agt_header) + { + case AGT_HEADER_INTEGER: + type = AGTV_INTEGER; + break; + case AGT_HEADER_FLOAT: + type = AGTV_FLOAT; + break; + case AGT_HEADER_VERTEX: + type = AGTV_VERTEX; + break; + case AGT_HEADER_EDGE: + type = AGTV_EDGE; + break; + case AGT_HEADER_PATH: + type = AGTV_PATH; + break; + default: + ereport(ERROR, (errmsg("unexpected agt_header type"))); + break; + } + break; + } + case AGTENTRY_IS_BOOL_TRUE: + type = AGTV_BOOL; + break; + case AGTENTRY_IS_BOOL_FALSE: + type = AGTV_BOOL; + break; + case AGTENTRY_IS_NULL: + type = AGTV_NULL; + break; + case AGTENTRY_IS_CONTAINER: + type = AGTV_BINARY; + break; + default: + ereport(ERROR, (errmsg("unexpected agtentry type"))); + break; + } + return type; +} + /* * A helper function to fill in an agtype_value to represent an element of an * array, or a key or value of an object. @@ -1105,7 +1217,7 @@ static agtype_iterator *free_and_get_parent(agtype_iterator *it) { agtype_iterator *v = it->parent; - pfree(it); + pfree_if_not_null(it); return v; } @@ -1357,9 +1469,9 @@ bool agtype_deep_contains(agtype_iterator **val, contains = agtype_deep_contains(&nestval, &nest_contained, false); if (nestval) - pfree(nestval); + pfree_if_not_null(nestval); if (nest_contained) - pfree(nest_contained); + pfree_if_not_null(nest_contained); if (contains) break; } @@ -2326,11 +2438,11 @@ char *agtype_value_type_to_string(enum agtype_value_type type) void pfree_agtype_value(agtype_value* value) { pfree_agtype_value_content(value); - pfree(value); + pfree_if_not_null(value); } /* - * Helper function that recursively deallocates the contents + * Helper function that recursively deallocates the contents * of the passed agtype_value only. It does not deallocate * `value` itself. */ @@ -2344,7 +2456,7 @@ void pfree_agtype_value_content(agtype_value* value) switch (value->type) { case AGTV_NUMERIC: - pfree(value->val.numeric); + pfree_if_not_null(value->val.numeric); break; case AGTV_STRING: @@ -2352,6 +2464,7 @@ void pfree_agtype_value_content(agtype_value* value) * The char pointer (val.string.val) is not free'd because * it is not allocated by an agtype helper function. */ + pfree_if_not_null(value->val.string.val); break; case AGTV_ARRAY: @@ -2360,7 +2473,7 @@ void pfree_agtype_value_content(agtype_value* value) { pfree_agtype_value_content(&value->val.array.elems[i]); } - pfree(value->val.array.elems); + pfree_if_not_null(value->val.array.elems); break; case AGTV_OBJECT: @@ -2371,11 +2484,11 @@ void pfree_agtype_value_content(agtype_value* value) pfree_agtype_value_content(&value->val.object.pairs[i].key); pfree_agtype_value_content(&value->val.object.pairs[i].value); } - pfree(value->val.object.pairs); + pfree_if_not_null(value->val.object.pairs); break; case AGTV_BINARY: - pfree(value->val.binary.data); + pfree_if_not_null(value->val.binary.data); break; case AGTV_NULL: diff --git a/src/backend/utils/ag_guc.c b/src/backend/utils/ag_guc.c index d9a99f66e..86b4e00bc 100644 --- a/src/backend/utils/ag_guc.c +++ b/src/backend/utils/ag_guc.c @@ -42,4 +42,5 @@ void define_config_params(void) NULL, NULL, NULL); + EmitWarningsOnPlaceholders("age"); } diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index 4f2f66a35..30dc4761d 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -20,9 +20,13 @@ #include "postgres.h" #include "utils/load/ag_load_edges.h" -#include "utils/load/age_load.h" #include "utils/load/csv.h" +void init_edge_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid); +void finish_edge_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid); + void edge_field_cb(void *field, size_t field_len, void *data) { @@ -58,6 +62,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) { csv_edge_reader *cr = (csv_edge_reader*)data; + batch_insert_state *batch_state = cr->batch_state; size_t i, n_fields; int64 start_id_int; @@ -68,9 +73,9 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) graphid end_vertex_graph_id; int end_vertex_type_id; - graphid object_graph_id; - - agtype* props = NULL; + graphid edge_id; + int64 entry_id; + TupleTableSlot *slot; n_fields = cr->cur_field; @@ -89,7 +94,8 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) } else { - object_graph_id = make_graphid(cr->object_id, (int64)cr->row); + entry_id = nextval_internal(cr->label_seq_relid, true); + edge_id = make_graphid(cr->label_id, entry_id); start_id_int = strtol(cr->fields[0], NULL, 10); start_vertex_type_id = get_label_id(cr->fields[1], cr->graph_oid); @@ -99,14 +105,35 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) start_vertex_graph_id = make_graphid(start_vertex_type_id, start_id_int); end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); - props = create_agtype_from_list_i(cr->header, cr->fields, - n_fields, 4, cr->load_as_agtype); - - insert_edge_simple(cr->graph_oid, cr->object_name, - object_graph_id, start_vertex_graph_id, - end_vertex_graph_id, props); - - pfree(props); + /* Get the appropriate slot from the batch state */ + slot = batch_state->slots[batch_state->num_tuples]; + + /* Clear the slots contents */ + ExecClearTuple(slot); + + /* Fill the values in the slot */ + slot->tts_values[0] = GRAPHID_GET_DATUM(edge_id); + slot->tts_values[1] = GRAPHID_GET_DATUM(start_vertex_graph_id); + slot->tts_values[2] = GRAPHID_GET_DATUM(end_vertex_graph_id); + slot->tts_values[3] = AGTYPE_P_GET_DATUM( + create_agtype_from_list_i( + cr->header, cr->fields, + n_fields, 4, cr->load_as_agtype)); + slot->tts_isnull[0] = false; + slot->tts_isnull[1] = false; + slot->tts_isnull[2] = false; + slot->tts_isnull[3] = false; + + /* Make the slot as containing virtual tuple */ + ExecStoreVirtualTuple(slot); + batch_state->num_tuples++; + + if (batch_state->num_tuples >= batch_state->max_tuples) + { + /* Insert the batch when it is full (i.e. BATCH_SIZE) */ + insert_batch(batch_state, cr->label_name, cr->graph_oid); + batch_state->num_tuples = 0; + } } for (i = 0; i < n_fields; ++i) @@ -119,7 +146,6 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) ereport(NOTICE,(errmsg("THere is some error"))); } - cr->cur_field = 0; cr->curr_row_length = 0; cr->row += 1; @@ -152,8 +178,8 @@ static int is_term(unsigned char c) int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, - char *object_name, - int object_id, + char *label_name, + int label_id, bool load_as_agtype) { @@ -163,6 +189,7 @@ int create_edges_from_csv_file(char *file_path, size_t bytes_read; unsigned char options = 0; csv_edge_reader cr; + char *label_seq_name; if (csv_init(&p, options) != 0) { @@ -180,6 +207,7 @@ int create_edges_from_csv_file(char *file_path, (errmsg("Failed to open %s\n", file_path))); } + label_seq_name = get_label_seq_relation_name(label_name); memset((void*)&cr, 0, sizeof(csv_edge_reader)); cr.alloc = 128; @@ -189,10 +217,14 @@ int create_edges_from_csv_file(char *file_path, cr.curr_row_length = 0; cr.graph_name = graph_name; cr.graph_oid = graph_oid; - cr.object_name = object_name; - cr.object_id = object_id; + cr.label_name = label_name; + cr.label_id = label_id; + cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); cr.load_as_agtype = load_as_agtype; + /* Initialize the batch insert state */ + init_edge_batch_insert(&cr.batch_state, label_name, graph_oid); + while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { if (csv_parse(&p, buf, bytes_read, edge_field_cb, @@ -205,6 +237,9 @@ int create_edges_from_csv_file(char *file_path, csv_fini(&p, edge_field_cb, edge_row_cb, &cr); + /* Finish any remaining batch inserts */ + finish_edge_batch_insert(&cr.batch_state, label_name, graph_oid); + if (ferror(fp)) { ereport(ERROR, (errmsg("Error while reading file %s\n", file_path))); @@ -216,3 +251,65 @@ int create_edges_from_csv_file(char *file_path, csv_free(&p); return EXIT_SUCCESS; } + +/* + * Initialize the batch insert state for edges. + */ +void init_edge_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid) +{ + Relation relation; + int i; + + // Open a temporary relation to get the tuple descriptor + relation = table_open(get_label_relation(label_name, graph_oid), AccessShareLock); + + // Initialize the batch insert state + *batch_state = (batch_insert_state *) palloc0(sizeof(batch_insert_state)); + (*batch_state)->max_tuples = BATCH_SIZE; + (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); + (*batch_state)->num_tuples = 0; + + // Create slots + for (i = 0; i < BATCH_SIZE; i++) + { + (*batch_state)->slots[i] = MakeSingleTupleTableSlot( + RelationGetDescr(relation), + &TTSOpsHeapTuple); + } + + table_close(relation, AccessShareLock); +} + +/* + * Finish the batch insert for edges. Insert the + * remaining tuples in the batch state and clean up. + */ +void finish_edge_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid) +{ + int i; + Relation relation; + + if ((*batch_state)->num_tuples > 0) + { + insert_batch(*batch_state, label_name, graph_oid); + (*batch_state)->num_tuples = 0; + } + + // Open a temporary relation to ensure resources are properly cleaned up + relation = table_open(get_label_relation(label_name, graph_oid), AccessShareLock); + + // Free slots + for (i = 0; i < BATCH_SIZE; i++) + { + ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); + } + + // Clean up batch state + pfree_if_not_null((*batch_state)->slots); + pfree_if_not_null(*batch_state); + *batch_state = NULL; + + table_close(relation, AccessShareLock); +} diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index f377f1cb3..2ab223346 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -17,11 +17,25 @@ * under the License. */ #include "postgres.h" +#include "executor/spi.h" +#include "catalog/namespace.h" +#include "executor/executor.h" #include "utils/load/ag_load_labels.h" -#include "utils/load/age_load.h" #include "utils/load/csv.h" +static void setup_temp_table_for_vertex_ids(char *graph_name); +static void insert_batch_in_temp_table(batch_insert_state *batch_state, + Oid graph_oid, Oid relid); +static void init_vertex_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid, + Oid temp_table_relid); +static void finish_vertex_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid, + Oid temp_table_relid); +static void insert_vertex_batch(batch_insert_state *batch_state, char *label_name, + Oid graph_oid, Oid temp_table_relid); + void vertex_field_cb(void *field, size_t field_len, void *data) { @@ -55,16 +69,16 @@ void vertex_field_cb(void *field, size_t field_len, void *data) void vertex_row_cb(int delim __attribute__((unused)), void *data) { - csv_vertex_reader *cr = (csv_vertex_reader*)data; - agtype *props = NULL; + batch_insert_state *batch_state = cr->batch_state; size_t i, n_fields; - graphid object_graph_id; - int64 label_id_int; + graphid vertex_id; + int64 entry_id; + TupleTableSlot *slot; + TupleTableSlot *temp_id_slot; n_fields = cr->cur_field; - if (cr->row == 0) { cr->header_num = cr->cur_field; @@ -82,36 +96,67 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) { if (cr->id_field_exists) { - label_id_int = strtol(cr->fields[0], NULL, 10); + entry_id = strtol(cr->fields[0], NULL, 10); + if (entry_id > cr->curr_seq_num) + { + DirectFunctionCall2(setval_oid, + ObjectIdGetDatum(cr->label_seq_relid), + Int64GetDatum(entry_id)); + cr->curr_seq_num = entry_id; + } } else { - label_id_int = (int64)cr->row; + entry_id = nextval_internal(cr->label_seq_relid, true); } - object_graph_id = make_graphid(cr->object_id, label_id_int); + vertex_id = make_graphid(cr->label_id, entry_id); - props = create_agtype_from_list(cr->header, cr->fields, - n_fields, label_id_int, - cr->load_as_agtype); - insert_vertex_simple(cr->graph_oid, cr->object_name, - object_graph_id, props); - pfree(props); - } + /* Get the appropriate slot from the batch state */ + slot = batch_state->slots[batch_state->num_tuples]; + temp_id_slot = batch_state->temp_id_slots[batch_state->num_tuples]; + + /* Clear the slots contents */ + ExecClearTuple(slot); + ExecClearTuple(temp_id_slot); + /* Fill the values in the slot */ + slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); + slot->tts_values[1] = AGTYPE_P_GET_DATUM( + create_agtype_from_list(cr->header, cr->fields, + n_fields, entry_id, + cr->load_as_agtype)); + slot->tts_isnull[0] = false; + slot->tts_isnull[1] = false; + + temp_id_slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); + temp_id_slot->tts_isnull[0] = false; + + /* Make the slot as containing virtual tuple */ + ExecStoreVirtualTuple(slot); + ExecStoreVirtualTuple(temp_id_slot); + + batch_state->num_tuples++; + + if (batch_state->num_tuples >= batch_state->max_tuples) + { + /* Insert the batch when it is full (i.e. BATCH_SIZE) */ + insert_vertex_batch(batch_state, cr->label_name, cr->graph_oid, + cr->temp_table_relid); + batch_state->num_tuples = 0; + } + } for (i = 0; i < n_fields; ++i) { free(cr->fields[i]); } - if (cr->error) { ereport(NOTICE,(errmsg("THere is some error"))); } - cr->cur_field = 0; cr->curr_row_length = 0; cr->row += 1; @@ -144,8 +189,8 @@ static int is_term(unsigned char c) int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, - char *object_name, - int object_id, + char *label_name, + int label_id, bool id_field_exists, bool load_as_agtype) { @@ -156,6 +201,8 @@ int create_labels_from_csv_file(char *file_path, size_t bytes_read; unsigned char options = 0; csv_vertex_reader cr; + char *label_seq_name; + Oid temp_table_relid; if (csv_init(&p, options) != 0) { @@ -163,6 +210,13 @@ int create_labels_from_csv_file(char *file_path, (errmsg("Failed to initialize csv parser\n"))); } + temp_table_relid = RelnameGetRelid(GET_TEMP_VERTEX_ID_TABLE(graph_name)); + if (!OidIsValid(temp_table_relid)) + { + setup_temp_table_for_vertex_ids(graph_name); + temp_table_relid = RelnameGetRelid(GET_TEMP_VERTEX_ID_TABLE(graph_name)); + } + csv_set_space_func(&p, is_space); csv_set_term_func(&p, is_term); @@ -173,6 +227,7 @@ int create_labels_from_csv_file(char *file_path, (errmsg("Failed to open %s\n", file_path))); } + label_seq_name = get_label_seq_relation_name(label_name); memset((void*)&cr, 0, sizeof(csv_vertex_reader)); @@ -183,12 +238,28 @@ int create_labels_from_csv_file(char *file_path, cr.curr_row_length = 0; cr.graph_name = graph_name; cr.graph_oid = graph_oid; - cr.object_name = object_name; - cr.object_id = object_id; + cr.label_name = label_name; + cr.label_id = label_id; cr.id_field_exists = id_field_exists; + cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); cr.load_as_agtype = load_as_agtype; + cr.temp_table_relid = temp_table_relid; + + if (cr.id_field_exists) + { + /* + * Set the curr_seq_num since we will need it to compare with + * incoming entry_id. + * + * We cant use currval because it will error out if nextval was + * not called before in the session. + */ + cr.curr_seq_num = nextval_internal(cr.label_seq_relid, true); + } - + /* Initialize the batch insert state */ + init_vertex_batch_insert(&cr.batch_state, label_name, graph_oid, + cr.temp_table_relid); while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { @@ -202,6 +273,10 @@ int create_labels_from_csv_file(char *file_path, csv_fini(&p, vertex_field_cb, vertex_row_cb, &cr); + /* Finish any remaining batch inserts */ + finish_vertex_batch_insert(&cr.batch_state, label_name, graph_oid, + cr.temp_table_relid); + if (ferror(fp)) { ereport(ERROR, (errmsg("Error while reading file %s\n", @@ -214,3 +289,179 @@ int create_labels_from_csv_file(char *file_path, csv_free(&p); return EXIT_SUCCESS; } + +static void insert_vertex_batch(batch_insert_state *batch_state, char *label_name, + Oid graph_oid, Oid temp_table_relid) +{ + insert_batch_in_temp_table(batch_state, graph_oid, temp_table_relid); + insert_batch(batch_state, label_name, graph_oid); +} + +/* + * Create and populate a temporary table with vertex ids that are already + * present in the graph. This table will be used to check if the new vertex + * id generated by loader is a duplicate. + * Unique index is created to enforce uniqueness of the ids. + * + * We dont need this for loading edges since the ids are generated using + * sequence and are unique. + */ +static void setup_temp_table_for_vertex_ids(char *graph_name) +{ + char *create_as_query; + char *index_query; + + create_as_query = psprintf("CREATE TEMP TABLE IF NOT EXISTS %s AS " + "SELECT DISTINCT id FROM \"%s\".%s", + GET_TEMP_VERTEX_ID_TABLE(graph_name), graph_name, + AG_DEFAULT_LABEL_VERTEX); + + index_query = psprintf("CREATE UNIQUE INDEX ON %s (id)", + GET_TEMP_VERTEX_ID_TABLE(graph_name)); + SPI_connect(); + SPI_execute(create_as_query, false, 0); + SPI_execute(index_query, false, 0); + + SPI_finish(); +} + +/* + * Inserts batch of tuples into the temporary table. + * This function also updates the index to check for + * uniqueness of the ids. + */ +static void insert_batch_in_temp_table(batch_insert_state *batch_state, + Oid graph_oid, Oid relid) +{ + int i; + EState *estate; + ResultRelInfo *resultRelInfo; + Relation rel; + List *result; + + rel = table_open(relid, RowExclusiveLock); + + /* Initialize executor state */ + estate = CreateExecutorState(); + + /* Initialize result relation information */ + resultRelInfo = makeNode(ResultRelInfo); + InitResultRelInfo(resultRelInfo, rel, 1, NULL, estate->es_instrument); + estate->es_result_relations = &resultRelInfo; + + /* Open the indices */ + ExecOpenIndices(resultRelInfo, false); + + /* Insert the batch into the temporary table */ + heap_multi_insert(rel, batch_state->temp_id_slots, batch_state->num_tuples, + GetCurrentCommandId(true), 0, NULL); + + for (i = 0; i < batch_state->num_tuples; i++) + { + result = ExecInsertIndexTuples(resultRelInfo, batch_state->temp_id_slots[i], + estate, false, true, NULL, NIL, false); + /* Check if the unique cnstraint is violated */ + if (list_length(result) != 0) + { + Datum id; + bool isnull; + + id = slot_getattr(batch_state->temp_id_slots[i], 1, &isnull); + ereport(ERROR, (errmsg("Cannot insert duplicate vertex id: %ld", + DATUM_GET_GRAPHID(id)), + errhint("Entry id %ld is already used", + get_graphid_entry_id(id)))); + } + } + /* Clean up and close the indices */ + ExecCloseIndices(resultRelInfo); + + FreeExecutorState(estate); + table_close(rel, RowExclusiveLock); + + CommandCounterIncrement(); +} + +/* + * Initialize the batch insert state for vertices. + */ +static void init_vertex_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid, + Oid temp_table_relid) +{ + Relation relation; + Oid relid; + + Relation temp_table_relation; + int i; + + /* Open a temporary relation to get the tuple descriptor */ + relid = get_label_relation(label_name, graph_oid); + relation = table_open(relid, AccessShareLock); + + temp_table_relation = table_open(temp_table_relid, AccessShareLock); + + /* Initialize the batch insert state */ + *batch_state = (batch_insert_state *) palloc0(sizeof(batch_insert_state)); + (*batch_state)->max_tuples = BATCH_SIZE; + (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); + (*batch_state)->temp_id_slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); + (*batch_state)->num_tuples = 0; + + /* Create slots */ + for (i = 0; i < BATCH_SIZE; i++) + { + (*batch_state)->slots[i] = MakeSingleTupleTableSlot( + RelationGetDescr(relation), + &TTSOpsHeapTuple); + (*batch_state)->temp_id_slots[i] = MakeSingleTupleTableSlot( + RelationGetDescr(temp_table_relation), + &TTSOpsHeapTuple); + } + + table_close(relation, AccessShareLock); + table_close(temp_table_relation, AccessShareLock); +} + +/* + * Finish the batch insert for vertices. Insert the + * remaining tuples in the batch state and clean up. + */ +static void finish_vertex_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid, + Oid temp_table_relid) +{ + Relation relation; + Oid relid; + + Relation temp_table_relation; + int i; + + if ((*batch_state)->num_tuples > 0) + { + insert_vertex_batch(*batch_state, label_name, graph_oid, temp_table_relid); + (*batch_state)->num_tuples = 0; + } + + /* Open a temporary relation to ensure resources are properly cleaned up */ + relid = get_label_relation(label_name, graph_oid); + relation = table_open(relid, AccessShareLock); + + temp_table_relation = table_open(temp_table_relid, AccessShareLock); + + /* Free slots */ + for (i = 0; i < BATCH_SIZE; i++) + { + ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); + ExecDropSingleTupleTableSlot((*batch_state)->temp_id_slots[i]); + } + + /* Clean up batch state */ + pfree_if_not_null((*batch_state)->slots); + pfree_if_not_null((*batch_state)->temp_id_slots); + pfree_if_not_null(*batch_state); + *batch_state = NULL; + + table_close(relation, AccessShareLock); + table_close(temp_table_relation, AccessShareLock); +} diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index 0eb6e7946..815a53bac 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -25,6 +25,9 @@ #include "utils/load/age_load.h" static agtype_value *csv_value_to_agtype_value(char *csv_val); +static Oid get_or_create_graph(const Name graph_name); +static int32 get_or_create_label(Oid graph_oid, char *graph_name, + char *label_name, char label_kind); agtype *create_empty_agtype(void) { @@ -111,9 +114,6 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, WAGT_VALUE, value_agtype); - pfree_agtype_value(key_agtype); - pfree_agtype_value(value_agtype); - for (i = 0; islots, + batch_state->num_tuples, GetCurrentCommandId(true), + 0, bistate); + + // Clean up + FreeBulkInsertState(bistate); + table_close(label_relation, RowExclusiveLock); + + CommandCounterIncrement(); +} + PG_FUNCTION_INFO_V1(load_labels_from_file); Datum load_labels_from_file(PG_FUNCTION_ARGS) { @@ -300,19 +328,24 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) id_field_exists = PG_GETARG_BOOL(3); load_as_agtype = PG_GETARG_BOOL(4); - graph_name_str = NameStr(*graph_name); label_name_str = NameStr(*label_name); + + if (strcmp(label_name_str, "") == 0) + { + label_name_str = AG_DEFAULT_LABEL_VERTEX; + } + file_path_str = text_to_cstring(file_path); - graph_oid = get_graph_oid(graph_name_str); - label_id = get_label_id(label_name_str, graph_oid); + graph_oid = get_or_create_graph(graph_name); + label_id = get_or_create_label(graph_oid, graph_name_str, + label_name_str, LABEL_KIND_VERTEX); create_labels_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, id_field_exists, load_as_agtype); PG_RETURN_VOID(); - } PG_FUNCTION_INFO_V1(load_edges_from_file); @@ -354,13 +387,91 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) graph_name_str = NameStr(*graph_name); label_name_str = NameStr(*label_name); + + if (strcmp(label_name_str, "") == 0) + { + label_name_str = AG_DEFAULT_LABEL_EDGE; + } + file_path_str = text_to_cstring(file_path); - graph_oid = get_graph_oid(graph_name_str); - label_id = get_label_id(label_name_str, graph_oid); + graph_oid = get_or_create_graph(graph_name); + label_id = get_or_create_label(graph_oid, graph_name_str, + label_name_str, LABEL_KIND_EDGE); create_edges_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, load_as_agtype); PG_RETURN_VOID(); +} + +/* + * Helper function to create a graph if it does not exist. + * Just returns Oid of the graph if it already exists. + */ +static Oid get_or_create_graph(const Name graph_name) +{ + Oid graph_oid; + char *graph_name_str; + + graph_name_str = NameStr(*graph_name); + graph_oid = get_graph_oid(graph_name_str); + + if (OidIsValid(graph_oid)) + { + return graph_oid; + } + + graph_oid = create_graph_internal(graph_name); + ereport(NOTICE, + (errmsg("graph \"%s\" has been created", NameStr(*graph_name)))); + + return graph_oid; +} + +/* + * Helper function to create a label if it does not exist. + * Just returns label_id of the label if it already exists. + */ +static int32 get_or_create_label(Oid graph_oid, char *graph_name, + char *label_name, char label_kind) +{ + int32 label_id; + + label_id = get_label_id(label_name, graph_oid); + + /* Check if label exists */ + if (label_id_is_valid(label_id)) + { + char *label_kind_full = (label_kind == LABEL_KIND_VERTEX) + ? "vertex" : "edge"; + char opposite_label_kind = (label_kind == LABEL_KIND_VERTEX) + ? LABEL_KIND_EDGE : LABEL_KIND_VERTEX; + + /* If it exists, but as another label_kind, throw an error */ + if (get_label_kind(label_name, graph_oid) == opposite_label_kind) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("label \"%s\" already exists as %s label", + label_name, label_kind_full))); + } + } + else + { + /* Create a label */ + RangeVar *rv; + List *parent; + char *default_label = (label_kind == LABEL_KIND_VERTEX) + ? AG_DEFAULT_LABEL_VERTEX : AG_DEFAULT_LABEL_EDGE; + rv = get_label_range_var(graph_name, graph_oid, default_label); + parent = list_make1(rv); + + create_label(graph_name, label_name, label_kind, parent); + label_id = get_label_id(label_name, graph_oid); + + ereport(NOTICE, + (errmsg("VLabel \"%s\" has been created", label_name))); + } + return label_id; } diff --git a/src/backend/utils/name_validation.c b/src/backend/utils/name_validation.c index 2ee998dea..6bfd62f28 100644 --- a/src/backend/utils/name_validation.c +++ b/src/backend/utils/name_validation.c @@ -55,12 +55,17 @@ int is_valid_graph_name(const char *graph_name) * @param label_type label type defined in label_commands.h * @return int */ -int is_valid_label(char *label_name, char label_type) +int is_valid_label_name(char *label_name, char label_type) { int len = strlen(label_name); if (len < MIN_LABEL_NAME_LEN || len > MAX_LABEL_NAME_LEN) { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("label name length not in range (%d <= length <= %d) length = %d", + MIN_LABEL_NAME_LEN, MAX_LABEL_NAME_LEN, len))); + return 0; } diff --git a/src/include/catalog/ag_label.h b/src/include/catalog/ag_label.h index 6c5e03334..0a8480b1a 100644 --- a/src/include/catalog/ag_label.h +++ b/src/include/catalog/ag_label.h @@ -73,6 +73,8 @@ int32 get_label_id(const char *label_name, Oid graph_oid); Oid get_label_relation(const char *label_name, Oid graph_oid); char *get_label_relation_name(const char *label_name, Oid graph_oid); char get_label_kind(const char *label_name, Oid label_graph); +char *get_label_seq_relation_name(const char *label_name); + bool label_id_exists(Oid graph_oid, int32 label_id); RangeVar *get_label_range_var(char *graph_name, Oid graph_oid, diff --git a/src/include/commands/graph_commands.h b/src/include/commands/graph_commands.h index e4d93fc1c..d456ef8c4 100644 --- a/src/include/commands/graph_commands.h +++ b/src/include/commands/graph_commands.h @@ -21,5 +21,6 @@ #define AG_GRAPH_COMMANDS_H Datum create_graph(PG_FUNCTION_ARGS); +Oid create_graph_internal(const Name graph_name); #endif diff --git a/src/include/parser/ag_scanner.h b/src/include/parser/ag_scanner.h index 7351b89b5..3dd89abd3 100644 --- a/src/include/parser/ag_scanner.h +++ b/src/include/parser/ag_scanner.h @@ -53,6 +53,7 @@ typedef enum ag_token_type AG_TOKEN_ALL_EXISTS, AG_TOKEN_CONCAT, AG_TOKEN_CHAR, + AG_TOKEN_BQIDENT } ag_token_type; /* diff --git a/src/include/utils/age_graphid_ds.h b/src/include/utils/age_graphid_ds.h index ea9dabdc3..a5bb5273f 100644 --- a/src/include/utils/age_graphid_ds.h +++ b/src/include/utils/age_graphid_ds.h @@ -21,6 +21,7 @@ #define AG_AGE_GRAPHID_DS_H #include "utils/graphid.h" +#include "utils/agtype.h" #define IS_GRAPHID_STACK_EMPTY(stack) \ get_stack_size(stack) == 0 diff --git a/src/include/utils/age_session_info.h b/src/include/utils/age_session_info.h index ebf0035ab..5bd072fb6 100644 --- a/src/include/utils/age_session_info.h +++ b/src/include/utils/age_session_info.h @@ -20,6 +20,8 @@ #ifndef AGE_SESSION_INFO_H #define AGE_SESSION_INFO_H +#include "utils/agtype.h" + bool is_session_info_prepared(void); char *get_session_info_graph_name(void); char *get_session_info_cypher_statement(void); diff --git a/src/include/utils/agtype.h b/src/include/utils/agtype.h index c5a2fe95b..486775320 100644 --- a/src/include/utils/agtype.h +++ b/src/include/utils/agtype.h @@ -466,6 +466,8 @@ agtype_value *find_agtype_value_from_container(agtype_container *container, agtype_value *key); agtype_value *get_ith_agtype_value_from_container(agtype_container *container, uint32 i); +enum agtype_value_type get_ith_agtype_value_type(agtype_container *container, + uint32 i); agtype_value *push_agtype_value(agtype_parse_state **pstate, agtype_iterator_token seq, agtype_value *agtval); @@ -553,8 +555,8 @@ agtype_iterator *get_next_list_element(agtype_iterator *it, void pfree_agtype_value(agtype_value* value); void pfree_agtype_value_content(agtype_value* value); void pfree_agtype_in_state(agtype_in_state* value); +void pfree_if_not_null(void *ptr); agtype_value *agtype_value_from_cstring(char *str, int len); - /* Oid accessors for AGTYPE */ Oid get_AGTYPEOID(void); Oid get_AGTYPEARRAYOID(void); diff --git a/src/include/utils/graphid.h b/src/include/utils/graphid.h index bfb72ee8c..407e9a585 100644 --- a/src/include/utils/graphid.h +++ b/src/include/utils/graphid.h @@ -36,7 +36,7 @@ typedef int64 graphid; #define label_id_is_valid(id) (id >= LABEL_ID_MIN && id <= LABEL_ID_MAX) -#define ENTRY_ID_MIN INT64CONST(1) +#define ENTRY_ID_MIN INT64CONST(0) /* 0x0000ffffffffffff */ #define ENTRY_ID_MAX INT64CONST(281474976710655) #define INVALID_ENTRY_ID INT64CONST(0) diff --git a/src/include/utils/load/ag_load_edges.h b/src/include/utils/load/ag_load_edges.h index 6bb8ac279..df663b1dd 100644 --- a/src/include/utils/load/ag_load_edges.h +++ b/src/include/utils/load/ag_load_edges.h @@ -17,6 +17,9 @@ * under the License. */ +#include "access/heapam.h" +#include "utils/load/age_load.h" + #ifndef AG_LOAD_EDGES_H #define AG_LOAD_EDGES_H @@ -34,12 +37,13 @@ typedef struct { size_t curr_row_length; char *graph_name; Oid graph_oid; - char *object_name; - int object_id; + char *label_name; + int label_id; + Oid label_seq_relid; char *start_vertex; char *end_vertex; bool load_as_agtype; - + batch_insert_state *batch_state; } csv_edge_reader; @@ -47,7 +51,7 @@ void edge_field_cb(void *field, size_t field_len, void *data); void edge_row_cb(int delim __attribute__((unused)), void *data); int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, - char *object_name, int object_id, + char *label_name, int label_id, bool load_as_agtype); #endif /*AG_LOAD_EDGES_H */ diff --git a/src/include/utils/load/ag_load_labels.h b/src/include/utils/load/ag_load_labels.h index 7d272efbc..3a70a5c05 100644 --- a/src/include/utils/load/ag_load_labels.h +++ b/src/include/utils/load/ag_load_labels.h @@ -22,6 +22,7 @@ #define AG_LOAD_LABELS_H #include "access/heapam.h" +#include "utils/load/age_load.h" #define AGE_VERTIX 1 #define AGE_EDGE 2 @@ -47,10 +48,14 @@ typedef struct { size_t curr_row_length; char *graph_name; Oid graph_oid; - char *object_name; - int object_id; + char *label_name; + int label_id; + Oid label_seq_relid; + Oid temp_table_relid; bool id_field_exists; bool load_as_agtype; + int curr_seq_num; + batch_insert_state *batch_state; } csv_vertex_reader; @@ -58,7 +63,7 @@ void vertex_field_cb(void *field, size_t field_len, void *data); void vertex_row_cb(int delim __attribute__((unused)), void *data); int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, - char *object_name, int object_id, + char *label_name, int label_id, bool id_field_exists, bool load_as_agtype); #endif /* AG_LOAD_LABELS_H */ diff --git a/src/include/utils/load/age_load.h b/src/include/utils/load/age_load.h index 1c650bb81..b1335581b 100644 --- a/src/include/utils/load/age_load.h +++ b/src/include/utils/load/age_load.h @@ -24,11 +24,26 @@ #include "catalog/ag_graph.h" #include "catalog/ag_label.h" #include "commands/label_commands.h" +#include "commands/graph_commands.h" #include "utils/ag_cache.h" #ifndef AGE_ENTITY_CREATOR_H #define AGE_ENTITY_CREATOR_H +#define TEMP_VERTEX_ID_TABLE_SUFFIX "_ag_vertex_ids" +#define GET_TEMP_VERTEX_ID_TABLE(graph_name) \ + psprintf("_%s%s", graph_name, TEMP_VERTEX_ID_TABLE_SUFFIX) + +#define BATCH_SIZE 1000 + +typedef struct +{ + TupleTableSlot **slots; + TupleTableSlot **temp_id_slots; + int num_tuples; + int max_tuples; +} batch_insert_state; + agtype* create_empty_agtype(void); agtype* create_agtype_from_list(char **header, char **fields, @@ -42,5 +57,7 @@ void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id, void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id, graphid start_id, graphid end_id, agtype* end_properties); +void insert_batch(batch_insert_state *batch_state, char *label_name, + Oid graph_oid); #endif /* AGE_ENTITY_CREATOR_H */ diff --git a/src/include/utils/name_validation.h b/src/include/utils/name_validation.h index 7cfe9bf7b..430da672e 100644 --- a/src/include/utils/name_validation.h +++ b/src/include/utils/name_validation.h @@ -35,10 +35,10 @@ #define MAX_GRAPH_NAME_LEN 63 #define MIN_GRAPH_NAME_LEN 3 -#define MAX_LABEL_NAME_LEN 65535 +#define MAX_LABEL_NAME_LEN NAMEDATALEN -1 #define MIN_LABEL_NAME_LEN 1 int is_valid_graph_name(const char *graph_name); -int is_valid_label(char *label_name, char label_type); +int is_valid_label_name(char *label_name, char label_type); #endif