From 136471d097c3862c897cf7256bff79897b549c52 Mon Sep 17 00:00:00 2001 From: Rafsun Masud Date: Tue, 20 Aug 2024 00:52:32 -0700 Subject: [PATCH] Update age_load to make property value conversion optional (#1525) (#2061) This is an addition to a previous patch that introduced loading property values from CSV files as agtype. This behavior is made optional in this patch with a boolean function parameter `load_as_agtype`. When this parameter is false, values are loaded as string. --- regress/expected/age_load.out | 58 +++++++++++++++++++++---- regress/sql/age_load.sql | 23 +++++++--- sql/age_main.sql | 10 ++++- src/backend/utils/load/ag_load_edges.c | 6 ++- src/backend/utils/load/ag_load_labels.c | 7 ++- src/backend/utils/load/age_load.c | 35 ++++++++++++--- src/include/utils/load/ag_load_edges.h | 4 +- src/include/utils/load/ag_load_labels.h | 3 +- src/include/utils/load/age_load.h | 6 ++- 9 files changed, 120 insertions(+), 32 deletions(-) diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index b568980c0..8635a499b 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -243,6 +243,7 @@ NOTICE: graph "agload_conversion" has been created (1 row) +-- vertex: load as agtype SELECT create_vlabel('agload_conversion','Person1'); NOTICE: VLabel "Person1" has been created create_vlabel @@ -250,13 +251,13 @@ NOTICE: VLabel "Person1" has been created (1 row) -SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv'); +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv', true, true); load_labels_from_file ----------------------- (1 row) -SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) $$) as (a agtype); +SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person1) RETURN properties(n) $$) as (a agtype); a ------------------------------------------------------------------------------------ {"id": 1, "bool": true, "__id__": 1, "string": "John Smith", "numeric": 1} @@ -267,6 +268,7 @@ SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) $$) {"id": 6, "bool": false, "__id__": 6, "string": "nUll", "numeric": 3.14} (6 rows) +-- vertex: load as string SELECT create_vlabel('agload_conversion','Person2'); NOTICE: VLabel "Person2" has been created create_vlabel @@ -274,26 +276,38 @@ NOTICE: VLabel "Person2" has been created (1 row) -SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv'); +SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv', true, false); load_labels_from_file ----------------------- (1 row) -SELECT create_elabel('agload_conversion','Edges'); -NOTICE: ELabel "Edges" has been created +SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person2) RETURN properties(n) $$) as (a agtype); + a +------------------------------------------------------------------------------------- + {"id": "1", "bool": "true", "__id__": 1, "string": "John Smith", "numeric": "1"} + {"id": "2", "bool": "false", "__id__": 2, "string": "John", "numeric": "-2"} + {"id": "3", "bool": "true", "__id__": 3, "string": "John Smith", "numeric": "1.4"} + {"id": "4", "bool": "false", "__id__": 4, "string": "\"John\"", "numeric": "-1e10"} + {"id": "5", "bool": "false", "__id__": 5, "string": "null", "numeric": "0"} + {"id": "6", "bool": "false", "__id__": 6, "string": "nUll", "numeric": "3.14"} +(6 rows) + +-- edge: load as agtype +SELECT create_elabel('agload_conversion','Edges1'); +NOTICE: ELabel "Edges1" has been created create_elabel --------------- (1 row) -SELECT load_edges_from_file('agload_conversion', 'Edges', 'age_load/conversion_edges.csv'); +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.csv', true); load_edges_from_file ---------------------- (1 row) -SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN properties(e) $$) as (a agtype); +SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges1]->() RETURN properties(e) $$) as (a agtype); a -------------------------------------------------------------- {"bool": true, "string": "John Smith", "numeric": 1} @@ -304,13 +318,39 @@ SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN properties( {"bool": false, "string": "nUll", "numeric": 3.14} (6 rows) +-- edge: load as string +SELECT create_elabel('agload_conversion','Edges2'); +NOTICE: ELabel "Edges2" has been created + create_elabel +--------------- + +(1 row) + +SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false); + load_edges_from_file +---------------------- + +(1 row) + +SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype); + a +------------------------------------------------------------- + {"bool": "true", "string": "John Smith", "numeric": "1"} + {"bool": "false", "string": "John", "numeric": "-2"} + {"bool": "true", "string": "John Smith", "numeric": "1.4"} + {"bool": "false", "string": "\"John\"", "numeric": "-1e10"} + {"bool": "false", "string": "null", "numeric": "0"} + {"bool": "false", "string": "nUll", "numeric": "3.14"} +(6 rows) + SELECT drop_graph('agload_conversion', true); -NOTICE: drop cascades to 5 other objects +NOTICE: drop cascades to 6 other objects DETAIL: drop cascades to table agload_conversion._ag_label_vertex drop cascades to table agload_conversion._ag_label_edge drop cascades to table agload_conversion."Person1" drop cascades to table agload_conversion."Person2" -drop cascades to table agload_conversion."Edges" +drop cascades to table agload_conversion."Edges1" +drop cascades to table agload_conversion."Edges2" NOTICE: graph "agload_conversion" has been dropped drop_graph ------------ diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index 7805d181e..cee34f59c 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -85,15 +85,24 @@ SELECT drop_graph('agload_test_graph', true); -- SELECT create_graph('agload_conversion'); +-- vertex: load as agtype SELECT create_vlabel('agload_conversion','Person1'); -SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv'); -SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) $$) as (a agtype); +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv', true, true); +SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person1) RETURN properties(n) $$) as (a agtype); +-- vertex: load as string SELECT create_vlabel('agload_conversion','Person2'); -SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv'); - -SELECT create_elabel('agload_conversion','Edges'); -SELECT load_edges_from_file('agload_conversion', 'Edges', 'age_load/conversion_edges.csv'); -SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN properties(e) $$) as (a agtype); +SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv', true, false); +SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person2) RETURN properties(n) $$) as (a agtype); + +-- edge: load as agtype +SELECT create_elabel('agload_conversion','Edges1'); +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.csv', true); +SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges1]->() RETURN properties(e) $$) as (a agtype); + +-- edge: load as string +SELECT create_elabel('agload_conversion','Edges2'); +SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false); +SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype); SELECT drop_graph('agload_conversion', true); diff --git a/sql/age_main.sql b/sql/age_main.sql index 1ceabbad1..59ada0f9f 100644 --- a/sql/age_main.sql +++ b/sql/age_main.sql @@ -122,17 +122,23 @@ CREATE FUNCTION ag_catalog.drop_label(graph_name name, label_name name, LANGUAGE c AS 'MODULE_PATHNAME'; +-- +-- If `load_as_agtype` is true, property values are loaded as agtype; otherwise +-- loaded as string. +-- CREATE FUNCTION ag_catalog.load_labels_from_file(graph_name name, label_name name, file_path text, - id_field_exists bool default true) + id_field_exists bool default true, + load_as_agtype bool default false) RETURNS void LANGUAGE c AS 'MODULE_PATHNAME'; CREATE FUNCTION ag_catalog.load_edges_from_file(graph_name name, label_name name, - file_path text) + file_path text, + load_as_agtype bool default false) RETURNS void LANGUAGE c AS 'MODULE_PATHNAME'; diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index 4d892f078..4f2f66a35 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -100,7 +100,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); props = create_agtype_from_list_i(cr->header, cr->fields, - n_fields, 4); + n_fields, 4, cr->load_as_agtype); insert_edge_simple(cr->graph_oid, cr->object_name, object_graph_id, start_vertex_graph_id, @@ -153,7 +153,8 @@ int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, char *object_name, - int object_id ) + int object_id, + bool load_as_agtype) { FILE *fp; @@ -190,6 +191,7 @@ int create_edges_from_csv_file(char *file_path, cr.graph_oid = graph_oid; cr.object_name = object_name; cr.object_id = object_id; + cr.load_as_agtype = load_as_agtype; while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index 137734fe5..f377f1cb3 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -92,7 +92,8 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) object_graph_id = make_graphid(cr->object_id, label_id_int); props = create_agtype_from_list(cr->header, cr->fields, - n_fields, label_id_int); + n_fields, label_id_int, + cr->load_as_agtype); insert_vertex_simple(cr->graph_oid, cr->object_name, object_graph_id, props); pfree(props); @@ -145,7 +146,8 @@ int create_labels_from_csv_file(char *file_path, Oid graph_oid, char *object_name, int object_id, - bool id_field_exists) + bool id_field_exists, + bool load_as_agtype) { FILE *fp; @@ -184,6 +186,7 @@ int create_labels_from_csv_file(char *file_path, cr.object_name = object_name; cr.object_id = object_id; cr.id_field_exists = id_field_exists; + cr.load_as_agtype = load_as_agtype; diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index dc36c56a7..1f9a1dbba 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -88,7 +88,7 @@ static agtype_value *csv_value_to_agtype_value(char *csv_val) } agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, - int64 vertex_id) + int64 vertex_id, bool load_as_agtype) { agtype* out; agtype_value* key_agtype; @@ -121,7 +121,15 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, WAGT_KEY, key_agtype); - value_agtype = csv_value_to_agtype_value(fields[i]); + if (load_as_agtype) + { + value_agtype = csv_value_to_agtype_value(fields[i]); + } + else + { + value_agtype = string_to_agtype_value(fields[i]); + } + result.res = push_agtype_value(&result.parse_state, WAGT_VALUE, value_agtype); @@ -140,7 +148,8 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, } agtype* create_agtype_from_list_i(char **header, char **fields, - size_t fields_len, size_t start_index) + size_t fields_len, size_t start_index, + bool load_as_agtype) { agtype* out; agtype_value* key_agtype; @@ -164,7 +173,16 @@ agtype* create_agtype_from_list_i(char **header, char **fields, result.res = push_agtype_value(&result.parse_state, WAGT_KEY, key_agtype); - value_agtype = csv_value_to_agtype_value(fields[i]); + + if (load_as_agtype) + { + value_agtype = csv_value_to_agtype_value(fields[i]); + } + else + { + value_agtype = string_to_agtype_value(fields[i]); + } + result.res = push_agtype_value(&result.parse_state, WAGT_VALUE, value_agtype); @@ -256,6 +274,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) Oid graph_oid; int32 label_id; bool id_field_exists; + bool load_as_agtype; if (PG_ARGISNULL(0)) { @@ -279,6 +298,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) label_name = PG_GETARG_NAME(1); file_path = PG_GETARG_TEXT_P(2); id_field_exists = PG_GETARG_BOOL(3); + load_as_agtype = PG_GETARG_BOOL(4); graph_name_str = NameStr(*graph_name); @@ -289,7 +309,8 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) label_id = get_label_id(label_name_str, graph_oid); create_labels_from_csv_file(file_path_str, graph_name_str, graph_oid, - label_name_str, label_id, id_field_exists); + label_name_str, label_id, id_field_exists, + load_as_agtype); PG_RETURN_VOID(); } @@ -306,6 +327,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) char* file_path_str; Oid graph_oid; int32 label_id; + bool load_as_agtype; if (PG_ARGISNULL(0)) { @@ -328,6 +350,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); file_path = PG_GETARG_TEXT_P(2); + load_as_agtype = PG_GETARG_BOOL(3); graph_name_str = NameStr(*graph_name); label_name_str = NameStr(*label_name); @@ -337,7 +360,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) label_id = get_label_id(label_name_str, graph_oid); create_edges_from_csv_file(file_path_str, graph_name_str, graph_oid, - label_name_str, label_id); + label_name_str, label_id, load_as_agtype); PG_RETURN_VOID(); } diff --git a/src/include/utils/load/ag_load_edges.h b/src/include/utils/load/ag_load_edges.h index 292d828be..eed07573f 100644 --- a/src/include/utils/load/ag_load_edges.h +++ b/src/include/utils/load/ag_load_edges.h @@ -38,6 +38,7 @@ typedef struct { int object_id; char *start_vertex; char *end_vertex; + bool load_as_agtype; } csv_edge_reader; @@ -46,7 +47,8 @@ void edge_field_cb(void *field, size_t field_len, void *data); void edge_row_cb(int delim __attribute__((unused)), void *data); int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, - char *object_name, int object_id ); + char *object_name, int object_id, + bool load_as_agtype); #endif /* AG_LOAD_EDGES_H */ diff --git a/src/include/utils/load/ag_load_labels.h b/src/include/utils/load/ag_load_labels.h index 9fed8b502..7d272efbc 100644 --- a/src/include/utils/load/ag_load_labels.h +++ b/src/include/utils/load/ag_load_labels.h @@ -50,6 +50,7 @@ typedef struct { char *object_name; int object_id; bool id_field_exists; + bool load_as_agtype; } csv_vertex_reader; @@ -58,6 +59,6 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data); int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, char *object_name, int object_id, - bool id_field_exists); + bool id_field_exists, bool load_as_agtype); #endif /* AG_LOAD_LABELS_H */ diff --git a/src/include/utils/load/age_load.h b/src/include/utils/load/age_load.h index 07d0275a7..1c650bb81 100644 --- a/src/include/utils/load/age_load.h +++ b/src/include/utils/load/age_load.h @@ -32,9 +32,11 @@ agtype* create_empty_agtype(void); agtype* create_agtype_from_list(char **header, char **fields, - size_t fields_len, int64 vertex_id); + size_t fields_len, int64 vertex_id, + bool load_as_agtype); agtype* create_agtype_from_list_i(char **header, char **fields, - size_t fields_len, size_t start_index); + size_t fields_len, size_t start_index, + bool load_as_agtype); void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id, agtype *vertex_properties); void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id,