From cf1ce46d05029dc6b8316d9a157c6b1e05ee8dd4 Mon Sep 17 00:00:00 2001 From: Rafsun Masud Date: Fri, 16 Aug 2024 10:34:30 -0700 Subject: [PATCH] Update age_load to load scalar property values with appropriate type (#2048) Previously, property values from csv files were always loaded as strings. This patch converts a value to an appropriate scalar type (i.e. string, bool, numeric, null) while loading. It uses the agtype_value_from_cstring() function for conversion. Additional change(s): ------------------- - Fix: for csv rows in edge files, create_agtype_from_list_i()'s start_index is corrected to 4 --- regress/age_load/data/conversion_edges.csv | 7 ++ regress/age_load/data/conversion_vertices.csv | 7 ++ regress/expected/age_load.out | 84 +++++++++++++++++++ regress/sql/age_load.sql | 18 ++++ src/backend/utils/adt/agtype.c | 3 +- src/backend/utils/load/ag_load_edges.c | 2 +- src/backend/utils/load/age_load.c | 51 ++++++++++- src/include/utils/agtype.h | 1 + 8 files changed, 168 insertions(+), 5 deletions(-) create mode 100644 regress/age_load/data/conversion_edges.csv create mode 100644 regress/age_load/data/conversion_vertices.csv diff --git a/regress/age_load/data/conversion_edges.csv b/regress/age_load/data/conversion_edges.csv new file mode 100644 index 000000000..f207cdc57 --- /dev/null +++ b/regress/age_load/data/conversion_edges.csv @@ -0,0 +1,7 @@ +start_id, start_vertex_type, end_id, end_vertex_type, string, bool, numeric, +1, Person1, 1, Person2, "John Smith", "true", 1 +1, Person1, 1, Person2, "John", "false", "-2" +1, Person1, 1, Person2, John Smith, true, 1.4 +1, Person1, 1, Person2, """John""", false, -1e10 +1, Person1, 1, Person2, null, false, 0 +1, Person1, 1, Person2, nUll, false, "3.14" diff --git a/regress/age_load/data/conversion_vertices.csv b/regress/age_load/data/conversion_vertices.csv new file mode 100644 index 000000000..8e0a9c65c --- /dev/null +++ b/regress/age_load/data/conversion_vertices.csv @@ -0,0 +1,7 @@ +id, string, bool, numeric, +1, "John Smith", "true", 1 +2, "John", "false", "-2" +3, John Smith, true, 1.4 +4, """John""", false, -1e10 +5, null, false, 0 +6, nUll, false, "3.14" diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index 5e74eaae6..b568980c0 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -233,3 +233,87 @@ NOTICE: graph "agload_test_graph" has been dropped (1 row) +-- +-- Test property type conversion +-- +SELECT create_graph('agload_conversion'); +NOTICE: graph "agload_conversion" has been created + create_graph +-------------- + +(1 row) + +SELECT create_vlabel('agload_conversion','Person1'); +NOTICE: VLabel "Person1" has been created + create_vlabel +--------------- + +(1 row) + +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv'); + load_labels_from_file +----------------------- + +(1 row) + +SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) $$) as (a agtype); + a +------------------------------------------------------------------------------------ + {"id": 1, "bool": true, "__id__": 1, "string": "John Smith", "numeric": 1} + {"id": 2, "bool": false, "__id__": 2, "string": "John", "numeric": -2} + {"id": 3, "bool": true, "__id__": 3, "string": "John Smith", "numeric": 1.4} + {"id": 4, "bool": false, "__id__": 4, "string": "John", "numeric": -10000000000.0} + {"id": 5, "bool": false, "__id__": 5, "string": null, "numeric": 0} + {"id": 6, "bool": false, "__id__": 6, "string": "nUll", "numeric": 3.14} +(6 rows) + +SELECT create_vlabel('agload_conversion','Person2'); +NOTICE: VLabel "Person2" has been created + create_vlabel +--------------- + +(1 row) + +SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv'); + load_labels_from_file +----------------------- + +(1 row) + +SELECT create_elabel('agload_conversion','Edges'); +NOTICE: ELabel "Edges" has been created + create_elabel +--------------- + +(1 row) + +SELECT load_edges_from_file('agload_conversion', 'Edges', 'age_load/conversion_edges.csv'); + load_edges_from_file +---------------------- + +(1 row) + +SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN properties(e) $$) as (a agtype); + a +-------------------------------------------------------------- + {"bool": true, "string": "John Smith", "numeric": 1} + {"bool": false, "string": "John", "numeric": -2} + {"bool": true, "string": "John Smith", "numeric": 1.4} + {"bool": false, "string": "John", "numeric": -10000000000.0} + {"bool": false, "string": null, "numeric": 0} + {"bool": false, "string": "nUll", "numeric": 3.14} +(6 rows) + +SELECT drop_graph('agload_conversion', true); +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table agload_conversion._ag_label_vertex +drop cascades to table agload_conversion._ag_label_edge +drop cascades to table agload_conversion."Person1" +drop cascades to table agload_conversion."Person2" +drop cascades to table agload_conversion."Edges" +NOTICE: graph "agload_conversion" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index 105c2fa60..7805d181e 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -79,3 +79,21 @@ SELECT * FROM cypher('agload_test_graph', $$ $$) AS (result_1 agtype, result_2 agtype); SELECT drop_graph('agload_test_graph', true); + +-- +-- Test property type conversion +-- +SELECT create_graph('agload_conversion'); + +SELECT create_vlabel('agload_conversion','Person1'); +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv'); +SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) $$) as (a agtype); + +SELECT create_vlabel('agload_conversion','Person2'); +SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv'); + +SELECT create_elabel('agload_conversion','Edges'); +SELECT load_edges_from_file('agload_conversion', 'Edges', 'age_load/conversion_edges.csv'); +SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN properties(e) $$) as (a agtype); + +SELECT drop_graph('agload_conversion', true); diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 00bf7d61f..62e9a5d3b 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -89,7 +89,6 @@ typedef enum /* type categories for datum_to_agtype */ } agt_type_category; static inline Datum agtype_from_cstring(char *str, int len); -static inline agtype_value *agtype_value_from_cstring(char *str, int len); size_t check_string_length(size_t len); static void agtype_in_agtype_annotation(void *pstate, char *annotation); static void agtype_in_object_start(void *pstate); @@ -355,7 +354,7 @@ Datum agtype_out(PG_FUNCTION_ARGS) * Uses the agtype parser (with hooks) to construct an agtype. */ -static inline agtype_value *agtype_value_from_cstring(char *str, int len) +agtype_value *agtype_value_from_cstring(char *str, int len) { agtype_lex_context *lex; agtype_in_state state; diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index d516d334f..4d892f078 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -100,7 +100,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); props = create_agtype_from_list_i(cr->header, cr->fields, - n_fields, 3); + n_fields, 4); insert_edge_simple(cr->graph_oid, cr->object_name, object_graph_id, start_vertex_graph_id, diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index 60dbf9941..dc36c56a7 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -18,11 +18,14 @@ */ #include "postgres.h" +#include "utils/json.h" #include "utils/load/ag_load_edges.h" #include "utils/load/ag_load_labels.h" #include "utils/load/age_load.h" +static agtype_value *csv_value_to_agtype_value(char *csv_val); + agtype *create_empty_agtype(void) { agtype* out; @@ -40,6 +43,50 @@ agtype *create_empty_agtype(void) return out; } +/* + * Converts the given csv value to an agtype_value. + * + * If csv_val is not a valid json, it is wrapped by double-quotes to make it a + * string value. Because agtype is jsonb-like, the token should be a valid + * json in order to be parsed into an agtype_value of appropriate type. + * Finally, agtype_value_from_cstring() is called for parsing. + */ +static agtype_value *csv_value_to_agtype_value(char *csv_val) +{ + char *new_csv_val; + agtype_value *res; + + if (!json_validate(cstring_to_text(csv_val), false, false)) + { + // wrap the string with double-quote + int oldlen; + int newlen; + + oldlen = strlen(csv_val); + newlen = oldlen + 2; // +2 for double-quotes + new_csv_val = (char *)palloc(sizeof(char) * (newlen + 1)); + + new_csv_val[0] = '"'; + strncpy(&new_csv_val[1], csv_val, oldlen); + new_csv_val[oldlen + 1] = '"'; + new_csv_val[oldlen + 2] = '\0'; + } + else + { + new_csv_val = csv_val; + } + + res = agtype_value_from_cstring(new_csv_val, strlen(new_csv_val)); + + // extract from top-level row scalar array + if (res->type == AGTV_ARRAY && res->val.array.raw_scalar) + { + res = &res->val.array.elems[0]; + } + + return res; +} + agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, int64 vertex_id) { @@ -74,7 +121,7 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, WAGT_KEY, key_agtype); - value_agtype = string_to_agtype_value(fields[i]); + value_agtype = csv_value_to_agtype_value(fields[i]); result.res = push_agtype_value(&result.parse_state, WAGT_VALUE, value_agtype); @@ -117,7 +164,7 @@ agtype* create_agtype_from_list_i(char **header, char **fields, result.res = push_agtype_value(&result.parse_state, WAGT_KEY, key_agtype); - value_agtype = string_to_agtype_value(fields[i]); + value_agtype = csv_value_to_agtype_value(fields[i]); result.res = push_agtype_value(&result.parse_state, WAGT_VALUE, value_agtype); diff --git a/src/include/utils/agtype.h b/src/include/utils/agtype.h index 7ea68fe3d..c5a2fe95b 100644 --- a/src/include/utils/agtype.h +++ b/src/include/utils/agtype.h @@ -553,6 +553,7 @@ agtype_iterator *get_next_list_element(agtype_iterator *it, void pfree_agtype_value(agtype_value* value); void pfree_agtype_value_content(agtype_value* value); void pfree_agtype_in_state(agtype_in_state* value); +agtype_value *agtype_value_from_cstring(char *str, int len); /* Oid accessors for AGTYPE */ Oid get_AGTYPEOID(void);