Skip to content

Commit

Permalink
Update age_load to load scalar property values with appropriate type
Browse files Browse the repository at this point in the history
Previously, property values from csv files were always loaded as strings. This
patch converts a value to an appropriate scalar type (i.e. string, bool,
numeric, null) while loading. It uses the agtype_value_from_cstring()
function for conversion.

Additional change(s):
-------------------
 - Fix: for csv rows in edge files, create_agtype_from_list_i()'s start_index
   is corrected to 4
  • Loading branch information
rafsun42 committed Aug 16, 2024
1 parent db98357 commit b5721c0
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 5 deletions.
7 changes: 7 additions & 0 deletions regress/age_load/data/conversion_edges.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
start_id, start_vertex_type, end_id, end_vertex_type, string, bool, numeric,
1, Person1, 1, Person2, "John Smith", "true", 1
1, Person1, 1, Person2, "John", "false", "-2"
1, Person1, 1, Person2, John Smith, true, 1.4
1, Person1, 1, Person2, """John""", false, -1e10
1, Person1, 1, Person2, null, false, 0
1, Person1, 1, Person2, nUll, false, "3.14"
7 changes: 7 additions & 0 deletions regress/age_load/data/conversion_vertices.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
id, string, bool, numeric,
1, "John Smith", "true", 1
2, "John", "false", "-2"
3, John Smith, true, 1.4
4, """John""", false, -1e10
5, null, false, 0
6, nUll, false, "3.14"
84 changes: 84 additions & 0 deletions regress/expected/age_load.out
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,87 @@ NOTICE: graph "agload_test_graph" has been dropped

(1 row)

--
-- Test property type conversion
--
SELECT create_graph('agload_conversion');
NOTICE: graph "agload_conversion" has been created
create_graph
--------------

(1 row)

SELECT create_vlabel('agload_conversion','Person1');
NOTICE: VLabel "Person1" has been created
create_vlabel
---------------

(1 row)

SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv');
load_labels_from_file
-----------------------

(1 row)

SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) $$) as (a agtype);
a
------------------------------------------------------------------------------------
{"id": 1, "bool": true, "__id__": 1, "string": "John Smith", "numeric": 1}
{"id": 2, "bool": false, "__id__": 2, "string": "John", "numeric": -2}
{"id": 3, "bool": true, "__id__": 3, "string": "John Smith", "numeric": 1.4}
{"id": 4, "bool": false, "__id__": 4, "string": "John", "numeric": -10000000000.0}
{"id": 5, "bool": false, "__id__": 5, "string": null, "numeric": 0}
{"id": 6, "bool": false, "__id__": 6, "string": "nUll", "numeric": 3.14}
(6 rows)

SELECT create_vlabel('agload_conversion','Person2');
NOTICE: VLabel "Person2" has been created
create_vlabel
---------------

(1 row)

SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv');
load_labels_from_file
-----------------------

(1 row)

SELECT create_elabel('agload_conversion','Edges');
NOTICE: ELabel "Edges" has been created
create_elabel
---------------

(1 row)

SELECT load_edges_from_file('agload_conversion', 'Edges', 'age_load/conversion_edges.csv');
load_edges_from_file
----------------------

(1 row)

SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN properties(e) $$) as (a agtype);
a
--------------------------------------------------------------
{"bool": true, "string": "John Smith", "numeric": 1}
{"bool": false, "string": "John", "numeric": -2}
{"bool": true, "string": "John Smith", "numeric": 1.4}
{"bool": false, "string": "John", "numeric": -10000000000.0}
{"bool": false, "string": null, "numeric": 0}
{"bool": false, "string": "nUll", "numeric": 3.14}
(6 rows)

SELECT drop_graph('agload_conversion', true);
NOTICE: drop cascades to 5 other objects
DETAIL: drop cascades to table agload_conversion._ag_label_vertex
drop cascades to table agload_conversion._ag_label_edge
drop cascades to table agload_conversion."Person1"
drop cascades to table agload_conversion."Person2"
drop cascades to table agload_conversion."Edges"
NOTICE: graph "agload_conversion" has been dropped
drop_graph
------------

(1 row)

18 changes: 18 additions & 0 deletions regress/sql/age_load.sql
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,21 @@ SELECT * FROM cypher('agload_test_graph', $$
$$) AS (result_1 agtype, result_2 agtype);

SELECT drop_graph('agload_test_graph', true);

--
-- Test property type conversion
--
SELECT create_graph('agload_conversion');

SELECT create_vlabel('agload_conversion','Person1');
SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv');
SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) $$) as (a agtype);

SELECT create_vlabel('agload_conversion','Person2');
SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv');

SELECT create_elabel('agload_conversion','Edges');
SELECT load_edges_from_file('agload_conversion', 'Edges', 'age_load/conversion_edges.csv');
SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN properties(e) $$) as (a agtype);

SELECT drop_graph('agload_conversion', true);
3 changes: 1 addition & 2 deletions src/backend/utils/adt/agtype.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ typedef enum /* type categories for datum_to_agtype */
} agt_type_category;

static inline Datum agtype_from_cstring(char *str, int len);
static inline agtype_value *agtype_value_from_cstring(char *str, int len);
size_t check_string_length(size_t len);
static void agtype_in_agtype_annotation(void *pstate, char *annotation);
static void agtype_in_object_start(void *pstate);
Expand Down Expand Up @@ -355,7 +354,7 @@ Datum agtype_out(PG_FUNCTION_ARGS)
* Uses the agtype parser (with hooks) to construct an agtype.
*/

static inline agtype_value *agtype_value_from_cstring(char *str, int len)
agtype_value *agtype_value_from_cstring(char *str, int len)
{
agtype_lex_context *lex;
agtype_in_state state;
Expand Down
2 changes: 1 addition & 1 deletion src/backend/utils/load/ag_load_edges.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data)
end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int);

props = create_agtype_from_list_i(cr->header, cr->fields,
n_fields, 3);
n_fields, 4);

insert_edge_simple(cr->graph_oid, cr->object_name,
object_graph_id, start_vertex_graph_id,
Expand Down
51 changes: 49 additions & 2 deletions src/backend/utils/load/age_load.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@
*/

#include "postgres.h"
#include "utils/json.h"

#include "utils/load/ag_load_edges.h"
#include "utils/load/ag_load_labels.h"
#include "utils/load/age_load.h"

static agtype_value *csv_value_to_agtype_value(char *csv_val);

agtype *create_empty_agtype(void)
{
agtype* out;
Expand All @@ -40,6 +43,50 @@ agtype *create_empty_agtype(void)
return out;
}

/*
* Converts the given csv value to an agtype_value.
*
* If csv_val is not a valid json, it is wrapped by double-quotes to make it a
* string value. Because agtype is jsonb-like, the token should be a valid
* json in order to be parsed into an agtype_value of appropriate type.
* Finally, agtype_value_from_cstring() is called for parsing.
*/
static agtype_value *csv_value_to_agtype_value(char *csv_val)
{
char *new_csv_val;
agtype_value *res;

if (!json_validate(cstring_to_text(csv_val), false, false))
{
// wrap the string with double-quote
int oldlen;
int newlen;

oldlen = strlen(csv_val);
newlen = oldlen + 2; // +2 for double-quotes
new_csv_val = (char *)palloc(sizeof(char) * (newlen + 1));

new_csv_val[0] = '"';
strncpy(&new_csv_val[1], csv_val, oldlen);
new_csv_val[oldlen + 1] = '"';
new_csv_val[oldlen + 2] = '\0';
}
else
{
new_csv_val = csv_val;
}

res = agtype_value_from_cstring(new_csv_val, strlen(new_csv_val));

// extract from top-level row scalar array
if (res->type == AGTV_ARRAY && res->val.array.raw_scalar)
{
res = &res->val.array.elems[0];
}

return res;
}

agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len,
int64 vertex_id)
{
Expand Down Expand Up @@ -74,7 +121,7 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len,
WAGT_KEY,
key_agtype);

value_agtype = string_to_agtype_value(fields[i]);
value_agtype = csv_value_to_agtype_value(fields[i]);
result.res = push_agtype_value(&result.parse_state,
WAGT_VALUE,
value_agtype);
Expand Down Expand Up @@ -117,7 +164,7 @@ agtype* create_agtype_from_list_i(char **header, char **fields,
result.res = push_agtype_value(&result.parse_state,
WAGT_KEY,
key_agtype);
value_agtype = string_to_agtype_value(fields[i]);
value_agtype = csv_value_to_agtype_value(fields[i]);
result.res = push_agtype_value(&result.parse_state,
WAGT_VALUE,
value_agtype);
Expand Down
1 change: 1 addition & 0 deletions src/include/utils/agtype.h
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ agtype_iterator *get_next_list_element(agtype_iterator *it,
void pfree_agtype_value(agtype_value* value);
void pfree_agtype_value_content(agtype_value* value);
void pfree_agtype_in_state(agtype_in_state* value);
agtype_value *agtype_value_from_cstring(char *str, int len);

/* Oid accessors for AGTYPE */
Oid get_AGTYPEOID(void);
Expand Down

0 comments on commit b5721c0

Please sign in to comment.