Skip to content

Commit

Permalink
Update age_load to make property value conversion optional (#1525) (#…
Browse files Browse the repository at this point in the history
…2061)

This is an addition to a previous patch that introduced loading
property values from CSV files as agtype. This behavior is
made optional in this patch with a boolean function parameter
`load_as_agtype`. When this parameter is false, values are
loaded as string.
  • Loading branch information
rafsun42 authored Aug 20, 2024
1 parent cf1ce46 commit 136471d
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 32 deletions.
58 changes: 49 additions & 9 deletions regress/expected/age_load.out
Original file line number Diff line number Diff line change
Expand Up @@ -243,20 +243,21 @@ NOTICE: graph "agload_conversion" has been created

(1 row)

-- vertex: load as agtype
SELECT create_vlabel('agload_conversion','Person1');
NOTICE: VLabel "Person1" has been created
create_vlabel
---------------

(1 row)

SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv');
SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv', true, true);
load_labels_from_file
-----------------------

(1 row)

SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) $$) as (a agtype);
SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person1) RETURN properties(n) $$) as (a agtype);
a
------------------------------------------------------------------------------------
{"id": 1, "bool": true, "__id__": 1, "string": "John Smith", "numeric": 1}
Expand All @@ -267,33 +268,46 @@ SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) $$)
{"id": 6, "bool": false, "__id__": 6, "string": "nUll", "numeric": 3.14}
(6 rows)

-- vertex: load as string
SELECT create_vlabel('agload_conversion','Person2');
NOTICE: VLabel "Person2" has been created
create_vlabel
---------------

(1 row)

SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv');
SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv', true, false);
load_labels_from_file
-----------------------

(1 row)

SELECT create_elabel('agload_conversion','Edges');
NOTICE: ELabel "Edges" has been created
SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person2) RETURN properties(n) $$) as (a agtype);
a
-------------------------------------------------------------------------------------
{"id": "1", "bool": "true", "__id__": 1, "string": "John Smith", "numeric": "1"}
{"id": "2", "bool": "false", "__id__": 2, "string": "John", "numeric": "-2"}
{"id": "3", "bool": "true", "__id__": 3, "string": "John Smith", "numeric": "1.4"}
{"id": "4", "bool": "false", "__id__": 4, "string": "\"John\"", "numeric": "-1e10"}
{"id": "5", "bool": "false", "__id__": 5, "string": "null", "numeric": "0"}
{"id": "6", "bool": "false", "__id__": 6, "string": "nUll", "numeric": "3.14"}
(6 rows)

-- edge: load as agtype
SELECT create_elabel('agload_conversion','Edges1');
NOTICE: ELabel "Edges1" has been created
create_elabel
---------------

(1 row)

SELECT load_edges_from_file('agload_conversion', 'Edges', 'age_load/conversion_edges.csv');
SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.csv', true);
load_edges_from_file
----------------------

(1 row)

SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN properties(e) $$) as (a agtype);
SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges1]->() RETURN properties(e) $$) as (a agtype);
a
--------------------------------------------------------------
{"bool": true, "string": "John Smith", "numeric": 1}
Expand All @@ -304,13 +318,39 @@ SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN properties(
{"bool": false, "string": "nUll", "numeric": 3.14}
(6 rows)

-- edge: load as string
SELECT create_elabel('agload_conversion','Edges2');
NOTICE: ELabel "Edges2" has been created
create_elabel
---------------

(1 row)

SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false);
load_edges_from_file
----------------------

(1 row)

SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype);
a
-------------------------------------------------------------
{"bool": "true", "string": "John Smith", "numeric": "1"}
{"bool": "false", "string": "John", "numeric": "-2"}
{"bool": "true", "string": "John Smith", "numeric": "1.4"}
{"bool": "false", "string": "\"John\"", "numeric": "-1e10"}
{"bool": "false", "string": "null", "numeric": "0"}
{"bool": "false", "string": "nUll", "numeric": "3.14"}
(6 rows)

SELECT drop_graph('agload_conversion', true);
NOTICE: drop cascades to 5 other objects
NOTICE: drop cascades to 6 other objects
DETAIL: drop cascades to table agload_conversion._ag_label_vertex
drop cascades to table agload_conversion._ag_label_edge
drop cascades to table agload_conversion."Person1"
drop cascades to table agload_conversion."Person2"
drop cascades to table agload_conversion."Edges"
drop cascades to table agload_conversion."Edges1"
drop cascades to table agload_conversion."Edges2"
NOTICE: graph "agload_conversion" has been dropped
drop_graph
------------
Expand Down
23 changes: 16 additions & 7 deletions regress/sql/age_load.sql
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,24 @@ SELECT drop_graph('agload_test_graph', true);
--
SELECT create_graph('agload_conversion');

-- vertex: load as agtype
SELECT create_vlabel('agload_conversion','Person1');
SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv');
SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) $$) as (a agtype);
SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv', true, true);
SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person1) RETURN properties(n) $$) as (a agtype);

-- vertex: load as string
SELECT create_vlabel('agload_conversion','Person2');
SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv');

SELECT create_elabel('agload_conversion','Edges');
SELECT load_edges_from_file('agload_conversion', 'Edges', 'age_load/conversion_edges.csv');
SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN properties(e) $$) as (a agtype);
SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv', true, false);
SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person2) RETURN properties(n) $$) as (a agtype);

-- edge: load as agtype
SELECT create_elabel('agload_conversion','Edges1');
SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.csv', true);
SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges1]->() RETURN properties(e) $$) as (a agtype);

-- edge: load as string
SELECT create_elabel('agload_conversion','Edges2');
SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false);
SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype);

SELECT drop_graph('agload_conversion', true);
10 changes: 8 additions & 2 deletions sql/age_main.sql
Original file line number Diff line number Diff line change
Expand Up @@ -122,17 +122,23 @@ CREATE FUNCTION ag_catalog.drop_label(graph_name name, label_name name,
LANGUAGE c
AS 'MODULE_PATHNAME';

--
-- If `load_as_agtype` is true, property values are loaded as agtype; otherwise
-- loaded as string.
--
CREATE FUNCTION ag_catalog.load_labels_from_file(graph_name name,
label_name name,
file_path text,
id_field_exists bool default true)
id_field_exists bool default true,
load_as_agtype bool default false)
RETURNS void
LANGUAGE c
AS 'MODULE_PATHNAME';

CREATE FUNCTION ag_catalog.load_edges_from_file(graph_name name,
label_name name,
file_path text)
file_path text,
load_as_agtype bool default false)
RETURNS void
LANGUAGE c
AS 'MODULE_PATHNAME';
Expand Down
6 changes: 4 additions & 2 deletions src/backend/utils/load/ag_load_edges.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data)
end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int);

props = create_agtype_from_list_i(cr->header, cr->fields,
n_fields, 4);
n_fields, 4, cr->load_as_agtype);

insert_edge_simple(cr->graph_oid, cr->object_name,
object_graph_id, start_vertex_graph_id,
Expand Down Expand Up @@ -153,7 +153,8 @@ int create_edges_from_csv_file(char *file_path,
char *graph_name,
Oid graph_oid,
char *object_name,
int object_id )
int object_id,
bool load_as_agtype)
{

FILE *fp;
Expand Down Expand Up @@ -190,6 +191,7 @@ int create_edges_from_csv_file(char *file_path,
cr.graph_oid = graph_oid;
cr.object_name = object_name;
cr.object_id = object_id;
cr.load_as_agtype = load_as_agtype;

while ((bytes_read=fread(buf, 1, 1024, fp)) > 0)
{
Expand Down
7 changes: 5 additions & 2 deletions src/backend/utils/load/ag_load_labels.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data)
object_graph_id = make_graphid(cr->object_id, label_id_int);

props = create_agtype_from_list(cr->header, cr->fields,
n_fields, label_id_int);
n_fields, label_id_int,
cr->load_as_agtype);
insert_vertex_simple(cr->graph_oid, cr->object_name,
object_graph_id, props);
pfree(props);
Expand Down Expand Up @@ -145,7 +146,8 @@ int create_labels_from_csv_file(char *file_path,
Oid graph_oid,
char *object_name,
int object_id,
bool id_field_exists)
bool id_field_exists,
bool load_as_agtype)
{

FILE *fp;
Expand Down Expand Up @@ -184,6 +186,7 @@ int create_labels_from_csv_file(char *file_path,
cr.object_name = object_name;
cr.object_id = object_id;
cr.id_field_exists = id_field_exists;
cr.load_as_agtype = load_as_agtype;



Expand Down
35 changes: 29 additions & 6 deletions src/backend/utils/load/age_load.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ static agtype_value *csv_value_to_agtype_value(char *csv_val)
}

agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len,
int64 vertex_id)
int64 vertex_id, bool load_as_agtype)
{
agtype* out;
agtype_value* key_agtype;
Expand Down Expand Up @@ -121,7 +121,15 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len,
WAGT_KEY,
key_agtype);

value_agtype = csv_value_to_agtype_value(fields[i]);
if (load_as_agtype)
{
value_agtype = csv_value_to_agtype_value(fields[i]);
}
else
{
value_agtype = string_to_agtype_value(fields[i]);
}

result.res = push_agtype_value(&result.parse_state,
WAGT_VALUE,
value_agtype);
Expand All @@ -140,7 +148,8 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len,
}

agtype* create_agtype_from_list_i(char **header, char **fields,
size_t fields_len, size_t start_index)
size_t fields_len, size_t start_index,
bool load_as_agtype)
{
agtype* out;
agtype_value* key_agtype;
Expand All @@ -164,7 +173,16 @@ agtype* create_agtype_from_list_i(char **header, char **fields,
result.res = push_agtype_value(&result.parse_state,
WAGT_KEY,
key_agtype);
value_agtype = csv_value_to_agtype_value(fields[i]);

if (load_as_agtype)
{
value_agtype = csv_value_to_agtype_value(fields[i]);
}
else
{
value_agtype = string_to_agtype_value(fields[i]);
}

result.res = push_agtype_value(&result.parse_state,
WAGT_VALUE,
value_agtype);
Expand Down Expand Up @@ -256,6 +274,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS)
Oid graph_oid;
int32 label_id;
bool id_field_exists;
bool load_as_agtype;

if (PG_ARGISNULL(0))
{
Expand All @@ -279,6 +298,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS)
label_name = PG_GETARG_NAME(1);
file_path = PG_GETARG_TEXT_P(2);
id_field_exists = PG_GETARG_BOOL(3);
load_as_agtype = PG_GETARG_BOOL(4);


graph_name_str = NameStr(*graph_name);
Expand All @@ -289,7 +309,8 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS)
label_id = get_label_id(label_name_str, graph_oid);

create_labels_from_csv_file(file_path_str, graph_name_str, graph_oid,
label_name_str, label_id, id_field_exists);
label_name_str, label_id, id_field_exists,
load_as_agtype);
PG_RETURN_VOID();

}
Expand All @@ -306,6 +327,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS)
char* file_path_str;
Oid graph_oid;
int32 label_id;
bool load_as_agtype;

if (PG_ARGISNULL(0))
{
Expand All @@ -328,6 +350,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS)
graph_name = PG_GETARG_NAME(0);
label_name = PG_GETARG_NAME(1);
file_path = PG_GETARG_TEXT_P(2);
load_as_agtype = PG_GETARG_BOOL(3);

graph_name_str = NameStr(*graph_name);
label_name_str = NameStr(*label_name);
Expand All @@ -337,7 +360,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS)
label_id = get_label_id(label_name_str, graph_oid);

create_edges_from_csv_file(file_path_str, graph_name_str, graph_oid,
label_name_str, label_id);
label_name_str, label_id, load_as_agtype);
PG_RETURN_VOID();

}
4 changes: 3 additions & 1 deletion src/include/utils/load/ag_load_edges.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ typedef struct {
int object_id;
char *start_vertex;
char *end_vertex;
bool load_as_agtype;

} csv_edge_reader;

Expand All @@ -46,7 +47,8 @@ void edge_field_cb(void *field, size_t field_len, void *data);
void edge_row_cb(int delim __attribute__((unused)), void *data);

int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid,
char *object_name, int object_id );
char *object_name, int object_id,
bool load_as_agtype);

#endif /* AG_LOAD_EDGES_H */

3 changes: 2 additions & 1 deletion src/include/utils/load/ag_load_labels.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ typedef struct {
char *object_name;
int object_id;
bool id_field_exists;
bool load_as_agtype;
} csv_vertex_reader;


Expand All @@ -58,6 +59,6 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data);

int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid,
char *object_name, int object_id,
bool id_field_exists);
bool id_field_exists, bool load_as_agtype);

#endif /* AG_LOAD_LABELS_H */
6 changes: 4 additions & 2 deletions src/include/utils/load/age_load.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@
agtype* create_empty_agtype(void);

agtype* create_agtype_from_list(char **header, char **fields,
size_t fields_len, int64 vertex_id);
size_t fields_len, int64 vertex_id,
bool load_as_agtype);
agtype* create_agtype_from_list_i(char **header, char **fields,
size_t fields_len, size_t start_index);
size_t fields_len, size_t start_index,
bool load_as_agtype);
void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id,
agtype *vertex_properties);
void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id,
Expand Down

0 comments on commit 136471d

Please sign in to comment.