Skip to content

Commit 1342ac1

Browse files
committed
Fixes neo4j-contrib#4207: Integration Tests for Load Procedures with Cloud Object Storage
1 parent 7c6254a commit 1342ac1

20 files changed

+803
-283
lines changed

extended-it/build.gradle

+2-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ dependencies {
5555
testImplementation group: 'org.testcontainers', name: 'chromadb', version: '1.20.2'
5656
testImplementation group: 'org.testcontainers', name: 'weaviate', version: '1.20.2'
5757
testImplementation group: 'org.testcontainers', name: 'milvus', version: '1.20.2'
58-
58+
testImplementation group: 'org.apache.poi', name: 'poi', version: '5.1.0'
59+
testImplementation group: 'org.apache.poi', name: 'poi-ooxml', version: '5.1.0'
5960
configurations.all {
6061
exclude group: 'org.slf4j', module: 'slf4j-nop'
6162
exclude group: 'ch.qos.logback', module: 'logback-classic'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package apoc.export;
2+
3+
import apoc.export.arrow.ExportArrow;
4+
import apoc.export.arrow.ImportArrow;
5+
import apoc.load.Gexf;
6+
import apoc.meta.Meta;
7+
import apoc.util.GoogleCloudStorageContainerExtension;
8+
import apoc.util.TestUtil;
9+
import org.junit.BeforeClass;
10+
import org.junit.ClassRule;
11+
import org.junit.Test;
12+
import org.neo4j.test.rule.DbmsRule;
13+
import org.neo4j.test.rule.ImpermanentDbmsRule;
14+
15+
import static apoc.ApocConfig.APOC_EXPORT_FILE_ENABLED;
16+
import static apoc.ApocConfig.APOC_IMPORT_FILE_ENABLED;
17+
import static apoc.ApocConfig.apocConfig;
18+
import static apoc.export.arrow.ImportArrowTestUtil.MAPPING_ALL;
19+
import static apoc.export.arrow.ImportArrowTestUtil.prepareDbForArrow;
20+
import static apoc.export.arrow.ImportArrowTestUtil.testImportCommon;
21+
import static apoc.util.ExtendedTestUtil.clearDb;
22+
import static apoc.util.GexfTestUtil.testImportGexfCommon;
23+
import static apoc.util.GoogleCloudStorageContainerExtension.gcsUrl;
24+
25+
public class ImportGoogleCloudStorageTest {
26+
public static GoogleCloudStorageContainerExtension gcs = new GoogleCloudStorageContainerExtension()
27+
.withMountedResourceFile("test_all.arrow", "/folder/test_all.arrow")
28+
.withMountedResourceFile("gexf/data.gexf", "/folder/data.gexf");
29+
30+
@ClassRule
31+
public static DbmsRule db = new ImpermanentDbmsRule();
32+
33+
@BeforeClass
34+
public static void setUp() throws Exception {
35+
gcs.start();
36+
TestUtil.registerProcedure(db, ExportArrow.class, ImportArrow.class, Meta.class, Gexf.class);
37+
prepareDbForArrow(db);
38+
apocConfig().setProperty(APOC_IMPORT_FILE_ENABLED, true);
39+
apocConfig().setProperty(APOC_EXPORT_FILE_ENABLED, true);
40+
}
41+
42+
@Test
43+
public void testImportArrow() {
44+
String url = gcsUrl(gcs, "b/folder/o/test_all.arrow?alt=media");
45+
testImportCommon(db, url, MAPPING_ALL);
46+
}
47+
48+
@Test
49+
public void testImportGexf() {
50+
clearDb(db);
51+
String url = gcsUrl(gcs, "b/folder/o/data.gexf?alt=media");
52+
testImportGexfCommon(db, url);
53+
}
54+
55+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
package apoc.load;
2+
3+
import apoc.load.xls.LoadXls;
4+
import apoc.util.GoogleCloudStorageContainerExtension;
5+
import apoc.util.TestUtil;
6+
import apoc.util.Util;
7+
import apoc.xml.XmlTestUtils;
8+
import org.junit.AfterClass;
9+
import org.junit.Assert;
10+
import org.junit.BeforeClass;
11+
import org.junit.ClassRule;
12+
import org.junit.Test;
13+
import org.neo4j.driver.internal.util.Iterables;
14+
import org.neo4j.graphdb.Result;
15+
import org.neo4j.test.rule.DbmsRule;
16+
import org.neo4j.test.rule.ImpermanentDbmsRule;
17+
18+
import java.util.ArrayList;
19+
import java.util.LinkedHashMap;
20+
import java.util.List;
21+
import java.util.Map;
22+
23+
import static apoc.load.LoadCsvTest.assertRow;
24+
import static apoc.util.GoogleCloudStorageContainerExtension.gcsUrl;
25+
import static apoc.util.MapUtil.map;
26+
import static apoc.util.TestUtil.testCall;
27+
import static apoc.util.TestUtil.testResult;
28+
import static java.util.Arrays.asList;
29+
import static org.junit.Assert.assertEquals;
30+
import static org.junit.Assert.assertFalse;
31+
import static org.junit.Assert.assertTrue;
32+
33+
public class LoadGoogleCloudStorageTest {
34+
35+
public static GoogleCloudStorageContainerExtension gcs = new GoogleCloudStorageContainerExtension()
36+
.withMountedResourceFile("test.csv", "/folder/test.csv")
37+
.withMountedResourceFile("map.json", "/folder/map.json")
38+
.withMountedResourceFile("xml/books.xml", "/folder/books.xml")
39+
.withMountedResourceFile("load_test.xlsx", "/folder/load_test.xlsx")
40+
.withMountedResourceFile("wikipedia.html", "/folder/wikipedia.html");
41+
42+
@ClassRule
43+
public static DbmsRule db = new ImpermanentDbmsRule();
44+
45+
@BeforeClass
46+
public static void setUp() throws Exception {
47+
gcs.start();
48+
TestUtil.registerProcedure(db, LoadCsv.class, LoadJson.class, LoadHtml.class, LoadXls.class, Xml.class);
49+
}
50+
51+
@AfterClass
52+
public static void tearDown() {
53+
gcs.close();
54+
db.shutdown();
55+
}
56+
57+
@Test
58+
public void testLoadCsv() {
59+
String url = gcsUrl(gcs, "b/folder/o/test.csv?alt=media");
60+
61+
testResult(db, "CALL apoc.load.csv($url)", map("url", url), (r) -> {
62+
assertRow(r, "Selma", "8", 0L);
63+
assertRow(r, "Rana", "11", 1L);
64+
assertRow(r, "Selina", "18", 2L);
65+
assertFalse("It should be the last record", r.hasNext());
66+
});
67+
}
68+
69+
@Test
70+
public void testLoadJSON() {
71+
String url = gcsUrl(gcs, "b/folder/o/map.json?alt=media");
72+
testCall(db, "CALL apoc.load.jsonArray($url, '$.foo')", map("url", url), (r) -> {
73+
assertEquals(asList(1L,2L,3L), r.get("value"));
74+
});
75+
}
76+
77+
@Test
78+
public void testLoadXml() {
79+
String url = gcsUrl(gcs, "b/folder/o/books.xml?alt=media");
80+
testCall(db, "CALL apoc.load.xml($url,'/catalog/book[title=\"Maeve Ascendant\"]/.',{failOnError:false}) yield value as result", Util.map("url", url), (r) -> {
81+
Object value = Iterables.single(r.values());
82+
Assert.assertEquals(XmlTestUtils.XML_XPATH_AS_NESTED_MAP, value);
83+
});
84+
}
85+
86+
@Test
87+
public void testLoadXls() {
88+
String url = gcsUrl(gcs, "b/folder/o/load_test.xlsx?alt=media");
89+
testResult(db, "CALL apoc.load.xls($url,'Full',{mapping:{Integer:{type:'int'}, Array:{type:'int',array:true,arraySep:';'}}})", map("url",url), // 'file:load_test.xlsx'
90+
(r) -> {
91+
assertXlsRow(r,0L,"String","Test","Boolean",true,"Integer",2L,"Float",1.5d,"Array",asList(1L,2L,3L));
92+
assertFalse("Should not have another row",r.hasNext());
93+
});
94+
}
95+
96+
@Test
97+
public void testLoadHtml() {
98+
String url = gcsUrl(gcs, "b/folder/o/wikipedia.html?alt=media");
99+
100+
Map<String, Object> query = map("links", "a[href]");
101+
102+
testCall(db, "CALL apoc.load.html($url,$query)",
103+
map("url", url, "query", query),
104+
row -> {
105+
final List<Map<String, Object>> actual = (List) ((Map) row.get("value")).get("links");
106+
assertEquals(106, actual.size());
107+
assertTrue(actual.stream().allMatch(i -> i.get("tagName").equals("a")));
108+
});
109+
}
110+
111+
static void assertXlsRow(Result r, long lineNo, Object...data) {
112+
Map<String, Object> row = r.next();
113+
Map<String, Object> map = map(data);
114+
assertEquals(map, row.get("map"));
115+
Map<Object, Object> stringMap = new LinkedHashMap<>(map.size());
116+
map.forEach((k,v) -> stringMap.put(k,v == null ? null : v.toString()));
117+
assertEquals(new ArrayList<>(map.values()), row.get("list"));
118+
assertEquals(lineNo, row.get("lineNo"));
119+
}
120+
}
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,55 @@
11
package apoc.s3;
22

33
import apoc.load.LoadCsv;
4+
import apoc.load.LoadDirectory;
5+
import apoc.load.LoadHtml;
46
import apoc.load.LoadJson;
57
import apoc.load.Xml;
8+
import apoc.load.xls.LoadXls;
69
import apoc.util.TestUtil;
710
import apoc.util.Util;
8-
import apoc.util.s3.S3BaseTest;
911
import apoc.xml.XmlTestUtils;
1012
import org.junit.Assert;
1113
import org.junit.Before;
1214
import org.junit.Rule;
1315
import org.junit.Test;
1416
import org.junit.jupiter.api.AfterAll;
15-
1617
import org.neo4j.driver.internal.util.Iterables;
18+
import org.neo4j.graphdb.Result;
1719
import org.neo4j.test.rule.DbmsRule;
1820
import org.neo4j.test.rule.ImpermanentDbmsRule;
1921

22+
import java.nio.charset.StandardCharsets;
23+
import java.util.ArrayList;
24+
import java.util.LinkedHashMap;
25+
import java.util.List;
26+
import java.util.Map;
27+
2028
import static apoc.ApocConfig.APOC_IMPORT_FILE_ENABLED;
2129
import static apoc.ApocConfig.APOC_IMPORT_FILE_USE_NEO4J_CONFIG;
2230
import static apoc.ApocConfig.apocConfig;
2331
import static apoc.load.LoadCsvTest.assertRow;
2432
import static apoc.util.ExtendedITUtil.EXTENDED_PATH;
2533
import static apoc.util.MapUtil.map;
34+
import static apoc.util.S3ExtendedUtil.putToS3AndGetUrl;
2635
import static apoc.util.TestUtil.testCall;
2736
import static apoc.util.TestUtil.testResult;
2837
import static java.util.Arrays.asList;
2938
import static org.junit.Assert.assertEquals;
3039
import static org.junit.Assert.assertFalse;
40+
import static org.junit.Assert.assertTrue;
3141

32-
public class LoadS3Test extends S3BaseTest {
42+
public class LoadS3Test extends S3BaseExtendedTest {
3343

3444
@Rule
3545
public DbmsRule db = new ImpermanentDbmsRule();
3646

3747
@Before
3848
public void setUp() throws Exception {
39-
TestUtil.registerProcedure(db, LoadCsv.class, LoadJson.class, Xml.class);
49+
TestUtil.registerProcedure(db, LoadCsv.class, LoadDirectory.class, LoadJson.class, LoadHtml.class, LoadXls.class, Xml.class);
4050
apocConfig().setProperty(APOC_IMPORT_FILE_ENABLED, true);
4151
apocConfig().setProperty(APOC_IMPORT_FILE_USE_NEO4J_CONFIG, false);
52+
putFolderToS3();
4253
}
4354

4455
@AfterAll
@@ -47,10 +58,8 @@ public void tearDownAll() {
4758
}
4859

4960
@Test
50-
public void testLoadCsvS3() {
51-
String url = s3Container.putFile(EXTENDED_PATH + "src/test/resources/test.csv");
52-
url = removeRegionFromUrl(url);
53-
61+
public void testLoadCsv() {
62+
String url = putToS3AndGetUrl(s3ExtendedContainer, EXTENDED_PATH + "src/test/resources/test.csv");
5463
testResult(db, "CALL apoc.load.csv($url,{failOnError:false})", map("url", url), (r) -> {
5564
assertRow(r, "Selma", "8", 0L);
5665
assertRow(r, "Rana", "11", 1L);
@@ -59,29 +68,72 @@ public void testLoadCsvS3() {
5968
});
6069
}
6170

62-
@Test public void testLoadJsonS3() {
63-
String url = s3Container.putFile(EXTENDED_PATH + "src/test/resources/map.json");
64-
url = removeRegionFromUrl(url);
65-
71+
@Test public void testLoadJson() {
72+
String url = putToS3AndGetUrl(s3ExtendedContainer, EXTENDED_PATH + "src/test/resources/map.json");
6673
testCall(db, "CALL apoc.load.json($url,'')",map("url", url),
6774
(row) -> {
6875
assertEquals(map("foo",asList(1L,2L,3L)), row.get("value"));
6976
});
7077
}
7178

72-
@Test public void testLoadXmlS3() {
73-
String url = s3Container.putFile(EXTENDED_PATH + "src/test/resources/xml/books.xml");
74-
url = removeRegionFromUrl(url);
75-
79+
@Test public void testLoadXml() {
80+
String url = putToS3AndGetUrl(s3ExtendedContainer, EXTENDED_PATH + "src/test/resources/xml/books.xml");
7681
testCall(db, "CALL apoc.load.xml($url,'/catalog/book[title=\"Maeve Ascendant\"]/.',{failOnError:false}) yield value as result", Util.map("url", url), (r) -> {
7782
Object value = Iterables.single(r.values());
7883
Assert.assertEquals(XmlTestUtils.XML_XPATH_AS_NESTED_MAP, value);
7984
});
8085
}
8186

82-
private String removeRegionFromUrl(String url) {
83-
return url.replace(s3Container.getEndpointConfiguration().getSigningRegion() + ".", "");
87+
@Test public void testLoadXls() {
88+
String url = putToS3AndGetUrl(s3ExtendedContainer, EXTENDED_PATH + "src/test/resources/load_test.xlsx");
89+
testResult(db, "CALL apoc.load.xls($url,'Full',{mapping:{Integer:{type:'int'}, Array:{type:'int',array:true,arraySep:';'}}})", map("url",url), // 'file:load_test.xlsx'
90+
(r) -> {
91+
assertXlsRow(r,0L,"String","Test","Boolean",true,"Integer",2L,"Float",1.5d,"Array",asList(1L,2L,3L));
92+
assertFalse("Should not have another row",r.hasNext());
93+
});
8494
}
8595

96+
@Test
97+
public void testLoadHtml() {
98+
String url = putToS3AndGetUrl(s3ExtendedContainer, EXTENDED_PATH + "src/test/resources/wikipedia.html");
99+
100+
Map<String, Object> query = map("links", "a[href]");
101+
102+
testCall(db, "CALL apoc.load.html($url,$query)",
103+
map("url", url, "query", query),
104+
row -> {
105+
final List<Map<String, Object>> actual = (List) ((Map) row.get("value")).get("links");
106+
assertEquals(106, actual.size());
107+
assertTrue(actual.stream().allMatch(i -> i.get("tagName").equals("a")));
108+
});
109+
}
110+
111+
private void putFolderToS3() {
112+
StringBuilder csv= new StringBuilder(); // Faster
113+
csv.append("name,age\r\n");
114+
csv.append("Bonzo,20\r\n");
115+
csv.append("Oronzo,45\r\n");
116+
byte[] data = csv.toString().getBytes(StandardCharsets.UTF_8);
117+
118+
s3ExtendedContainer.putObjectToS3("test_folder/test.csv", data);
119+
120+
csv = new StringBuilder();
121+
csv.append("name,age\r\n");
122+
csv.append("Bobby,18\r\n");
123+
csv.append("Maruccio,90\r\n");
124+
data = csv.toString().getBytes(StandardCharsets.UTF_8);
125+
126+
s3ExtendedContainer.putObjectToS3("test_folder/test_1.csv", data);
127+
}
128+
129+
static void assertXlsRow(Result r, long lineNo, Object...data) {
130+
Map<String, Object> row = r.next();
131+
Map<String, Object> map = map(data);
132+
assertEquals(map, row.get("map"));
133+
Map<Object, Object> stringMap = new LinkedHashMap<>(map.size());
134+
map.forEach((k,v) -> stringMap.put(k,v == null ? null : v.toString()));
135+
assertEquals(new ArrayList<>(map.values()), row.get("list"));
136+
assertEquals(lineNo, row.get("lineNo"));
137+
}
86138

87139
}

0 commit comments

Comments
 (0)