From d0cd7b1a5c7ab132b1af7542334eb49de2f39ca6 Mon Sep 17 00:00:00 2001 From: Rising Odegua Date: Sun, 16 Jan 2022 12:47:41 +0100 Subject: [PATCH] Fixes #354 --- src/danfojs-base/io/browser/io.csv.ts | 2 + src/danfojs-base/io/node/io.csv.ts | 8 +- .../tests/io/csv.reader.test.js | 10 ++ src/danfojs-node/test/io/csv.reader.test.ts | 14 +- src/danfojs-node/test/samples/iris.csv | 151 ++++++++++++++++++ 5 files changed, 182 insertions(+), 3 deletions(-) create mode 100644 src/danfojs-node/test/samples/iris.csv diff --git a/src/danfojs-base/io/browser/io.csv.ts b/src/danfojs-base/io/browser/io.csv.ts index e05d42da..d22eecd3 100644 --- a/src/danfojs-base/io/browser/io.csv.ts +++ b/src/danfojs-base/io/browser/io.csv.ts @@ -51,6 +51,7 @@ const $readCSV = async (file: any, options?: CsvInputOptionsBrowser): Promise { Papa.parse(file, { header: true, + dynamicTyping: true, ...options, download: true, complete: results => { @@ -83,6 +84,7 @@ const $streamCSV = async (file: string, callback: (df: DataFrame) => void, optio let count = -1 Papa.parse(file, { ...options, + dynamicTyping: true, header: true, download: true, step: results => { diff --git a/src/danfojs-base/io/node/io.csv.ts b/src/danfojs-base/io/node/io.csv.ts index 1de24619..80f51409 100644 --- a/src/danfojs-base/io/node/io.csv.ts +++ b/src/danfojs-base/io/node/io.csv.ts @@ -54,6 +54,7 @@ const $readCSV = async (filePath: string, options?: CsvInputOptionsNode): Promis return new Promise(resolve => { const optionsWithDefaults = { header: true, + dynamicTyping: true, ...options, } @@ -76,6 +77,7 @@ const $readCSV = async (filePath: string, options?: CsvInputOptionsNode): Promis const fileStream = fs.createReadStream(filePath) Papa.parse(fileStream, { header: true, + dynamicTyping: true, ...options, complete: results => { const df = new DataFrame(results.data, frameConfig); @@ -107,6 +109,7 @@ const $streamCSV = async (filePath: string, callback: (df: DataFrame) => void, o if (filePath.startsWith("http") || filePath.startsWith("https")) { const optionsWithDefaults = { header: true, + dynamicTyping: true, ...options, } return new Promise(resolve => { @@ -132,6 +135,7 @@ const $streamCSV = async (filePath: string, callback: (df: DataFrame) => void, o let count = -1 Papa.parse(fileStream, { header: true, + dynamicTyping: true, ...options, step: results => { const df = new DataFrame([results.data], { ...frameConfig, index: [count++] }); @@ -223,7 +227,7 @@ const $openCsvInputStream = (filePath: string, options: CsvInputOptionsNode) => if (filePath.startsWith("http") || filePath.startsWith("https")) { const dataStream = request.get(filePath); - const parseStream: any = Papa.parse(Papa.NODE_STREAM_INPUT, { header, ...options }); + const parseStream: any = Papa.parse(Papa.NODE_STREAM_INPUT, { header, dynamicTyping: true, ...options }); dataStream.pipe(parseStream); let count = -1 @@ -255,7 +259,7 @@ const $openCsvInputStream = (filePath: string, options: CsvInputOptionsNode) => const fileStream = fs.createReadStream(filePath) let count = -1 Papa.parse(fileStream, { - ...{ header, ...options }, + ...{ header, dynamicTyping: true, ...options }, step: results => { if (isFirstChunk) { if (header === true) { diff --git a/src/danfojs-browser/tests/io/csv.reader.test.js b/src/danfojs-browser/tests/io/csv.reader.test.js index d66aecd6..17892295 100644 --- a/src/danfojs-browser/tests/io/csv.reader.test.js +++ b/src/danfojs-browser/tests/io/csv.reader.test.js @@ -58,6 +58,16 @@ describe("readCSV", function () { ]); }); + it("Read remote csv file works and returns correct data type", async function () { + const remoteFile = "https://raw.githubusercontent.com/javascriptdata/danfojs/dev/src/danfojs-node/test/samples/titanic.csv"; + let df = await dfd.readCSV(remoteFile, { header: true, preview: 2 }); + const values = [ + [ 0, 3, 'Mr. Owen Harris Braund', 'male', 22, 1, 0, 7.25 ], + [ 1, 1, 'Mrs. John Bradley (Florence Briggs Thayer) Cumings', 'female', 38, 1, 0, 71.2833 ] + ]; + assert.deepEqual(df.values, values); + }); + }); // describe("streamCSV", function () { diff --git a/src/danfojs-node/test/io/csv.reader.test.ts b/src/danfojs-node/test/io/csv.reader.test.ts index 752f7ed2..88fcc416 100644 --- a/src/danfojs-node/test/io/csv.reader.test.ts +++ b/src/danfojs-node/test/io/csv.reader.test.ts @@ -59,7 +59,19 @@ describe("readCSV", function () { 'int32', 'float32' ]); }); - + it("Read local csv with correct types and format works", async function () { + const filePath = path.join(process.cwd(), "test", "samples", "iris.csv"); + let df: any = await readCSV(filePath, { header: true, preview: 5 }); + const values = [ + [5.1, 3.5, 1.4, 0.2, 0.0], + [4.9, 3.0, 1.4, 0.2, 0.0], + [4.7, 3.2, 1.3, 0.2, 0.0], + [4.6, 3.1, 1.5, 0.2, 0.0], + [5.0, 3.6, 1.4, 0.2, 0.0], + ] + console.log(df.values) + assert.deepEqual(df.values, values); + }); // it("Read remote csv file works", async function () { // const remoteFile = "https://raw.githubusercontent.com/opensource9ja/danfojs/dev/danfojs-node/tests/samples/titanic.csv" // let df: any = await readCSV(remoteFile, { header: true, preview: 5 }); diff --git a/src/danfojs-node/test/samples/iris.csv b/src/danfojs-node/test/samples/iris.csv new file mode 100644 index 00000000..95c15f1b --- /dev/null +++ b/src/danfojs-node/test/samples/iris.csv @@ -0,0 +1,151 @@ +sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target +5.1,3.5,1.4,0.2,0.0 +4.9,3.0,1.4,0.2,0.0 +4.7,3.2,1.3,0.2,0.0 +4.6,3.1,1.5,0.2,0.0 +5.0,3.6,1.4,0.2,0.0 +5.4,3.9,1.7,0.4,0.0 +4.6,3.4,1.4,0.3,0.0 +5.0,3.4,1.5,0.2,0.0 +4.4,2.9,1.4,0.2,0.0 +4.9,3.1,1.5,0.1,0.0 +5.4,3.7,1.5,0.2,0.0 +4.8,3.4,1.6,0.2,0.0 +4.8,3.0,1.4,0.1,0.0 +4.3,3.0,1.1,0.1,0.0 +5.8,4.0,1.2,0.2,0.0 +5.7,4.4,1.5,0.4,0.0 +5.4,3.9,1.3,0.4,0.0 +5.1,3.5,1.4,0.3,0.0 +5.7,3.8,1.7,0.3,0.0 +5.1,3.8,1.5,0.3,0.0 +5.4,3.4,1.7,0.2,0.0 +5.1,3.7,1.5,0.4,0.0 +4.6,3.6,1.0,0.2,0.0 +5.1,3.3,1.7,0.5,0.0 +4.8,3.4,1.9,0.2,0.0 +5.0,3.0,1.6,0.2,0.0 +5.0,3.4,1.6,0.4,0.0 +5.2,3.5,1.5,0.2,0.0 +5.2,3.4,1.4,0.2,0.0 +4.7,3.2,1.6,0.2,0.0 +4.8,3.1,1.6,0.2,0.0 +5.4,3.4,1.5,0.4,0.0 +5.2,4.1,1.5,0.1,0.0 +5.5,4.2,1.4,0.2,0.0 +4.9,3.1,1.5,0.2,0.0 +5.0,3.2,1.2,0.2,0.0 +5.5,3.5,1.3,0.2,0.0 +4.9,3.6,1.4,0.1,0.0 +4.4,3.0,1.3,0.2,0.0 +5.1,3.4,1.5,0.2,0.0 +5.0,3.5,1.3,0.3,0.0 +4.5,2.3,1.3,0.3,0.0 +4.4,3.2,1.3,0.2,0.0 +5.0,3.5,1.6,0.6,0.0 +5.1,3.8,1.9,0.4,0.0 +4.8,3.0,1.4,0.3,0.0 +5.1,3.8,1.6,0.2,0.0 +4.6,3.2,1.4,0.2,0.0 +5.3,3.7,1.5,0.2,0.0 +5.0,3.3,1.4,0.2,0.0 +7.0,3.2,4.7,1.4,1.0 +6.4,3.2,4.5,1.5,1.0 +6.9,3.1,4.9,1.5,1.0 +5.5,2.3,4.0,1.3,1.0 +6.5,2.8,4.6,1.5,1.0 +5.7,2.8,4.5,1.3,1.0 +6.3,3.3,4.7,1.6,1.0 +4.9,2.4,3.3,1.0,1.0 +6.6,2.9,4.6,1.3,1.0 +5.2,2.7,3.9,1.4,1.0 +5.0,2.0,3.5,1.0,1.0 +5.9,3.0,4.2,1.5,1.0 +6.0,2.2,4.0,1.0,1.0 +6.1,2.9,4.7,1.4,1.0 +5.6,2.9,3.6,1.3,1.0 +6.7,3.1,4.4,1.4,1.0 +5.6,3.0,4.5,1.5,1.0 +5.8,2.7,4.1,1.0,1.0 +6.2,2.2,4.5,1.5,1.0 +5.6,2.5,3.9,1.1,1.0 +5.9,3.2,4.8,1.8,1.0 +6.1,2.8,4.0,1.3,1.0 +6.3,2.5,4.9,1.5,1.0 +6.1,2.8,4.7,1.2,1.0 +6.4,2.9,4.3,1.3,1.0 +6.6,3.0,4.4,1.4,1.0 +6.8,2.8,4.8,1.4,1.0 +6.7,3.0,5.0,1.7,1.0 +6.0,2.9,4.5,1.5,1.0 +5.7,2.6,3.5,1.0,1.0 +5.5,2.4,3.8,1.1,1.0 +5.5,2.4,3.7,1.0,1.0 +5.8,2.7,3.9,1.2,1.0 +6.0,2.7,5.1,1.6,1.0 +5.4,3.0,4.5,1.5,1.0 +6.0,3.4,4.5,1.6,1.0 +6.7,3.1,4.7,1.5,1.0 +6.3,2.3,4.4,1.3,1.0 +5.6,3.0,4.1,1.3,1.0 +5.5,2.5,4.0,1.3,1.0 +5.5,2.6,4.4,1.2,1.0 +6.1,3.0,4.6,1.4,1.0 +5.8,2.6,4.0,1.2,1.0 +5.0,2.3,3.3,1.0,1.0 +5.6,2.7,4.2,1.3,1.0 +5.7,3.0,4.2,1.2,1.0 +5.7,2.9,4.2,1.3,1.0 +6.2,2.9,4.3,1.3,1.0 +5.1,2.5,3.0,1.1,1.0 +5.7,2.8,4.1,1.3,1.0 +6.3,3.3,6.0,2.5,2.0 +5.8,2.7,5.1,1.9,2.0 +7.1,3.0,5.9,2.1,2.0 +6.3,2.9,5.6,1.8,2.0 +6.5,3.0,5.8,2.2,2.0 +7.6,3.0,6.6,2.1,2.0 +4.9,2.5,4.5,1.7,2.0 +7.3,2.9,6.3,1.8,2.0 +6.7,2.5,5.8,1.8,2.0 +7.2,3.6,6.1,2.5,2.0 +6.5,3.2,5.1,2.0,2.0 +6.4,2.7,5.3,1.9,2.0 +6.8,3.0,5.5,2.1,2.0 +5.7,2.5,5.0,2.0,2.0 +5.8,2.8,5.1,2.4,2.0 +6.4,3.2,5.3,2.3,2.0 +6.5,3.0,5.5,1.8,2.0 +7.7,3.8,6.7,2.2,2.0 +7.7,2.6,6.9,2.3,2.0 +6.0,2.2,5.0,1.5,2.0 +6.9,3.2,5.7,2.3,2.0 +5.6,2.8,4.9,2.0,2.0 +7.7,2.8,6.7,2.0,2.0 +6.3,2.7,4.9,1.8,2.0 +6.7,3.3,5.7,2.1,2.0 +7.2,3.2,6.0,1.8,2.0 +6.2,2.8,4.8,1.8,2.0 +6.1,3.0,4.9,1.8,2.0 +6.4,2.8,5.6,2.1,2.0 +7.2,3.0,5.8,1.6,2.0 +7.4,2.8,6.1,1.9,2.0 +7.9,3.8,6.4,2.0,2.0 +6.4,2.8,5.6,2.2,2.0 +6.3,2.8,5.1,1.5,2.0 +6.1,2.6,5.6,1.4,2.0 +7.7,3.0,6.1,2.3,2.0 +6.3,3.4,5.6,2.4,2.0 +6.4,3.1,5.5,1.8,2.0 +6.0,3.0,4.8,1.8,2.0 +6.9,3.1,5.4,2.1,2.0 +6.7,3.1,5.6,2.4,2.0 +6.9,3.1,5.1,2.3,2.0 +5.8,2.7,5.1,1.9,2.0 +6.8,3.2,5.9,2.3,2.0 +6.7,3.3,5.7,2.5,2.0 +6.7,3.0,5.2,2.3,2.0 +6.3,2.5,5.0,1.9,2.0 +6.5,3.0,5.2,2.0,2.0 +6.2,3.4,5.4,2.3,2.0 +5.9,3.0,5.1,1.8,2.0