Add back default support for parquet fix #315 (#444)

* Add back default support for parquet ref #315 * Bump ver.
gesistsa · Jul 17, 2024 · 88aa095 · 88aa095
1 parent 894bd2e
commit 88aa095
Show file tree

Hide file tree

Showing 12 changed files with 18 additions and 14 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: rio
 Type: Package
 Title: A Swiss-Army Knife for Data I/O
-Version: 1.1.1
+Version: 1.2.0
 Authors@R: c(person("Jason", "Becker", role = "aut", email = "[email protected]"),
              person("Chung-hong", "Chan", role = c("aut", "cre"), email = "[email protected]",
 	     	     comment = c(ORCID = "0000-0002-6232-7530")),
@@ -53,7 +53,8 @@ Imports:
     writexl,
     lifecycle,
     R.utils,
-    readr
+    readr,
+    nanoparquet
 Suggests:
     datasets,
     bit64,

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,8 @@
-# rio 1.1.1.999 (development)
+# rio 1.2.0
 
 * Fix lintr issues #434 (h/t @bisaloo Hugo Gruson)
+* Drop support for R < 4.0.0 see #436
+* Add support for parquet in the import tier using `nanoparquet` see rio 1.0.1 below.
 
 Bug fixes
 

diff --git a/R/export.R b/R/export.R
@@ -32,7 +32,7 @@
 #'     \item Weka Attribute-Relation File Format (.arff), using [foreign::write.arff()]
 #'     \item Fixed-width format data (.fwf), using [utils::write.table()] with `row.names = FALSE`, `quote = FALSE`, and `col.names = FALSE`
 #'     \item [CSVY](https://github.com/csvy) (CSV with a YAML metadata header) using [data.table::fwrite()].
-#'     \item Apache Arrow Parquet (.parquet), using [arrow::write_parquet()]
+#'     \item Apache Arrow Parquet (.parquet), using [nanoparquet::write_parquet()]
 #'     \item Feather R/Python interchange format (.feather), using [arrow::write_feather()]
 #'     \item Fast storage (.fst), using [fst::write.fst()]
 #'     \item JSON (.json), using [jsonlite::toJSON()]. In this case, `x` can be a variety of R objects, based on class mapping conventions in this paper: [https://arxiv.org/abs/1403.2805](https://arxiv.org/abs/1403.2805).

diff --git a/R/export_methods.R b/R/export_methods.R
@@ -282,7 +282,7 @@ export_delim <- function(file, x, fwrite = lifecycle::deprecated(), sep = "\t",
 
 #' @export
 .export.rio_parquet <- function(file, x, ...) {
-    .docall(arrow::write_parquet, ..., args = list(x = x, sink = file))
+    .docall(nanoparquet::write_parquet, ..., args = list(x = x, file = file))
 }
 
 #' @export

diff --git a/R/import.R b/R/import.R
@@ -42,7 +42,7 @@
 #'     \item Fortran data (no recognized extension), using [utils::read.fortran()]
 #'     \item Fixed-width format data (.fwf), using a faster version of [utils::read.fwf()] that requires a `widths` argument and by default in rio has `stringsAsFactors = FALSE`
 #'     \item [CSVY](https://github.com/csvy) (CSV with a YAML metadata header) using [data.table::fread()].
-#'     \item Apache Arrow Parquet (.parquet), using [arrow::read_parquet()]
+#'     \item Apache Arrow Parquet (.parquet), using [nanoparquet::read_parquet()]
 #'     \item Feather R/Python interchange format (.feather), using [arrow::read_feather()]
 #'     \item Fast storage (.fst), using [fst::read.fst()]
 #'     \item JSON (.json), using [jsonlite::fromJSON()]

diff --git a/R/import_methods.R b/R/import_methods.R
@@ -413,8 +413,8 @@ extract_html_row <- function(x, empty_value) {
 
 #' @export
 .import.rio_parquet <- function(file, which = 1, ...) {
-    .check_pkg_availability("arrow")
-    .docall(arrow::read_parquet, ..., args = list(file = file, as_data_frame = TRUE))
+    #.check_pkg_availability("arrow")
+    .docall(nanoparquet::read_parquet, ..., args = list(file = file, options = nanoparquet::parquet_options(class = "data.frame")))
 }
 
 #' @export

diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/README.md b/README.md
@@ -133,6 +133,7 @@ The full list of supported formats is below:
 | Gzip                                | gz / gzip                           | base           | base           | Default |                                |
 | Zip files                           | zip                                 | utils          | utils          | Default |                                |
 | Ambiguous file format               | dat                                 | data.table     |                | Default | Attempt as delimited text data |
+| Apache Arrow (Parquet)              | parquet                             | nanoparquet    | nanoparquet    | Default |                                |
 | CSVY (CSV + YAML metadata header)   | csvy                                | data.table     | data.table     | Default |                                |
 | Comma-separated data                | csv                                 | data.table     | data.table     | Default |                                |
 | Comma-separated data (European)     | csv2                                | data.table     | data.table     | Default |                                |
@@ -159,7 +160,6 @@ The full list of supported formats is below:
 | Text Representations of R Objects   | dump                                | base           | base           | Default |                                |
 | Weka Attribute-Relation File Format | arff / weka                         | foreign        | foreign        | Default |                                |
 | XBASE database files                | dbf                                 | foreign        | foreign        | Default |                                |
-| Apache Arrow (Parquet)              | parquet                             | arrow          | arrow          | Suggest |                                |
 | Clipboard                           | clipboard                           | clipr          | clipr          | Suggest | default is tsv                 |
 | EViews                              | eviews / wf1                        | hexView        |                | Suggest |                                |
 | Fast Storage                        | fst                                 | fst            | fst            | Suggest |                                |

diff --git a/data-raw/single.json b/data-raw/single.json
@@ -2,10 +2,10 @@
     {
         "input": "parquet",
         "format": "parquet",
-        "type": "suggest",
+        "type": "import",
         "format_name": "Apache Arrow (Parquet)",
-        "import_function": "arrow::read_parquet",
-        "export_function": "arrow::write_parquet",
+        "import_function": "nanoparquet::read_parquet",
+        "export_function": "nanoparquet::write_parquet",
         "note": ""
     },
     {

diff --git a/man/export.Rd b/man/export.Rd
diff --git a/man/import.Rd b/man/import.Rd
diff --git a/man/rio.Rd b/man/rio.Rd