diff --git a/NEWS.md b/NEWS.md index 266c9a46f..0c07317d0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # vctrs (development version) +* `vec_locate_sorted_groups()` has gained an `appearance` argument to optionally + return group keys in the order of their first appearance. This makes + `vec_locate_sorted_groups()` almost identical to `vec_group_loc()`, but they + are implemented with very different algorithms (#1747). + # vctrs 0.5.1 * Fix for CRAN checks. diff --git a/R/order.R b/R/order.R index 47c7f167a..64d541450 100644 --- a/R/order.R +++ b/R/order.R @@ -185,8 +185,15 @@ vec_sort_radix <- function(x, #' #' `vec_locate_sorted_groups()` returns a data frame containing a `key` column #' with sorted unique groups, and a `loc` column with the locations of each -#' group in `x`. It is similar to [vec_group_loc()], except the groups are -#' returned sorted rather than by first appearance. +#' group in `x`. +#' +#' `vec_locate_sorted_groups()` is very similar to [vec_group_loc()], except +#' the groups are typically sorted by value rather than by first appearance. +#' If `appearance = TRUE`, then the two functions are roughly identical, with +#' the main difference being that `vec_locate_sorted_groups(appearance = TRUE)` +#' computes the groups using a sort-based approach, and `vec_group_loc()` +#' computes them using a hash-based approach. One may be faster than the other +#' depending on the structure of the input data. #' #' @details #' `vec_locate_sorted_groups(x)` is equivalent to, but faster than: @@ -198,6 +205,14 @@ vec_sort_radix <- function(x, #' #' @inheritParams order-radix #' +#' @param appearance Ordering of returned group keys. +#' +#' If `FALSE`, the default, group keys are returned sorted by value. +#' +#' If `TRUE`, group keys are returned sorted by first appearance in `x`. This +#' means `direction`, `na_value`, and `chr_proxy_collate` no longer have any +#' effect. +#' #' @return #' A two column data frame with size equal to `vec_size(vec_unique(x))`. #' * A `key` column of type `vec_ptype(x)`. @@ -215,16 +230,21 @@ vec_sort_radix <- function(x, #' ) #' #' # `vec_locate_sorted_groups()` is similar to `vec_group_loc()`, except keys -#' # are returned ordered rather than by first appearance. +#' # are returned ordered rather than by first appearance by default. #' vec_locate_sorted_groups(df) -#' #' vec_group_loc(df) +#' +#' # Setting `appearance = TRUE` makes `vec_locate_sorted_groups()` mostly +#' # equivalent to `vec_group_loc()`, but their underlying algorithms are very +#' # different. +#' vec_locate_sorted_groups(df, appearance = TRUE) vec_locate_sorted_groups <- function(x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, - chr_proxy_collate = NULL) { + chr_proxy_collate = NULL, + appearance = FALSE) { check_dots_empty0(...) .Call( @@ -233,7 +253,8 @@ vec_locate_sorted_groups <- function(x, direction, na_value, nan_distinct, - chr_proxy_collate + chr_proxy_collate, + appearance ) } @@ -245,9 +266,9 @@ vec_order_info <- function(x, na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL, - chr_ordered = TRUE) { + appearance = FALSE) { check_dots_empty0(...) - .Call(vctrs_order_info, x, direction, na_value, nan_distinct, chr_proxy_collate, chr_ordered) + .Call(vctrs_order_info, x, direction, na_value, nan_distinct, chr_proxy_collate, appearance) } # ------------------------------------------------------------------------------ diff --git a/man/vec_locate_sorted_groups.Rd b/man/vec_locate_sorted_groups.Rd index b03b99d9a..5889c178e 100644 --- a/man/vec_locate_sorted_groups.Rd +++ b/man/vec_locate_sorted_groups.Rd @@ -10,7 +10,8 @@ vec_locate_sorted_groups( direction = "asc", na_value = "largest", nan_distinct = FALSE, - chr_proxy_collate = NULL + chr_proxy_collate = NULL, + appearance = FALSE ) } \arguments{ @@ -56,6 +57,14 @@ columns. Common transformation functions include: \code{tolower()} for case-insensitive ordering and \code{stringi::stri_sort_key()} for locale-aware ordering.} + +\item{appearance}{Ordering of returned group keys. + +If \code{FALSE}, the default, group keys are returned sorted by value. + +If \code{TRUE}, group keys are returned sorted by first appearance in \code{x}. This +means \code{direction}, \code{na_value}, and \code{chr_proxy_collate} no longer have any +effect.} } \value{ A two column data frame with size equal to \code{vec_size(vec_unique(x))}. @@ -69,8 +78,15 @@ A two column data frame with size equal to \code{vec_size(vec_unique(x))}. \code{vec_locate_sorted_groups()} returns a data frame containing a \code{key} column with sorted unique groups, and a \code{loc} column with the locations of each -group in \code{x}. It is similar to \code{\link[=vec_group_loc]{vec_group_loc()}}, except the groups are -returned sorted rather than by first appearance. +group in \code{x}. + +\code{vec_locate_sorted_groups()} is very similar to \code{\link[=vec_group_loc]{vec_group_loc()}}, except +the groups are typically sorted by value rather than by first appearance. +If \code{appearance = TRUE}, then the two functions are roughly identical, with +the main difference being that \code{vec_locate_sorted_groups(appearance = TRUE)} +computes the groups using a sort-based approach, and \code{vec_group_loc()} +computes them using a hash-based approach. One may be faster than the other +depending on the structure of the input data. } \details{ \code{vec_locate_sorted_groups(x)} is equivalent to, but faster than: @@ -93,9 +109,13 @@ df <- data.frame( ) # `vec_locate_sorted_groups()` is similar to `vec_group_loc()`, except keys -# are returned ordered rather than by first appearance. +# are returned ordered rather than by first appearance by default. vec_locate_sorted_groups(df) - vec_group_loc(df) + +# Setting `appearance = TRUE` makes `vec_locate_sorted_groups()` mostly +# equivalent to `vec_group_loc()`, but their underlying algorithms are very +# different. +vec_locate_sorted_groups(df, appearance = TRUE) } \keyword{internal} diff --git a/src/init.c b/src/init.c index cbff4f412..705692ae8 100644 --- a/src/init.c +++ b/src/init.c @@ -128,7 +128,7 @@ extern SEXP vctrs_locate_complete(SEXP); extern SEXP vctrs_detect_complete(SEXP); extern SEXP vctrs_normalize_encoding(SEXP); extern SEXP vctrs_order(SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP vctrs_locate_sorted_groups(SEXP, SEXP, SEXP, SEXP, SEXP); +extern SEXP vctrs_locate_sorted_groups(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_order_info(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern r_obj* ffi_vec_unrep(r_obj*); extern SEXP vctrs_fill_missing(SEXP, SEXP, SEXP); @@ -303,7 +303,7 @@ static const R_CallMethodDef CallEntries[] = { {"vctrs_detect_complete", (DL_FUNC) &vctrs_detect_complete, 1}, {"vctrs_normalize_encoding", (DL_FUNC) &vctrs_normalize_encoding, 1}, {"vctrs_order", (DL_FUNC) &vctrs_order, 5}, - {"vctrs_locate_sorted_groups", (DL_FUNC) &vctrs_locate_sorted_groups, 5}, + {"vctrs_locate_sorted_groups", (DL_FUNC) &vctrs_locate_sorted_groups, 6}, {"vctrs_order_info", (DL_FUNC) &vctrs_order_info, 6}, {"ffi_vec_unrep", (DL_FUNC) &ffi_vec_unrep, 1}, {"vctrs_fill_missing", (DL_FUNC) &vctrs_fill_missing, 3}, diff --git a/src/match-joint.c b/src/match-joint.c index 6b8e7fa1c..ff999de55 100644 --- a/src/match-joint.c +++ b/src/match-joint.c @@ -115,7 +115,7 @@ r_obj* vec_joint_xtfrm(r_obj* x, chrs_smallest, nan_distinct, r_null, - true + false ), &n_prot); r_obj* y_info = KEEP_N(vec_order_info( @@ -124,7 +124,7 @@ r_obj* vec_joint_xtfrm(r_obj* x, chrs_smallest, nan_distinct, r_null, - true + false ), &n_prot); const int* v_x_o = r_int_cbegin(r_list_get(x_info, 0)); diff --git a/src/match.c b/src/match.c index dabea1b62..73f4d609b 100644 --- a/src/match.c +++ b/src/match.c @@ -2012,7 +2012,7 @@ r_obj* compute_nesting_container_info(r_obj* haystack, chrs_smallest, true, r_null, - true + false ), &n_prot); r_obj* o_haystack = r_list_get(info, 0); @@ -2074,7 +2074,7 @@ r_obj* compute_nesting_container_info(r_obj* haystack, chrs_smallest, true, r_null, - true + false ); r_obj* outer_group_sizes = KEEP_N(r_list_get(info, 1), &n_prot); v_outer_group_sizes = r_int_cbegin(outer_group_sizes); diff --git a/src/order.c b/src/order.c index c87599fd6..f637703cb 100644 --- a/src/order.c +++ b/src/order.c @@ -194,7 +194,7 @@ static SEXP vec_order_info_impl(SEXP x, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate, - bool chr_ordered, + bool appearance, bool group_sizes); // [[ include("order.h") ]] @@ -203,9 +203,9 @@ SEXP vec_order(SEXP x, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate) { - const bool chr_ordered = true; + const bool appearance = false; const bool group_sizes = false; - SEXP info = vec_order_info_impl(x, direction, na_value, nan_distinct, chr_proxy_collate, chr_ordered, group_sizes); + SEXP info = vec_order_info_impl(x, direction, na_value, nan_distinct, chr_proxy_collate, appearance, group_sizes); return r_list_get(info, 0); } @@ -215,22 +215,26 @@ static SEXP vec_locate_sorted_groups(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, - SEXP chr_proxy_collate); + SEXP chr_proxy_collate, + bool appearance); // [[ register() ]] SEXP vctrs_locate_sorted_groups(SEXP x, SEXP direction, SEXP na_value, SEXP nan_distinct, - SEXP chr_proxy_collate) { + SEXP chr_proxy_collate, + SEXP appearance) { bool c_nan_distinct = parse_nan_distinct(nan_distinct); + bool c_appearance = r_arg_as_bool(appearance, "appearance"); return vec_locate_sorted_groups( x, direction, na_value, c_nan_distinct, - chr_proxy_collate + chr_proxy_collate, + c_appearance ); } @@ -239,16 +243,15 @@ SEXP vec_locate_sorted_groups(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, - SEXP chr_proxy_collate) { - const bool chr_ordered = true; - + SEXP chr_proxy_collate, + bool appearance) { SEXP info = KEEP(vec_order_info( x, direction, na_value, nan_distinct, chr_proxy_collate, - chr_ordered + appearance )); SEXP o = r_list_get(info, 0); @@ -257,6 +260,7 @@ SEXP vec_locate_sorted_groups(SEXP x, SEXP sizes = r_list_get(info, 1); const int* p_sizes = r_int_cbegin(sizes); + r_ssize x_size = r_length(o); r_ssize n_groups = r_length(sizes); SEXP loc = KEEP(r_alloc_list(n_groups)); @@ -264,23 +268,52 @@ SEXP vec_locate_sorted_groups(SEXP x, SEXP key_loc = KEEP(r_alloc_integer(n_groups)); int* p_key_loc = r_int_begin(key_loc); - int start = 0; + if (appearance) { + SEXP o_appearance = r_list_get(info, 3); + const int* p_o_appearance = r_int_cbegin(o_appearance); - for (r_ssize i = 0; i < n_groups; ++i) { - p_key_loc[i] = p_o[start]; + // Accumulate group starts, overwriting group sizes memory. + // Group starts are necessary to work with `o_appearance`. + r_ssize start = 0; + int* p_starts = r_int_begin(sizes); - const int size = p_sizes[i]; + for (r_ssize i = 0; i < n_groups; ++i) { + const r_ssize size = p_sizes[i]; + p_starts[i] = start; + start += size; + } - SEXP elt = r_alloc_integer(size); - r_list_poke(loc, i, elt); - int* p_elt = r_int_begin(elt); + for (r_ssize i = 0; i < n_groups; ++i) { + const r_ssize index = p_o_appearance[i] - 1; + r_ssize start = p_starts[index]; + const r_ssize next_start = (index == n_groups - 1) ? x_size : p_starts[index + 1]; + const r_ssize size = next_start - start; - R_len_t k = 0; + p_key_loc[i] = p_o[start]; - for (int j = 0; j < size; ++j) { - p_elt[k] = p_o[start]; - ++start; - ++k; + SEXP elt = r_alloc_integer(size); + r_list_poke(loc, i, elt); + int* p_elt = r_int_begin(elt); + + for (r_ssize j = 0; j < size; ++j, ++start) { + p_elt[j] = p_o[start]; + } + } + } else { + r_ssize start = 0; + + for (r_ssize i = 0; i < n_groups; ++i) { + const r_ssize size = p_sizes[i]; + + p_key_loc[i] = p_o[start]; + + SEXP elt = r_alloc_integer(size); + r_list_poke(loc, i, elt); + int* p_elt = r_int_begin(elt); + + for (r_ssize j = 0; j < size; ++j, ++start) { + p_elt[j] = p_o[start]; + } } } @@ -306,11 +339,14 @@ SEXP vec_locate_sorted_groups(SEXP x, // ----------------------------------------------------------------------------- /* - * Returns a list of size three. + * Returns a list of length three or four: * - The first element of the list contains the ordering as an integer vector. * - The second element of the list contains the group sizes as an integer * vector. * - The third element of the list contains the max group size as an integer. + * - The optional fourth element of the list contains an additional ordering + * integer vector that re-orders the sorted unique values of `x` to generate + * an appearance ordering. It is only present if `appearance` is `true`. */ // [[ include("order.h") ]] SEXP vec_order_info(SEXP x, @@ -318,9 +354,9 @@ SEXP vec_order_info(SEXP x, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate, - bool chr_ordered) { + bool appearance) { const bool group_sizes = true; - return vec_order_info_impl(x, direction, na_value, nan_distinct, chr_proxy_collate, chr_ordered, group_sizes); + return vec_order_info_impl(x, direction, na_value, nan_distinct, chr_proxy_collate, appearance, group_sizes); } // [[ register() ]] @@ -329,10 +365,10 @@ SEXP vctrs_order_info(SEXP x, SEXP na_value, SEXP nan_distinct, SEXP chr_proxy_collate, - SEXP chr_ordered) { + SEXP appearance) { bool c_nan_distinct = parse_nan_distinct(nan_distinct); - bool c_chr_ordered = r_bool_as_int(chr_ordered); - return vec_order_info(x, direction, na_value, c_nan_distinct, chr_proxy_collate, c_chr_ordered); + bool c_appearance = r_bool_as_int(appearance); + return vec_order_info(x, direction, na_value, c_nan_distinct, chr_proxy_collate, c_appearance); } static inline size_t vec_compute_n_bytes_lazy_raw(SEXP x, const enum vctrs_type type); @@ -365,11 +401,15 @@ SEXP vec_order_info_impl(SEXP x, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate, - bool chr_ordered, + bool appearance, bool group_sizes) { // TODO call struct r_lazy call = r_lazy_null; + if (appearance && !group_sizes) { + r_stop_internal("Can't set `appearance` without `group_sizes`."); + } + int n_prot = 0; SEXP decreasing = PROTECT_N(parse_direction(direction), &n_prot); @@ -447,6 +487,10 @@ SEXP vec_order_info_impl(SEXP x, struct truelength_info* p_truelength_info = new_truelength_info(size); PROTECT_TRUELENGTH_INFO(p_truelength_info, &n_prot); + // If we are prepping for order-by-appearance, we internally handle character + // vectors with `chr_appearance()`, which is much faster than `chr_order()` + const bool chr_ordered = !appearance; + struct order* p_order = new_order(size); PROTECT_ORDER(p_order, &n_prot); @@ -468,7 +512,9 @@ SEXP vec_order_info_impl(SEXP x, p_truelength_info ); - SEXP out = PROTECT_N(r_alloc_list(3), &n_prot); + const r_ssize out_size = 1 + (2 * group_sizes) + (1 * appearance); + + SEXP out = PROTECT_N(r_alloc_list(out_size), &n_prot); r_list_poke(out, 0, p_order->data); if (group_sizes) { @@ -479,6 +525,60 @@ SEXP vec_order_info_impl(SEXP x, r_list_poke(out, 2, r_int((int) p_group_info->max_group_size)); } + if (appearance) { + struct group_info* p_group_info = groups_current(p_group_infos); + + const r_ssize n_groups = p_group_info->n_groups; + const int* p_sizes = p_group_info->p_data; + + // Order of the unique keys + SEXP keys = PROTECT_N(r_alloc_integer(n_groups), &n_prot); + int* p_keys = r_int_begin(keys); + + r_ssize start = 0; + + for (r_ssize i = 0; i < n_groups; ++i) { + p_keys[i] = p_order->p_data[start]; + start += p_sizes[i]; + } + + // Appearance order of the unique keys + struct order* p_order_appearance = new_order(n_groups); + PROTECT_ORDER(p_order_appearance, &n_prot); + + // Ascending order is the only option that matters, + // as this is an integer vector with no missing values + SEXP decreasing = r_false; + SEXP na_last = r_true; + const bool nan_distinct = false; + const bool chr_ordered = false; + const r_ssize size = n_groups; + const enum vctrs_type type = VCTRS_TYPE_integer; + + // Turn off group tracking + p_group_infos->ignore_groups = true; + + vec_order_switch( + keys, + decreasing, + na_last, + nan_distinct, + chr_ordered, + size, + type, + p_order_appearance, + p_lazy_x_chunk, + p_lazy_x_aux, + p_lazy_o_aux, + p_lazy_bytes, + p_lazy_counts, + p_group_infos, + p_truelength_info + ); + + r_list_poke(out, 3, p_order_appearance->data); + } + UNPROTECT(n_prot); return out; } diff --git a/src/order.h b/src/order.h index c3f3c6310..e03f57905 100644 --- a/src/order.h +++ b/src/order.h @@ -28,7 +28,7 @@ SEXP vec_order_info(SEXP x, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate, - bool chr_ordered); + bool appearance); // ----------------------------------------------------------------------------- diff --git a/src/rank.c b/src/rank.c index 3a1e0370c..f68b2af80 100644 --- a/src/rank.c +++ b/src/rank.c @@ -81,9 +81,9 @@ r_obj* vec_rank(r_obj* x, r_obj* rank = KEEP(r_alloc_integer(rank_size)); int* v_rank = r_int_begin(rank); - const bool chr_ordered = true; + const bool appearance = false; - r_obj* info = KEEP(vec_order_info(x, direction, na_value, nan_distinct, chr_proxy_collate, chr_ordered)); + r_obj* info = KEEP(vec_order_info(x, direction, na_value, nan_distinct, chr_proxy_collate, appearance)); r_obj* order = r_list_get(info, 0); const int* v_order = r_int_cbegin(order); diff --git a/tests/testthat/_snaps/order.md b/tests/testthat/_snaps/order.md index 068598e9f..2251ae147 100644 --- a/tests/testthat/_snaps/order.md +++ b/tests/testthat/_snaps/order.md @@ -1,3 +1,27 @@ +# `appearance` is validated + + Code + vec_locate_sorted_groups(1, appearance = NA) + Condition + Error in `vec_locate_sorted_groups()`: + ! `appearance` must be `TRUE` or `FALSE`. + +--- + + Code + vec_locate_sorted_groups(1, appearance = 1) + Condition + Error in `vec_locate_sorted_groups()`: + ! `appearance` must be `TRUE` or `FALSE`. + +--- + + Code + vec_locate_sorted_groups(1, appearance = c(TRUE, FALSE)) + Condition + Error in `vec_locate_sorted_groups()`: + ! `appearance` must be `TRUE` or `FALSE`. + # dots must be empty (#1647) Code diff --git a/tests/testthat/test-order.R b/tests/testthat/test-order.R index 13014fb0d..6354888ad 100644 --- a/tests/testthat/test-order.R +++ b/tests/testthat/test-order.R @@ -1164,36 +1164,76 @@ test_that("`chr_proxy_collate` can result in keys being seen as identical", { expect_identical(vec_locate_sorted_groups(y, chr_proxy_collate = tolower), y_expect) }) +test_that("can request groups returned in appearance order", { + x <- c(2, 1, 5, 1, 2) + expect_identical( + vec_locate_sorted_groups(x, appearance = TRUE), + data_frame(key = c(2, 1, 5), loc = list(c(1L, 5L), c(2L, 4L), 3L)) + ) + + x <- c(1, 2, 4, 5, 3) + expect_identical( + vec_locate_sorted_groups(x, appearance = TRUE), + data_frame(key = x, loc = list(1L, 2L, 3L, 4L, 5L)) + ) +}) + +test_that("appearance order isn't affected by `direction` or `na_value`", { + x <- c("b", "B", NA, "a", "B", "A") + + out <- vec_locate_sorted_groups(x, appearance = TRUE) + expect <- data_frame(key = c("b", "B", NA, "a", "A"), loc = list(1L, c(2L, 5L), 3L, 4L, 6L)) + + expect_identical(out, expect) + expect_identical(out, vec_locate_sorted_groups(x, appearance = TRUE, direction = "desc")) + expect_identical(out, vec_locate_sorted_groups(x, appearance = TRUE, na_value = "smallest")) +}) + +test_that("`appearance` is validated", { + expect_snapshot(error = TRUE, { + vec_locate_sorted_groups(1, appearance = NA) + }) + expect_snapshot(error = TRUE, { + vec_locate_sorted_groups(1, appearance = 1) + }) + expect_snapshot(error = TRUE, { + vec_locate_sorted_groups(1, appearance = c(TRUE, FALSE)) + }) +}) + # ------------------------------------------------------------------------------ -# `vec_order_info(chr_ordered = FALSE)` +# `vec_order_info(appearance = TRUE)` test_that("can order character vectors in appearance order", { x <- c("b", "a", "B", "B", "a") - info <- vec_order_info(x, chr_ordered = FALSE) + info <- vec_order_info(x, appearance = TRUE) expect_identical(info[[1]], c(1L, 2L, 5L, 3L, 4L)) expect_identical(info[[2]], c(1L, 2L, 2L)) expect_identical(info[[3]], 2L) + expect_identical(info[[4]], c(1L, 2L, 3L)) }) test_that("using appearance order means `direction` has no effect", { x <- c("b", "a", "B", "B", "a") - info1 <- vec_order_info(x, direction = "asc", chr_ordered = FALSE) - info2 <- vec_order_info(x, direction = "desc", chr_ordered = FALSE) + info1 <- vec_order_info(x, direction = "asc", appearance = TRUE) + info2 <- vec_order_info(x, direction = "desc", appearance = TRUE) expect_identical(info1[[1]], info2[[1]]) expect_identical(info1[[2]], info2[[2]]) expect_identical(info1[[3]], info2[[3]]) + expect_identical(info1[[4]], info2[[4]]) }) test_that("appearance order works with NA - `na_value` has no effect", { x <- c(NA, "foo", NA, "bar") - info <- vec_order_info(x, chr_ordered = FALSE) + info <- vec_order_info(x, appearance = TRUE) expect_identical(info[[1]], c(1L, 3L, 2L, 4L)) expect_identical(info[[2]], c(2L, 1L, 1L)) expect_identical(info[[3]], 2L) + expect_identical(info[[4]], c(1L, 2L, 3L)) }) test_that("appearance order can be mixed with regular ordering", { @@ -1202,11 +1242,28 @@ test_that("appearance order can be mixed with regular ordering", { df <- data_frame(x = x, y = y) # `y` breaks ties - info <- vec_order_info(df, chr_ordered = FALSE) + info <- vec_order_info(df, appearance = TRUE) expect_identical(info[[1]], c(1L, 5L, 2L, 6L, 3L, 4L)) expect_identical(info[[2]], c(1L, 1L, 2L, 1L, 1L)) expect_identical(info[[3]], 2L) + expect_identical(info[[4]], c(1L, 3L, 4L, 5L, 2L)) +}) + +test_that("appearance order can still separate `NA` from `NaN`", { + x <- c(NA, NaN, NA) + + info <- vec_order_info(x, appearance = TRUE, nan_distinct = FALSE) + expect_identical(info[[1]], 1:3) + expect_identical(info[[2]], 3L) + expect_identical(info[[3]], 3L) + expect_identical(info[[4]], 1L) + + info <- vec_order_info(x, appearance = TRUE, nan_distinct = TRUE) + expect_identical(info[[1]], c(2L, 1L, 3L)) + expect_identical(info[[2]], c(1L, 2L)) + expect_identical(info[[3]], 2L) + expect_identical(info[[4]], c(2L, 1L)) }) # ------------------------------------------------------------------------------