Skip to content

Commit 856c443

Browse files
committed
Implement vec_recode_values() and vec_replace_values()
1 parent 2ab2cf4 commit 856c443

File tree

19 files changed

+2449
-36
lines changed

19 files changed

+2449
-36
lines changed

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,11 +627,13 @@ export(vec_ptype_full)
627627
export(vec_ptype_show)
628628
export(vec_rank)
629629
export(vec_rbind)
630+
export(vec_recode_values)
630631
export(vec_recycle)
631632
export(vec_recycle_common)
632633
export(vec_rep)
633634
export(vec_rep_each)
634635
export(vec_repeat)
636+
export(vec_replace_values)
635637
export(vec_restore)
636638
export(vec_run_sizes)
637639
export(vec_seq_along)

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# vctrs (development version)
22

3+
* New `vec_recode_values()` and `vec_replace_values()` for recoding and replacing values. In particular, this makes it easy to recode a vector using a lookup table (#2027).
4+
35
* `vec_equal()` now efficiently internally recycles `x` and `y` elements of size 1 (#2028).
46

57
* `list_unchop()` now assigns names correctly when overlapping `indices` are involved (#2019).

R/recode.R

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
#' Recode and replace values
2+
#'
3+
#' @description
4+
#'
5+
#' - `vec_recode_values()` constructs an entirely new vector by recoding the
6+
#' values from `x` specified in `from` to the corresponding values in `to`. If
7+
#' there are values in `x` not matched by `from`, then they are recoded to the
8+
#' `default` value.
9+
#'
10+
#' - `vec_replace_values()` updates an existing vector by replacing the values
11+
#' from `x` specified in `from` with the corresponding values in `to`. In this
12+
#' case, `to` must have the same type as `x` and values in `x` not matched by
13+
#' `from` pass through untouched.
14+
#'
15+
#' @inheritParams rlang::args_dots_empty
16+
#' @inheritParams rlang::args_error_context
17+
#'
18+
#' @param x A vector.
19+
#'
20+
#' @param from Values to locate in `x` and map to values in `to`.
21+
#'
22+
#' Extra values present in `from` but not in `x` are ignored.
23+
#'
24+
#' - If `from_as_list_of_vectors = FALSE`, `from` must be a single vector of
25+
#' any size, which will be [cast][vctrs::theory-faq-coercion] to the type of
26+
#' `x`.
27+
#'
28+
#' - If `from_as_list_of_vectors = TRUE`, `from` must be a list of vectors of
29+
#' any size, which will individually be [cast][vctrs::theory-faq-coercion]
30+
#' to the type of `x`.
31+
#'
32+
#' @param to Values to map `from` to.
33+
#'
34+
#' The common type of `to` and `default` will determine the type of the
35+
#' output, unless `ptype` is provided.
36+
#'
37+
#' - If `to_as_list_of_vectors = FALSE`, `to` must be a single vector of size
38+
#' 1 or the same size as `from`.
39+
#'
40+
#' - If `to_as_list_of_vectors = TRUE`, `to` must be a list of vectors. The
41+
#' list itself must be size 1 or the same size as `from`. Each individual
42+
#' vector in the list must be size 1 or the same size as `x`.
43+
#'
44+
#' @param default Default value to use when there is a value present in `x`
45+
#' that is unmatched by a value in `from`.
46+
#'
47+
#' By default, a missing value is used as the default value.
48+
#'
49+
#' If supplied, `default` must be size 1 or the same size as `x`.
50+
#'
51+
#' Can only be set when `unmatched = "default"`.
52+
#'
53+
#' @param unmatched Handling of unmatched locations.
54+
#'
55+
#' One of:
56+
#'
57+
#' - `"default"` to use `default` in unmatched locations.
58+
#'
59+
#' - `"error"` to error when there are unmatched locations.
60+
#'
61+
#' @param from_as_list_of_vectors,to_as_list_of_vectors Boolean values
62+
#' determining whether to treat `from` and `to` as vectors or as lists of
63+
#' vectors. See their parameter descriptions for more details.
64+
#'
65+
#' @param x_arg,from_arg,to_arg,default_arg Argument names used in error
66+
#' messages.
67+
#'
68+
#' @param ptype An optional override for the output type, which is usually
69+
#' computed as the common type of `to` and `default`.
70+
#'
71+
#' @returns
72+
#' A vector the same size as `x`.
73+
#'
74+
#' - For `vec_recode_values()`, the type of the output is computed as the common
75+
#' type of `to` and `default`, unless overridden by `ptype`. The names of the
76+
#' output come from the names of `to` and `default`.
77+
#'
78+
#' - For `vec_replace_values()`, the type of the output will have the same type
79+
#' as `x`. The names of the output will be the same as the names of `x`.
80+
#'
81+
#' @name vec-recode-and-replace
82+
#'
83+
#' @examples
84+
#' x <- c(1, 2, 3, 1, 2, 4, NA, 5)
85+
#'
86+
#' # Imagine you have a pre-existing lookup table
87+
#' likert <- data.frame(
88+
#' from = c(1, 2, 3, 4, 5),
89+
#' to = c(
90+
#' "Strongly disagree",
91+
#' "Disagree",
92+
#' "Neutral",
93+
#' "Agree",
94+
#' "Strongly agree"
95+
#' )
96+
#' )
97+
#' vec_recode_values(x, from = likert$from, to = likert$to)
98+
#'
99+
#' # If you don't map all of the values, a `default` is used
100+
#' x <- c(1, 2, 3, 1, 2, 4, NA, 5, 6, 7)
101+
#' vec_recode_values(x, from = likert$from, to = likert$to)
102+
#' vec_recode_values(x, from = likert$from, to = likert$to, default = "Unknown")
103+
#'
104+
#' # If you want existing `NA`s to pass through, include a mapping for `NA` in
105+
#' # your lookup table
106+
#' likert <- data.frame(
107+
#' from = c(1, 2, 3, 4, 5, NA),
108+
#' to = c(
109+
#' "Strongly disagree",
110+
#' "Disagree",
111+
#' "Neutral",
112+
#' "Agree",
113+
#' "Strongly agree",
114+
#' NA
115+
#' )
116+
#' )
117+
#' vec_recode_values(x, from = likert$from, to = likert$to, default = "Unknown")
118+
#'
119+
#' # If you believe you've captured all of the cases, you can assert this with
120+
#' # `unmatched = "error"`, which will error if you've missed any cases
121+
#' # (including `NA`, which must be explicitly handled)
122+
#' try(vec_recode_values(
123+
#' x,
124+
#' from = likert$from,
125+
#' to = likert$to,
126+
#' unmatched = "error"
127+
#' ))
128+
#'
129+
#' if (require("tibble")) {
130+
#' # If you want to partially update `x`, retaining the type of `x` and
131+
#' # leaving values not covered by `from` alone, use `vec_replace_values()`
132+
#' universities <- c(
133+
#' "Duke",
134+
#' "Fake U",
135+
#' "Duke U",
136+
#' NA,
137+
#' "Chapel Hill",
138+
#' "UNC",
139+
#' NA,
140+
#' "Duke"
141+
#' )
142+
#'
143+
#' standardize <- tibble::tribble(
144+
#' ~from, ~to,
145+
#' "Duke", "Duke University",
146+
#' "Duke U", "Duke University",
147+
#' "UNC", "UNC Chapel Hill",
148+
#' "Chapel Hill", "UNC Chapel Hill",
149+
#' )
150+
#' vec_replace_values(
151+
#' universities,
152+
#' from = standardize$from,
153+
#' to = standardize$to
154+
#' )
155+
#'
156+
#' # In this case, you can use a more powerful feature of
157+
#' # `vec_replace_values()`, `from_as_list_of_vectors`, which allows you to
158+
#' # provide a list of `from` vectors that each match multiple `from` values
159+
#' # to a single `to` value. `tribble()` can help you create these!
160+
#' standardize <- tibble::tribble(
161+
#' ~from, ~to,
162+
#' c("Duke", "Duke U"), "Duke University",
163+
#' c("UNC", "Chapel Hill"), "UNC Chapel Hill",
164+
#' )
165+
#'
166+
#' # Note how `from` is a list column
167+
#' standardize
168+
#'
169+
#' vec_replace_values(
170+
#' universities,
171+
#' from = standardize$from,
172+
#' to = standardize$to,
173+
#' from_as_list_of_vectors = TRUE
174+
#' )
175+
#'
176+
#' # `vec_replace_values()` is also a useful way to map from or to `NA`
177+
#' vec_replace_values(universities, from = NA, to = "Unknown")
178+
#' vec_replace_values(universities, from = "Fake U", to = NA)
179+
#' }
180+
NULL
181+
182+
#' @rdname vec-recode-and-replace
183+
#' @export
184+
vec_recode_values <- function(
185+
x,
186+
...,
187+
from,
188+
to,
189+
default = NULL,
190+
unmatched = "default",
191+
from_as_list_of_vectors = FALSE,
192+
to_as_list_of_vectors = FALSE,
193+
ptype = NULL,
194+
x_arg = "x",
195+
from_arg = "from",
196+
to_arg = "to",
197+
default_arg = "default",
198+
error_call = current_env()
199+
) {
200+
check_dots_empty0(...)
201+
.Call(
202+
ffi_vec_recode_values,
203+
x,
204+
from,
205+
to,
206+
default,
207+
unmatched,
208+
from_as_list_of_vectors,
209+
to_as_list_of_vectors,
210+
ptype,
211+
environment()
212+
)
213+
}
214+
215+
#' @rdname vec-recode-and-replace
216+
#' @export
217+
vec_replace_values <- function(
218+
x,
219+
...,
220+
from,
221+
to,
222+
from_as_list_of_vectors = FALSE,
223+
to_as_list_of_vectors = FALSE,
224+
x_arg = "x",
225+
from_arg = "from",
226+
to_arg = "to",
227+
error_call = current_env()
228+
) {
229+
check_dots_empty0(...)
230+
.Call(
231+
ffi_vec_replace_values,
232+
x,
233+
from,
234+
to,
235+
from_as_list_of_vectors,
236+
to_as_list_of_vectors,
237+
environment()
238+
)
239+
}

_pkgdown.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ reference:
6464
- vec_assign
6565
- vec_fill_missing
6666

67+
- title: Recoding
68+
contents:
69+
- vec_recode_values
70+
6771
- title: Equality and ordering
6872
contents:
6973
- vec_equal

0 commit comments

Comments
 (0)