|
| 1 | +#' Recode and replace values |
| 2 | +#' |
| 3 | +#' @description |
| 4 | +#' |
| 5 | +#' - `vec_recode_values()` constructs an entirely new vector by recoding the |
| 6 | +#' values from `x` specified in `from` to the corresponding values in `to`. If |
| 7 | +#' there are values in `x` not matched by `from`, then they are recoded to the |
| 8 | +#' `default` value. |
| 9 | +#' |
| 10 | +#' - `vec_replace_values()` updates an existing vector by replacing the values |
| 11 | +#' from `x` specified in `from` with the corresponding values in `to`. In this |
| 12 | +#' case, `to` must have the same type as `x` and values in `x` not matched by |
| 13 | +#' `from` pass through untouched. |
| 14 | +#' |
| 15 | +#' @inheritParams rlang::args_dots_empty |
| 16 | +#' @inheritParams rlang::args_error_context |
| 17 | +#' |
| 18 | +#' @param x A vector. |
| 19 | +#' |
| 20 | +#' @param from Values to locate in `x` and map to values in `to`. |
| 21 | +#' |
| 22 | +#' Extra values present in `from` but not in `x` are ignored. |
| 23 | +#' |
| 24 | +#' - If `from_as_list_of_vectors = FALSE`, `from` must be a single vector of |
| 25 | +#' any size, which will be [cast][vctrs::theory-faq-coercion] to the type of |
| 26 | +#' `x`. |
| 27 | +#' |
| 28 | +#' - If `from_as_list_of_vectors = TRUE`, `from` must be a list of vectors of |
| 29 | +#' any size, which will individually be [cast][vctrs::theory-faq-coercion] |
| 30 | +#' to the type of `x`. |
| 31 | +#' |
| 32 | +#' @param to Values to map `from` to. |
| 33 | +#' |
| 34 | +#' The common type of `to` and `default` will determine the type of the |
| 35 | +#' output, unless `ptype` is provided. |
| 36 | +#' |
| 37 | +#' - If `to_as_list_of_vectors = FALSE`, `to` must be a single vector of size |
| 38 | +#' 1 or the same size as `from`. |
| 39 | +#' |
| 40 | +#' - If `to_as_list_of_vectors = TRUE`, `to` must be a list of vectors. The |
| 41 | +#' list itself must be size 1 or the same size as `from`. Each individual |
| 42 | +#' vector in the list must be size 1 or the same size as `x`. |
| 43 | +#' |
| 44 | +#' @param default Default value to use when there is a value present in `x` |
| 45 | +#' that is unmatched by a value in `from`. |
| 46 | +#' |
| 47 | +#' By default, a missing value is used as the default value. |
| 48 | +#' |
| 49 | +#' If supplied, `default` must be size 1 or the same size as `x`. |
| 50 | +#' |
| 51 | +#' Can only be set when `unmatched = "default"`. |
| 52 | +#' |
| 53 | +#' @param unmatched Handling of unmatched locations. |
| 54 | +#' |
| 55 | +#' One of: |
| 56 | +#' |
| 57 | +#' - `"default"` to use `default` in unmatched locations. |
| 58 | +#' |
| 59 | +#' - `"error"` to error when there are unmatched locations. |
| 60 | +#' |
| 61 | +#' @param from_as_list_of_vectors,to_as_list_of_vectors Boolean values |
| 62 | +#' determining whether to treat `from` and `to` as vectors or as lists of |
| 63 | +#' vectors. See their parameter descriptions for more details. |
| 64 | +#' |
| 65 | +#' @param x_arg,from_arg,to_arg,default_arg Argument names used in error |
| 66 | +#' messages. |
| 67 | +#' |
| 68 | +#' @param ptype An optional override for the output type, which is usually |
| 69 | +#' computed as the common type of `to` and `default`. |
| 70 | +#' |
| 71 | +#' @returns |
| 72 | +#' A vector the same size as `x`. |
| 73 | +#' |
| 74 | +#' - For `vec_recode_values()`, the type of the output is computed as the common |
| 75 | +#' type of `to` and `default`, unless overridden by `ptype`. The names of the |
| 76 | +#' output come from the names of `to` and `default`. |
| 77 | +#' |
| 78 | +#' - For `vec_replace_values()`, the type of the output will have the same type |
| 79 | +#' as `x`. The names of the output will be the same as the names of `x`. |
| 80 | +#' |
| 81 | +#' @name vec-recode-and-replace |
| 82 | +#' |
| 83 | +#' @examples |
| 84 | +#' x <- c(1, 2, 3, 1, 2, 4, NA, 5) |
| 85 | +#' |
| 86 | +#' # Imagine you have a pre-existing lookup table |
| 87 | +#' likert <- data.frame( |
| 88 | +#' from = c(1, 2, 3, 4, 5), |
| 89 | +#' to = c( |
| 90 | +#' "Strongly disagree", |
| 91 | +#' "Disagree", |
| 92 | +#' "Neutral", |
| 93 | +#' "Agree", |
| 94 | +#' "Strongly agree" |
| 95 | +#' ) |
| 96 | +#' ) |
| 97 | +#' vec_recode_values(x, from = likert$from, to = likert$to) |
| 98 | +#' |
| 99 | +#' # If you don't map all of the values, a `default` is used |
| 100 | +#' x <- c(1, 2, 3, 1, 2, 4, NA, 5, 6, 7) |
| 101 | +#' vec_recode_values(x, from = likert$from, to = likert$to) |
| 102 | +#' vec_recode_values(x, from = likert$from, to = likert$to, default = "Unknown") |
| 103 | +#' |
| 104 | +#' # If you want existing `NA`s to pass through, include a mapping for `NA` in |
| 105 | +#' # your lookup table |
| 106 | +#' likert <- data.frame( |
| 107 | +#' from = c(1, 2, 3, 4, 5, NA), |
| 108 | +#' to = c( |
| 109 | +#' "Strongly disagree", |
| 110 | +#' "Disagree", |
| 111 | +#' "Neutral", |
| 112 | +#' "Agree", |
| 113 | +#' "Strongly agree", |
| 114 | +#' NA |
| 115 | +#' ) |
| 116 | +#' ) |
| 117 | +#' vec_recode_values(x, from = likert$from, to = likert$to, default = "Unknown") |
| 118 | +#' |
| 119 | +#' # If you believe you've captured all of the cases, you can assert this with |
| 120 | +#' # `unmatched = "error"`, which will error if you've missed any cases |
| 121 | +#' # (including `NA`, which must be explicitly handled) |
| 122 | +#' try(vec_recode_values( |
| 123 | +#' x, |
| 124 | +#' from = likert$from, |
| 125 | +#' to = likert$to, |
| 126 | +#' unmatched = "error" |
| 127 | +#' )) |
| 128 | +#' |
| 129 | +#' if (require("tibble")) { |
| 130 | +#' # If you want to partially update `x`, retaining the type of `x` and |
| 131 | +#' # leaving values not covered by `from` alone, use `vec_replace_values()` |
| 132 | +#' universities <- c( |
| 133 | +#' "Duke", |
| 134 | +#' "Fake U", |
| 135 | +#' "Duke U", |
| 136 | +#' NA, |
| 137 | +#' "Chapel Hill", |
| 138 | +#' "UNC", |
| 139 | +#' NA, |
| 140 | +#' "Duke" |
| 141 | +#' ) |
| 142 | +#' |
| 143 | +#' standardize <- tibble::tribble( |
| 144 | +#' ~from, ~to, |
| 145 | +#' "Duke", "Duke University", |
| 146 | +#' "Duke U", "Duke University", |
| 147 | +#' "UNC", "UNC Chapel Hill", |
| 148 | +#' "Chapel Hill", "UNC Chapel Hill", |
| 149 | +#' ) |
| 150 | +#' vec_replace_values( |
| 151 | +#' universities, |
| 152 | +#' from = standardize$from, |
| 153 | +#' to = standardize$to |
| 154 | +#' ) |
| 155 | +#' |
| 156 | +#' # In this case, you can use a more powerful feature of |
| 157 | +#' # `vec_replace_values()`, `from_as_list_of_vectors`, which allows you to |
| 158 | +#' # provide a list of `from` vectors that each match multiple `from` values |
| 159 | +#' # to a single `to` value. `tribble()` can help you create these! |
| 160 | +#' standardize <- tibble::tribble( |
| 161 | +#' ~from, ~to, |
| 162 | +#' c("Duke", "Duke U"), "Duke University", |
| 163 | +#' c("UNC", "Chapel Hill"), "UNC Chapel Hill", |
| 164 | +#' ) |
| 165 | +#' |
| 166 | +#' # Note how `from` is a list column |
| 167 | +#' standardize |
| 168 | +#' |
| 169 | +#' vec_replace_values( |
| 170 | +#' universities, |
| 171 | +#' from = standardize$from, |
| 172 | +#' to = standardize$to, |
| 173 | +#' from_as_list_of_vectors = TRUE |
| 174 | +#' ) |
| 175 | +#' |
| 176 | +#' # `vec_replace_values()` is also a useful way to map from or to `NA` |
| 177 | +#' vec_replace_values(universities, from = NA, to = "Unknown") |
| 178 | +#' vec_replace_values(universities, from = "Fake U", to = NA) |
| 179 | +#' } |
| 180 | +NULL |
| 181 | + |
| 182 | +#' @rdname vec-recode-and-replace |
| 183 | +#' @export |
| 184 | +vec_recode_values <- function( |
| 185 | + x, |
| 186 | + ..., |
| 187 | + from, |
| 188 | + to, |
| 189 | + default = NULL, |
| 190 | + unmatched = "default", |
| 191 | + from_as_list_of_vectors = FALSE, |
| 192 | + to_as_list_of_vectors = FALSE, |
| 193 | + ptype = NULL, |
| 194 | + x_arg = "x", |
| 195 | + from_arg = "from", |
| 196 | + to_arg = "to", |
| 197 | + default_arg = "default", |
| 198 | + error_call = current_env() |
| 199 | +) { |
| 200 | + check_dots_empty0(...) |
| 201 | + .Call( |
| 202 | + ffi_vec_recode_values, |
| 203 | + x, |
| 204 | + from, |
| 205 | + to, |
| 206 | + default, |
| 207 | + unmatched, |
| 208 | + from_as_list_of_vectors, |
| 209 | + to_as_list_of_vectors, |
| 210 | + ptype, |
| 211 | + environment() |
| 212 | + ) |
| 213 | +} |
| 214 | + |
| 215 | +#' @rdname vec-recode-and-replace |
| 216 | +#' @export |
| 217 | +vec_replace_values <- function( |
| 218 | + x, |
| 219 | + ..., |
| 220 | + from, |
| 221 | + to, |
| 222 | + from_as_list_of_vectors = FALSE, |
| 223 | + to_as_list_of_vectors = FALSE, |
| 224 | + x_arg = "x", |
| 225 | + from_arg = "from", |
| 226 | + to_arg = "to", |
| 227 | + error_call = current_env() |
| 228 | +) { |
| 229 | + check_dots_empty0(...) |
| 230 | + .Call( |
| 231 | + ffi_vec_replace_values, |
| 232 | + x, |
| 233 | + from, |
| 234 | + to, |
| 235 | + from_as_list_of_vectors, |
| 236 | + to_as_list_of_vectors, |
| 237 | + environment() |
| 238 | + ) |
| 239 | +} |
0 commit comments