-
Notifications
You must be signed in to change notification settings - Fork 66
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve consistency of names handling in prototype functions #1020
base: main
Are you sure you want to change the base?
Changes from all commits
579ae5f
2126fde
fdfd2a0
66d299a
3b869ce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
#include "vctrs.h" | ||
#include "utils.h" | ||
#include "arg-counter.h" | ||
#include "type-data-frame.h" | ||
|
||
// Initialised at load time | ||
static SEXP syms_vec_ptype_finalise_dispatch = NULL; | ||
|
@@ -9,6 +10,7 @@ static SEXP fns_vec_ptype_finalise_dispatch = NULL; | |
|
||
static inline SEXP vec_ptype_slice(SEXP x, SEXP empty); | ||
static SEXP s3_type(SEXP x, struct vctrs_arg* x_arg); | ||
static SEXP df_ptype(SEXP x, bool bare); | ||
|
||
// [[ register() ]] | ||
SEXP vctrs_ptype(SEXP x, SEXP x_arg) { | ||
|
@@ -30,13 +32,21 @@ SEXP vec_ptype(SEXP x, struct vctrs_arg* x_arg) { | |
case vctrs_type_character: return vec_ptype_slice(x, vctrs_shared_empty_chr); | ||
case vctrs_type_raw: return vec_ptype_slice(x, vctrs_shared_empty_raw); | ||
case vctrs_type_list: return vec_ptype_slice(x, vctrs_shared_empty_list); | ||
case vctrs_type_dataframe: return bare_df_map(x, &col_ptype); | ||
case vctrs_type_dataframe: return df_ptype(x, true); | ||
case vctrs_type_s3: return s3_type(x, x_arg); | ||
case vctrs_type_scalar: stop_scalar_type(x, x_arg); | ||
} | ||
never_reached("vec_ptype"); | ||
} | ||
|
||
// [[ include("vctrs.h") ]] | ||
SEXP vec_ptype_unnamed(SEXP x, struct vctrs_arg* x_arg) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does feel a bit awkward that we have to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel like |
||
SEXP out = PROTECT(vec_ptype(x, x_arg)); | ||
out = vec_set_names(out, R_NilValue); | ||
UNPROTECT(1); | ||
return out; | ||
} | ||
|
||
static SEXP col_ptype(SEXP x) { | ||
return vec_ptype(x, args_empty); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the columns of the data frame be named? Or should names be stripped from them too? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. I think they should be unnamed for consistency with vec-assign. |
||
} | ||
|
@@ -52,10 +62,10 @@ static inline SEXP vec_ptype_slice(SEXP x, SEXP empty) { | |
static SEXP s3_type(SEXP x, struct vctrs_arg* x_arg) { | ||
switch (class_type(x)) { | ||
case vctrs_class_bare_tibble: | ||
return bare_df_map(x, &col_ptype); | ||
return df_ptype(x, true); | ||
|
||
case vctrs_class_data_frame: | ||
return df_map(x, &col_ptype); | ||
return df_ptype(x, false); | ||
|
||
case vctrs_class_bare_data_frame: | ||
Rf_errorcall(R_NilValue, "Internal error: Bare data frames should be handled by `vec_ptype()`"); | ||
|
@@ -75,6 +85,24 @@ static SEXP s3_type(SEXP x, struct vctrs_arg* x_arg) { | |
return vec_slice(x, R_NilValue); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be interesting to consider a version of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we did this, we'd have to reconsider how names are tracked in the lubridate PR. When we proxy, the names are added as a new column. This would get sliced and then restored even if we turned on the option to not preserve names. On the other hand, POSIXlt stores names directly on the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like if we did this then we wouldn't even need a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We may still need the set-names call on S3 objects (or at least on S3 data frames) because the names might be encoded in a field, as in POSIXlt vectors. Or maybe we should enforce names as a special There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think I'm arguing above that a The proxy for POSIXlt is a data frame with a There might be other cases where this doesn't work, but it might be useful to say that a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe POSIXlt is not a good example. I was thinking the difference between named and unnamed record vectors of size zero would be the presence of a names field. This is equivalent to the presence or absence of a names attributes for normal vectors of size zero. |
||
} | ||
|
||
SEXP df_ptype(SEXP x, bool bare) { | ||
SEXP row_nms = PROTECT(df_rownames(x)); | ||
|
||
SEXP ptype = R_NilValue; | ||
if (bare) { | ||
ptype = PROTECT(bare_df_map(x, &col_ptype)); | ||
} else { | ||
ptype = PROTECT(df_map(x, &col_ptype)); | ||
} | ||
|
||
if (TYPEOF(row_nms) == STRSXP) { | ||
Rf_setAttrib(ptype, R_RowNamesSymbol, vctrs_shared_empty_chr); | ||
} | ||
|
||
UNPROTECT(2); | ||
return ptype; | ||
} | ||
|
||
static SEXP vec_ptype_finalise_unspecified(SEXP x); | ||
static SEXP vec_ptype_finalise_dispatch(SEXP x); | ||
|
||
|
@@ -161,9 +189,11 @@ SEXP vctrs_type_common(SEXP call, SEXP op, SEXP args, SEXP env) { | |
return out; | ||
} | ||
|
||
|
||
|
||
SEXP vctrs_type_common_impl(SEXP dots, SEXP ptype) { | ||
if (!vec_is_partial(ptype)) { | ||
return vec_ptype(ptype, args_dot_ptype); | ||
return vec_ptype_unnamed(ptype, args_dot_ptype); | ||
} | ||
|
||
if (r_is_true(r_peek_option("vctrs.no_guessing"))) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't feel quite right to me — I don't think we should be systematically stripping names, and then providing
names<-
methods that ignore the stripping. Why not just strip names on atomic vectors?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The partial objects are not vectors, which makes them weird in vctrs. This is just a stopgap. I don't think the hacks around partial vectors should make us pause.
As for record vectors, they should support
names<-
at some point, like other vectors. I would really prefer a general solution than ignoring S3 vectors.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
More generally, maybe there really exists some vectors that do not support names. It doesn't seem ill-founded to require of these vectors to implement set-to-null as a no-op in their
names<-
method.