Force species-level filling of missing newmoons when na_drop = FALSE, c…

…loses #294
weecology · Sep 19, 2024 · cc3404e · cc3404e
1 parent 6419678
commit cc3404e
Show file tree

Hide file tree

Showing 6 changed files with 53 additions and 26 deletions.
diff --git a/R/process_data_utils.R b/R/process_data_utils.R
@@ -28,7 +28,7 @@ clean_data <- function(full_data, trapping_table, ...) {
 #'
 #' @param data any data.frame with a plot column.
 #' @param plots specify subset of plots; can be a vector of plots, or specific
-#' sets: "all" plots or "Longterm" plots (plots that have had the same
+#' sets: "all" plots or "longterm" plots (plots that have had the same
 #' treatment for the entire time series)
 #' @return Data.table filtered to the desired subset of plots.
 #'
@@ -79,15 +79,15 @@ join_plots <- function(df, plots_table) {
 #' period codes denote the number of censuses that have occurred, but are
 #' not the same as the number of censuses that should have occurred. Sometimes
 #' censuses are missed (weather, transport issues,etc). You can't pick this
-#' up with the period code. Because censues may not always occur monthly due to
-#' the newmoon - a new moon code was devised to give a standardized language
-#' of time for forcasting in particular. This function allows the user to decide
+#' up with the period code. Because censuses may not always occur monthly due to
+#' the new moon - a new moon code was devised to give a standardized language
+#' of time for forecasting in particular. This function allows the user to decide
 #' if they want to use the rodent period code, the new moon code, the date of
 #' the rodent census, or have their data with all three time formats
 #'
 #' @param summary_table Data.table with summarized rodent data.
-#' @param newmoon_table Data_table linking newmoon codes with period codes.
-#' @param time Character. Denotes whether newmoon codes, period codes,
+#' @param newmoon_table Data_table linking new moon codes with period codes.
+#' @param time Character. Denotes whether new moon codes, period codes,
 #' and/or date are desired.
 #'
 #' @return Data.table of summarized rodent data with user-specified time format
@@ -104,8 +104,14 @@ add_time <- function(summary_table, newmoons_table, time = "period") {
  } else {
  newmoons_table$censusdate[is.na(newmoons_table$censusdate)] <-
  newmoons_table$newmoondate[is.na(newmoons_table$censusdate)]
+ vars_to_complete <- names(dplyr::select(summary_table,tidyselect::any_of(c("species","plot"))))
  join_summary_newmoon <- dplyr::left_join(newmoons_table, summary_table,
- by = "period")
+ by = "period") %>%
+ tidyr::complete(tidyr::nesting(!!!rlang::syms(c("newmoonnumber",
+ "newmoondate",
+ "censusdate"))),
+ !!!rlang::syms(vars_to_complete)) %>%
+ tidyr::drop_na(tidyselect::any_of(c("species","plot")))
  }
  date_vars <- c("newmoondate", "newmoonnumber", "period", "censusdate")
  vars_to_keep <- switch(tolower(time),

diff --git a/R/summarize_individual_rodents.R b/R/summarize_individual_rodents.R
@@ -12,10 +12,16 @@
 #' @export
 #'
 summarize_individual_rodents <- function(path = get_default_data_path(),
- clean = TRUE, type = "Rodents",
- length = "all", unknowns = FALSE, time = "period",
- fillweight = FALSE, min_plots = 1, min_traps = 1,
- download_if_missing = TRUE, quiet = FALSE)
+ clean = TRUE,
+ type = "Rodents",
+ length = "all",
+ unknowns = FALSE,
+ time = "period",
+ fillweight = FALSE,
+ min_plots = 1,
+ min_traps = 1,
+ download_if_missing = TRUE,
+ quiet = FALSE)
 {
 
  #### Get Data ----
@@ -38,8 +44,15 @@ summarize_individual_rodents <- function(path = get_default_data_path(),
  "sex", "reprod", "age", "testes", "vagina","pregnant", "nipples","lactation",
  "hfl", "wgt", "tag", "note2", "ltag", "note3"))
 
- #### use new moon number as time index if time == "newmoon" ----
- return(add_time(rodents, data_tables$newmoons_table, time))
+ rodents <- add_time(rodents, data_tables$newmoons_table, time)
+
+ if(time == "newmoon") {
+ rodents <- rodents %>%
+ dplyr::select("newmoonnumber","month","day","year","treatment","plot","stake",
+ "species","sex","reprod","age","testes","vagina","pregnant",
+ "nipples","lactation","hfl","wgt","tag","note2","ltag","note3")
+ }
+ return(rodents)
 }
 
 #' @rdname summarize_individual_rodents

diff --git a/R/summarize_rodents.R b/R/summarize_rodents.R
@@ -33,7 +33,7 @@
 #' 2) that combo was skipped that month, or
 #' 3) that combo was trapped, but is unusable (a negative period code))
 #' @param zero_drop logical, drop 0s (representing sufficient sampling, but no
-#' detections)
+#' detection)
 #' @param min_traps minimum number of traps for a plot to be included
 #' @param min_plots minimum number of plots within a period for an
 #' observation to be included
@@ -50,18 +50,26 @@
 #' @export
 #'
 summarize_rodent_data <- function(path = get_default_data_path(),
- clean = TRUE, level = "Site",
- type = "Rodents", length = "all", plots = length,
- unknowns = FALSE, shape = "crosstab",
- time = "period", output = "abundance",
+ clean = TRUE,
+ level = "Site",
+ type = "Rodents",
+ length = "all",
+ plots = length,
+ unknowns = FALSE,
+ shape = "crosstab",
+ time = "period",
+ output = "abundance",
  fillweight = (output != "abundance"),
  na_drop = TRUE,
  zero_drop = switch(tolower(level),
  "plot" = FALSE,
  "treatment" = TRUE,
  "site" = TRUE),
- min_traps = 1, min_plots = 24, effort = FALSE,
- download_if_missing = TRUE, quiet = FALSE,
+ min_traps = 1,
+ min_plots = 24,
+ effort = FALSE,
+ download_if_missing = TRUE,
+ quiet = FALSE,
  include_unsampled = FALSE)
 {
  if (include_unsampled)

diff --git a/man/summarize_plant_data.Rd b/man/summarize_plant_data.Rd
diff --git a/man/summarize_rodent_data.Rd b/man/summarize_rodent_data.Rd
diff --git a/tests/testthat/test-99-regression.R b/tests/testthat/test-99-regression.R
@@ -28,7 +28,7 @@ test_that("data generated by level = plot, time = newmoon, type = granivore, sha
  shape = "flat", time = "newmoon", na_drop = FALSE) %>%
  dplyr::filter(newmoonnumber < 465)
  attributes(data) <- attributes(data)[sort(names(attributes(data)))]
- expect_known_hash(data, "20d3d2287c")
+ expect_known_hash(data, "f5167f2c0e")
 
  sampled_newmoons <- abundance(portal_data_path, time = "all",
  na_drop = FALSE, min_plots = 1) %>%
@@ -37,7 +37,7 @@ test_that("data generated by level = plot, time = newmoon, type = granivore, sha
  dplyr::filter(newmoonnumber %in% sampled_newmoons)
  attributes(data) <- attributes(data)[sort(names(attributes(data)))]
  expect_equal(dim(data), c(155880, 5))
- expect_known_hash(data, "efbecf7764")
+ expect_known_hash(data, "d11b6b8ef3")
 })
 
 test_that("data generated by na_drop = FALSE, zero_drop = FALSE is same", {
@@ -49,7 +49,7 @@ test_that("data generated by na_drop = FALSE, zero_drop = FALSE is same", {
  expect_equal(dim(data), c(464, 22))
  expect_known_hash(is.na(data), "0294bfffde")
  data[is.na(data)] <- -999
- expect_known_hash(data, "638d5588ce")
+ expect_known_hash(data, "62f714b7c9")
  abundances <- data %>% dplyr::select(-censusdate)
 
  data <- abundance(portal_data_path, time = "newmoon", min_plots = 1,
@@ -59,7 +59,7 @@ test_that("data generated by na_drop = FALSE, zero_drop = FALSE is same", {
  expect_equal(dim(data), c(464, 22))
  expect_known_hash(is.na(data), "b2d5abb360")
  data[is.na(data)] <- -999
- expect_known_hash(data, "59b85b7415")
+ expect_known_hash(data, "6c800c6b50")
  expect_equal(data %>% dplyr::select(-newmoonnumber),
  abundances)
 })