Skip to content

Commit

Permalink
Merge pull request #305 from weecology/na_drop
Browse files Browse the repository at this point in the history
Force species-level filling of missing newmoons when na_drop = FALSE, closes #294
  • Loading branch information
gmyenni authored Sep 19, 2024
2 parents 6419678 + 07b3b4e commit 387ffc7
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 29 deletions.
6 changes: 3 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# portalr
* `load_rodent_data()` now returns an object with an S3 class, and provides a useful message on `print`
* Users can now pass arguments to `download_observations()` from `load_rodent_data()` and other calling functions

* `load_rodent_data()` now returns an object with an S3 class, and provides a useful message on `print`.
* Users can now pass arguments to `download_observations()` from `load_rodent_data()` and other calling functions.
* Fix bug in `na_drop = FALSE` that failed to complete missing rows to the species level when `time = "newmoon"`.

Version numbers follow [Semantic Versioning](https://semver.org/).

Expand Down
20 changes: 13 additions & 7 deletions R/process_data_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ clean_data <- function(full_data, trapping_table, ...) {
#'
#' @param data any data.frame with a plot column.
#' @param plots specify subset of plots; can be a vector of plots, or specific
#' sets: "all" plots or "Longterm" plots (plots that have had the same
#' sets: "all" plots or "longterm" plots (plots that have had the same
#' treatment for the entire time series)
#' @return Data.table filtered to the desired subset of plots.
#'
Expand Down Expand Up @@ -79,15 +79,15 @@ join_plots <- function(df, plots_table) {
#' period codes denote the number of censuses that have occurred, but are
#' not the same as the number of censuses that should have occurred. Sometimes
#' censuses are missed (weather, transport issues,etc). You can't pick this
#' up with the period code. Because censues may not always occur monthly due to
#' the newmoon - a new moon code was devised to give a standardized language
#' of time for forcasting in particular. This function allows the user to decide
#' up with the period code. Because censuses may not always occur monthly due to
#' the new moon - a new moon code was devised to give a standardized language
#' of time for forecasting in particular. This function allows the user to decide
#' if they want to use the rodent period code, the new moon code, the date of
#' the rodent census, or have their data with all three time formats
#'
#' @param summary_table Data.table with summarized rodent data.
#' @param newmoon_table Data_table linking newmoon codes with period codes.
#' @param time Character. Denotes whether newmoon codes, period codes,
#' @param newmoon_table Data_table linking new moon codes with period codes.
#' @param time Character. Denotes whether new moon codes, period codes,
#' and/or date are desired.
#'
#' @return Data.table of summarized rodent data with user-specified time format
Expand All @@ -104,8 +104,14 @@ add_time <- function(summary_table, newmoons_table, time = "period") {
} else {
newmoons_table$censusdate[is.na(newmoons_table$censusdate)] <-
newmoons_table$newmoondate[is.na(newmoons_table$censusdate)]
vars_to_complete <- names(dplyr::select(summary_table,tidyselect::any_of(c("species","plot"))))
join_summary_newmoon <- dplyr::left_join(newmoons_table, summary_table,
by = "period")
by = "period") %>%
tidyr::complete(tidyr::nesting(!!!rlang::syms(c("newmoonnumber",
"newmoondate",
"censusdate"))),
!!!rlang::syms(vars_to_complete)) %>%
tidyr::drop_na(tidyselect::any_of(c("species","plot")))
}
date_vars <- c("newmoondate", "newmoonnumber", "period", "censusdate")
vars_to_keep <- switch(tolower(time),
Expand Down
25 changes: 19 additions & 6 deletions R/summarize_individual_rodents.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,16 @@
#' @export
#'
summarize_individual_rodents <- function(path = get_default_data_path(),
clean = TRUE, type = "Rodents",
length = "all", unknowns = FALSE, time = "period",
fillweight = FALSE, min_plots = 1, min_traps = 1,
download_if_missing = TRUE, quiet = FALSE)
clean = TRUE,
type = "Rodents",
length = "all",
unknowns = FALSE,
time = "period",
fillweight = FALSE,
min_plots = 1,
min_traps = 1,
download_if_missing = TRUE,
quiet = FALSE)
{

#### Get Data ----
Expand All @@ -38,8 +44,15 @@ summarize_individual_rodents <- function(path = get_default_data_path(),
"sex", "reprod", "age", "testes", "vagina","pregnant", "nipples","lactation",
"hfl", "wgt", "tag", "note2", "ltag", "note3"))

#### use new moon number as time index if time == "newmoon" ----
return(add_time(rodents, data_tables$newmoons_table, time))
rodents <- add_time(rodents, data_tables$newmoons_table, time)

if(time == "newmoon") {
rodents <- rodents %>%
dplyr::select("newmoonnumber","month","day","year","treatment","plot","stake",
"species","sex","reprod","age","testes","vagina","pregnant",
"nipples","lactation","hfl","wgt","tag","note2","ltag","note3")
}
return(rodents)
}

#' @rdname summarize_individual_rodents
Expand Down
22 changes: 15 additions & 7 deletions R/summarize_rodents.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#' 2) that combo was skipped that month, or
#' 3) that combo was trapped, but is unusable (a negative period code))
#' @param zero_drop logical, drop 0s (representing sufficient sampling, but no
#' detections)
#' detection)
#' @param min_traps minimum number of traps for a plot to be included
#' @param min_plots minimum number of plots within a period for an
#' observation to be included
Expand All @@ -50,18 +50,26 @@
#' @export
#'
summarize_rodent_data <- function(path = get_default_data_path(),
clean = TRUE, level = "Site",
type = "Rodents", length = "all", plots = length,
unknowns = FALSE, shape = "crosstab",
time = "period", output = "abundance",
clean = TRUE,
level = "Site",
type = "Rodents",
length = "all",
plots = length,
unknowns = FALSE,
shape = "crosstab",
time = "period",
output = "abundance",
fillweight = (output != "abundance"),
na_drop = TRUE,
zero_drop = switch(tolower(level),
"plot" = FALSE,
"treatment" = TRUE,
"site" = TRUE),
min_traps = 1, min_plots = 24, effort = FALSE,
download_if_missing = TRUE, quiet = FALSE,
min_traps = 1,
min_plots = 24,
effort = FALSE,
download_if_missing = TRUE,
quiet = FALSE,
include_unsampled = FALSE)
{
if (include_unsampled)
Expand Down
2 changes: 1 addition & 1 deletion man/summarize_plant_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/summarize_rodent_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions tests/testthat/test-99-regression.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ test_that("data generated by level = plot, time = newmoon, type = granivore, sha
shape = "flat", time = "newmoon", na_drop = FALSE) %>%
dplyr::filter(newmoonnumber < 465)
attributes(data) <- attributes(data)[sort(names(attributes(data)))]
expect_known_hash(data, "20d3d2287c")
expect_known_hash(data, "f5167f2c0e")

sampled_newmoons <- abundance(portal_data_path, time = "all",
na_drop = FALSE, min_plots = 1) %>%
Expand All @@ -37,7 +37,7 @@ test_that("data generated by level = plot, time = newmoon, type = granivore, sha
dplyr::filter(newmoonnumber %in% sampled_newmoons)
attributes(data) <- attributes(data)[sort(names(attributes(data)))]
expect_equal(dim(data), c(155880, 5))
expect_known_hash(data, "efbecf7764")
expect_known_hash(data, "d11b6b8ef3")
})

test_that("data generated by na_drop = FALSE, zero_drop = FALSE is same", {
Expand All @@ -49,7 +49,7 @@ test_that("data generated by na_drop = FALSE, zero_drop = FALSE is same", {
expect_equal(dim(data), c(464, 22))
expect_known_hash(is.na(data), "0294bfffde")
data[is.na(data)] <- -999
expect_known_hash(data, "638d5588ce")
expect_known_hash(data, "62f714b7c9")
abundances <- data %>% dplyr::select(-censusdate)

data <- abundance(portal_data_path, time = "newmoon", min_plots = 1,
Expand All @@ -59,7 +59,7 @@ test_that("data generated by na_drop = FALSE, zero_drop = FALSE is same", {
expect_equal(dim(data), c(464, 22))
expect_known_hash(is.na(data), "b2d5abb360")
data[is.na(data)] <- -999
expect_known_hash(data, "59b85b7415")
expect_known_hash(data, "6c800c6b50")
expect_equal(data %>% dplyr::select(-newmoonnumber),
abundances)
})
Expand Down

0 comments on commit 387ffc7

Please sign in to comment.