diff --git a/R/fix_bad_data.R b/R/fix_bad_data.R index 9433cae..d55a304 100644 --- a/R/fix_bad_data.R +++ b/R/fix_bad_data.R @@ -8,6 +8,7 @@ context <- unlist(context$var) # We should now have *fairly* clean context bad_data <- unlist(bad_table$var[bad_row]) + if (type == "dps") { if (!grepl("\\.", as.character(bad_data))) { # If this looks like a decimal error, let's salvage the data @@ -29,22 +30,35 @@ fix <- NA } } + if (type == "mme") { - tdiff <- abs(mean(context, na.rm = TRUE) - (bad_data/10)) + # First, make sure the value is actually bad. + tdiff <- abs(mean(context, na.rm = TRUE) - bad_data) if (length(tdiff) == 0) tdiff <- NA sdiff <- sd(context, na.rm = TRUE) prop <- if (!is.na(sdiff) & !is.na(tdiff)) (tdiff / sdiff) else NA + + # If our value is more than 1.5 standard deviations off, try a dps fix if (!is.na(prop) && prop > 1.5) { - # Let's see if it is a decimal place error (fairly conservative) - tdiff <- abs(mean(context, na.rm = TRUE)-(bad_data*10)) + tdiff <- abs(mean(context, na.rm = TRUE) - (bad_data/10)) if (length(tdiff) == 0) tdiff <- NA + sdiff <- sd(context, na.rm = TRUE) prop <- if (!is.na(sdiff) & !is.na(tdiff)) (tdiff / sdiff) else NA - if (!is.na(prop) && prop < 1.5) { - observation <- paste0(label, " dps: ", bad_data, " -> ", 10 * bad_data, " (", round(prop, 2), ")") - fix <- (10 * bad_data) + # If the value is still off, try the other direction + if (!is.na(prop) && prop > 1.5) { + tdiff <- abs(mean(context, na.rm = TRUE)-(bad_data*10)) + if (length(tdiff) == 0) tdiff <- NA + prop <- if (!is.na(sdiff) & !is.na(tdiff)) (tdiff / sdiff) else NA + if (!is.na(prop) && prop < 1.5) { + observation <- paste0(label, " dps: ", bad_data, " -> ", 10 * bad_data, " (", round(prop, 2), ")") + fix <- (10 * bad_data) + } else { + observation <- paste0(label, " err: ", bad_data, " -> NA (", round(prop, 2), ")") + fix <- NA + } } else { - observation <- paste0(label, " err: ", bad_data, " -> NA (", round(prop, 2), ")") - fix <- NA + observation <- paste0(label, " dps: ", bad_data, " -> ", bad_data/10, " (", round(prop, 2), ")") + fix <- (bad_data/10) } } else { fix <- bad_data diff --git a/R/qc.R b/R/qc.R index fa82709..2938c6e 100644 --- a/R/qc.R +++ b/R/qc.R @@ -41,17 +41,19 @@ qc <- function(dat) { bad_table <- select(dat, Fecha, var = `Tmax (C)`) fixes <- .fix_bad_data(bad_table, maxshifts[i], "Tmax", "dps") dat$`Tmax (C)`[maxshifts[i]] <- unlist(fixes[1]) - existingobs <- if (!is.na(observations[maxshifts[i]]) && observations[maxshifts[i]] != '') paste(observations[maxshifts[i]], "/ ") else "" + existingobs <- if (!is.na(observations[maxshifts[i]]) && observations[maxshifts[i]] != '') paste(observations[maxshifts[i]], ifelse((unlist(fixes[2]) != ''), "/ ", "")) else "" observations[maxshifts[i]] <- paste0(existingobs, unlist(fixes[2])) } } if (length(minshifts) > 0) { - bad_table <- select(dat, Fecha, var = `Tmin (C)`) - fixes <- .fix_bad_data(bad_table, minshifts[i], "Tmin", "dps") - dat$`Tmin (C)`[minshifts[i]] <- unlist(fixes[1]) - existingobs <- if (!is.na(observations[minshifts[i]]) && observations[minshifts[i]] != '') paste(observations[minshifts[i]], "/ ") else "" - observations[minshifts[i]] <- paste0(existingobs, unlist(fixes[2])) + for (i in 1:length(maxshifts)) { + bad_table <- select(dat, Fecha, var = `Tmin (C)`) + fixes <- .fix_bad_data(bad_table, minshifts[i], "Tmin", "dps") + dat$`Tmin (C)`[minshifts[i]] <- unlist(fixes[1]) + existingobs <- if (!is.na(observations[minshifts[i]]) && observations[minshifts[i]] != '') paste(observations[minshifts[i]], ifelse((unlist(fixes[2]) != ''), "/ ", "")) else "" + observations[minshifts[i]] <- paste0(existingobs, unlist(fixes[2])) + } } # Try to detect Tmin < Tmax (for now, we'll throw away these days) @@ -63,20 +65,21 @@ qc <- function(dat) { bad_table <- select(dat, Fecha, var = `Tmax (C)`) fixes <- .fix_bad_data(bad_table, minmaxerr[i], "Tmax", "mme") dat$`Tmax (C)`[minmaxerr[i]] <- unlist(fixes[1]) - existingobs <- if (!is.na(observations[minmaxerr[i]]) && observations[minmaxerr[i]] != '') paste(observations[minmaxerr[i]], "/ ") else "" + existingobs <- if (!is.na(observations[minmaxerr[i]]) && observations[minmaxerr[i]] != '') paste(observations[minmaxerr[i]], ifelse((unlist(fixes[2]) != ''), "/ ", "")) else "" observations[minmaxerr[i]] <- paste0(existingobs, unlist(fixes[2])) # Repeat the same for Tmin bad_table <- select(dat, Fecha, var = `Tmin (C)`) fixes <- .fix_bad_data(bad_table, minmaxerr[i], "Tmin", "mme") dat$`Tmin (C)`[minmaxerr[i]] <- unlist(fixes[1]) - existingobs <- if (!is.na(observations[minmaxerr[i]]) && observations[minmaxerr[i]] != '') paste(observations[minmaxerr[i]], "/ ") else "" + existingobs <- if (!is.na(observations[minmaxerr[i]]) && observations[minmaxerr[i]] != '') paste(observations[minmaxerr[i]], ifelse((unlist(fixes[2]) != ''), "/ ", "")) else "" observations[minmaxerr[i]] <- paste0(existingobs, unlist(fixes[2])) } } # Recalculate Tmean and add observations dat$`Tmean (C)` <- round((dat$`Tmax (C)` + dat$`Tmin (C)`)/2,1) + observations[is.na(observations)] <- '' dat <- add_column(dat, Observations = observations) dat