Skip to content

Commit

Permalink
qc: Iterate over all minshifts; don't delete good values where Tmin >…
Browse files Browse the repository at this point in the history
… Tmax.
  • Loading branch information
ConorIA committed Oct 8, 2017
1 parent 5e0a3cc commit af21a38
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 16 deletions.
30 changes: 22 additions & 8 deletions R/fix_bad_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
context <- unlist(context$var)
# We should now have *fairly* clean context
bad_data <- unlist(bad_table$var[bad_row])

if (type == "dps") {
if (!grepl("\\.", as.character(bad_data))) {
# If this looks like a decimal error, let's salvage the data
Expand All @@ -29,22 +30,35 @@
fix <- NA
}
}

if (type == "mme") {
tdiff <- abs(mean(context, na.rm = TRUE) - (bad_data/10))
# First, make sure the value is actually bad.
tdiff <- abs(mean(context, na.rm = TRUE) - bad_data)
if (length(tdiff) == 0) tdiff <- NA
sdiff <- sd(context, na.rm = TRUE)
prop <- if (!is.na(sdiff) & !is.na(tdiff)) (tdiff / sdiff) else NA

# If our value is more than 1.5 standard deviations off, try a dps fix
if (!is.na(prop) && prop > 1.5) {
# Let's see if it is a decimal place error (fairly conservative)
tdiff <- abs(mean(context, na.rm = TRUE)-(bad_data*10))
tdiff <- abs(mean(context, na.rm = TRUE) - (bad_data/10))
if (length(tdiff) == 0) tdiff <- NA
sdiff <- sd(context, na.rm = TRUE)
prop <- if (!is.na(sdiff) & !is.na(tdiff)) (tdiff / sdiff) else NA
if (!is.na(prop) && prop < 1.5) {
observation <- paste0(label, " dps: ", bad_data, " -> ", 10 * bad_data, " (", round(prop, 2), ")")
fix <- (10 * bad_data)
# If the value is still off, try the other direction
if (!is.na(prop) && prop > 1.5) {
tdiff <- abs(mean(context, na.rm = TRUE)-(bad_data*10))
if (length(tdiff) == 0) tdiff <- NA
prop <- if (!is.na(sdiff) & !is.na(tdiff)) (tdiff / sdiff) else NA
if (!is.na(prop) && prop < 1.5) {
observation <- paste0(label, " dps: ", bad_data, " -> ", 10 * bad_data, " (", round(prop, 2), ")")
fix <- (10 * bad_data)
} else {
observation <- paste0(label, " err: ", bad_data, " -> NA (", round(prop, 2), ")")
fix <- NA
}
} else {
observation <- paste0(label, " err: ", bad_data, " -> NA (", round(prop, 2), ")")
fix <- NA
observation <- paste0(label, " dps: ", bad_data, " -> ", bad_data/10, " (", round(prop, 2), ")")
fix <- (bad_data/10)
}
} else {
fix <- bad_data
Expand Down
19 changes: 11 additions & 8 deletions R/qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,19 @@ qc <- function(dat) {
bad_table <- select(dat, Fecha, var = `Tmax (C)`)
fixes <- .fix_bad_data(bad_table, maxshifts[i], "Tmax", "dps")
dat$`Tmax (C)`[maxshifts[i]] <- unlist(fixes[1])
existingobs <- if (!is.na(observations[maxshifts[i]]) && observations[maxshifts[i]] != '') paste(observations[maxshifts[i]], "/ ") else ""
existingobs <- if (!is.na(observations[maxshifts[i]]) && observations[maxshifts[i]] != '') paste(observations[maxshifts[i]], ifelse((unlist(fixes[2]) != ''), "/ ", "")) else ""
observations[maxshifts[i]] <- paste0(existingobs, unlist(fixes[2]))
}
}

if (length(minshifts) > 0) {
bad_table <- select(dat, Fecha, var = `Tmin (C)`)
fixes <- .fix_bad_data(bad_table, minshifts[i], "Tmin", "dps")
dat$`Tmin (C)`[minshifts[i]] <- unlist(fixes[1])
existingobs <- if (!is.na(observations[minshifts[i]]) && observations[minshifts[i]] != '') paste(observations[minshifts[i]], "/ ") else ""
observations[minshifts[i]] <- paste0(existingobs, unlist(fixes[2]))
for (i in 1:length(maxshifts)) {
bad_table <- select(dat, Fecha, var = `Tmin (C)`)
fixes <- .fix_bad_data(bad_table, minshifts[i], "Tmin", "dps")
dat$`Tmin (C)`[minshifts[i]] <- unlist(fixes[1])
existingobs <- if (!is.na(observations[minshifts[i]]) && observations[minshifts[i]] != '') paste(observations[minshifts[i]], ifelse((unlist(fixes[2]) != ''), "/ ", "")) else ""
observations[minshifts[i]] <- paste0(existingobs, unlist(fixes[2]))
}
}

# Try to detect Tmin < Tmax (for now, we'll throw away these days)
Expand All @@ -63,20 +65,21 @@ qc <- function(dat) {
bad_table <- select(dat, Fecha, var = `Tmax (C)`)
fixes <- .fix_bad_data(bad_table, minmaxerr[i], "Tmax", "mme")
dat$`Tmax (C)`[minmaxerr[i]] <- unlist(fixes[1])
existingobs <- if (!is.na(observations[minmaxerr[i]]) && observations[minmaxerr[i]] != '') paste(observations[minmaxerr[i]], "/ ") else ""
existingobs <- if (!is.na(observations[minmaxerr[i]]) && observations[minmaxerr[i]] != '') paste(observations[minmaxerr[i]], ifelse((unlist(fixes[2]) != ''), "/ ", "")) else ""
observations[minmaxerr[i]] <- paste0(existingobs, unlist(fixes[2]))

# Repeat the same for Tmin
bad_table <- select(dat, Fecha, var = `Tmin (C)`)
fixes <- .fix_bad_data(bad_table, minmaxerr[i], "Tmin", "mme")
dat$`Tmin (C)`[minmaxerr[i]] <- unlist(fixes[1])
existingobs <- if (!is.na(observations[minmaxerr[i]]) && observations[minmaxerr[i]] != '') paste(observations[minmaxerr[i]], "/ ") else ""
existingobs <- if (!is.na(observations[minmaxerr[i]]) && observations[minmaxerr[i]] != '') paste(observations[minmaxerr[i]], ifelse((unlist(fixes[2]) != ''), "/ ", "")) else ""
observations[minmaxerr[i]] <- paste0(existingobs, unlist(fixes[2]))
}
}

# Recalculate Tmean and add observations
dat$`Tmean (C)` <- round((dat$`Tmax (C)` + dat$`Tmin (C)`)/2,1)
observations[is.na(observations)] <- ''
dat <- add_column(dat, Observations = observations)

dat
Expand Down

0 comments on commit af21a38

Please sign in to comment.