-
Notifications
You must be signed in to change notification settings - Fork 3
/
station_search.R
108 lines (98 loc) · 4.26 KB
/
station_search.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
##' @title Find Senamhi stations matching various criteria
##'
##' @description Search for Senamhi stations by name, region, available data, and/or distance to a target.
##'
##' @param name character; optional character vector to filter results by station name.
##' @param ignore.case logical; by default the search for station names is not case-sensitive.
##' @param glob logical; whether to allow regular expressions in the \code{name}. See \code{\link{glob2rx}}.
##' @param region character; optional character string to filter results by region.
##' @param period numeric; optional, either a range of years or the total number of years of data that must be available.
##' @param config character; the configuration of the station ((m)eteorological or (h)ydrological)
##' @param target numeric; optional station ID of a target station, or a vector of length 2 containing latitude and longitude (in that order).
##' @param dist numeric; vector with a range of distance from the target in km. Only used if a target is specified. (default is 0:100)
##' @param sort Boolean; if TRUE (default), will sort the resultant table by distance from `target`. Only used if a target is specified.
##' @param ... Additional arguments passed to \code{\link{grep}}.
##'
##' @return A data frame containing the details of matching stations.
##'
##' @importFrom dplyr arrange filter mutate rowwise
##' @importFrom geosphere distGeo
##' @importFrom rlang .data
##' @importFrom utils glob2rx
##'
##' @export
##'
##' @author Conor I. Anderson
##'
##' @examples
##' # Find all stations containing 'Tarapoto' in their name.
##' station_search('Tarapoto')
##'
##' # Find all stations starting with "San"
##' station_search(name = "San*", glob = TRUE)
##'
##' # Find stations with data available from 1971 to 2000.
##' station_search(period = 1971:2000)
##'
##' # Find all stations between 0 and 100 km from Station '000401'
##' station_search(target = '000401', dist = 0:100)
##'
station_search <- function(name = NULL, ignore.case = TRUE, glob = FALSE, region = NULL,
period = NULL, config = NULL, target = NULL, dist = 0:100, sort = TRUE, ...) {
catalogue <- .get_catalogue()
if (!is.null(target) && length(target) == 1L && nchar(target) < 6) {
target <- suppressWarnings(try(sprintf("%06d", as.numeric(target)), silent = TRUE))
if (inherits(target, "try-error") || !target %in% catalogue$StationID) {
stop("Target station appears invalid.")
}
}
filt <- catalogue
# If `name` is not NULL, filter by name
if (!is.null(name)) {
if (glob) name <- glob2rx(name)
if (length(name) > 1) name <- paste(name, collapse = "|")
filt <- filter(filt, grepl(name, .data$Station, ignore.case = ignore.case, ...))
}
# If `region` is not NULL, filter by name
if (!is.null(region)) {
filt <- filter(filt, .data$Region == toupper(region))
if (nrow(filt) == 0) {
stop("No data found for that region. Did you spell it correctly?")
}
}
# If `config` is not NULL, filter by config
if (!is.null(config)) {
filt <- filter(filt, grepl(config, .data$Configuration, ignore.case = ignore.case, ...))
if (nrow(filt) == 0) {
stop("No data found for that config. Did you pass \"m\" or \"h\"?")
}
}
# If `period` is not NULL, filter by available data
if (!is.null(period)) {
if (length(period) == 1) {
filt <- filter(filt, .data$`Period (Yr)` >= period)
} else {
filt <- filter(filt, .data$`Data Start` <= min(period) &
.data$`Data End` >= max(period))
}
if (nrow(filt) == 0) {
stop("No station was found for the specified period.")
}
}
# If `target` is not NULL, filter by distance to target
if (!is.null(target)) {
if (length(target) == 1L) {
p1 <- catalogue %>% filter(.data$StationID == target) %>%
select(.data$Longitude, .data$Latitude) %>% unlist()
} else if (length(target) == 2L) {
p1 <- c(target[2], target[1])
} else stop("error: check target format")
filt <- rowwise(filt) %>%
mutate(Dist = distGeo(p1, c(.data$Longitude, .data$Latitude))/1000) %>%
filter(.data$Dist >= min(dist) & .data$Dist <= max(dist))
if (sort == TRUE) filt <- arrange(filt, .data$Dist)
attr(filt, "target_lon") <- p1[1]
attr(filt, "target_lat") <- p1[2]
}
filt
}