Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
ybkamaleri committed Sep 1, 2022
2 parents d308580 + b91fb69 commit 6aed387
Show file tree
Hide file tree
Showing 128 changed files with 731 additions and 174 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: orgdata
Title: Aggregating Original Data
Version: 0.7.4
Version: 0.7.5
Authors@R:
c(person(given = "Yusman",
family = "Kamaleri",
Expand Down
6 changes: 5 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Generated by roxygen2: do not edit by hand

S3method(find_age_category,cat)
S3method(find_age_category,default)
S3method(find_age_category,val)
S3method(find_data,csv)
S3method(find_data,default)
S3method(find_data,dta)
Expand Down Expand Up @@ -33,6 +36,7 @@ export(do_reshape_wide)
export(do_split)
export(do_year)
export(emoji)
export(find_age_category)
export(find_column_input)
export(find_column_multi)
export(find_column_multi_input)
Expand Down Expand Up @@ -81,8 +85,8 @@ export(reset_opt)
export(reset_options)
export(save_file)
export(se_fil)
export(see_data)
export(see_file)
export(see_org)
export(update_orgdata)
export(upgrade_orgdata)
export(website)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# orgdata 0.7.5
- Use either `empty` or `tom` to represent regular expression to replace to since Access makes symbol `""` to be invisible (#285)
- Change function name from `see_org()` to `see_data()` for viewing data in the data warehouse.
- Use symbol `|` to separate multiple arguments in column `EXTRA` (#288)
- Group age to specific or specified interval with `AgeCat()`. This function can be use in table for filegroup under `EXTRA` column (#287 #289)

# orgdata 0.7.4
- Delete raw of similar columns with multiple specifications (#282)
- Different ways to recode of similar column ie. duplicated, with defined lesid will give error.
Expand Down
101 changes: 101 additions & 0 deletions R/age-group.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#' Create age categories
#' @description Create age categories either by age interval or specified age
#' categories. How to define the age categories in `EXTRA` column in Access is
#' shown in the example below.
#' @description Age categories can be specified as follows:
#' - Specific interval eg. every 5 years. Interval with odd numbers will use minimum age of 0 and maximum age is 85+, while even number uses maximum age of 80+.
#' - Specified interval eg. 0-18, 19-44, 45-64, 65-79, 80+
#' @param dt Dataset
#' @param interval Age interval
#' @examples
#' \dontrun{
#' AgeCat(5) #Group age for every 5 years with min 0 and max 85+
#' AgeCat(10) #Group age for every 10 years with min 0 and max 80+
#' AgeCat(0, 19, 45, 65, 80) #Age group of 0-18, 19-44, 45-64, 65-79, 80+
#' }
#' @family extra arguments
#' @export
find_age_category <- function(dt = NULL, interval = NULL) {
UseMethod("find_age_category", interval)
}

#' @method find_age_category default
#' @export
find_age_category.default <- function(dt, interval) {
message("Selected age category: ", interval)
stop(sprintf("Age categories not valid: `%s`", interval))
}

# Interval value to categorize age. The minimum age will always be
# 0 while maximum age is 80 for even age interval and 85 for odd age
# interval.
#' @method find_age_category val
#' @export
find_age_category.val <- function(dt, interval){
is_color_txt(x = interval, msg = "Creating age category with year-interval of", emoji = TRUE)

## Age lower and upper limit for odd and even number
valOdd <- interval %% 2
maxAge <- ifelse(valOdd == 0, 80, 85)
minAge <- 0

ageBrk <- c(seq(from = minAge, to = maxAge, by = interval), Inf)
dt <- make_age_cat(dt, category = ageBrk)
return(dt)
}

#' @method find_age_category cat
#' @export
find_age_category.cat <- function(dt, interval){

txt <- paste(interval, collapse = ", ")
is_color_txt(x = paste0(txt, "+"), msg = "Creating age category", emoji = TRUE)
ageBrk <- c(interval, Inf)
dt <- make_age_cat(dt, category = ageBrk)
return(dt)
}


## Helper ----------

make_age_cat <- function(dt, category){

ALDER <- ageid <- ageGRP <- alderGRP <- NULL
grp <- up <- lo <- grpid <- NULL

vals <- paste0("VAL", 1:getOption("orgdata.vals"))
gpv <- setdiff(names(dt), vals)

dt[, grpid := .GRP, by = mget(gpv)]
dt[, grp := cut(ALDER, breaks = category, right = FALSE), by = grpid][, grp := as.character(grp)]

idVars <- c("grpid", "grp")
dt[, ageid := .GRP, by = mget(idVars)]

vals <- grep("^VAL", names(dt), value = TRUE)
for (i in vals){
vai <- tolower(i)
dt[, (vai) := sum(get(i), na.rm = TRUE), by = ageid]
dt[, (i) := get(vai)]
dt[, (vai) := NULL]
}

dt <- dt[, .SD[1], by = ageid]
dt[, ageGRP := sub("\\[(.*)\\)", "\\1", grp)]

ageVars <- c("lo", "up")
dt[, (ageVars) := data.table::tstrsplit(ageGRP, ",")]

for (j in ageVars){
suppressWarnings(data.table::set(dt, j = j, value = as.numeric(dt[[j]])))
}

dt[, up := up - 1]
dt[up != Inf, alderGRP := paste0(lo, "_", up)]
dt[up == Inf, alderGRP := paste0(lo, "+")]
dt[, ALDER := alderGRP]

delVals <- c("ageGRP", "alderGRP", idVars, ageVars)
dt[, (delVals) := NULL]
return(dt)
}
7 changes: 4 additions & 3 deletions R/extra-args.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#' @title Execute Extra Arguments
#' @description This is based on the input in `EXTRA` column from Access
#' registration database. The arguments that are valid here can be expanded
#' whenever needed. See details section for valid arguments to be used.
#' All argument names are written in `CamelCase` style.
#' whenever needed. See details section for valid arguments to be used. All
#' argument names are written in `CamelCase` style. Use symbol `|` to separate
#' multiple arguments.
#'
#' @details Currently, these arguments can be used:
#' - `DeleteNaRow` : Delete any row consisting only NA in all columns
Expand Down Expand Up @@ -38,7 +39,7 @@ get_extra_args <- function(group = NULL, con = NULL, spec = NULL){
input <- find_column_input(spec = spec, col = "EXTRA")

if (!is.na(input)) {
input <- find_column_multi(spec = spec, col = "EXTRA", sep = ",")
input <- find_column_multi(spec = spec, col = "EXTRA", sep = "|")
}
return(input)

Expand Down
50 changes: 48 additions & 2 deletions R/extra-filegroup-args.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@

#' @title Execute Extra Arguments for Filegroup
#' @description This is based on the input in `EXTRA` column from Access
#' registration database on filegroup. The arguments that are valid here can
#' be expanded whenever needed. See details section for valid arguments to be
#' used. All argument names are written in `CamelCase` style.
#' @description NB! Use symbol `|` to separate multiple arguments eg. `DeleteOldBydel | AgeCat(5)`
#'
#' @details Currently, these arguments can be used:
#' - `DeleteOldBydel` : Delete bydel codes before 2003, except for Oslo
#' - `AgeCat()` : Categorise age to different groups with defined interval. Example can be found in `find_age_category()`.
#' @param dt Dataset
#' @param args Extra arguments as specified in details section below.
#' @family extra arguments
#' @export
do_extra_args_group <- function(dt = NULL, args = NULL){
dt <- is_delete_bydel_before_2003(dt, extra = args)
dt <- is_age_category(dt, extra = args)

return(dt)
}


Expand Down Expand Up @@ -41,7 +45,7 @@ get_extra_args_group <- function(group = NULL, con = NULL, spec = NULL){
input <- find_column_input(spec = spec, col = "EXTRA")

if (!is.na(input)) {
input <- find_column_multi(spec = spec, col = "EXTRA", sep = ",")
input <- find_column_multi(spec = spec, col = "EXTRA", sep = "|")
}
return(input)

Expand All @@ -61,3 +65,45 @@ is_delete_bydel_before_2003 <- function(dt = NULL, extra = NULL){
}
return(dt)
}

is_age_category <- function(dt = NULL, extra = NULL){
is_debug(deep = TRUE)

ageCat <- extra[grepl("AgeCat", extra)]
if (length(ageCat > 0)){

gp <- is_input_age_class(ageCat)
dt <- find_age_category(dt = dt, interval = gp)
}

return(dt)
}


## Helper ---------------
is_input_age_class <- function(input){
input <- sub("^AgeCat\\((.*)\\)", "\\1", input)

input <- is_separate(input, sep = ",")

if (length(input) > 1){
# category with specified group
input <- is_check_age_input(input)
class(input) <- append(class(input), "cat")

} else {
# category with interval eg. every 5 years
input <- is_check_age_input(input)
class(input) <- append(class(input), "val")
}

return(input)
}

is_check_age_input <- function(inx){
inx <- trimws(inx)
inx <- tryCatch(as.numeric(inx),
warning = function(w){
is_stop("Interval for AgeCat in EXTRA is not numeric:", inx)
})
}
6 changes: 3 additions & 3 deletions R/make-file-each.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,15 @@ do_make_file_each <- function(spec, fgspec, aggregate, datacols, year, row, base
is_verbose(msg = is_line_short(), type = "other", ctrl = FALSE)
withr::with_options(list(orgdata.emoji = "safe"),
is_color_txt(x = "",
msg = "Delete dataset in the database ...",
msg = "Delete dataset in the data warehouse ...",
type = "debug", emoji = TRUE))
duck$db_remove_table(name = tblKoblid)
}

if (fileCtrl && fileDuck){
withr::with_options(list(orgdata.emoji = "safe"),
is_color_txt(x = "",
msg = "Read data directly from Database",
msg = "Read data directly from Data Warehouse",
type = "debug", emoji = TRUE))
is_color_txt(x = fileName, msg = "File:")
dt <- duck$db_read(name = tblKoblid)
Expand All @@ -153,7 +153,7 @@ do_make_file_each <- function(spec, fgspec, aggregate, datacols, year, row, base
is_verbose(msg = is_line_short(), type = "other", ctrl = FALSE)
withr::with_options(list(orgdata.emoji = "safe"),
is_color_txt(x = "",
msg = "Adding dataset to the database ...",
msg = "Adding dataset to the data warehouse ...",
type = "debug", emoji = TRUE))
duck$db_write(name = tblKoblid, value = dt, write = TRUE)
}
Expand Down
1 change: 0 additions & 1 deletion R/make-file.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ make_file <- function(group = NULL,
outDT <- is_col_num_warn(outDT, numCols)

## EXTRA ARGUMENTS FOR FILEGROUP ---------------------------
## -- DELETE OLD BYDEL --
bySpec <- get_extra_args_group(spec = fgSpec)
outDT <- do_extra_args_group(dt = outDT, args = bySpec )

Expand Down
14 changes: 13 additions & 1 deletion R/recode-regexp.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ is_recode_regexp <- function(dt, code, cols){
for (i in seq_along(cols)){
col <- cols[i]
fra <- is_rex(code = code[["FRA"]])
til <- code[["TIL"]]
til <- is_empty_til(code = code[["TIL"]])

dt[, (col) := gsub(fra, til, get(col))]
}
Expand All @@ -129,3 +129,15 @@ is_rex <- function(code){

return(code)
}

## Symbol "" in Access make it invisible so better use
## something clear with the word 'empty' or 'tom'
is_empty_til <- function(code){
code <- trimws(code)
ety <- c("empty", "emtpy", "tom")
if (is.element(code, ety)){
code <- ""
}

return(code)
}
2 changes: 1 addition & 1 deletion R/recode.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ check_dublicate_col <- function(code){

dp <- data.table::copy(code)
cols <- c("KOL", "FRA")
dp[, dup := .N > 1, by = cols]
dp[, "dup" := .N > 1, by = cols]
dps <- sum(dp[["dup"]])

if (dps > 0){
Expand Down
17 changes: 9 additions & 8 deletions R/see-org.R → R/see-data.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
#' @title See Original Data in the Database
#' @description See the original data that are saved in the org database when
#' the column *KONTROLLERT* is marked. This means the dataset has been cleaned
#' and recoded as specified in *INNLESING* table in Access registration
#' database.
#' @title See Structured Data in the Data Warehouse
#' @description See the original data that have been clean and structured i the
#' data warehouse. Data is saved in the warehouse when the column
#' *KONTROLLERT* is marked in the original file table in Access. This means
#' the dataset has been cleaned and recoded as specified in *INNLESING* table
#' in Access registration database.
#' @param group The filegroup name (\emph{filgruppe})
#' @inheritParams make_file
#' @param action To read or delete the data in the database. Default is `read`.
#' @param action To read or delete the data in the warehouse. Default is `read`.
#' @examples
#' \dontrun{
#' dt <- see_org("LESEFERD", koblid = 134)
#' dt <- see_data("LESEFERD", koblid = 134)
#' }
#' @export
see_org <- function(group = NULL, koblid = NULL, year = NULL, action = c("read", "delete")){
see_data <- function(group = NULL, koblid = NULL, year = NULL, action = c("read", "delete")){

action <- match.arg(action)
if (is.null(year))
Expand Down
11 changes: 7 additions & 4 deletions R/utils-misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ debug_opt <- function(opt = c("shallow", "deep", "nrow", "row", "aggregate", "ge


#' @title Emoji
#' @description Use emoji for fun &#128516;
#' @param x Emoji to choose ie. thumb, smile or sad
#' @description Change emoji in the output messages for fun &#128516;
#' @param x Emoji to choose ie. thumb, smile etc
#' @examples emoji("smile")
#' @export
emoji <- function(x = c("mark", "thumb", "write",
Expand All @@ -70,6 +70,8 @@ emoji <- function(x = c("mark", "thumb", "write",
folder = options(orgdata.emoji = "folder"),
book = options(orgdata.emoji = "book")
)

invisible()
}


Expand Down Expand Up @@ -155,16 +157,17 @@ is_latest_version <- function(ver = utils::packageDescription("orgdata")[["Versi
newVer <- numeric_version(gitVer) > numeric_version(ver)

if(newVer){
is_color_txt(ver, "Your installed version:", type = "note")
is_color_txt(gitVer, "New version is available!", type = "note", emoji = TRUE, symbol = "thumb")
is_color_txt(ver, "Your installed version:", type = "note")
is_color_txt("Changelog", "Find out what's new in", type = "note", emoji = TRUE, symbol = "book")
out <- TRUE
}
} else {
is_color_txt("", "You have no internet connection!",
type = "error", emoji = TRUE, symbol = "sad")
}

return(out)
invisible(out)
}

is_online <- function(x){
Expand Down
2 changes: 1 addition & 1 deletion R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ opt.orgdata <- as.list(opt_rename(optOrg))
if (latest){
x <- utils::askYesNo("Update orgdata now?")
if (isTRUE(x)){
update_orgdata()
orgdata::update_orgdata()
}
}
}
Loading

0 comments on commit 6aed387

Please sign in to comment.