diff --git a/R/globals.R b/R/globals.R index 7a04738..cd8f66e 100644 --- a/R/globals.R +++ b/R/globals.R @@ -2,5 +2,5 @@ ## a small price we pay for using data.table NSE with unquoted variables utils::globalVariables( c("count", "datetime", "msg_type", "timestamp", "exchange", "file_size", - "last_modified", ".", "size", "time") + "last_modified", ".", "size", "time", "stock", "stock_locate") ) diff --git a/R/read_functions.R b/R/read_functions.R index 6853950..78753ed 100644 --- a/R/read_functions.R +++ b/R/read_functions.R @@ -41,7 +41,7 @@ #' @param max_timestamp an 64 bit integer vector (see also \code{\link[bit64]{as.integer64}}) #' of maxium timestamp (inclusive). #' Note: min and max timestamp must be supplied with the same length or left empty. -#' @param filter_stock a character vector, specifying a filter for stocks. +#' @param filter_stock a character vector, specifying a filter for stocks. #' Note that this a shorthand for the \code{filter_stock_locate} argument, as it #' tries to find the stock_locate based on the \code{stock_directory} argument, #' if this is not found, it will try to extract the stock directory from the file, @@ -196,7 +196,7 @@ read_ITCH <- function(file, type, skip = 0, n_max = -1, length(min_timestamp) == 0 | length(max_timestamp) == 0)) stop("min_timestamp and max_timestamp has to have the same length or have to be not specified!") - if (!quiet && length(min_timestamp) > 0 | length(max_timestamp) > 0) { + if (!quiet && (length(min_timestamp) > 0 | length(max_timestamp) > 0)) { txt <- "[Filter] timestamp: " if (length(max_timestamp) == 0) { txt <- paste0(txt, ">= ", min_timestamp) diff --git a/README.Rmd b/README.Rmd index c5b7e3e..d5d81ec 100644 --- a/README.Rmd +++ b/README.Rmd @@ -175,12 +175,12 @@ od <- read_orders( max_timestamp = 55800000000000, # end at 15:30:00.000000 filter_stock_locate = 1, # take only stock with code 1 filter_stock = "CHAR", # but also take stock CHAR - stock_directory = sdir # provide the stock_directory to match stock names to locate_codes + stock_directory = sdir # provide the stock_directory to match stock names to stock_locates ) od[, .(n = .N), by = msg_type] range(od$timestamp) -od[, .(n = .N), by = .(locate_code, stock)] +od[, .(n = .N), by = .(stock_locate, stock)] ``` If you are interested in writing `ITCH_50` files or gaining a better understanding of the internal data structures, have a look at the `debug` folder and its contents. diff --git a/README.md b/README.md index 4f73d70..4dd0959 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ orders <- read_orders(file) #> [Counting] 5,000 messages found #> [Loading] . #> [Converting] to data.table -#> [Done] in 0.09 secs +#> [Done] in 0.08 secs str(orders) #> Classes 'data.table' and 'data.frame': 5000 obs. of 13 variables: #> $ msg_type : chr "A" "A" "F" "A" ... diff --git a/inst/extdata/ex20101224.TEST_ITCH_50 b/inst/extdata/ex20101224.TEST_ITCH_50 index 96e23fe..8bfaf8d 100644 Binary files a/inst/extdata/ex20101224.TEST_ITCH_50 and b/inst/extdata/ex20101224.TEST_ITCH_50 differ diff --git a/inst/extdata/ex20101224.TEST_ITCH_50.gz b/inst/extdata/ex20101224.TEST_ITCH_50.gz index b6dcfc1..c8b5571 100644 Binary files a/inst/extdata/ex20101224.TEST_ITCH_50.gz and b/inst/extdata/ex20101224.TEST_ITCH_50.gz differ diff --git a/inst/tinytest/test_read_functions.R b/inst/tinytest/test_read_functions.R index 81889c0..c9000b0 100644 --- a/inst/tinytest/test_read_functions.R +++ b/inst/tinytest/test_read_functions.R @@ -20,7 +20,7 @@ file_raw <- file_raw[length(file_raw)] expect_true(file.exists(file)) expect_true(file.info(file)[["size"]] == 465048) expect_true(file.exists(gzfile)) -expect_true(file.info(gzfile)[["size"]] == 159966) +expect_true(file.info(gzfile)[["size"]] == 159965) #### Count messages ct <- count_messages(file, quiet = TRUE) diff --git a/man/read_functions.Rd b/man/read_functions.Rd index d744eff..9ff1a31 100644 --- a/man/read_functions.Rd +++ b/man/read_functions.Rd @@ -96,8 +96,9 @@ Note: min and max timestamp must be supplied with the same length or left empty. \item{max_timestamp}{an 64 bit integer vector (see also \code{\link[bit64]{as.integer64}}) of maxium timestamp (inclusive). -Note: min and max timestamp must be supplied with the same length or left empty. -@param filter_stock a character vector, specifying a filter for stocks. +Note: min and max timestamp must be supplied with the same length or left empty.} + +\item{filter_stock}{a character vector, specifying a filter for stocks. Note that this a shorthand for the \code{filter_stock_locate} argument, as it tries to find the stock_locate based on the \code{stock_directory} argument, if this is not found, it will try to extract the stock directory from the file, diff --git a/simulate_dataset.R b/simulate_dataset.R index db3ba88..2480862 100644 --- a/simulate_dataset.R +++ b/simulate_dataset.R @@ -20,7 +20,7 @@ Rcpp::sourceCpp("debug/debug_tools.cpp") # take 3 most traded stocks in orders, trades file <- "20191230.BX_ITCH_50" -loc_code <- download_locate_code("BX", "2019-12-30", quiet = TRUE) +loc_code <- read_stock_directory(file, add_meta = FALSE, quiet = TRUE) trades <- read_trades(file, add_meta = FALSE, quiet = TRUE) orders <- read_orders(file, add_meta = FALSE, quiet = TRUE) mods <- read_modifications(file, add_meta = FALSE, quiet = TRUE) @@ -34,19 +34,19 @@ orders[, .(n = .N), by = stock][order(-n)][1:3] trades[, .(n = .N), by = stock][order(-n)][1:3] merge( mods[, .(n = .N), by = stock_locate][order(-n)][1:3], - loc_code, by = "locate_code", all.x = TRUE + loc_code[, .(stock_locate, stock)], by = "stock_locate", all.x = TRUE ) # take the following stocks as a base stock_select <- c("TSLA" = "ALC", "NIO" = "BOB", "BABA" = "CHAR") loc_codes <- loc_code[ - ticker %chin% names(stock_select) + stock %chin% names(stock_select) ][, - .(stock_old = ticker, + .(stock_old = stock, old_loc_code = stock_locate, - stock = stock_select[ticker]) -][order(stock), locate_code := 1:.N][] + stock = stock_select[stock]) +][order(stock)][, stock_locate := 1:.N][] # removes price outliers outside of a given sigma range... remove_price_outliers <- function(dt, sigma = 3) {