Skip to content

Commit

Permalink
Reformatted tnt2newick which now works for tree files of any length a…
Browse files Browse the repository at this point in the history
…nd has new functionalities, internal input verification, as well as several tests ready.
  • Loading branch information
gaballench committed Sep 18, 2021
1 parent 8df9e25 commit f2feaa3
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 23 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tbea
Title: Tools for Pre- and Post-processing in Bayesian Evolutionary Analyses
Version: 0.4.0
Version: 0.5.0
Authors@R: c(person("Gustavo A.", "Ballen", email = "[email protected]", role = c("aut", "cre")),
person("Sandra", "Reinales", email = "[email protected]", role = c("aut")))
Description: Package for bayesian inference in phylogenetics and evolution.
Expand Down
2 changes: 1 addition & 1 deletion R/concatNexus.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ concatNexus <- function(matrices = NULL, pattern, path, filename, morpho = FALSE
from_ape <- sum(grepl(pattern = "write.nexus.data.R",
x = outputNEXUS, ignore.case = TRUE))
if (from_ape) {
outputNEXUS[2] <- paste("[Data written by concatNexus.R ", Sys.time(), "]", sep = "")
outputNEXUS[2] <- paste("[Data written by tbea::concatNexus ", Sys.time(), "]", sep = "")
}
# write the summary table with the dimmension of each partition to be included into the MrBayes script
endElem <- vector(mode = "integer", length = length(NCHARvector))
Expand Down
86 changes: 72 additions & 14 deletions R/tnt2newick.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,104 @@
#'
#' @param output A vector of type 'character' with the path to output files to contain the tree in newick format.
#'
#' @param subsetting A vector of type 'logical' indicating whether subsetting (i.e., chopping at once the first and last line of the TNT tree file) should be done. Otherwise, explicit text replacements removing such lines are used.
#' @param string A vector of type 'character' which can be either an object in memory or a string for interactive transformation, in TNT format. Use file in case your tree(s) are stored in a file instead.
#'
#' @param return A 'logical' expression indicating whether to write the newick tree(s) to a file in 'output' (if FALSE, the default), or whether to return to the screen (if TRUE), potentially to be stored in a vector via the '<-' operator.
#'
#' @param subsetting A vector of type 'logical' indicating whether subsetting (i.e., chopping at once the first and last line of the TNT tree file) should be done. Otherwise, explicit text replacements removing such lines are used. The default is false because it does not play well with multi-tree TNT files. Only use subsetting = TRUE if you are sure that there is only one tree in the file with the commands, tread and proc as first and last lines.
#'
#' @param name.sep A vector of length 2 and type 'character' for carrying out separator conversion in the names of terminals. For instance, if the terminals have names composed of two words separated by an underscore (_) and you want them to be separated by space ( ) then use name.sep = c("_", " "). This does not support regular expressions.
#'
#' @return This function writes to the disk a text file containing the tree converted to newick format.
#' @return This function writes to the disk a text file containing the tree converted to newick format. Alternatively, it returns the output to the screen or writes it to an object in memory thanks to the argument 'string'.
#'
#' @author Gustavo A. Ballen
#'
#' @details This function has been tested for cases where only one tree is in the original tnt tree file. Please be careful with files containing multiple trees.
#'
#' @examples
#' # Convert a tree in TNT tree format to newick format
#' \dontrun{
#' tnt2newick(file = "my_TNT_tree.tre", output = "my_TNT_tree.newick")
#' }
#' # create a file with multiple trees tree in TNT format to convert to newick format
#' writeLines(
#' text = c(
#' "tread 'some comment'",
#' "(Taxon_A ((Taxon_B Taxon_C)(Taxon_D Taxon_E)))*",
#' "(Taxon_A (Taxon_B (Taxon_C (Taxon_D Taxon_E))))*",
#' "(Taxon_A (Taxon_C (Taxon_B (Taxon_D Taxon_E))));",
#' "proc-;"
#' ),
#' con = "someTrees.tre"
#' )
#' tnt2newick(file = "someTrees.tre", return = TRUE)
#' file.remove("someTrees.tre")
#' @export

tnt2newick <- function(file, output, subsetting = TRUE){
tree <- readLines(file)
if(subsetting) {
tnt2newick <- function(file, output = NULL, string = NULL, return = FALSE, subsetting = FALSE, name.sep = NULL){

# check that input file exists or a tree string is provided
if (!file.exists(file) & is.null(string)) {
stop(paste("File ", file, " does not exist and \'string\'is NULL, you need either an input \'file\' or an object with trees in \'string\'", sep = ""))
}

# check that output objects or instructions are provided
if (is.null(output) & !(return)) {
stop("An output file is required when return = FALSE")
}

# both output and return can not be in effect at the same time
if (!is.null(output) & return) {
warning("Both \'output\' and \'return\' are in use when only one of them should be. The output will be returned and not written to a file. Provide the argument output with a path to the output file and set return = FALSE")
}

# warn about a pre-existing file
if (!(is.null(output)) & !(return)) {
if (file.exists(output)) {
warning(paste("File ", output, " already exists, it will be overwritten", sep = ""))
}
}

# check whether we are using a string or a file in the disk
if (!is.null(string)) {
tree <- string
} else {
tree <- readLines(file)
}

# use subsetting to remove tread and proc commands, probably not the safest choice
if (subsetting) {
tree <- tree[-c(1,length(tree))]
} else {
tree <- gsub(pattern = "tread.*", replacement = "", x = tree, ignore.case = TRUE)
tree <- gsub(pattern = "proc-;.*", replacement = "", x = tree, ignore.case = TRUE)
tree <- gsub(pattern = "proc/.*", replacement = "", x = tree, ignore.case = TRUE)
}


# string replacements
tree <- gsub(pattern = "*", replacement = ";", x = tree, fixed = TRUE)
tree <- gsub(pattern = " ", replacement = ",", x = tree, fixed = TRUE)
tree <- gsub(pattern = ")(", replacement = "),(", x = tree, fixed = TRUE)
tree <- gsub(pattern = ",)", replacement = ")", x = tree, fixed = TRUE)
tree <- gsub(pattern = "_", replacement = " ", x = tree, fixed = TRUE)

# If name separators are to be used, look for the first one and replace with the second one
if (!is.null(name.sep)) {
tree <- gsub(pattern = name.sep[1], replacement = name.sep[2], x = tree, fixed = TRUE)
}

# some more string replacements
tree <- gsub(pattern = ",=", replacement = ":", x = tree, fixed = TRUE)
tree <- gsub(pattern = "=", replacement = ":", x = tree, fixed = TRUE)
tree <- gsub(pattern = " =", replacement = ":", x = tree, fixed = TRUE)
tree <- gsub(pattern = "=", replacement = ":", x = tree, fixed = TRUE)
tree <- gsub(pattern = ":/", replacement = "", x = tree, fixed = TRUE)
tree <- gsub(pattern = ":", replacement = " ", x = tree, fixed = TRUE)
if(length(which(tree == "")) == 0) {
writeLines(text = tree, con = output)
} else {

# strip blank lines before returning or writing to file
if (length(which(tree == "")) != 0) {
tree <- tree[-which(tree == "")]
}

# either return to the screen or write to a file
if (return) {
return(tree)
} else {
writeLines(text = tree, con = output)
}
}
36 changes: 29 additions & 7 deletions man/tnt2newick.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 58 additions & 0 deletions tests/testthat/test-tnt2newick.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# create some input tree and their expected outputs

# create a file with multiple trees in TNT format to convert to newick format
writeLines(
text = c(
"tread \'some comment\'",
"(Taxon_A ((Taxon_B Taxon_C)(Taxon_D Taxon_E)))*",
"(Taxon_A (Taxon_B (Taxon_C (Taxon_D Taxon_E))))*",
"(Taxon_A (Taxon_C (Taxon_B (Taxon_D Taxon_E))));",
"proc-;"),
con = "someTrees.tre"
)

# create a file with multiple trees newick format to convert
writeLines(
text = c(
"(Taxon_A,((Taxon_B,Taxon_C),(Taxon_D,Taxon_E)));",
"(Taxon_A,(Taxon_B,(Taxon_C,(Taxon_D,Taxon_E))));",
"(Taxon_A,(Taxon_C,(Taxon_B,(Taxon_D,Taxon_E))));"),
con = "someTrees.newick"
)



#file, output = NULL, string = NULL, return = FALSE, subsetting = FALSE, name.sep = NULL

test_that("input file does not exist while string is NULL", {
expect_error(tnt2newick(file = "file_that_does_not_exist.tre", output = NULL, string = NULL, return = FALSE, subsetting = FALSE, name.sep = NULL),
regexp = "does not exist",
ignore.case = TRUE)
})

test_that("output is NULL and return is FALSE", {
expect_error(tnt2newick(file = "someTrees.tre", output = NULL, string = NULL, return = FALSE, subsetting = FALSE, name.sep = NULL),
regexp = "output file is required",
ignore.case = TRUE)
})

test_that("output file and return can not be in effect at the same time", {
expect_warning(tnt2newick(file = "someTrees.tre", output = "outputTrees.newick", string = NULL, return = TRUE, subsetting = FALSE, name.sep = NULL),
regexp = "are in use when only one of them should be",
ignore.case = TRUE)
})

test_that("output file already exists", {
expect_warning(tnt2newick(file = "someTrees.tre", output = "someTrees.newick", string = NULL, return = FALSE, subsetting = FALSE, name.sep = NULL),
regexp = "already exists, it will be overwritten",
ignore.case = TRUE)
})

test_that("number of name separators got completely replaced using name.sep", {
expect_equal(sum(sapply(gregexpr(pattern = "_", text = readLines(con = "someTrees.tre"), fixed=TRUE), function(i) sum(i > 0))),
sum(sapply(gregexpr(pattern = "--", text = tnt2newick(file = "someTrees.tre", output = NULL, string = NULL, return = TRUE, subsetting = FALSE, name.sep = c("_", "--")), fixed=TRUE), function(i) sum(i > 0))))
})

# clean tesating files
file.remove(dir(pattern = ".tre$"))
file.remove(dir(pattern = ".newick$"))

0 comments on commit f2feaa3

Please sign in to comment.