10
10
# ' @param prior the type of prior to use. Can be one of 'baps' and 'mean' (default=baps)
11
11
# ' @param check.fasta whether to check the fasta file for issue. Slows things down a little but may be avoided for very large fasta files.
12
12
# '
13
- # ' @return A sparse matrix reprsentation of the SNPs (different to the consensus sequence)
13
+ # ' @return A sparse matrix representation of the SNPs (different to the consensus sequence)
14
14
# '
15
15
# ' @examples
16
16
# ' fasta <- system.file("extdata", "seqs.fa", package = "fastbaps")
17
- # ' fasta <- ape::read.FASTA(fasta)
18
17
# ' sparse.data <- import_fasta_sparse_nt(fasta)
19
18
# '
19
+ # ' fasta <- ape::read.FASTA(fasta)
20
+ # ' sparse.data.ape <- import_fasta_sparse_nt(fasta)
21
+ # '
22
+ # '
20
23
# ' @export
21
24
import_fasta_sparse_nt <- function (fasta , prior = ' baps' , check.fasta = TRUE ){
22
25
@@ -38,10 +41,10 @@ import_fasta_sparse_nt <- function(fasta, prior='baps', check.fasta=TRUE){
38
41
seqnames <- rownames(fasta )
39
42
40
43
cons_ref <- c(a = 0 ,c = 1 ,g = 2 ,t = 3 ,`-` = 4 ,`n` = 4 )
41
- cosensus <- apply(fasta [ 2 : nrow( fasta ),, drop = FALSE ] , 2 , function (x ){
42
- tbl <- table(x )
43
- cons_ref [names( tbl )[which.max( tbl )]]
44
- })
44
+ allele_counts <- apply(fasta , 2 , function (x ){
45
+ c( table(factor ( x , levels = c( ' a ' , ' c ' , ' g ' , ' t ' , ' - ' , ' n ' ))) )
46
+ }, simplify = TRUE )
47
+ cosensus <- cons_ref [apply( allele_counts , 2 , which.max )]
45
48
46
49
fasta [fasta == ' a' ] <- 1
47
50
fasta [fasta == ' c' ] <- 2
@@ -51,7 +54,10 @@ import_fasta_sparse_nt <- function(fasta, prior='baps', check.fasta=TRUE){
51
54
fasta [fasta == ' n' ] <- 5
52
55
fasta <- apply(fasta , 2 , as.numeric )
53
56
54
- ij <- which(t(fasta ) != (cosensus + 1 ), arr.ind = TRUE )
57
+ ij <- which((t(fasta ) != (cosensus + 1 )) & (t(fasta )!= 5 ), arr.ind = TRUE )
58
+ # remove singletons
59
+ ij <- ij [allele_counts [cbind(t(fasta )[ij ], ij [,1 ])] > 1 , , drop = FALSE ]
60
+
55
61
snp.data <- list (num.seqs = nrow(fasta ),
56
62
consensus = cosensus ,
57
63
seq.length = ncol(fasta ),
@@ -61,6 +67,7 @@ import_fasta_sparse_nt <- function(fasta, prior='baps', check.fasta=TRUE){
61
67
dims = c(snp.data $ seq.length , snp.data $ num.seqs ),
62
68
dimnames = list (1 : snp.data $ seq.length , seqnames )))
63
69
70
+
64
71
} else {
65
72
snp.data <- import_fasta_to_vector_each_nt(fasta )
66
73
snp.data $ seq.names <- gsub(" ^>" ," " ,snp.data $ seq.names )
0 commit comments