diff --git a/Project.toml b/Project.toml
new file mode 100644
index 000000000..6a964c599
--- /dev/null
+++ b/Project.toml
@@ -0,0 +1,49 @@
+name = "PhyloNetworks"
+uuid = "33ad39ac-ed31-50eb-9b15-43d0656eaa72"
+license = "MIT"
+version = "0.10.0"
+
+[deps]
+BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
+BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
+CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
+StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
+
+[compat]
+BioSequences = "≥ 1.0.0"
+BioSymbols = "≥ 3.0.0"
+CSV = "≥ 0.4.0"
+Combinatorics = "≥ 0.7.0"
+DataFrames = "≥ 0.13.0"
+DataStructures = "≥ 0.9.0"
+Distributions = "≥ 0.15.0"
+GLM = "≥ 1.0.0"
+NLopt = "≥ 0.5.1"
+SpecialFunctions = "≥ 0.7.0"
+StaticArrays = "≥ 0.8.3"
+StatsBase = "≥ 0.26.0"
+StatsFuns = "≥ 0.7.0"
+StatsModels = "≥ 0.6.0"
+julia = "≥ 0.7.0"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/REQUIRE b/REQUIRE
deleted file mode 100644
index a048f653f..000000000
--- a/REQUIRE
+++ /dev/null
@@ -1,15 +0,0 @@
-julia 0.7
-BioSequences 1.0
-BioSymbols 3.0
-Combinatorics 0.7
-CSV 0.4
-DataFrames 0.13
-DataStructures 0.9
-Distributions 0.15.0
-GLM 1.0
-NLopt 0.5.1
-SpecialFunctions 0.7
-StaticArrays 0.8.3
-StatsBase 0.26
-StatsFuns 0.7
-StatsModels 0.3
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
index 0a5cb7836..4635f295b 100644
--- a/benchmark/benchmarks.jl
+++ b/benchmark/benchmarks.jl
@@ -9,7 +9,7 @@ const SUITE = BenchmarkGroup()
 
 SUITE["nasm"] = BenchmarkGroup(["JC69", "HKY85"])
 SUITE["fitDiscreteFixed"] = BenchmarkGroup(["ERSM", "BTSM", "JC69", "HKY85"])
-SUITE["fitDiscrete"] = BenchmarkGroup(["ERSM", "BTSM", "JC69", "HKY85"])
+SUITE["fitdiscrete"] = BenchmarkGroup(["ERSM", "BTSM", "JC69", "HKY85"])
 
 # Add benchmarks to nasm group
 SUITE["nasm"]["JC69"] = @benchmarkable JC69([0.5])
@@ -20,8 +20,8 @@ SUITE["nasm"]["HKY85"] = @benchmarkable P!(P(m1, 1.0), m1, 3.0)
 net_dat = readTopology("(((A:2.0,(B:1.0)#H1:0.1::0.9):1.5,(C:0.6,#H1:1.0::0.1):1.0):0.5,D:2.0);")
 species_alone = ["C","A","B","D"]
 dat_alone = DataFrame(trait=["hi","lo","lo","hi"])
-SUITE["fitDiscreteFixed"]["ERSM"] = @benchmarkable fitDiscrete(net_dat, :ERSM, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false)
-SUITE["fitDiscreteFixed"]["BTSM"] = @benchmarkable fitDiscrete(net_dat, :BTSM, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false)
+SUITE["fitDiscreteFixed"]["ERSM"] = @benchmarkable fitdiscrete(net_dat, :ERSM, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false)
+SUITE["fitDiscreteFixed"]["BTSM"] = @benchmarkable fitdiscrete(net_dat, :BTSM, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false)
 
 fastafile = joinpath(@__DIR__, "..", "examples", "Ae_bicornis_Tr406_Contig10132.aln")
 dna_dat, dna_weights = readfastatodna(fastafile, true);
@@ -32,14 +32,14 @@ for edge in net_dna.edge #adds branch lengths
         setGamma!(edge, 0.5)
     end
 end
-SUITE["fitDiscreteFixed"]["JC69"] = @benchmarkable fitDiscrete(net_dna, :JC69, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false)
-SUITE["fitDiscreteFixed"]["HKY85"] = @benchmarkable fitDiscrete(net_dna, :HKY85, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false)
-
-## fitDiscrete benchmarks
-SUITE["fitDiscrete"]["ERSM"] = @benchmarkable fitDiscrete(net_dat, :ERSM, species_alone, dat_alone; optimizeQ=true, optimizeRVAS=true)
-SUITE["fitDiscrete"]["BTSM"] = @benchmarkable fitDiscrete(net_dat, :BTSM, species_alone, dat_alone; optimizeQ=true, optimizeRVAS=true)
-SUITE["fitDiscrete"]["JC69"] = @benchmarkable fitDiscrete(net_dna, :JC69, dna_dat, dna_weights; optimizeQ=true, optimizeRVAS=true)
-SUITE["fitDiscrete"]["HKY85"] = @benchmarkable fitDiscrete(net_dna, :HKY85, dna_dat, dna_weights; optimizeQ=true, optimizeRVAS=true)
+SUITE["fitDiscreteFixed"]["JC69"] = @benchmarkable fitdiscrete(net_dna, :JC69, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false)
+SUITE["fitDiscreteFixed"]["HKY85"] = @benchmarkable fitdiscrete(net_dna, :HKY85, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false)
+
+## fitdiscrete benchmarks
+SUITE["fitdiscrete"]["ERSM"] = @benchmarkable fitdiscrete(net_dat, :ERSM, species_alone, dat_alone; optimizeQ=true, optimizeRVAS=true)
+SUITE["fitdiscrete"]["BTSM"] = @benchmarkable fitdiscrete(net_dat, :BTSM, species_alone, dat_alone; optimizeQ=true, optimizeRVAS=true)
+SUITE["fitdiscrete"]["JC69"] = @benchmarkable fitdiscrete(net_dna, :JC69, dna_dat, dna_weights; optimizeQ=true, optimizeRVAS=true)
+SUITE["fitdiscrete"]["HKY85"] = @benchmarkable fitdiscrete(net_dna, :HKY85, dna_dat, dna_weights; optimizeQ=true, optimizeRVAS=true)
 
 # If a cache of tuned parameters already exists, use it, otherwise, tune and cache
 # the benchmark parameters. Reusing cached parameters is faster and more reliable
diff --git a/docs/src/index.md b/docs/src/index.md
index 311d39b83..30723f383 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -48,7 +48,7 @@ Pages = [
     "man/multiplealleles.md",
     "man/trait_tree.md",
     "man/parsimony.md",
-    "man/fitDiscrete.md"
+    "man/fitdiscrete.md"
 ]
 Depth = 3
 ```
diff --git a/docs/src/lib/public.md b/docs/src/lib/public.md
index db5ac6582..b9500324b 100644
--- a/docs/src/lib/public.md
+++ b/docs/src/lib/public.md
@@ -137,11 +137,11 @@ parsimonyGF
 Q
 randomTrait
 randomTrait!
-fitDiscrete
+fitdiscrete
 maxParsimonyNet
 nstates
-setrates!
-setalpha!
+stationary
+empiricalDNAfrequencies
 ```
 
 ```@meta
diff --git a/docs/src/man/fitDiscrete.md b/docs/src/man/fitDiscrete.md
index 4be492f6f..d831ae721 100644
--- a/docs/src/man/fitDiscrete.md
+++ b/docs/src/man/fitDiscrete.md
@@ -1,137 +1,142 @@
+```@setup traitevol_fixednet
+using PhyloNetworks
+using DataFrames
+mkpath("../assets/figures")
+```
+
 # Discrete Trait Evolution
 
 With a phylogenetic network structure inferred, we can now estimate how quickly traits
-have evolved over time using a likelihood model. These traits should be discrete 
-characteristics of a species such as feather color, diet type, or dna in aligned 
-genetic sequences.
+have evolved over time using a likelihood model. These traits should be discrete
+characteristics of a species such as feather color, diet type,
+or DNA in aligned genetic sequences.
 
 ## Discrete Trait Data
 
-As with continuous trait evolution, we assume a fixed network, correctly rooted, 
+As with continuous trait evolution, we assume a fixed network, correctly rooted,
 with branch lengths proportional to calendar time. We start with a network, then
 add data about the tips of this network. We allow data of two types.
-1. A vector of species names with a data frame of traits:
 
-```@setup fitDiscrete
-using PhyloNetworks, DataFrames
-mkpath("../assets/figures")
-```
-
-```@example fitDiscrete
-#read in network
-simple_net = readTopology("(A:3.0,(B:2.0,(C:1.0,D:1.0):1.0):1.0);");
-
-#read in trait data
-simple_species = ["C","A","B","D"]
-simple_dat = DataFrame(trait=["hi","lo","lo","hi"])
-```
-
-If your species names and trait data are in the same data frame, read in your data 
-frame then subset the data like this:
-```@example fitDiscrete
-dat = DataFrame(species=["C","A","B","D"], trait=["hi","lo","lo","hi"])
-simple_species = dat[:species]
-simple_dat = DataFrame(trait = dat[:trait])
-```
-
-2. To use dna data, read in the network structure then start with a fasta 
-file. Reading the data from this file using the `readfastatodna` function. This 
-creates a data frame of dna data and a vector of dna pattern weights.
-
-```@example fitDiscrete
-#read in network
-dna_net = readTopology("((((((((((((((Ae_caudata_Tr275:1.0,Ae_caudata_Tr276:1.0):1.0,Ae_caudata_Tr139:1.0):1.0)#H1:1.0::0.6,((((((Ae_longissima_Tr241:1.0,Ae_longissima_Tr242:1.0):1.0,Ae_longissima_Tr355:1.0):1.0,(Ae_sharonensis_Tr265:1.0,Ae_sharonensis_Tr264:1.0):1.0):1.0,((Ae_bicornis_Tr408:1.0,Ae_bicornis_Tr407:1.0):1.0,Ae_bicornis_Tr406:1.0):1.0):1.0,((Ae_searsii_Tr164:1.0,Ae_searsii_Tr165:1.0):1.0,Ae_searsii_Tr161:1.0):1.0):1.0)#H2:1.0::0.6):1.0,(((Ae_umbellulata_Tr266:1.0,Ae_umbellulata_Tr257:1.0):1.0,Ae_umbellulata_Tr268:1.0):1.0,#H1:1.0::0.4):1.0):1.0,((Ae_comosa_Tr271:1.0,Ae_comosa_Tr272:1.0):1.0,(((Ae_uniaristata_Tr403:1.0,Ae_uniaristata_Tr357:1.0):1.0,Ae_uniaristata_Tr402:1.0):1.0,Ae_uniaristata_Tr404:1.0):1.0):1.0):1.0,(((Ae_tauschii_Tr352:1.0,Ae_tauschii_Tr351:1.0):1.0,(Ae_tauschii_Tr180:1.0,Ae_tauschii_Tr125:1.0):1.0):1.0,(#H2:1.0::0.4,((((Ae_mutica_Tr237:1.0,Ae_mutica_Tr329:1.0):1.0,Ae_mutica_Tr244:1.0):1.0,Ae_mutica_Tr332:1.0):1.0)#H4:1.0::0.6):1.0):1.0):1.0,(((T_boeoticum_TS8:1.0,(T_boeoticum_TS10:1.0,T_boeoticum_TS3:1.0):1.0):1.0,T_boeoticum_TS4:1.0):1.0,((T_urartu_Tr315:1.0,T_urartu_Tr232:1.0):1.0,(T_urartu_Tr317:1.0,T_urartu_Tr309:1.0):1.0):1.0):1.0):1.0,(((((Ae_speltoides_Tr320:1.0,Ae_speltoides_Tr323:1.0):1.0,Ae_speltoides_Tr223:1.0):1.0,Ae_speltoides_Tr251:1.0):1.0):1.0,#H4:1.0::0.4):1.0):1.0):1.0,Ta_caputMedusae_TB2:1.0):1.0,S_vavilovii_Tr279:1.0):1.0,Er_bonaepartis_TB1:1.0):1.0,H_vulgare_HVens23:1.0);");
+1. A vector of species names with a data frame of traits:
 
-#read in dna data
-fastafile = joinpath(dirname(pathof(PhyloNetworks)), "..","examples",
-"Ae_bicornis_Tr406_Contig10132.aln")
-dna_dat, dna_weights = readfastatodna(fastafile, true);
-```
+   ```@example traitevol_fixednet
+   # read in network
+   net = readTopology("(A:3,((B:0.4)#H1:1.6::0.92,((C:0.4,#H1:0::0.08):0.6,D:1):1):1);");
+   # read in trait data
+   species = ["C","A","B","D"]
+   dat = DataFrame(trait=["hi","lo","lo","hi"])
+   ```
+
+   If your species names and trait data are in the same data frame,
+   read in your data frame then subset the data like this:
+   ```@example traitevol_fixednet
+   dat = DataFrame(species=["C","A","B","D"], trait=["hi","lo","lo","hi"])
+   species = dat[:species]
+   dat = DataFrame(trait = dat[:trait])
+   ```
+
+2. To use dna data, read in the network structure then start with a fasta
+   file. Reading the data from this file using the `readfastatodna` function.
+   This creates a data frame of dna data and a vector of dna pattern weights.
+
+   ```@example traitevol_fixednet
+   # read in network
+   dna_net = readTopology("((((((((((((((Ae_caudata_Tr275:1.0,Ae_caudata_Tr276:1.0):1.0,Ae_caudata_Tr139:1.0):1.0)#H1:1.0::0.6,((((((Ae_longissima_Tr241:1.0,Ae_longissima_Tr242:1.0):1.0,Ae_longissima_Tr355:1.0):1.0,(Ae_sharonensis_Tr265:1.0,Ae_sharonensis_Tr264:1.0):1.0):1.0,((Ae_bicornis_Tr408:1.0,Ae_bicornis_Tr407:1.0):1.0,Ae_bicornis_Tr406:1.0):1.0):1.0,((Ae_searsii_Tr164:1.0,Ae_searsii_Tr165:1.0):1.0,Ae_searsii_Tr161:1.0):1.0):1.0)#H2:1.0::0.6):1.0,(((Ae_umbellulata_Tr266:1.0,Ae_umbellulata_Tr257:1.0):1.0,Ae_umbellulata_Tr268:1.0):1.0,#H1:1.0::0.4):1.0):1.0,((Ae_comosa_Tr271:1.0,Ae_comosa_Tr272:1.0):1.0,(((Ae_uniaristata_Tr403:1.0,Ae_uniaristata_Tr357:1.0):1.0,Ae_uniaristata_Tr402:1.0):1.0,Ae_uniaristata_Tr404:1.0):1.0):1.0):1.0,(((Ae_tauschii_Tr352:1.0,Ae_tauschii_Tr351:1.0):1.0,(Ae_tauschii_Tr180:1.0,Ae_tauschii_Tr125:1.0):1.0):1.0,(#H2:1.0::0.4,((((Ae_mutica_Tr237:1.0,Ae_mutica_Tr329:1.0):1.0,Ae_mutica_Tr244:1.0):1.0,Ae_mutica_Tr332:1.0):1.0)#H4:1.0::0.6):1.0):1.0):1.0,(((T_boeoticum_TS8:1.0,(T_boeoticum_TS10:1.0,T_boeoticum_TS3:1.0):1.0):1.0,T_boeoticum_TS4:1.0):1.0,((T_urartu_Tr315:1.0,T_urartu_Tr232:1.0):1.0,(T_urartu_Tr317:1.0,T_urartu_Tr309:1.0):1.0):1.0):1.0):1.0,(((((Ae_speltoides_Tr320:1.0,Ae_speltoides_Tr323:1.0):1.0,Ae_speltoides_Tr223:1.0):1.0,Ae_speltoides_Tr251:1.0):1.0):1.0,#H4:1.0::0.4):1.0):1.0):1.0,Ta_caputMedusae_TB2:1.0):1.0,S_vavilovii_Tr279:1.0):1.0,Er_bonaepartis_TB1:1.0):1.0,H_vulgare_HVens23:1.0);");
+   # read in dna data
+   fastafile = joinpath(dirname(pathof(PhyloNetworks)), "..","examples","Ae_bicornis_Tr406_Contig10132.aln")
+   dna_dat, dna_weights = readfastatodna(fastafile, true);
+   dna_dat
+   dna_weights
+   ```
 
 ## Choosing a Substitution Model
 
-After reading in your data, choose a model to describe how evolutionary changes 
-(or substitutions, in the case of DNA) happened over time. We offer a selection 
-of Markov substitution models to describe the evolutionary process.
+After reading in your data, choose a model to describe how evolutionary changes
+(or substitutions, in the case of DNA) happened over time.
+Available Markov substitution models are described below.
 
-### Generic Trait Models  
+### Generic Trait Models
 
 These models works well for any type of trait we may want to model. For general
-trait types, use one of these three models:    
-`:ERSM` Equal Rates Substitution Model  
-`:BTSM` Binary Trait Substitution Model    
-`:TBTSM` Two Binary Trait Substituion Model    
-
-### DNA-Specific Models  
-
-The DNA-specific models are optimized for aligned sequence data. We offer JC69 
-and HKY85 models in both relative and absolute versions. The JC69 model was 
-developed by Jukes and Cantor in 1969 and uses one rate for all type of substitutions. 
-The HKY85 model was developed in 1985 by Hasegawa, Kishino, & Yano. It treats 
-transitions differently from transversions.    
-`:JC69` Jukes Cantor 69 Model    
-`:HKY85` Hasegawa, Kishino and Yano 1985     
-
-## Running FitDiscrete
-
-To infer evolutionary rates, run the `fitDiscrete` function on the network and data. 
-It will calculate the maximum likelihood score of a network given one or more 
-discrete trait characters at the tips. Along each edge, evolutionary changes
-are modeled with a continous time Markov model, with parameters estimated by 
-maximizing the likelihood. At each hybrid node, the trait is assumed to be 
-inherited from the immediate parent (or parents, in the case of a hybrid edge).
-If there is a hybrid edge, the trait is modeled according to the parents' weighted 
-average genetic contributions, as measured by inheritance gamma γ. The model 
-currently ignores incomplete lineage sorting.
+trait types, use one of these three models:
+- `:BTSM` Binary Trait Substitution Model (2 states, rates unconstrained)
+- `:ERSM` Equal Rates Substitution Model
+  (`k` states, all transitions possible with equal rates)
+- `:TBTSM` Two Binary Trait Substituion Model (though not fully implemented yet)
+
+### DNA-Specific Models
+
+The DNA-specific models are optimized for aligned sequence data.
+The 4 nucleotide states are from
+[BioSymbols](https://github.com/BioJulia/BioSymbols.jl)
+(listed [here](http://biojulia.net/BioSymbols.jl/stable/nucleicacids/)).
+Each model has a relative and an absolute version.
+- `:JC69` Jukes & Cantor 1969 model: one single rate for all transitions.
+  The relative version has values -1 along the diagonal of the rate matrix
+  (1 expected transition / unit of time). The absolute version has an extra
+  parameter to scale the rate matrix.
+- `:HKY85` Hasegawa, Kishino & Yano 1985: treats transitions differently
+  from transversions.
+
+## Fitting the model
+
+To infer evolutionary rates, run the `fitdiscrete` function on the network and data.
+It will calculate the maximum likelihood score of a fixed network
+given one or more discrete trait characters at the tips.
+Along each edge, evolutionary changes
+are modeled with a continous time Markov model, with parameters estimated by
+maximizing the likelihood. At each hybrid node, the trait is assumed to be
+inherited from the immediate parent (or parents, in the case of a hybrid node).
+At a hybrid node, the trait is assumed to be inherited from one or the other
+parent, with probabilities equal to the inheritance γ of each parent edge
+(which is given by the network).
+The model ignores incomplete lineage sorting (e.g. hemiplasy).
 
 ### General Trait Data
 
-```@example fitDiscrete
-s1 = fitDiscrete(simple_net, :ERSM, simple_species, simple_dat; optimizeQ=false, 
-optimizeRVAS=false)
-s2 = fitDiscrete(simple_net, :BTSM, simple_species, simple_dat; optimizeQ=false, 
-optimizeRVAS=false)
+```@repl traitevol_fixednet
+s1 = fitdiscrete(net, :ERSM, species, dat; optimizeQ=false)
+s2 = fitdiscrete(net, :BTSM, species, dat; optimizeQ=false)
 ```
-In this `fitDiscrete` call, we do not optimize rates or allow for rate variation
-across sites.
-
-If `optimizeQ = true`, the `fitDiscrete` function estimates the evolutionary rate
-or rates. Because we didn't allow for rate variation across sites in these models, 
-we do not optimize the way rates may vary across trait types.
-
-```@example fitDiscrete
-s3 = fitDiscrete(simple_net, :ERSM, simple_species, simple_dat; optimizeQ=true, 
-optimizeRVAS=true)
-s4 = fitDiscrete(simple_net, :BTSM, simple_species, simple_dat; optimizeQ=true, 
-optimizeRVAS=true)
+In this `fitdiscrete` call, we do not optimize rates or allow for rate variation
+across sites. The default rates (which act as starting value if rates
+were to be optimized) are chosen equal to the inverse of the total edge lengths
+in the network (or 1/ntax if all branch lengths are missing).
+
+If `optimizeQ = true` (which is the default), the `fitdiscrete`
+function estimates the parameters of the rate matrix.
+Because we didn't allow for rate variation across sites in these models,
+there is nothing to optimize in the way rates may vary across traits (sites).
+
+```@repl traitevol_fixednet
+s3 = fitdiscrete(net, :ERSM, species, dat)
+s4 = fitdiscrete(net, :BTSM, species, dat)
 ```
 
 ### DNA Data
 
-For DNA data, use `:JC69` or `:HKY85` models. 
-```@example fitDiscrete
-d1 = fitDiscrete(dna_net, :JC69, dna_dat, dna_weights, :RV; optimizeQ=false, 
-optimizeRVAS=false)
-d2 = fitDiscrete(dna_net, :HKY85, dna_dat, dna_weights, :RV; optimizeQ=false, 
-optimizeRVAS=false)
+For DNA data, use one of `:JC69` or `:HKY85`.
+To allow for rate variation across sites, use the `:RV` option.
+
+```@example traitevol_fixednet
+d1 = fitdiscrete(dna_net, :JC69, dna_dat, dna_weights, :RV; optimizeQ=false, optimizeRVAS=false)
+d2 = fitdiscrete(dna_net, :HKY85, dna_dat, dna_weights, :RV; optimizeQ=false, optimizeRVAS=false)
 ```
-In these `fitDiscrete` models, we do not optimize rates (`optimizeQ=false`), but
-we do allow for rate variation across sites.
+In these `fitdiscrete` models, we do not optimize rates (`optimizeQ=false`), but
+we do allow for rate variation across sites, with a default α of 1.
 
 ### Rate Variation Across Sites
 
-In its default version, `fitDiscrete` does not allow for rate variation across sites.
-To allow for rate variation across sites in your estimate of evolutionary rates 
-(or rate variation across trait types, in the case of general trait types), 
-include `:RV`. If you include `:RV` and `optimizeRVAS = true`, the model will 
-not only allow for rate variation, but it will also optimize how rates vary across 
-sites.
+In its default version, `fitdiscrete` does not allow for rate variation across sites.
+To allow for rate variation across sites in your estimate of evolutionary rates
+(or rate variation across traits in the case of general traits),
+include `:RV`. If you include `:RV` and `optimizeRVAS = true`,
+the model will allow for rate variation and
+also optimize the parameter α of the distribution of rates across sites.
 
 We optimize the evolutionary rates and the way rates vary across sites for the
-DNA data here:    
-```@example fitDiscrete
-d3 = fitDiscrete(dna_net, :JC69, dna_dat, dna_weights, :RV; optimizeQ=true, 
-optimizeRVAS=false)
-d4 = fitDiscrete(dna_net, :HKY85, dna_dat, dna_weights, :RV; optimizeQ=true, 
-optimizeRVAS=false)
-```
\ No newline at end of file
+DNA data here:
+```@repl traitevol_fixednet
+d3 = fitdiscrete(dna_net, :JC69, dna_dat, dna_weights, :RV; optimizeRVAS=false)
+d4 = fitdiscrete(dna_net, :HKY85, dna_dat, dna_weights, :RV)
+```
diff --git a/docs/src/man/fixednetworkoptim.md b/docs/src/man/fixednetworkoptim.md
index a8e5b70c8..85114b689 100644
--- a/docs/src/man/fixednetworkoptim.md
+++ b/docs/src/man/fixednetworkoptim.md
@@ -8,9 +8,9 @@ This is useful if we have a few candidate networks to compare.
 Each network can be optimized individually, and the network with the best
 pseudolikelihood can be chosen.
 
-The score being optimized is the pseudo-deviance, i.e.
-the negative log pseudo-likelihood up to an additive constant
-(the lower the better).
+The score being optimized is a pseudo-deviance, i.e.
+a multiple of the negative log pseudo-likelihood up to an additive constant
+(the lower the better; a pseudo-deviance of 0 corresponds to a perfect fit).
 
 Following our example in [Getting a Network](@ref),
 we can optimize parameters on the true network
@@ -29,7 +29,7 @@ raxmlCF = readTrees2CF(raxmltrees, writeTab=false, writeSummary=false)
 truenet = readTopology("((((D:0.4,C:0.4):4.8,((A:0.8,B:0.8):2.2)#H1:2.2::0.7):4.0,(#H1:0::0.3,E:3.0):6.2):2.0,O:11.2);");
 net1alt = topologyMaxQPseudolik!(truenet, raxmlCF);
 writeTopology(net1alt, round=true)
-net1alt.loglik # pseudo deviance, actually
+net1alt.loglik # pseudo deviance actually: the lower the better
 ```
 ```@example fixednetworkoptim
 using PhyloPlots, RCall
@@ -54,16 +54,16 @@ the search stops (but the optimization will take longer).
 It makes no difference on this small data set.
 ```julia
 net1par = topologyMaxQPseudolik!(truenet, raxmlCF, ftolRel=1e-10, xtolAbs=1e-10)
-net1par.loglik
+net1par.loglik # pseudo deviance, actually: the lower the better
 ```
 
 ## Network Score with no optimization
 
 For a network with given branch lengths and γ heritabilies,
-we can compute the pseudolikelihood with:
+we can compute the pseudolikelihood (well, a pseudo-deviance) with:
 ```@repl fixednetworkoptim
 topologyQPseudolik!(truenet,raxmlCF);
-truenet.loglik
+truenet.loglik # again, pseudo deviance
 ```
 This function is not maximizing the pseudolikelihood, it is simply computing the
 pseudolikelihood (or deviance) for the given branch lengths and probabilities of
diff --git a/docs/src/man/snaq_plot.md b/docs/src/man/snaq_plot.md
index f7d167886..f7aba32bf 100644
--- a/docs/src/man/snaq_plot.md
+++ b/docs/src/man/snaq_plot.md
@@ -89,7 +89,8 @@ The file `net1.networks` contains a list of networks that are slight modificatio
 of the best (estimated) network `net1`. The modifications changed the direction
 of one reticulation at a time, by moving the placement of one hybrid node to another
 node inside the same cycle.
-For each modified network, the pseudolikelihood score was calculated.
+For each modified network, the pseudolikelihood score was calculated
+(the `loglik` or `-Ploglik` values give a pseudo deviance actually).
 
 The function name `snaq!` ends with ! because it modifies the argument `raxmlCF`
 by including the expected CF. Type `?` then `snaq!` to get help on that function.
@@ -266,7 +267,7 @@ echo "end of SNaQ run ..."
 ## choosing the number of hybridizations
 
 Each network has a `loglik` attribute, which is its pseudo deviance:
-twice the negative log-likelihood up to a constant (the constant is
+a multiple of the negative log-likelihood up to a constant (the constant is
 such that the score is 0 if the network fits the data perfectly).
 The lower the better. We can plot these scores across hybrid values:
 ```@example snaqplot
diff --git a/docs/src/man/trait_tree.md b/docs/src/man/trait_tree.md
index fe4e8ed0e..b75954ea2 100644
--- a/docs/src/man/trait_tree.md
+++ b/docs/src/man/trait_tree.md
@@ -131,7 +131,7 @@ associated with trait 2 has a high p-value, which means that this coefficient
 is not significantly different from 0. This is consistent with the
 way we simulated trait 3.
 
-The function returns an object of type [`PhyloNetworkLinearModel`](@ref)`<:LinPredModel`.
+The function returns an object of type [`PhyloNetworkLinearModel`](@ref)`<:GLM.LinPredModel`.
 It is a subtype of the GLM type `LinPredModel`, which means that all base
 functions from Julia [StatsBase](https://github.com/JuliaStats/StatsBase.jl) can
 be applied to it. See the documentation for this type for a list of all
diff --git a/src/PhyloNetworks.jl b/src/PhyloNetworks.jl
index 4a8d5382f..37aa253a2 100644
--- a/src/PhyloNetworks.jl
+++ b/src/PhyloNetworks.jl
@@ -2,7 +2,7 @@ __precompile__()
 
 module PhyloNetworks
 
-    # stdlib (standard libraries): no need to list in REQUIRE or Project.toml
+    # stdlib (standard libraries)
     using Dates
     using Distributed
     using LinearAlgebra # for LowerTriangular, logdet, diag
@@ -10,7 +10,7 @@ module PhyloNetworks
     using Random
     using Statistics: mean, quantile
 
-    # other libraries, to list as dependencies
+    # other libraries, indicate compatible version in Project.toml
     using BioSequences
     using BioSymbols
     using Combinatorics: combinations
@@ -135,15 +135,16 @@ module PhyloNetworks
         TwoBinaryTraitSubstitutionModel,
         JC69, HKY85,
         nstates,
-        Q, setrates!,
+        Q,
         getlabels,
         nparams,
         RateVariationAcrossSites,
-        setalpha!,
         randomTrait,
         randomTrait!,
-        fitDiscrete,
+        fitdiscrete,
         readfastatodna,
+        stationary,
+        empiricalDNAfrequencies,
         ## TICR test
         ticr,
         ticr!
diff --git a/src/parsimony.jl b/src/parsimony.jl
index fd26479d0..70496839f 100755
--- a/src/parsimony.jl
+++ b/src/parsimony.jl
@@ -411,7 +411,7 @@ function readfastatodna(fastafile::String, countPatterns=false::Bool)
     species = String[]
     firstspecies = Bool(true)
     nsites = 0
-    for record in reader #species (row)
+    for record in reader #by species (row)
         if firstspecies
             nsites = length(sequence(record))
             for site in 1:nsites # initialize an array for each site
@@ -419,7 +419,7 @@ function readfastatodna(fastafile::String, countPatterns=false::Bool)
             end
             firstspecies = false
         end
-        push!(species, FASTA.identifier(record))
+        push!(species, FASTA.identifier(record)) #adds species name to end of species vector
         length(sequence(record)) == nsites || error("sequences of different length: current sequences is ", 
             length(sequence(record)), " long while first sequence is ", nsites, " long")
         for site in 1:nsites
diff --git a/src/pseudolik.jl b/src/pseudolik.jl
index 3eba3cdb7..8121fa2a7 100644
--- a/src/pseudolik.jl
+++ b/src/pseudolik.jl
@@ -1364,6 +1364,8 @@ function logPseudoLik(quartet::Quartet)
             suma += -1.e15
         else
             suma += quartet.obsCF[i] == 0 ? 0.0 : 100*quartet.obsCF[i]*log(quartet.qnet.expCF[i]/quartet.obsCF[i])
+            # WARNING: 100 should be replaced by -2*ngenes to get the deviance.
+            # below: negative sign used below in logPseudoLik() when summing up across 4-taxon sets
         end
     end
     ## to account for missing data:
diff --git a/src/substitutionModels.jl b/src/substitutionModels.jl
index 43b781650..a4b46816d 100644
--- a/src/substitutionModels.jl
+++ b/src/substitutionModels.jl
@@ -742,7 +742,7 @@ transition/transversion ratio: κ=α/β. The rate transition matrix Q is normali
 
 `nparams` returns 1 or 2.
 In other words: the stationary distribution is not counted in the number of parameters
-(and `fitDiscrete` does not optimize the pi values at the moment).
+(and `fitdiscrete` does not optimize the pi values at the moment).
 
 # examples
 
@@ -785,15 +785,14 @@ struct HKY85 <: NucleicAcidSubstitutionModel
         #Warning: this constructor should not be used directly. Use the constructor below,
         #   which will call this.
         all(rate .> 0.) || error("All elements of rate must be positive")
-        1 <= length(rate) <= 2 || error("rate is not a valid length for HKY85 model")
+        1 <= length(rate) <= 2 || error("rate has invalid length for HKY85 model")
+        if relative length(rate) == 1 || error("the relative version of HKY85 takes a single rate")
+        else        length(rate) == 2 || error("the absolute version of HKY85 takes 2 rates")
+        end
         length(pi) == 4 || error("pi must be of length 4")
         all(0. .< pi.< 1.) || error("All base proportions must be between 0 and 1")
         sum(pi) == 1. || error("Base proportions must sum to 1")
-        if length(rate) == 1
-            new(rate, pi, true, eigeninfo)
-        else
-            new(rate, pi, false, eigeninfo)
-        end
+        new(rate, pi, relative, eigeninfo)
     end
 end
 function HKY85(rate::AbstractVector, pi::Vector{Float64}, relative=true::Bool)
@@ -801,7 +800,7 @@ function HKY85(rate::AbstractVector, pi::Vector{Float64}, relative=true::Bool)
     seteigeninfo!(obj)
     return obj
 end
-HKY85(rate::Float64, pi::Vector{Float64}, relative=true::Bool) = HKY85([rate], pi, relative)
+HKY85(rate::Float64, pi::Vector{Float64}) = HKY85([rate], pi, true)
 
 """
 for [`HKY85`](@ref): store piR, piY, the 2 non-zero eigenvalues and a scaling factor
@@ -977,14 +976,14 @@ Because the mean of the desired continuous Gamma distribution is 1, we use
 shape α and scale θ=1/α (e.g. rate β=α).
 The shape parameter is referred to as alpha here.
 The Gamma distribution is discretized into `ncat` categories.
-In each category, the category's rate multiplier is selected as a quantile.
+In each category, the category's rate multiplier is a normalized quantile of the gamma distribution.
 
 ```jldoctest
 julia> rv = RateVariationAcrossSites()
 Rate Variation Across Sites using Discretized Gamma Model
 alpha: 1.0
 categories for Gamma discretization: 4
-ratemultiplier: [0.133531, 0.470004, 0.980829, 2.07944]
+ratemultiplier: [0.145784, 0.513132, 1.07083, 2.27025]
 
 julia> PhyloNetworks.setalpha!(rv, 2.0)
 
@@ -992,7 +991,13 @@ julia> rv
 Rate Variation Across Sites using Discretized Gamma Model
 alpha: 2.0
 categories for Gamma discretization: 4
-ratemultiplier: [0.304691, 0.652574, 1.05902, 1.80351]
+ratemultiplier: [0.319065, 0.683361, 1.10898, 1.8886]
+
+julia> RateVariationAcrossSites(2.0, 4)
+Rate Variation Across Sites using Discretized Gamma Model
+alpha: 2.0
+categories for Gamma discretization: 4
+ratemultiplier: [0.319065, 0.683361, 1.10898, 1.8886]
 ```
 """
 mutable struct RateVariationAcrossSites
@@ -1001,11 +1006,13 @@ mutable struct RateVariationAcrossSites
     ratemultiplier::Array{Float64}
     function RateVariationAcrossSites(alpha = 1.0::Float64, ncat = 4::Int)
         @assert alpha >= 0.0 "alpha must be >= 0"
+        @assert ncat > 0 "ncat must be 1 or greater"
         if ncat == 1
             ratemultiplier = [1.0]
         else
-            cuts = Vector((0:(ncat-1))/ncat) + repeat([1/2ncat], ncat)
+            cuts = Vector((0:(ncat-1))/ncat) + repeat([1/(2ncat)], ncat)
             ratemultiplier = quantile.(Distributions.Gamma(alpha, 1/alpha), cuts)
+            ratemultiplier ./= mean(ratemultiplier)
         end
         new(alpha, ncat, ratemultiplier)
     end
@@ -1022,8 +1029,10 @@ function setalpha!(obj::RateVariationAcrossSites, alpha::Float64)
     @assert alpha >= 0 "alpha must be a float >= 0"
     obj.alpha = alpha
     if obj.ncat > 1
+        rv = obj.ratemultiplier
         cuts = Vector((0:(obj.ncat-1))/obj.ncat) + repeat([1/(2*obj.ncat)], obj.ncat)
-        obj.ratemultiplier[:] = quantile.(Distributions.Gamma(alpha, 1/alpha), cuts)
+        rv[:] = quantile.(Distributions.Gamma(alpha, 1/alpha), cuts)
+        rv ./= mean(rv)
     end
     return nothing
 end
@@ -1039,3 +1048,112 @@ end
 function nparams(obj::RateVariationAcrossSites)
     return (obj.ncat == 1 ? 0 : 1)
 end
+
+"""
+    empiricalDNAfrequencies(DNAdata::AbstractDataFrame, DNAweights,
+                            correction=true, useambiguous=true)
+
+Estimate base frequencies in DNA data `DNAdata`, ordered ACGT.
+
+- `DNAdata`: data frame. All columns are used. If the first column
+  gives species names, find a way to ignore it before calculating empirical
+  frequencies, e.g. `empiricalDNAfrequencies(view(DNAdata, 2:ncol(DNAdata)))`.
+  Data type must be `BioSymbols.DNA` or `Char` or `String`.
+  WARNING: this is checked on the first column only.
+- `DNAweights`: vector of weights, to weigh each column in `DNAdata`.
+- `correction`: if `true`, add 1 to each count and 4 to the denominator
+  for a more stable estimator, similar to Bayes prior of 1/4 and
+  the Agresti-Coull interval in binomial estimation.
+- `useambiguous`: if `true`, ambiguous bases are used (except gaps and Ns).
+  For example, `Y` adds 0.5 weight to `C` and 0.5 weight to `T`.
+"""
+function empiricalDNAfrequencies(dnaDat::AbstractDataFrame, dnaWeights::Vector,
+    correctedestimate=true::Bool, useambiguous=true::Bool)
+
+    # warning: checking first column and first row only
+    eltypes(dnaDat)[1] == BioSymbols.DNA || eltypes(dnaDat)[1] == Char ||
+      dnaDat[1,1] ∈ string.(BioSymbols.alphabet(DNA)) ||
+        error("empiricalDNAfrequencies requires data of type String, Char, or BioSymbols.DNA")
+
+    # initialize counts: keys same as BioSymbols.ACGT (which are ordered)
+    prior = correctedestimate ? 1.0 : 0.0
+    dnacounts = Dict(DNA_A=>prior, DNA_C=>prior, DNA_G=>prior, DNA_T=>prior)
+
+    convert2dna = eltypes(dnaDat)[1] != BioSymbols.DNA
+    for j in 1:ncol(dnaDat) # for each column
+        col = dnaDat[j]
+        wt = dnaWeights[j]
+        for nuc in col      # for each row
+            if convert2dna
+                nuc = convert(DNA, nuc[1]) # if nuc is a string, nuc[1] = 1st character
+            end
+            if nuc ∈ BioSymbols.ACGT
+                dnacounts[nuc] += wt
+            elseif nuc == DNA_Gap || nuc == DNA_N || !useambiguous
+                continue # to next row
+            # else: ambiguity, uses BioSequences definitions
+            elseif nuc == DNA_M # A or C
+                dnacounts[DNA_A] += 0.5*wt
+                dnacounts[DNA_C] += 0.5*wt
+            elseif nuc == DNA_R # A or G
+                dnacounts[DNA_A] += 0.5*wt
+                dnacounts[DNA_G] += 0.5*wt
+            elseif nuc == DNA_W # A or T/U
+                dnacounts[DNA_A] += 0.5*wt
+                dnacounts[DNA_T] += 0.5*wt
+            elseif nuc == DNA_S # C or G
+                dnacounts[DNA_C] += 0.5*wt
+                dnacounts[DNA_G] += 0.5*wt
+            elseif nuc == DNA_Y # C or T/U
+                dnacounts[DNA_C] += 0.5*wt
+                dnacounts[DNA_T] += 0.5*wt
+            elseif nuc == DNA_K # G or T/U
+                dnacounts[DNA_G] += 0.5*wt
+                dnacounts[DNA_T] += 0.5*wt
+            elseif nuc == DNA_V # A or C or G
+                dnacounts[DNA_A] += wt/3
+                dnacounts[DNA_C] += wt/3
+                dnacounts[DNA_G] += wt/3
+            elseif nuc == DNA_H # A or C or T
+                dnacounts[DNA_A] += wt/3
+                dnacounts[DNA_C] += wt/3
+                dnacounts[DNA_T] += wt/3
+            elseif nuc == DNA_D # A or G or T/U
+                dnacounts[DNA_A] += wt/3
+                dnacounts[DNA_G] += wt/3
+                dnacounts[DNA_T] += wt/3
+            elseif nuc == DNA_B # C or G or T/U
+                dnacounts[DNA_C] += wt/3
+                dnacounts[DNA_G] += wt/3
+                dnacounts[DNA_T] += wt/3
+            end
+        end
+    end
+    totalweight = sum(values(dnacounts))
+    res = [dnacounts[key]/totalweight for key in BioSymbols.ACGT] # to control the order
+    all(0. .<= res .<= 1.) || error("""weird: empirical base frequency < 0 or > 1""")
+    return res
+end
+
+"""
+    stationary(substitutionmodel)
+
+Stationary distribution of a Markov model
+"""
+stationary(mod::SM) = error("stationary not defined for $(typeof(mod)).")
+
+function stationary(mod::JC69)
+    return [0.25,0.25,0.25,0.25]  
+end
+
+function stationary(mod::HKY85)
+    return mod.pi
+end
+
+function stationary(mod::ERSM) #for ERSM, uniform = stationary
+    return [1/mod.k for i in 1:mod.k]
+end
+
+function stationary(mod::BTSM)
+    return [mod.eigeninfo[2], mod.eigeninfo[3]]
+end
diff --git a/src/traits.jl b/src/traits.jl
index b2b515ede..bec4dd3f3 100644
--- a/src/traits.jl
+++ b/src/traits.jl
@@ -512,7 +512,7 @@ julia> sim = simulate(net, params); # simulate a dataset with shifts
 julia> using DataFrames # to handle data frames
 
 julia> dat = DataFrame(trait = sim[:Tips], tipNames = sim.M.tipNames)
-4×2 DataFrames.DataFrame
+4×2 DataFrame
 │ Row │ trait   │ tipNames │
 │     │ Float64 │ String   │
 ├─────┼─────────┼──────────┤
@@ -522,7 +522,7 @@ julia> dat = DataFrame(trait = sim[:Tips], tipNames = sim.M.tipNames)
 │ 4   │ 7.88906 │ D        │
 
 julia> dfr_shift = regressorShift(net.node[nodes_shifts], net) # the regressors matching the shifts.
-4×3 DataFrames.DataFrame
+4×3 DataFrame
 │ Row │ shift_1 │ shift_8 │ tipNames │
 │     │ Float64 │ Float64 │ String   │
 ├─────┼─────────┼─────────┼──────────┤
@@ -536,7 +536,7 @@ julia> dfr = join(dat, dfr_shift, on=:tipNames); # join data and regressors in a
 julia> using StatsModels # for statistical model formulas
 
 julia> fitBM = phyloNetworklm(@formula(trait ~ shift_1 + shift_8), dfr, net) # actual fit
-StatsModels.DataFrameRegressionModel{PhyloNetworkLinearModel,Array{Float64,2}}
+StatsModels.TableRegressionModel{PhyloNetworkLinearModel,Array{Float64,2}}
 
 Formula: trait ~ 1 + shift_1 + shift_8
 
@@ -652,7 +652,7 @@ julia> using Random; Random.seed!(2468); # sets the seed for reproducibility
 julia> sim = simulate(net, params); # simulate a dataset with shifts
 
 julia> dat = DataFrame(trait = sim[:Tips], tipNames = sim.M.tipNames)
-4×2 DataFrames.DataFrame
+4×2 DataFrame
 │ Row │ trait   │ tipNames │
 │     │ Float64 │ String   │
 ├─────┼─────────┼──────────┤
@@ -662,7 +662,7 @@ julia> dat = DataFrame(trait = sim[:Tips], tipNames = sim.M.tipNames)
 │ 4   │ 12.6891 │ D        │
 
 julia> dfr_hybrid = regressorHybrid(net) # the reressors matching the hybrids.
-4×3 DataFrames.DataFrame
+4×3 DataFrame
 │ Row │ shift_6 │ tipNames │ sum     │
 │     │ Float64 │ String   │ Float64 │
 ├─────┼─────────┼──────────┼─────────┤
@@ -676,7 +676,7 @@ julia> dfr = join(dat, dfr_hybrid, on=:tipNames); # join data and regressors in
 julia> using StatsModels
 
 julia> fitBM = phyloNetworklm(@formula(trait ~ shift_6), dfr, net) # actual fit
-StatsModels.DataFrameRegressionModel{PhyloNetworkLinearModel,Array{Float64,2}}
+StatsModels.TableRegressionModel{PhyloNetworkLinearModel,Array{Float64,2}}
 
 Formula: trait ~ 1 + shift_6
 
@@ -1120,18 +1120,21 @@ end
 
 # New type for phyloNetwork regression
 """
-    PhyloNetworkLinearModel<:LinPredModel
+    PhyloNetworkLinearModel<:GLM.LinPredModel
 
 Regression object for a phylogenetic regression. Result of fitting function [`phyloNetworklm`](@ref).
-Dominated by the `LinPredModel` class, from package `GLM`.
+Dominated by the `GLM.LinPredModel` class, from package `GLM`.
 
 The following StatsBase functions can be applied to it:
 `coef`, `nobs`, `vcov`, `stderror`, `confint`, `coeftable`, `dof_residual`, `dof`, `deviance`,
 `residuals`, `response`, `predict`, `loglikelihood`, `nulldeviance`, `nullloglikelihood`,
 `r2`, `adjr2`, `aic`, `aicc`, `bic`.
 
-The following StatsModels functions can also be applied to it:
-`ModelFrame`, `ModelMatrix`, `Formula`.
+For accessing the model matrix (`object.mm` and `object.mm.m`),
+the model frame (`object.mf`) or formula (`object.mf.f`), refer to
+[StatsModels](https://juliastats.github.io/StatsModels.jl/stable/) functions,
+like `show(object.mf.f)`, `terms(object.mf.f)`, `coefnames(object.mf.f)`,
+`terms(object.mf.f.rhs)`, `response(object)` etc.
 
 Estimated variance and mean of the BM process used can be retrieved with
 functions [`sigma2_estim`](@ref) and [`mu_estim`](@ref).
@@ -1145,7 +1148,7 @@ An ancestral state reconstruction can be performed from this fitted object using
 The `PhyloNetworkLinearModel` object has fields: `lm`, `V`, `Vy`, `RL`, `Y`, `X`, `logdetVy`, `ind`, `nonmissing`, `model`, `lambda`.
 Type in "?PhyloNetworkLinearModel.field" to get help on a specific field.
 """
-mutable struct PhyloNetworkLinearModel <: LinPredModel
+mutable struct PhyloNetworkLinearModel <: GLM.LinPredModel
     "lm: a GLM.LinearModel object, fitted on the cholesky-tranformend problem"
     lm::GLM.LinearModel # result of a lm on a matrix
     "V: a MatrixTopologicalOrder object of the network-induced correlations"
@@ -1473,11 +1476,10 @@ end
 
 Phylogenetic regression, using the correlation structure induced by the network.
 
-Returns an object of class [`PhyloNetworkLinearModel`](@ref). See documentation for this type and
-example to see all the functions that can be applied to it.
+Returns an object of class [`PhyloNetworkLinearModel`](@ref). See documentation for this type and example to see all the functions that can be applied to it.
 
 # Arguments
-* `f::Formula`: formula to use for the regression (see the `DataFrame` package)
+* `f::StatsModels.FormulaTerm`: formula to use for the regression
 * `fr::AbstractDataFrame`: DataFrame containing the data and regressors at the tips. It should have an extra column labelled "tipNames", that gives the names of the taxa for each observation.
 * `net::HybridNetwork`: phylogenetic network to use. Should have labelled tips.
 * `model::AbstractString="BM"`: the model to use, "BM" (default) or "lambda" (for Pagel's lambda).
@@ -1507,9 +1509,9 @@ julia> using StatsModels # for stat model formulas
 julia> fitBM = phyloNetworklm(@formula(trait ~ 1), dat, phy);
 
 julia> fitBM # Shows a summary
-StatsModels.DataFrameRegressionModel{PhyloNetworkLinearModel,Array{Float64,2}}
+StatsModels.TableRegressionModel{PhyloNetworkLinearModel,Array{Float64,2}}
 
-Formula: trait ~ +1
+Formula: trait ~ 1
 
 Model: BM
 
@@ -1633,8 +1635,8 @@ julia> round.(predict(fitBM), digits=5)
  4.679
 
 ```
-""" #"
-function phyloNetworklm(f::Formula,
+"""
+function phyloNetworklm(f::StatsModels.FormulaTerm,
                         fr::AbstractDataFrame,
                         net::HybridNetwork;
                         model="BM"::AbstractString,
@@ -1681,19 +1683,19 @@ function phyloNetworklm(f::Formula,
         end
         #   fr = fr[ind, :]
     end
-    # Find the regression matrix and answer vector
-    mf = ModelFrame(f,fr)
-    if isequal(f.rhs, -1) # If there are no regressors
-        mm = ModelMatrix(zeros(size(mf.df, 1), 0), [0])
-    else
-        mm = ModelMatrix(mf)
-    end
-    Y = convert(Vector{Float64}, StatsModels.model_response(mf))
-    # Fit the model (Method copied from DataFrame/src/statsmodels/statsmodels.jl, lines 47-58)
-    # (then StatsModels/src/statsmodels.jl lines 42-46)
-    StatsModels.DataFrameRegressionModel(phyloNetworklm(mm.m, Y, net;
-                                                       nonmissing=mf.nonmissing, model=model, ind=ind,
-                                                       startingValue=startingValue, fixedValue=fixedValue), mf, mm)
+    # Find the regression matrix and response vector
+    data, nonmissing = StatsModels.missing_omit(StatsModels.columntable(fr), f)
+    sch = StatsModels.schema(f, data)
+    f = StatsModels.apply_schema(f, sch, PhyloNetworkLinearModel)
+    mf = ModelFrame(f, sch, data, PhyloNetworkLinearModel)
+    mm = StatsModels.ModelMatrix(mf)
+    Y = StatsModels.response(mf)
+    # Y = convert(Vector{Float64}, StatsModels.response(mf))
+    # Y, pred = StatsModels.modelcols(f, fr)
+    StatsModels.TableRegressionModel(
+        phyloNetworklm(mm.m, Y, net; nonmissing=nonmissing, model=model, ind=ind,
+                       startingValue=startingValue, fixedValue=fixedValue),
+        mf, mm)
 end
 
 ### Methods on type phyloNetworkRegression
@@ -1775,7 +1777,7 @@ function StatsBase.adjr2(obj::PhyloNetworkLinearModel)
 end
 
 ## REMARK
-# As PhyloNetworkLinearModel <: LinPredModel, the following functions are automatically defined:
+# As PhyloNetworkLinearModel <: GLM.LinPredModel, the following functions are automatically defined:
 # aic, aicc, bic
 
 ## New quantities
@@ -1786,8 +1788,8 @@ end
 Estimated variance for a fitted object.
 """
 sigma2_estim(m::PhyloNetworkLinearModel) = deviance(m.lm) / nobs(m)
-# Need to be adapted manually to DataFrameRegressionModel beacouse it's a new function
-sigma2_estim(m::StatsModels.DataFrameRegressionModel{PhyloNetworkLinearModel,T} where T) =
+# Need to be adapted manually to TableRegressionModel beacouse it's a new function
+sigma2_estim(m::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T) =
   sigma2_estim(m.model)
 # ML estimate for ancestral state of the BM
 """
@@ -1806,9 +1808,9 @@ function mu_estim(m::PhyloNetworkLinearModel)
         return coef(m)[1]
     end
 end
-# Need to be adapted manually to DataFrameRegressionModel beacouse it's a new function
-function mu_estim(m::StatsModels.DataFrameRegressionModel{PhyloNetworkLinearModel,T} where T)
-    if (!m.mf.terms.intercept)
+# Need to be adapted manually to TableRegressionModel beacouse it's a new function
+function mu_estim(m::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T)
+    if m.mf.f.rhs.terms[1] != StatsModels.InterceptTerm{true}()
         error("The fit was done without intercept, so I cannot estimate mu")
     end
     return coef(m)[1]
@@ -1820,13 +1822,7 @@ end
 Estimated lambda parameter for a fitted object.
 """
 lambda_estim(m::PhyloNetworkLinearModel) = m.lambda
-lambda_estim(m::StatsModels.DataFrameRegressionModel{PhyloNetworkLinearModel,T} where T) = lambda_estim(m.model)
-
-### Functions specific to DataFrameRegressionModel
-StatsModels.ModelFrame(m::StatsModels.DataFrameRegressionModel) = m.mf
-StatsModels.ModelMatrix(m::StatsModels.DataFrameRegressionModel) = m.mm
-StatsModels.Formula(m::StatsModels.DataFrameRegressionModel) = Formula(m.mf.terms)
-StatsModels.response(m::StatsModels.DataFrameRegressionModel) = response(m.model)
+lambda_estim(m::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T) = lambda_estim(m.model)
 
 ### Print the results
 # Variance
@@ -1844,11 +1840,11 @@ function Base.show(io::IO, obj::PhyloNetworkLinearModel)
 end
 # For DataFrameModel. see also Base.show in
 # https://github.com/JuliaStats/StatsModels.jl/blob/master/src/statsmodel.jl
-function Base.show(io::IO, model::StatsModels.DataFrameRegressionModel{PhyloNetworkLinearModel,T} where T)
+function Base.show(io::IO, model::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T)
     ct = coeftable(model)
     println(io, "$(typeof(model))")
-    println(io)
-    println(io, Formula(model.mf.terms))
+    print(io, "\nFormula: ")
+    println(io, string(model.mf.f)) # formula
     println(io)
     println(io, "Model: $(model.model.model)")
     println(io)
@@ -1868,7 +1864,7 @@ end
 ###############################################################################
 ###############################################################################
 
-function GLM.ftest(objs::StatsModels.DataFrameRegressionModel{PhyloNetworkLinearModel,T}...)  where T
+function GLM.ftest(objs::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T}...)  where T
     objsModels = [obj.model for obj in objs]
     return ftest(objsModels...)
 end
@@ -1895,7 +1891,7 @@ data, for models that have more and more effects.
 
 Returns a DataFrame object with the anova table.
 """
-function anova(objs::StatsModels.DataFrameRegressionModel{PhyloNetworkLinearModel,T}...) where T
+function anova(objs::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T}...) where T
     objsModels = [obj.model for obj in objs]
     return(anova(objsModels...))
 end
@@ -2129,7 +2125,7 @@ end
 # object fitted by function phyloNetworklm, and some predictors expressed at all the nodes of the network.
 #
 # - obj: a PhyloNetworkLinearModel object, or a
-# DataFrameRegressionModel{PhyloNetworkLinearModel}, if data frames were used.
+# TableRegressionModel{PhyloNetworkLinearModel}, if data frames were used.
 # - X_n a matrix with as many columns as the number of predictors used, and as
 # many lines as the number of unknown nodes or tips.
 #
@@ -2369,7 +2365,7 @@ julia> ancStates = ancestralStateReconstruction(fitBM);
 └ @ PhyloNetworks ~/build/crsl4/PhyloNetworks.jl/src/traits.jl:2163
 
 julia> expectations(ancStates)
-31×2 DataFrames.DataFrame
+31×2 DataFrame
 │ Row │ nodeNumber │ condExpectation │
 │     │ Int64      │ Float64         │
 ├─────┼────────────┼─────────────────┤
@@ -2414,7 +2410,7 @@ julia> predint(ancStates)
   1.0695      1.0695
 
 julia> expectationsPlot(ancStates) # format node <-> ancestral state
-31×2 DataFrames.DataFrame
+31×2 DataFrame
 │ Row │ nodeNumber │ PredInt   │
 │     │ Int64      │ Abstract… │
 ├─────┼────────────┼───────────┤
@@ -2438,7 +2434,7 @@ julia> expectationsPlot(ancStates) # format node <-> ancestral state
 julia> plot(phy, :RCall, nodeLabel = expectationsPlot(ancStates));
 
 julia> predintPlot(ancStates) # prediction intervals, in data frame, useful to plot
-31×2 DataFrames.DataFrame
+31×2 DataFrame
 │ Row │ nodeNumber │ PredInt       │
 │     │ Int64      │ Abstract…     │
 ├─────┼────────────┼───────────────┤
@@ -2474,11 +2470,11 @@ function ancestralStateReconstruction(obj::PhyloNetworkLinearModel)
   X_n = ones((length(obj.V.internalNodeNumbers) + sum(.!obj.nonmissing), 1))
     ancestralStateReconstruction(obj, X_n)
 end
-# For a DataFrameRegressionModel
-function ancestralStateReconstruction(obj::StatsModels.DataFrameRegressionModel{PhyloNetworkLinearModel,T} where T)
+# For a TableRegressionModel
+function ancestralStateReconstruction(obj::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T)
     ancestralStateReconstruction(obj.model)
 end
-function ancestralStateReconstruction(obj::StatsModels.DataFrameRegressionModel{PhyloNetworkLinearModel,T} where T, X_n::Matrix)
+function ancestralStateReconstruction(obj::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T, X_n::Matrix)
     ancestralStateReconstruction(obj.model, X_n)
 end
 
diff --git a/src/traitsLikDiscrete.jl b/src/traitsLikDiscrete.jl
index 27117cf62..c8b24aa21 100644
--- a/src/traitsLikDiscrete.jl
+++ b/src/traitsLikDiscrete.jl
@@ -3,7 +3,7 @@
 
 Subtype of `StatsBase.StatisticalModel`, to fit discrete data to a model
 of trait substitution along a network.
-See [`fitDiscrete`](@ref) to fit a trait substitution model to discrete data.
+See [`fitdiscrete`](@ref) to fit a trait substitution model to discrete data.
 It returns an object of type `StatisticalSubstitutionModel`, to which standard
 functions can be applied, like `loglikelihood(object)`, `aic(object)` etc.
 """
@@ -44,9 +44,10 @@ StatsBase.dof(obj::SSM) = nparams(obj.model) + nparams(obj.ratemodel)
 function Base.show(io::IO, obj::SSM)
     disp =  "$(typeof(obj)):\n"
     disp *= string(obj.model)
-    disp *= "$(obj.nsites) traits, $(length(obj.trait)) species, "
+    disp *= "$(obj.nsites) traits, $(length(obj.trait)) species\n"
     if obj.ratemodel.ncat != 1
-        disp *= "with gamma variable rate model according to $(obj.ratemodel) with alpha = $(obj.ratemodel.alpha) and $(obj.ratemodel.ncat) categories "
+        disp *= "variable rates across sites ~ discretized gamma with\n alpha=$(obj.ratemodel.alpha)"
+        disp *= "\n $(obj.ratemodel.ncat) categories\n rate multipliers: $(obj.ratemodel.ratemultiplier)\n"
     end
     disp *= "on a network with $(obj.net.numHybrids) reticulations"
     if !ismissing(obj.loglik)
@@ -65,14 +66,14 @@ end
 # weights
 
 """
-    fitDiscrete(net, model, tipdata)
-    fitDiscrete(net, model, RateVariationAcrossSites, tipdata)
-    fitDiscrete(net, model, species, traits)
-    fitDiscrete(net, model, RateVariationAcrossSites, species, traits)
-    fitDiscrete(net, model, dnadata, dnapatternweights)
-    fitDiscrete(net, model, RateVariationAcrossSites, dnadata, dnapatternweights)
-    fitDiscrete(net, modSymbol, species, traits)
-    fitDiscrete(net, modSymbol, dnadata, dnapatternweights)
+    fitdiscrete(net, model, tipdata)
+    fitdiscrete(net, model, RateVariationAcrossSites, tipdata)
+    fitdiscrete(net, model, species, traits)
+    fitdiscrete(net, model, RateVariationAcrossSites, species, traits)
+    fitdiscrete(net, model, dnadata, dnapatternweights)
+    fitdiscrete(net, model, RateVariationAcrossSites, dnadata, dnapatternweights)
+    fitdiscrete(net, modSymbol, species, traits)
+    fitdiscrete(net, modSymbol, dnadata, dnapatternweights)
     
 Calculate the maximum likelihood (ML) score of a network given
 one or more discrete characters at the tips. Along each edge, transitions
@@ -109,6 +110,8 @@ Optional arguments (default):
 - tolerance values to control when the optimization is stopped:
   `ftolRel` (1e-12), `ftolAbs` (1e-10) on the likelihood, and
   `xtolRel` (1e-10), `xtolAbs` (1e-10) on the model parameters.
+- bounds for the alpha parameter of the Gamma distribution of rates across sites:
+  `alphamin=0.05`, `alphamax=500`.
 - `verbose` (false): if true, more information is output.
 
 # examples:
@@ -122,22 +125,24 @@ julia> using DataFrames
 
 julia> dat = DataFrame(species=["C","A","B","D"], trait=["hi","lo","lo","hi"]);
 
-julia> fit1 = fitDiscrete(net, m1, dat; optimizeQ=true, optimizeRVAS=false)
+julia> fit1 = fitdiscrete(net, m1, dat)
 PhyloNetworks.StatisticalSubstitutionModel:
 Binary Trait Substitution Model:
 rate lo→hi α=0.27222
 rate hi→lo β=0.34981
-1 traits, 4 species, on a network with 1 reticulations
+1 traits, 4 species
+on a network with 1 reticulations
 log-likelihood: -2.7277
 
 julia> tips = Dict("A" => "lo", "B" => "lo", "C" => "hi", "D" => "hi");
 
-julia> fit2 = fitDiscrete(net, m1, tips; optimizeRVAS=false, xtolRel=1e-16, xtolAbs=1e-16, ftolRel=1e-16)
+julia> fit2 = fitdiscrete(net, m1, tips; xtolRel=1e-16, xtolAbs=1e-16, ftolRel=1e-16)
 PhyloNetworks.StatisticalSubstitutionModel:
 Binary Trait Substitution Model:
 rate lo→hi α=0.27222
 rate hi→lo β=0.34981
-1 traits, 4 species, on a network with 1 reticulations
+1 traits, 4 species
+on a network with 1 reticulations
 log-likelihood: -2.7277
 ```
 
@@ -154,7 +159,7 @@ julia> tips = Dict("sp1" => BioSymbols.DNA_A, "sp2" => BioSymbols.DNA_A, "sp3" =
 
 julia> mJC69 = JC69([0.25], false);
 
-julia> fitJC69 = fitDiscrete(net, mJC69, tips; optimizeQ=true, optimizeRVAS=false)
+julia> fitJC69 = fitdiscrete(net, mJC69, tips)
 PhyloNetworks.StatisticalSubstitutionModel:
 Jukes and Cantor 69 Substitution Model,
 absolute rate version
@@ -165,16 +170,17 @@ rate matrix Q:
        C  0.0974       *  0.0974  0.0974
        G  0.0974  0.0974       *  0.0974
        T  0.0974  0.0974  0.0974       *
-1 traits, 4 species, on a network with 0 reticulations
+1 traits, 4 species
+on a network with 0 reticulations
 log-likelihood: -4.99274
 
-julia> rv = RateVariationAcrossSites(1.0, 4)
+julia> rv = RateVariationAcrossSites()
 Rate Variation Across Sites using Discretized Gamma Model
 alpha: 1.0
 categories for Gamma discretization: 4
-ratemultiplier: [0.133531, 0.470004, 0.980829, 2.07944]
+ratemultiplier: [0.145784, 0.513132, 1.07083, 2.27025]
 
-julia> fitDiscrete(net, mJC69, rv, tips; optimizeQ=false, optimizeRVAS=false)
+julia> fitdiscrete(net, mJC69, rv, tips; optimizeQ=false, optimizeRVAS=false)
 PhyloNetworks.StatisticalSubstitutionModel:
 Jukes and Cantor 69 Substitution Model,
 absolute rate version
@@ -185,22 +191,23 @@ rate matrix Q:
        C  0.0833       *  0.0833  0.0833
        G  0.0833  0.0833       *  0.0833
        T  0.0833  0.0833  0.0833       *
-1 traits, 4 species, with gamma variable rate model according to Rate Variation Across Sites using Discretized Gamma Model
-alpha: 1.0
-categories for Gamma discretization: 4
-ratemultiplier: [0.133531, 0.470004, 0.980829, 2.07944]
- with alpha = 1.0 and 4 categories on a network with 0 reticulations
-log-likelihood: -5.26879
+1 traits, 4 species
+variable rates across sites ~ discretized gamma with
+ alpha=1.0
+ 4 categories
+ rate multipliers: [0.145784, 0.513132, 1.07083, 2.27025]
+on a network with 0 reticulations
+log-likelihood: -5.2568
 ```
 """
-function fitDiscrete(net::HybridNetwork, model::SubstitutionModel, #tips::Dict no ratemodel version
+function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, #tips::Dict no ratemodel version
     tips::Dict; kwargs...)
     ratemodel = RateVariationAcrossSites(1.0, 1)
-    fitDiscrete(net, model, ratemodel, tips; kwargs...)
+    fitdiscrete(net, model, ratemodel, tips; kwargs...)
 end
 
 #tips::Dict version with ratemodel
-function fitDiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::RateVariationAcrossSites, 
+function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::RateVariationAcrossSites, 
     tips::Dict; kwargs...)
         species = String[]
     dat = Vector{Int}[] # indices of trait labels
@@ -216,13 +223,13 @@ function fitDiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::Ra
 end
 
 #dat::DataFrame, no rate model version
-function fitDiscrete(net::HybridNetwork, model::SubstitutionModel, dat::DataFrame; kwargs...)
+function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, dat::DataFrame; kwargs...)
     ratemodel = RateVariationAcrossSites(1.0, 1)
-    fitDiscrete(net, model, ratemodel, dat; kwargs...)
+    fitdiscrete(net, model, ratemodel, dat; kwargs...)
 end
 
 #dat::DataFrame with rate model version
-function fitDiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::RateVariationAcrossSites,
+function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::RateVariationAcrossSites,
         dat::DataFrame; kwargs...)
     i = findfirst(isequal(:taxon), DataFrames.names(dat))
     
@@ -241,13 +248,13 @@ function fitDiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::Ra
 end
 
 #species, dat version, no ratemodel
-function fitDiscrete(net::HybridNetwork, model::SubstitutionModel, species::Array{String}, dat::DataFrame; kwargs...)
+function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, species::Array{String}, dat::DataFrame; kwargs...)
     ratemodel = RateVariationAcrossSites(1.0, 1)
-    fitDiscrete(net, model, ratemodel, species, dat; kwargs...)
+    fitdiscrete(net, model, ratemodel, species, dat; kwargs...)
 end
 
 #species, dat version with ratemodel
-function fitDiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::RateVariationAcrossSites,
+function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::RateVariationAcrossSites,
         species::Array{String}, dat::DataFrame; kwargs...)
     dat2 = traitlabels2indices(dat, model) # vec of vec, indices
     o, net = check_matchtaxonnames!(copy(species), dat2, net)
@@ -255,19 +262,24 @@ function fitDiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::Ra
 end
 
 #wrapper: species, dat version with model symbol
-function fitDiscrete(net::HybridNetwork, modSymbol::Symbol, species::Array{String}, dat::DataFrame, rvSymbol=:noRV::Symbol; kwargs...)
+function fitdiscrete(net::HybridNetwork, modSymbol::Symbol,  
+    species::Array{String}, dat::DataFrame, rvSymbol=:noRV::Symbol; kwargs...)
     rate = startingrate(net)
     labels = learnLabels(modSymbol, species, dat)
     if modSymbol == :JC69
-        model = JC69([rate], true)
+        model = JC69([1.0], true) # 1.0 instead of rate because relative version
     elseif modSymbol == :HKY85
-        model = HKY85([rate, rate], [0.25, 0.25, 0.25, 0.25], true)
+        model = HKY85([1.0], # transition/transversion rate ratio
+                       empiricalDNAfrequencies(dat, repeat([1.], inner=ncol(dat))),
+                       true)
     elseif modSymbol == :ERSM
         model = EqualRatesSubstitutionModel(length(labels), rate, labels);
     elseif modSymbol == :BTSM
         model = BinaryTraitSubstitutionModel([rate, rate], labels)
     elseif modSymbol == :TBTSM
         model = TwoBinaryTraitSubstitutionModel([rate, rate, rate, rate, rate, rate, rate, rate], labels)
+    else
+        error("model $modSymbol is unknown or not implemented yet")
     end
 
     if rvSymbol == :RV
@@ -275,22 +287,22 @@ function fitDiscrete(net::HybridNetwork, modSymbol::Symbol, species::Array{Strin
     else
         rvas = RateVariationAcrossSites(1.0, 1)
     end
-
-    fitDiscrete(net, model, rvas, species, dat; kwargs...)
+    fitdiscrete(net, model, rvas, species, dat; kwargs...)
 end
 
 #dnadata with dnapatternweights version, no ratemodel
-function fitDiscrete(net::HybridNetwork, model::SubstitutionModel, dnadata::DataFrame, 
+function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, dnadata::DataFrame, 
     dnapatternweights::Array{Float64}; kwargs...)
     ratemodel = RateVariationAcrossSites(1.0, 1)
-    fitDiscrete(net, model, ratemodel, dnadata, dnapatternweights; kwargs...)
+    fitdiscrete(net, model, ratemodel, dnadata, dnapatternweights; kwargs...)
 end
 #dnadata with dnapatternweights version with ratemodel
-function fitDiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::RateVariationAcrossSites,
+function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::RateVariationAcrossSites,
     dnadata::DataFrame, dnapatternweights::Array{Float64}; kwargs...)
     
     dat2 = traitlabels2indices(dnadata[2:end], model) 
-   # dat2 = traitlabels2indices(view(dnadata, 2:ncol(dnadata)), model) #removed view bc it wasnt recognized as DF by traitlabels2indices
+   # dat2 = traitlabels2indices(view(dnadata, 2:ncol(dnadata)), model) #removed view bc it wasnt recognized 
+    #as DF by traitlabels2indices
             # this doesnt work, but was attempting to uses view to avoid a shallow copy and 
             # be more space efficient
             #produces a vec of vec, indices
@@ -301,18 +313,23 @@ function fitDiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::Ra
 end
 
 #wrapper for dna data
-function fitDiscrete(net::HybridNetwork, modSymbol::Symbol, dnadata::DataFrame, dnapatternweights::Array{Float64}, rvSymbol=:noRV::Symbol; kwargs...)
+function fitdiscrete(net::HybridNetwork, modSymbol::Symbol, dnadata::DataFrame, 
+    dnapatternweights::Array{Float64}, rvSymbol=:noRV::Symbol; kwargs...)
     rate = startingrate(net)
     if modSymbol == :JC69
-        model = JC69([rate], true)
+        model = JC69([1.0], true)  # 1.0 instead of rate because relative version (relative = true)
     elseif modSymbol == :HKY85
-        model = HKY85([rate, rate], [0.25, 0.25, 0.25, 0.25], true)
+        model = HKY85([1.0], # transition/transversion rate ratio
+                      empiricalDNAfrequencies(view(dnadata, 2:ncol(dnadata)), dnapatternweights),
+                      true)
     elseif modSymbol == :ERSM
         model = EqualRatesSubstitutionModel(4, rate, [BioSymbols.DNA_A, BioSymbols.DNA_C, BioSymbols.DNA_G, BioSymbols.DNA_T]);
     elseif modSymbol == :BTSM
         error("Binary Trait Substitution Model supports only two trait states, but dna data has four states.")
     elseif modSymbol == :TBTSM
         error("Two Binary Trait Substitution Model does not support dna data: it supports two sets of potentially correlated two trait states.")
+    else
+        error("model $modSymbol is unknown or not implemented yet")
     end
 
     if rvSymbol == :RV
@@ -321,22 +338,23 @@ function fitDiscrete(net::HybridNetwork, modSymbol::Symbol, dnadata::DataFrame,
         rvas = RateVariationAcrossSites(1.0, 1)
     end
     
-    fitDiscrete(net, model, rvas, dnadata, dnapatternweights; kwargs...)
+    fitdiscrete(net, model, rvas, dnadata, dnapatternweights; kwargs...)
 end
 
 """
     fit(StatisticalSubstitutionModel, net, model, traits; kwargs...)
     fit!(StatisticalSubstitutionModel; kwargs...)
 
-Internal function called by [`fitDiscrete`](@ref): with same key word arguments `kwargs`.
-But dangerous: `traits` should be a vector of vectors as for [`fitDiscrete`](@ref)
+Internal function called by [`fitdiscrete`](@ref): with same key word arguments `kwargs`.
+But dangerous: `traits` should be a vector of vectors as for [`fitdiscrete`](@ref)
 **but** here `traits` need to contain the *indices* of trait values corresponding
 to the indices in `getlabels(model)`, and species should appear in `traits` in the
 order corresponding to the node numbers in `net`.
 See [`traitlabels2indices`](@ref) to convert trait labels to trait indices.
-**Warning**: does *not* perform checks. [`fitDiscrete`](@ref) calls this function
+**Warning**: does *not* perform checks. [`fitdiscrete`](@ref) calls this function
 after doing checks, preordering nodes in the network, making sure nodes have
 consecutive numbers, species are matched between data and network etc.
+Like, snaq!(), we estimate the network starting from tree (or network) `astraltree`.
 """
 function StatsBase.fit(::Type{SSM}, net::HybridNetwork, model::SubstitutionModel, 
     ratemodel::RateVariationAcrossSites, trait::AbstractVector; kwargs...)
@@ -375,7 +393,8 @@ end
 
 function fit!(obj::SSM; optimizeQ=true::Bool, optimizeRVAS=true::Bool,verbose=false::Bool,
       NLoptMethod=:LD_MMA::Symbol, ftolRel=fRelBL::Float64, ftolAbs=fAbsBL::Float64,
-      xtolRel=xRelBL::Float64, xtolAbs=xAbsBL::Float64)
+      xtolRel=xRelBL::Float64, xtolAbs=xAbsBL::Float64,
+      alphamin=0.05, alphamax=500)
     all(x -> x >= 0.0, [e.length for e in obj.net.edge]) || error("branch lengths should be >= 0")
     all(x -> x >= 0.0, [e.gamma for e in obj.net.edge]) || error("gammas should be >= 0")
     if optimizeQ && nparams(obj.model) <1
@@ -386,25 +405,23 @@ function fit!(obj::SSM; optimizeQ=true::Bool, optimizeRVAS=true::Bool,verbose=fa
         @debug "Rate model has one rate category, so there are no parameters to optimize. optimizeRVAS will be set to false."
         optimizeRVAS = false
     end
-    if !optimizeQ & !optimizeRVAS 
-        #? rates need to be set?
+    if !optimizeQ && !optimizeRVAS
         discrete_corelikelihood!(obj)
         verbose && println("loglik = $(loglikelihood(obj)) under fixed parameters, no optimization")
-        return obj # return fmax,xmax,ret
+        return obj
     end
     counter = [0]
     if optimizeQ
         function loglikfun(x::Vector{Float64}, grad::Vector{Float64}) # modifies obj
             counter[1] += 1
-            setrates!(obj.model, x) #replaced assignment here for efficiency will be setalpha!
+            setrates!(obj.model, x)
             res = discrete_corelikelihood!(obj)
             verbose && println("loglik: $res, model rates: $x")
             length(grad) == 0 || error("gradient not implemented")
             return res
         end
-        # build optimizer for Q
-        # optimize Q under fixedRVAS
-        # set-up optimization object
+        # optimize Q under fixed RVAS parameters
+        # set-up optimization object for Q
         NLoptMethod=:LN_COBYLA # no gradient
         # :LN_COBYLA for (non)linear constraits, :LN_BOBYQA for bound constraints
         nparQ = nparams(obj.model)
@@ -425,15 +442,13 @@ function fit!(obj::SSM; optimizeQ=true::Bool, optimizeRVAS=true::Bool,verbose=fa
     if optimizeRVAS
         function loglikfunRVAS(alpha::Vector{Float64}, grad::Vector{Float64}) # modifies obj
             counter[1] += 1
-            setalpha!(obj.ratemodel, alpha[1]) #replaced assignment here for efficiency will be setalpha!
+            setalpha!(obj.ratemodel, alpha[1]) # obj.ratemodel.alpha is a scalar, alpha is a vector within here
             res = discrete_corelikelihood!(obj)
             verbose && println("loglik: $res, rate variation model shape parameter alpha: $(alpha[1])")
             length(grad) == 0 || error("gradient not implemented")
             return res
         end
-        # build optimizer for RAS
-        # optimize RAS 
-        # set-up optimization object
+        # set-up optimization object for RVAS parameter
         NLoptMethod=:LN_COBYLA # no gradient
         # :LN_COBYLA for (non)linear constraits, :LN_BOBYQA for bound constraints
         nparRVAS = nparams(obj.ratemodel)
@@ -444,24 +459,21 @@ function fit!(obj::SSM; optimizeQ=true::Bool, optimizeRVAS=true::Bool,verbose=fa
         NLopt.xtol_abs!(optRVAS,xtolAbs)
         NLopt.maxeval!(optRVAS,1000) # max number of iterations
         # NLopt.maxtime!(optRVAS, t::Real)
-        NLopt.lower_bounds!(optRVAS, zeros(Float64, nparRVAS))
-        # fixit: set upper bound depending on branch lengths in network?
+        NLopt.lower_bounds!(optRVAS, fill(alphamin, (nparRVAS,)) ) # for 0 as lower bound: zeros(Float64, nparRVAS)
+        NLopt.upper_bounds!(optRVAS, fill(alphamax, (nparRVAS,)) ) # delete to remove upper bound
         counter[1] = 0
         NLopt.max_objective!(optRVAS, loglikfunRVAS)
         fmax, xmax, ret = NLopt.optimize(optRVAS, [obj.ratemodel.alpha]) # optimization here!
         verbose && println("RVAS: got $(round(fmax, digits=5)) at $(round.(xmax, digits=5)) after $(counter[1]) iterations (return code $(ret))")
     end
     if optimizeQ && optimizeRVAS
-        # optimize Q under RVAS
-        # fixit: set upper bound depending on branch lengths in network?
+        # optimize Q under fixed RVAS parameters: a second time
         counter[1] = 0
-        #NLopt.max_objective!(optQ, loglikfun)
-        fmax, xmax, ret = NLopt.optimize(optQ, obj.model.rate) # optimization here!
+        fmax, xmax, ret = NLopt.optimize(optQ, obj.model.rate)
         verbose && println("got $(round(fmax, digits=5)) at $(round.(xmax, digits=5)) after $(counter[1]) iterations (return code $(ret))")
-        # optimize RVAS
+        # optimize RVAS under fixed Q: a second time
         counter[1] = 0
-        #NLopt.max_objective!(optRVAS, loglikfunRVAS)
-        fmax, xmax, ret = NLopt.optimize(optRVAS, [obj.ratemodel.alpha]) # optimization here!
+        fmax, xmax, ret = NLopt.optimize(optRVAS, [obj.ratemodel.alpha])
         verbose && println("RVAS: got $(round(fmax, digits=5)) at $(round.(xmax, digits=5)) after $(counter[1]) iterations (return code $(ret))")
     end
     return obj
@@ -471,6 +483,7 @@ end
 """
     discrete_corelikelihood!(obj::StatisticalSubstitutionModel; whichtrait=:all)
     discrete_corelikelihood_tree!(obj, t::Integer, traitrange::AbstractArray)
+
 Calculate the likelihood and update `obj.loglik` for discrete characters on a network
 (or on a single tree: `t`th tree displayed in the network, for the second form).
 Update forward and direct partial likelihoods while doing so.
@@ -538,7 +551,11 @@ function discrete_corelikelihood_tree!(obj::SSM, t::Integer, traitrange::Abstrac
                     end
                 end
                 if ni==1 # root is first index in nodes changed
-                    logprior = [-log(k) for i in 1:k] # uniform prior at root
+                    if typeof(obj.model) == NASM 
+                        logprior = log.(stationary(obj.model))
+                    else #other trait models
+                        logprior = [-log(k) for i in 1:k] # uniform prior at root
+                    end
                     loglik = logsumexp(logprior + view(forwardlik, :,nnum)) # log P{data for ci | tree t}
                     if iratemultiplier == 1
                         currentloglik = loglik
@@ -593,17 +610,26 @@ function traitlabels2indices(data::AbstractVector, model::SubstitutionModel)
     end
     return A
 end
+
 function traitlabels2indices(data::Union{AbstractMatrix,DataFrame},
                              model::SubstitutionModel)
     A = Vector{Vector{Union{Missings.Missing,Int}}}(undef, 0) # indices of trait labels
     labs = getlabels(model)
     isDNA = typeof(labs) == Array{DNA,1}
     ntraits = size(data,2) #starting with spcies column
-    for i in 1:size(data,1) # go row by row
+    for i in 1:size(data,1) # go row by row (which is species by species)
         V = Vector{Union{Missings.Missing,Int}}(undef, ntraits)
         for j in 1:ntraits
             vi = missing # value index
             @inbounds l = data[i,j] # value label
+            if !isDNA && ismissing(l)
+                vi = missing
+            elseif isDNA #else if DNA
+                if typeof(l) == String #takes string and converts to a Char so that we can convert to DNA
+                    l = Vector{Char}(l)[1]
+                end
+                l = convert(DNA, l)
+            end
             if !ismissing(l)
                 vi = findfirst(isequal(l), labs) 
                 if vi == nothing
@@ -619,7 +645,7 @@ function traitlabels2indices(data::Union{AbstractMatrix,DataFrame},
             V[j] = vi
         end
         push!(A, V)
-      end
+    end
     return A
 end
 
@@ -631,7 +657,7 @@ Return a new network (`net` is *not* modified) with tips matching those in speci
 if some species in `net` have no data, these species are pruned from the network.
 The network also has its node names reset, such that leaves have nodes have
 consecutive numbers starting at 1, with leaves first.
-Used by [`fitDiscrete`](@ref) to build a new [`StatisticalSubstitutionModel`](@ref).
+Used by [`fitdiscrete`](@ref) to build a new [`StatisticalSubstitutionModel`](@ref).
 """
 function check_matchtaxonnames!(species::AbstractVector, dat::AbstractVector, net::HybridNetwork)
     # 1. basic checks for dimensions and types
@@ -682,7 +708,7 @@ end
 Estimate the marginal probability of ancestral states for discrete character
 number `trait`, or for the active trait if `trait` is unspecified: `obj.activesite`.
 The parameters of the [`StatisticalSubstitutionModel`](@ref) object `obj`
-must first be fitted using [`fitDiscrete`](@ref), and ancestral state reconstruction
+must first be fitted using [`fitdiscrete`](@ref), and ancestral state reconstruction
 is conditional on the estimated parameters. If these parameters were estimated
 using all traits, they are used as is to do ancestral state reconstruction of the
 particular `trait` of interest.
@@ -712,10 +738,10 @@ julia> using DataFrames
 
 julia> dat = DataFrame(species=["C","A","B","D"], trait=["hi","lo","lo","hi"]);
 
-julia> fit1 = fitDiscrete(net, m1, dat);
+julia> fit1 = fitdiscrete(net, m1, dat);
 
 julia> asr = ancestralStateReconstruction(fit1)
-9×4 DataFrames.DataFrame
+9×4 DataFrame
 │ Row │ nodenumber │ nodelabel │ lo       │ hi       │
 │     │ Int64      │ String    │ Float64  │ Float64  │
 ├─────┼────────────┼───────────┼──────────┼──────────┤
@@ -731,7 +757,7 @@ julia> asr = ancestralStateReconstruction(fit1)
 
 julia> round.(exp.(fit1.postltw), digits=6) # marginal (posterior) probability that the trait evolved on each displayed tree
 2-element Array{Float64,1}:
- 0.919831 
+ 0.919831
  0.080169
 
 julia> using PhyloPlots
@@ -787,7 +813,11 @@ function discrete_backwardlikelihood_tree!(obj::SSM, t::Integer, trait::Integer)
     bkdlik = view(obj.backwardlik,:,:,t)
     k = nstates(obj.model)
     bkwtmp = Vector{Float64}(undef, k) # to hold bkw lik without parent edge transition
-    logprior = [-log(k) for i in 1:k]
+    if typeof(obj.model) == NASM 
+        logprior = log.(stationary(obj.model))
+    else #trait models
+        logprior = [-log(k) for i in 1:k] # uniform prior at root
+    end
     for ni in 1:length(tree.nodes_changed) # pre-order traversal to calculate backwardlik
         n = tree.nodes_changed[ni]
         nnum = n.number
@@ -816,18 +846,18 @@ end
 """
     learnLabels(model::SubstitutionModel, species::Array{String}, dat::DataFrame)
 
-Learn label names and length from data for fitDiscrete wrapper function with species vector and trait dataframe data
+Learn label names and length from data for fitdiscrete wrapper function with species vector and trait dataframe data
 """
 function learnLabels(modSymbol::Symbol, species::Array{String}, dat::DataFrame)
     labels = unique(reshape(convert(Matrix, dat), 1, size(dat)[1]*size(dat)[2]))
     if modSymbol == :BTSM
-        length(labels) == 2 || error("Binary Trait Substitution Model supports traits with two states. These data have more than two states.")
+        length(labels) == 2 || error("Binary Trait Substitution Model supports traits with two states. These data have do not have two states.")
     elseif modSymbol == :TBTSM
-        unique(dat[1]) == 2 && unique(dat[2] == 2) || error("Two Binary Trait Substitution Model supports two traits with two states each. These data have more than two states.")
+        unique(dat[1]) == 2 && unique(dat[2] == 2) || error("Two Binary Trait Substitution Model supports two traits with two states each.")
     elseif modSymbol == :HKY85
-        uppercase(join(sort(labels))) == "ACGT" || error("HKY85 requires that trait data are dna bases A, C, G, and T")
+        occursin(uppercase(join(sort(labels))), "ACGT") || error("HKY85 requires that trait data are dna bases A, C, G, and T")
     elseif modSymbol == :JC69
-        uppercase(join(sort(labels))) == "ACGT" || error("JC69 requires that trait data are dna bases A, C, G, and T")
+        occursin(uppercase(join(sort(labels))), "ACGT") || error("JC69 requires that trait data are dna bases A, C, G, and T")
     end
     return labels
 end
@@ -836,8 +866,9 @@ end
     startingrate(net)
 
 Estimate an evolutionary rate appropriate for the branch lengths in the network,
-which should be a good starting value before optimization in `fitDiscrete`,
+which should be a good starting value before optimization in `fitdiscrete`,
 assuming approximately 1 change across the entire tree.
+If all edge lengths are missing, set starting rate to 1/(number of taxa).
 """
 function startingrate(net::HybridNetwork)
     totaledgelength = 0.0
@@ -847,7 +878,7 @@ function startingrate(net::HybridNetwork)
         end
     end
     if totaledgelength == 0.0 # as when all edge lengths are missing
-        totaledgelength = 0.25
+        totaledgelength = net.numTaxa
     end
     return 1.0/totaledgelength
 end
diff --git a/test/test_traitLikDiscrete.jl b/test/test_traitLikDiscrete.jl
index afdb2ba83..d4229920d 100644
--- a/test/test_traitLikDiscrete.jl
+++ b/test/test_traitLikDiscrete.jl
@@ -103,10 +103,10 @@ end
 # test on a tree
 #=
 likelihood calculated in R using a fixed Q matrix, first with ace() then
-with fitDiscrete(), then with fitMK(). problem: they give different results,
+with fitdiscrete(), then with fitMK(). problem: they give different results,
 see http://blog.phytools.org/2015/09/the-difference-between-different.html
 - ace: misses log(#states) in its log-likelihood
-- fitDiscrete in geiger: uses empirical prior at root, not stationary dist,
+- fitdiscrete in geiger: uses empirical prior at root, not stationary dist,
   but "lik" object is very flexible
 - fitMk is correct. also great for 2 correlated binary traits
 library(ape)
@@ -119,7 +119,7 @@ print(fitER$loglik - log(2), digits=17) #         -2.6638002683925799
 print(fitER$rates, digits=17)  # rates = 0.3743971742794559
 print(fitER$lik.anc, digits=17)# posterior probs of states at nodes: 3x2 matrix (3 internal nodes, 2 states)
 library(geiger)
-fitER = fitDiscrete(mytree, states, model="ER")
+fitER = fitdiscrete(mytree, states, model="ER")
 print(fitER$opt$q12, digits=17) # rates = 0.36836216513047726
 print(fitER$opt$lnL, digits=17) # log-likelihood = -2.6626566310743804
 lik = fitER$lik
@@ -128,7 +128,7 @@ library(phytools)
 Q2 = matrix(c(-1,1,1,-1),2,2)*fitER$opt$q12
 fit2 = fitMk(mytree, states, model="ER", fixedQ=Q2)
 print(fit2$logLik, digits=17) # log-likelihood = -2.6638637960257574
-fitER = fitDiscrete(mytree, states, model="ARD")
+fitER = fitdiscrete(mytree, states, model="ARD")
 lik = fitER$lik
 Q = c(0.29885191850718751, 0.38944304456937912) # q12, q21
 lik(Q, root="given", root.p=Q[2:1]/sum(Q)) # -2.6457428692377234
@@ -138,32 +138,32 @@ lik(Q, root="flat") # -2.6754091090953693 .1,.7: -3.3291679800706073
 optim(Q, lik, lower=1e-8, control=list(fnscale=-1), root="flat")
 # rates = 0.29993140042699212 0.38882902905265493 loglik=-2.6447247349802496
 states=c(1,2,1); names(states)=c("A","B","D")
-fitER = fitDiscrete(mytree, states, model="ARD"); lik = fitER$lik
+fitER = fitdiscrete(mytree, states, model="ARD"); lik = fitER$lik
 lik(Q, root="flat") # -2.1207856874033491
 =#
 
 net = readTopology("(A:3.0,(B:2.0,(C:1.0,D:1.0):1.0):1.0);");
 tips = Dict("A" => "lo", "B" => "lo", "C" => "hi", "D" => "hi"); #? this is supposed to be an AbstractVector, is a Dict{String,String}
 m1 = EqualRatesSubstitutionModel(2,[0.36836216513047726], ["lo", "hi"]);
-fit1 = (@test_logs fitDiscrete(net, m1, tips; optimizeQ=false, optimizeRVAS=false));
+fit1 = (@test_logs fitdiscrete(net, m1, tips; optimizeQ=false, optimizeRVAS=false));
 @test_logs show(devnull, fit1)
 @test StatsBase.loglikelihood(fit1) ≈ -2.6638637960257574 atol=2e-4
 species = ["G","C","A","B","D"]
 dat1 = DataFrame(trait = ["hi","hi","lo","lo","hi"], species = species)
 m2 = BinaryTraitSubstitutionModel(0.2, 0.3, ["lo", "hi"])
-fit2 = (@test_logs fitDiscrete(net, m2, dat1; optimizeQ=false, optimizeRVAS=false))
+fit2 = (@test_logs fitdiscrete(net, m2, dat1; optimizeQ=false, optimizeRVAS=false))
 @test fit2.trait == [[1],[1],[2],[2]]
 @test StatsBase.loglikelihood(fit2) ≈ -2.6754091090953693 atol=2e-4
 originalstdout = stdout
 redirect_stdout(open("/dev/null", "w"))
 #OPTIMIZES RATES
-fit2 = @test_logs fitDiscrete(net, m2, dat1; optimizeQ=true, optimizeRVAS=false, verbose=true) # 65 iterations
+fit2 = @test_logs fitdiscrete(net, m2, dat1; optimizeQ=true, optimizeRVAS=false, verbose=true) # 65 iterations
 redirect_stdout(originalstdout)
 @test fit2.model.rate ≈ [0.29993140042699212, 0.38882902905265493] atol=2e-4
 @test StatsBase.loglikelihood(fit2) ≈ -2.6447247349802496 atol=2e-4
 m2.rate[:] = [0.2, 0.3];
 dat2 = DataFrame(trait1= ["hi","hi","lo","lo","hi"], trait2=["hi",missing,"lo","hi","lo"]);
-fit3 = (@test_logs fitDiscrete(net, m2, species, dat2; optimizeQ=false, optimizeRVAS=false))
+fit3 = (@test_logs fitdiscrete(net, m2, species, dat2; optimizeQ=false, optimizeRVAS=false))
 
 @test fit3.loglik ≈ (-2.6754091090953693 - 2.1207856874033491)
 PhyloNetworks.fit!(fit3; optimizeQ=true, optimizeRVAS=false)
@@ -192,7 +192,7 @@ function traitprobabilities(model, net, ntraits=10)
     npatterns = i
     lik = Float64[]
     for i in 1:npatterns
-        fit = fitDiscrete(net, model, dat[[:species, Symbol("x",i)]]; optimizeQ=false, optimizeRVAS=false)
+        fit = fitdiscrete(net, model, dat[[:species, Symbol("x",i)]]; optimizeQ=false, optimizeRVAS=false)
         push!(lik, fit.loglik)
     end
     return dat, prop, lik
@@ -224,20 +224,20 @@ d = DataFrame(species=["D","C","B","A"], x1=[1,1,1,1], x2=[1,2,2,1], x3=[2,2,2,2
     x11=[1,2,1,1], x12=[2,2,1,2], x13=[2,1,1,2], x14=[1,2,2,2], x15=[1,1,1,2], x16=[2,1,2,2])
 lik = Float64[]
 for i in 1:16
-    fit = fitDiscrete(net, m1, d[[:species, Symbol("x",i)]]; optimizeQ=false, optimizeRVAS=false)
+    fit = fitdiscrete(net, m1, d[[:species, Symbol("x",i)]]; optimizeQ=false, optimizeRVAS=false)
     push!(lik, fit.loglik)
 end
 @test lik ≈ [-1.6218387598967712, -3.008066347196894, -4.3943604143403245, -3.008199100743402,
     -3.70121329832901, -3.0081981601869483, -2.315051933868397, -2.314985711030534,
     -3.0081988850020873, -3.0081983709272504, -2.3150512090547584, -3.70134532205944,
     -3.008132923628349, -3.7012134632082083, -2.3149859724945876, -3.7013460518770915]
-fit1 = fitDiscrete(net, m1, d[[:species, :x6]]; optimizeRVAS=false)
+fit1 = fitdiscrete(net, m1, d[[:species, :x6]]; optimizeRVAS=false)
 
 # with parameter estimation
 net = readTopology("(((A:2.0,(B:1.0)#H1:0.1::0.9):1.5,(C:0.6,#H1:1.0::0.1):1.0):0.5,D:2.0);")
 m1 = BinaryTraitSubstitutionModel([1.0, 1.0], ["lo", "hi"])
 dat = DataFrame(species=["C","A","B","D"], trait=["hi","lo","lo","hi"])
-fit1 = fitDiscrete(net, m1, dat; optimizeQ=false, optimizeRVAS=false)
+fit1 = fitdiscrete(net, m1, dat; optimizeQ=false, optimizeRVAS=false)
 @test fit1.loglik ≈ -2.77132013004859
 PhyloNetworks.fit!(fit1; optimizeQ=true, optimizeRVAS=false)
 @test fit1.model.rate ≈ [0.2722263130324768, 0.34981109618902395] atol=1e-4
@@ -248,7 +248,7 @@ function simulateManyTraits_estimate(ntraits)
     res, lab = randomTrait(m1, net; ntraits=ntraits)
     tips = findall(in(tipLabels(net)), lab) # indices of tips: columns in res
     dat = DataFrame(transpose(res[:,tips])); species = lab[tips]
-    return fitDiscrete(net, m1, species, dat; optimizeRVAS = false)
+    return fitdiscrete(net, m1, species, dat; optimizeRVAS = false)
 end
 # simulateManyTraits_estimate(100000)
 # α=1.1124637623451075, β=0.5604529225895175, loglik=-25587.1  with ntraits=10000
@@ -271,8 +271,8 @@ asr = ancestralStateReconstruction(fit1)
 end # end of testset, fixed topology
 
 @testset "testing readfastatodna" begin
-#fastafile = joinpath(@__DIR__, "../..", "dev/PhyloNetworks/examples", "test_8_withrepeatingsites.aln")
 fastafile = joinpath(@__DIR__, "..", "examples", "test_8_withrepeatingsites.aln")
+#fastafile = abspath(joinpath(dirname(Base.find_package("PhyloNetworks")), "..", "examples", "test_8_withrepeatingsites.aln"))
 dat, weights = readfastatodna(fastafile, true);
 @test weights ==  [3.0, 1.0, 1.0, 2.0, 1.0]
 #check that no columns are repeated, only correct columns removed
@@ -280,6 +280,7 @@ dat, weights = readfastatodna(fastafile, true);
 
 #test on data with no repeated site patterns
 fastafile = joinpath(@__DIR__, "..", "examples", "Ae_bicornis_8sites.aln")
+#fastafile = abspath(joinpath(dirname(Base.find_package("PhyloNetworks")), "..", "examples", "Ae_bicornis_8sites.aln"))
 dat, weights = readfastatodna(fastafile, true);
 #check that weights are correct
 @test weights == ones(Float64, 8)
@@ -287,7 +288,7 @@ dat, weights = readfastatodna(fastafile, true);
 @test ncol(dat) == 9
 end #testing readfastatodna
 
-@testset "testing NucleicAcidSubsitutionModels" begin
+@testset "NucleicAcidSubsitutionModels" begin
 
 #test NASM models basics
 mJC69 = JC69(0.5, false);
@@ -300,12 +301,15 @@ mJC69 = JC69(0.5, false);
                         0.121646  0.121646  0.635063  0.121646;
                         0.121646  0.121646  0.121646  0.635063] atol=1e-5
 
+@test_throws ErrorException HKY85([0.5, 0.5], [0.25, 0.25, 0.25, 0.25], true)
+@test_throws ErrorException HKY85([.1,.1,.1], [0.25, 0.25, 0.25, 0.25], false)
+@test_throws ErrorException HKY85([0.5], [0.25, 0.25, 0.25, 0.25], false)
 mHKY85 = HKY85([0.5, 0.5], [0.25, 0.25, 0.25, 0.25], false)
 @test Q(mHKY85) ≈ [ -0.375   0.125   0.125   0.125;
                     0.125  -0.375   0.125   0.125;
                     0.125   0.125  -0.375   0.125;
                     0.125   0.125   0.125  -0.375] atol=1e-5
-mHKY85rel = HKY85([0.5], [0.25, 0.25, 0.25, 0.25], true)
+mHKY85rel = HKY85(0.5, [0.25, 0.25, 0.25, 0.25])
 @test Q(mHKY85rel) ≈ [-1.0   0.4   0.2   0.4;
                         0.4  -1.0   0.4   0.2;
                         0.2   0.4  -1.0   0.4;
@@ -329,60 +333,91 @@ mHKY85rel = HKY85([0.5], [0.25, 0.25, 0.25, 0.25], true)
 @test_logs show(devnull, mHKY85)
 end # end of testing NASMs
 
-@testset "testing fitDiscrete for NucleicAcidSubsitutionModels & RateVariationAcrossSites" begin
-# test fitDiscrete with NASM #
+@testset "fitdiscrete for NucleicAcidSubsitutionModels & RateVariationAcrossSites" begin
+# test fitdiscrete with NASM #
 #there are 3 alignments in PhyloNetworks/examples
 net = readTopology("(A:3.0,(B:2.0,(C:1.0,D:1.0):1.0):1.0);");
 tips = Dict("A" => BioSymbols.DNA_A, "B" => BioSymbols.DNA_A, "C" => BioSymbols.DNA_G, "D" => BioSymbols.DNA_G);
 
 #without optimization (confirmed with ape ace() function and phangorn)
 mJC69 = JC69(0.2923350741254221, false) #ace() gives Q matrix cell, not rate. lambda = (1.0/3.0)*obj.rate[1] so rate = 3*0.097445024708474035 = 0.2923350741254221
-fitJC69 = fitDiscrete(net, mJC69, tips; optimizeQ=false, optimizeRVAS=false); 
+fitJC69 = fitdiscrete(net, mJC69, tips; optimizeQ=false);
 @test loglikelihood(fitJC69) ≈ -4.9927386890207304 atol=2e-6 #ace() from ape pkg and our method agree here. 
 
 ##without optimize at 0.25
 mJC69 = JC69([0.25], false)
-fitJC69 = fitDiscrete(net, mJC69, tips; optimizeQ=false, optimizeRVAS=false); 
+fitJC69 = fitdiscrete(net, mJC69, tips; optimizeQ=false);
 @test loglikelihood(fitJC69) ≈ -4.99997 atol=2e-3 #from phangorn
 
 #with optimization (confirmed with ape ace() function and phangorn)
 mJC69 = JC69([0.25], false)
-fitJC69 = fitDiscrete(net, mJC69, tips; optimizeQ=true, optimizeRVAS=false); 
+fitJC69 = fitdiscrete(net, mJC69, tips; optimizeQ=true)
 @test Q(fitJC69.model)[1,2] ≈ 0.097445024708474035 atol = 2e-3 #from ace() in ape pkg
 @test loglikelihood(fitJC69) ≈ -4.9927386890207304 atol=2e-6 #from ace() + log(4)
 
 #without optimization (confirmed with phangorn function)
 mHKY85 = HKY85([4.0/3, 4.0/3], [0.25, 0.25, 0.25, 0.25], false); #absolute
-fitHKY85 = fitDiscrete(net, mHKY85, tips; optimizeQ=false, optimizeRVAS=false);
+fitHKY85 = fitdiscrete(net, mHKY85, tips; optimizeQ=false);
 @test loglikelihood(fitHKY85) ≈ -5.365777014 atol = 2e-8
 mHKY85 = HKY85([1.0], [0.25, 0.25, 0.25, 0.25], true); #relative
-fitHKY85 = fitDiscrete(net, mHKY85, tips; optimizeQ=false, optimizeRVAS=false);
+fitHKY85 = fitdiscrete(net, mHKY85, tips; optimizeQ=false);
 @test loglikelihood(fitHKY85) ≈ -5.365777014 atol = 2e-8
 
 #with optimization (confirmed with ape ace() function)
 mHKY85 = HKY85([0.5, 0.1], [0.25, 0.25, 0.25, 0.25], false); #absolute
-fitHKY85 = fitDiscrete(net, mHKY85, tips; optimizeQ=true, optimizeRVAS=false);
-@test fitHKY85.model.rate[1] ≈ 1.4975887229148119 atol = 2e-3
+@time fitHKY85 = fitdiscrete(net, mHKY85, tips; optimizeQ=true)
+@test fitHKY85.model.rate[1] ≈ 1.4975887229148119 atol = 2e-4
 @test loglikelihood(fitHKY85) ≈ -3.3569474489525244 atol = 2e-8
 
 # test RateVariationAcrossSites with NASM
-rv = RateVariationAcrossSites(1.0, 4);
+@test RateVariationAcrossSites(2.0, 4).ratemultiplier ≈ [0.3190650334827019,0.6833612017749638,1.108977045226681,1.888596719515653]
+rv = RateVariationAcrossSites()
+@test rv.ratemultiplier ≈ [0.14578435573294748,0.5131315935152865,1.0708310452240655,2.270253005527701]
+PhyloNetworks.setalpha!(rv, 2.0)
+@test rv.ratemultiplier ≈ [0.3190650334827019,0.6833612017749638,1.108977045226681,1.888596719515653]
 @test_logs show(devnull, rv)
 rv.ratemultiplier[:] = [0.1369538, 0.4767519, 1.0000000, 2.3862944] # NOTE: phangorn calculates gamma quantiles differently, so I assign them for testing
 mJC69 = JC69([1.0], true)
-fitJC69rv = fitDiscrete(net, mJC69, rv, tips; optimizeQ=false, optimizeRVAS=false, ftolRel=1e-20);
+fitJC69rv = fitdiscrete(net, mJC69, rv, tips; optimizeQ=false, optimizeRVAS=false, ftolRel=1e-20);
 @test loglikelihood(fitJC69rv) ≈ -5.26390008 atol = 2e-8
 
-fitJC69rvOpt = fitDiscrete(net, mJC69, rv, tips, optimizeQ=false, optimizeRVAS=true);
+fitJC69rvOpt = fitdiscrete(net, mJC69, rv, tips, optimizeQ=false, optimizeRVAS=true);
 
 mHKY85 = HKY85([4.0/3, 4.0/3], [0.25, 0.25, 0.25, 0.25], false);
-fitHKY85rv = fitDiscrete(net, mHKY85, rv, tips; optimizeQ=false, optimizeRVAS=false);
+fitHKY85rv = fitdiscrete(net, mHKY85, rv, tips; optimizeQ=false, optimizeRVAS=false);
 @test loglikelihood(fitHKY85rv) ≈ -5.2639000803742979 atol = 2e-5 #from phangorn
 
-fitHKY85rvOpt = fitDiscrete(net, mHKY85, rv, tips; optimizeQ=false, optimizeRVAS=true);
+fitHKY85rvOpt = fitdiscrete(net, mHKY85, rv, tips; optimizeQ=false, optimizeRVAS=true);
 
-#tests wrappers
+## TEST WRAPPERS ##
+#for species, trait data
+net_dat = readTopology("(((A:2.0,(B:1.0)#H1:0.1::0.9):1.5,(C:0.6,#H1:1.0::0.1):1.0):0.5,D:2.0);")
+dat = DataFrame(species=["C","A","B","D"], trait=["hi","lo","lo","hi"])
+species_alone = ["C","A","B","D"]
+dat_alone = DataFrame(trait=["hi","lo","lo","hi"])
+net_tips = readTopology("(A:3.0,(B:2.0,(C:1.0,D:1.0):1.0):1.0);");
+@test_throws ErrorException fitdiscrete(net_dat, :bogus, species_alone, dat_alone);
+s1 = fitdiscrete(net_dat, :ERSM, species_alone, dat_alone; optimizeQ=false)
+@test_logs show(devnull, s1)
+s1 = fitdiscrete(net_dat, :ERSM, species_alone, dat_alone, :RV; optimizeQ=false, optimizeRVAS=false)
+@test_logs show(devnull, s1)
+s2 = fitdiscrete(net_dat, :BTSM, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false)
+@test_logs show(devnull, s2)
+@test_throws ErrorException fitdiscrete(net_dat, :TBTSM, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false)
+dna_alone = DataFrame(trait=['A','C','C','A'])
+s3 = fitdiscrete(net_dat, :JC69, species_alone, dna_alone, :RV; optimizeRVAS=false, ftolRel=.1,ftolAbs=.2,xtolRel=.1,xtolAbs=.2) # 1 site: no info to optimize RVAS
+@test s3.model.relative
+@test s3.ratemodel.alpha == 1.0
+@test_logs show(devnull, s3)
+s4 = fitdiscrete(net_dat, :HKY85, species_alone, dna_alone, :RV; optimizeRVAS=false, ftolRel=.1,ftolAbs=.2,xtolRel=.1,xtolAbs=.2)
+@test s4.model.relative
+@test s4.model.pi == [3,3,1,1]/8
+@test s4.ratemodel.alpha == 1.0
+@test_logs show(devnull, s4)
+
+#for dna data (output of fastatodna)
 fastafile = joinpath(@__DIR__, "..", "examples", "Ae_bicornis_Tr406_Contig10132.aln")
+#fastafile = abspath(joinpath(dirname(Base.find_package("PhyloNetworks")), "..", "examples", "Ae_bicornis_Tr406_Contig10132.aln"))
 dna_dat, dna_weights = readfastatodna(fastafile, true);
 net_dna = readTopology("((((((((((((((Ae_caudata_Tr275,Ae_caudata_Tr276),Ae_caudata_Tr139))#H1,#H2),(((Ae_umbellulata_Tr266,Ae_umbellulata_Tr257),Ae_umbellulata_Tr268),#H1)),((Ae_comosa_Tr271,Ae_comosa_Tr272),(((Ae_uniaristata_Tr403,Ae_uniaristata_Tr357),Ae_uniaristata_Tr402),Ae_uniaristata_Tr404))),(((Ae_tauschii_Tr352,Ae_tauschii_Tr351),(Ae_tauschii_Tr180,Ae_tauschii_Tr125)),(((((((Ae_longissima_Tr241,Ae_longissima_Tr242),Ae_longissima_Tr355),(Ae_sharonensis_Tr265,Ae_sharonensis_Tr264)),((Ae_bicornis_Tr408,Ae_bicornis_Tr407),Ae_bicornis_Tr406)),((Ae_searsii_Tr164,Ae_searsii_Tr165),Ae_searsii_Tr161)))#H2,#H4))),(((T_boeoticum_TS8,(T_boeoticum_TS10,T_boeoticum_TS3)),T_boeoticum_TS4),((T_urartu_Tr315,T_urartu_Tr232),(T_urartu_Tr317,T_urartu_Tr309)))),(((((Ae_speltoides_Tr320,Ae_speltoides_Tr323),Ae_speltoides_Tr223),Ae_speltoides_Tr251))H3,((((Ae_mutica_Tr237,Ae_mutica_Tr329),Ae_mutica_Tr244),Ae_mutica_Tr332))#H4))),Ta_caputMedusae_TB2),S_vavilovii_Tr279),Er_bonaepartis_TB1),H_vulgare_HVens23);");
 for edge in net_dna.edge #adds branch lengths
@@ -391,42 +426,26 @@ for edge in net_dna.edge #adds branch lengths
         setGamma!(edge, 0.5)
     end
 end
-net_dat = readTopology("(((A:2.0,(B:1.0)#H1:0.1::0.9):1.5,(C:0.6,#H1:1.0::0.1):1.0):0.5,D:2.0);")
-dat = DataFrame(species=["C","A","B","D"], trait=["hi","lo","lo","hi"])
-species_alone = ["C","A","B","D"]
-dat_alone = DataFrame(trait=["hi","lo","lo","hi"])
-
-net_tips = readTopology("(A:3.0,(B:2.0,(C:1.0,D:1.0):1.0):1.0);");
-
-## TEST WRAPPERS ##
-    #for species, trait data
-s1 = fitDiscrete(net_dat, :ERSM, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false);
-@test_logs show(devnull, s1)
-s1 = fitDiscrete(net_dat, :ERSM, species_alone, dat_alone, :RV; optimizeQ=false, optimizeRVAS=false);
-@test_logs show(devnull, s1)
-s2 = fitDiscrete(net_dat, :BTSM, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false);
-@test_logs show(devnull, s2)
-@test_throws ErrorException fitDiscrete(net_dat, :TBTSM, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false);
-@test_throws ErrorException fitDiscrete(net_dat, :JC69, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false);
-@test_throws ErrorException fitDiscrete(net_dat, :HKY85, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false);
-
-    #for dna data (output of fastatodna)
-d1 = fitDiscrete(net_dna, :ERSM, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false);
+d1 = (@test_logs (:warn, r"^the network contains taxa with no data") (:warn, r"^resetting edge numbers") fitdiscrete(net_dna,
+  :ERSM, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false))
 @test_logs show(devnull, d1)
-@test_throws ErrorException fitDiscrete(net_dna, :BTSM, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false);
-@test_throws ErrorException fitDiscrete(net_dna, :TBTSM, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false);
-d2 = fitDiscrete(net_dna, :JC69, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false);
+@test_throws ErrorException fitdiscrete(net_dna, :BTSM, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false);
+@test_throws ErrorException fitdiscrete(net_dna, :TBTSM, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false);
+d2 = (@test_logs (:warn, r"^the network contains taxa with no data") (:warn, r"^resetting edge numbers") fitdiscrete(net_dna,
+  :JC69, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false))
 @test_logs show(devnull, d2)
-d2 = fitDiscrete(net_dna, :JC69, dna_dat, dna_weights, :RV; optimizeQ=false, optimizeRVAS=false);
+d2 = (@test_logs (:warn, r"^the network contains taxa with no data") (:warn, r"^resetting edge numbers") fitdiscrete(net_dna,
+  :JC69, dna_dat, dna_weights, :RV; optimizeQ=false, optimizeRVAS=false))
 @test_logs show(devnull, d2)
-d3 = fitDiscrete(net_dna, :HKY85, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false);
+d3 = (@test_logs (:warn, r"^the network contains taxa with no data") (:warn, r"^resetting edge numbers") fitdiscrete(net_dna,
+  :HKY85, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false))
 @test_logs show(devnull, d3)
 
-end #testing NASM with rate variation
+end #testing fitdiscrete for NucleicAcidSubsitutionModels & RateVariationAcrossSites
 
-@testset "testing readfastatodna with NASM and RateVariationAcrossSites" begin
-#fastafile = joinpath(@__DIR__, "../..", "dev/PhyloNetworks/examples", "Ae_bicornis_Tr406_Contig10132.aln")
+@testset "readfastatodna with NASM and RateVariationAcrossSites" begin
 fastafile = joinpath(@__DIR__, "..", "examples", "Ae_bicornis_Tr406_Contig10132.aln")
+#fastafile = abspath(joinpath(dirname(Base.find_package("PhyloNetworks")), "..", "examples", "Ae_bicornis_Tr406_Contig10132.aln"))
 dna_dat, dna_weights = readfastatodna(fastafile, true);
 
 dna_net_top = readTopology("((((((((((((((Ae_caudata_Tr275,Ae_caudata_Tr276),Ae_caudata_Tr139))#H1,#H2),(((Ae_umbellulata_Tr266,Ae_umbellulata_Tr257),Ae_umbellulata_Tr268),#H1)),((Ae_comosa_Tr271,Ae_comosa_Tr272),(((Ae_uniaristata_Tr403,Ae_uniaristata_Tr357),Ae_uniaristata_Tr402),Ae_uniaristata_Tr404))),(((Ae_tauschii_Tr352,Ae_tauschii_Tr351),(Ae_tauschii_Tr180,Ae_tauschii_Tr125)),(((((((Ae_longissima_Tr241,Ae_longissima_Tr242),Ae_longissima_Tr355),(Ae_sharonensis_Tr265,Ae_sharonensis_Tr264)),((Ae_bicornis_Tr408,Ae_bicornis_Tr407),Ae_bicornis_Tr406)),((Ae_searsii_Tr164,Ae_searsii_Tr165),Ae_searsii_Tr161)))#H2,#H4))),(((T_boeoticum_TS8,(T_boeoticum_TS10,T_boeoticum_TS3)),T_boeoticum_TS4),((T_urartu_Tr315,T_urartu_Tr232),(T_urartu_Tr317,T_urartu_Tr309)))),(((((Ae_speltoides_Tr320,Ae_speltoides_Tr323),Ae_speltoides_Tr223),Ae_speltoides_Tr251))H3,((((Ae_mutica_Tr237,Ae_mutica_Tr329),Ae_mutica_Tr244),Ae_mutica_Tr332))#H4))),Ta_caputMedusae_TB2),S_vavilovii_Tr279),Er_bonaepartis_TB1),H_vulgare_HVens23);");
@@ -434,23 +453,76 @@ for edge in dna_net_top.edge #adds branch lengths
     setLength!(edge,1.0)
 end
 
-nasm_model = JC69([0.5], false);
-@test_throws ErrorException fitDiscrete(dna_net_top, nasm_model, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false)
-
-#Fixes the gamma error (creates a network)
+nasm_model = JC69([0.3], false);       # relative=false: absolute version
+rv = RateVariationAcrossSites(1.0, 2); # 2 rates to go faster
+# below: error because missing gammas
+@test_throws ErrorException fitdiscrete(dna_net_top, nasm_model, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false)
+# set gamma at the 3 reticulations, to fix error above
 setGamma!(dna_net_top.edge[6],0.6)
 setGamma!(dna_net_top.edge[7],0.6)
 setGamma!(dna_net_top.edge[58],0.6)
 
-dna_net = fitDiscrete(dna_net_top, nasm_model, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false)
-
-dna_net_optQ = fitDiscrete(dna_net_top, nasm_model, dna_dat, dna_weights; optimizeQ=true, optimizeRVAS=false)
-
-dna_net_optRVAS = fitDiscrete(dna_net_top, nasm_model, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=true)
+dna_net = (@test_logs (:warn, r"^the network contains taxa with no data") (:warn, r"^resetting edge numbers") fitdiscrete(dna_net_top,
+    nasm_model, dna_dat, dna_weights; optimizeQ=false))
+@test dna_net.model.rate == nasm_model.rate
+@test dna_net.ratemodel.ratemultiplier == [1.0]
+dna_net_optQ = (@test_logs (:warn, r"^the network contains taxa with no data") (:warn, r"^resetting edge numbers") fitdiscrete(dna_net_top,
+    nasm_model, rv, dna_dat, dna_weights; optimizeQ=true, optimizeRVAS=false, ftolRel=.1, ftolAbs=.2, xtolRel=.1, xtolAbs=.2))
+@test dna_net_optQ.model.rate != nasm_model.rate
+@test dna_net_optQ.ratemodel.alpha == 1.0
+dna_net_optRVAS = (@test_logs (:warn, r"^the network contains taxa with no data") (:warn, r"^resetting edge numbers") fitdiscrete(dna_net_top,
+    nasm_model, rv, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=true, ftolRel=.1, ftolAbs=.2, xtolRel=.1, xtolAbs=.2))
+@test dna_net_optRVAS.model.rate == nasm_model.rate
+@test dna_net_optRVAS.ratemodel.alpha != 1.0
+@test dna_net_optRVAS.ratemodel.ratemultiplier ≈ [0.02, 1.98] atol=0.05
+originalstdout = stdout
+redirect_stdout(open("/dev/null", "w"))
+dna_net_opt_both = (@test_logs (:warn, r"^the network contains taxa with no data") (:warn, r"^resetting edge numbers") fitdiscrete(dna_net_top,
+    nasm_model, rv, dna_dat, dna_weights; optimizeQ=true, optimizeRVAS=true, ftolRel=.1, ftolAbs=.2, xtolRel=.1, xtolAbs=.2, verbose=true))
+redirect_stdout(originalstdout)
+@test dna_net_opt_both.model.rate != nasm_model.rate
+@test dna_net_opt_both.ratemodel.alpha != 1.0
+# for this example: all NaN values if no lower bound on RVAS's alpha, because it goes to 0
+@test dna_net_opt_both.ratemodel.ratemultiplier ≈ [1e-4, 2.0] atol=0.02
+@test dna_net_opt_both.loglik > -3100. # should be ~ -2901.3 -- but low tolerance: just check it's not horrible
+# with default strict tolerance values: takes *much* longer, alpha=0.05, JC rate = 0.00293, loglik = -2535.618
+end # of testing readfastatodna with NASM and RateVariationAcrossSites
+
+@testset "stationary and empiricalDNAfrequencies" begin
+
+BTSM_1 = BinaryTraitSubstitutionModel(1.0, 2.0);
+ERSM_1 = EqualRatesSubstitutionModel(4, 3.0, ["S1","S2","S3","S4"]);
+@test PhyloNetworks.stationary(BTSM_1) ≈ [0.6666666666666666, 0.3333333333333333] atol=1e-6
+@test PhyloNetworks.stationary(ERSM_1) == [0.25, 0.25, 0.25, 0.25]
+
+JC69_1 = JC69(0.5, false);
+@test PhyloNetworks.stationary(JC69_1) == [0.25, 0.25, 0.25, 0.25]
+HKY85_1 = HKY85([0.5, 0.5], [0.2, 0.3, 0.25, 0.25], false)
+@test PhyloNetworks.stationary(HKY85_1) == [0.2, 0.3, 0.25, 0.25]
+
+# test empiricalDNAfrequencies with string type
+# Bayesian correction by default: more stable and avoids zeros
+dna_String = view(DataFrame(A = ["s1", "s2"], site1 = ["A", "A"], site2 = ["G", "T"]), 2:3)
+@test PhyloNetworks.empiricalDNAfrequencies(dna_String, [1, 1]) ≈ [3,1,2,2]/(4+4)
+# with char type
+dna_Char = view(DataFrame(A = ["s1", "s2"], site1 = ['A', 'A'], site2 = ['G', 'T']), 2:3)
+@test PhyloNetworks.empiricalDNAfrequencies(dna_Char, [1, 1]) ≈ [3,1,2,2]/(4+4)
+# uncorrected estimate
+@test PhyloNetworks.empiricalDNAfrequencies(dna_Char, [1, 1], false) ≈ [2,0,1,1]/4
+# with ambiguous sites
+dna_Char = DataFrame(site1 = ['A','A','Y'], site2 = ['G','T','V'])
+@test PhyloNetworks.empiricalDNAfrequencies(dna_Char, [1, 1], false, false) ≈ [2,0,1,1]/4
+@test PhyloNetworks.empiricalDNAfrequencies(dna_Char, [1, 1], false) ≈ [2+1/3,1/2+1/3,1+1/3,1+1/2]/6
+# with DNA type and weights
+#fastafile = abspath(joinpath(dirname(Base.find_package("PhyloNetworks")), "..", "examples", "test_8_withrepeatingsites.aln"))
+fastafile = joinpath(@__DIR__, "..", "examples", "test_8_withrepeatingsites.aln")
+dat, weights = readfastatodna(fastafile, true);
+@test PhyloNetworks.empiricalDNAfrequencies(view(dat, 2:6), weights) ≈ [0.21153846153846154, 0.3076923076923077, 0.40384615384615385, 0.07692307692307693] atol=1e-9
 
-dna_net_opt_both = fitDiscrete(dna_net_top, nasm_model, dna_dat, dna_weights; optimizeQ=true, optimizeRVAS=true)
+#test PhyloNetworks.empiricalDNAfrequencies with bad type
+dna_bad = view(DataFrame(A = ["s1", "s2"], trait1 = ["hi", "lo"], trait2 = ["lo", "hi"]), 2:3)
+@test_throws ErrorException PhyloNetworks.empiricalDNAfrequencies(dna_bad, [1, 1])
 
-end #of testing readfastatodna with NASM and RateVariationAcrossSites
+end #testing stationary and empiricalDNAfrequencies functions
 
 end # of nested testsets
-