From bcaebd05b2e89a52ab4a0377377c6f7f93cee9eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9cile=20An=C3=A9?= Date: Tue, 3 Sep 2019 09:58:54 -0500 Subject: [PATCH] new indexing rules in DataFrames v0.19 (#102) * code: - update to new indexing rules in DataFrames v0.19 - getIndex -> findfirst - readNexusTrees: more efficient * documentation: - discrete trait page - Documenter v0.23 * tests: - folder clean-up - tests added to increase coverage - removed match_mode=:any in log tests --- .travis.yml | 5 +- docs/Project.toml | 2 +- docs/make.jl | 1 + docs/readme.md | 7 +- docs/src/index.md | 4 +- docs/src/lib/internals.md | 8 - docs/src/lib/public.md | 13 +- docs/src/man/bootstrap.md | 30 +- docs/src/man/dist_reroot.md | 3 + docs/src/man/expectedCFs.md | 13 +- docs/src/man/fitDiscrete.md | 299 ++++++---- docs/src/man/fitdiscreteDNA.md | 78 +++ docs/src/man/fixednetworkoptim.md | 8 +- docs/src/man/inputdata.md | 2 + docs/src/man/parsimony.md | 1 + docs/src/man/ticr_howtogetQuartetCFs.md | 19 +- examples/case_f_example.jl | 2 + examples/test.nex | 7 +- src/auxiliary.jl | 51 +- src/bootstrap.jl | 81 ++- src/compareNetworks.jl | 42 +- src/deleteHybrid.jl | 6 +- src/manipulateNet.jl | 4 +- src/multipleAlleles.jl | 21 +- src/optimization.jl | 30 +- src/parsimony.jl | 11 +- src/pseudolik.jl | 41 +- src/readData.jl | 89 +-- src/readwrite.jl | 67 ++- src/substitutionModels.jl | 31 +- src/ticr.jl | 4 +- src/traits.jl | 87 ++- src/traitsLikDiscrete.jl | 32 +- test/1_astral.out | 1 - test/CaseH_output.txt | 6 - test/HGTtableCF.txt | 16 - test/Tree_output.txt | 6 - test/add_hybrid_caseC.jl | 25 - test/add_hybrid_caseD.jl | 26 - test/add_hybrid_caseE.jl | 25 - test/add_hybrid_caseF.jl | 26 - test/add_hybrid_caseG.jl | 25 - test/add_hybrid_caseH.jl | 25 - test/add_hybrid_caseI.jl | 25 - test/add_hybrid_caseJ.jl | 25 - test/bestNet.err | 2 - test/bestNet.out | 18 - test/debug_n6.jl | 131 ----- test/debug_optBL.jl | 106 ---- test/debug_readTopology.jl | 67 --- test/delete_caseC.jl | 27 - test/delete_caseD.jl | 27 - test/delete_caseE.jl | 27 - test/delete_caseF.jl | 27 - test/delete_caseG.jl | 27 - test/delete_caseH.jl | 27 - test/delete_caseI.jl | 27 - test/delete_caseJ.jl | 27 - test/descData.txt | 22 - test/hola.txt | 11 - test/initial_tests_deleteLeaf_quartetNet.jl | 89 --- test/movesTable.txt | 1 - test/net1prueba.networks | 6 - test/net1prueba.out | 11 - test/net1prueba2.networks | 5 - test/net1prueba2.out | 11 - test/net2_snaq.out | 17 - test/print_add.jl | 9 - test/rand4Quartets.txt | 4 - test/readme.md | 17 +- test/summaryTreesQuartets.txt | 14 - test/tableCF0.txt | 16 - test/tableCF1.txt | 16 - test/tableCF2.txt | 11 - test/tableCF3.txt | 11 - test/tableCF4.txt | 6 - test/test_5taxon_writeTopology.jl | 56 -- test/test_addDelete.jl | 81 --- test/test_afterOptBL.jl | 232 -------- test/test_badDiamII.jl | 2 +- test/test_bootstrap.jl | 32 +- test/test_calculateExpCF.jl | 6 +- test/test_changeDir.jl | 47 -- test/test_checkrootplace.jl | 16 - test/test_correctLik.jl | 18 +- test/test_extractQuartet.jl | 129 ----- test/test_functions_5taxon.jl | 224 -------- test/test_hasEdge.jl | 2 + test/test_hgt.jl | 132 ----- test/test_hybridatnode.jl | 28 - test/test_lm.jl | 68 +-- test/test_lm_tree.jl | 12 +- test/test_movedownlevel.jl | 24 - test/test_multipleAlleles.jl | 22 +- test/test_nlopt_example.jl | 44 -- test/test_nni.jl | 78 --- test/test_optBL.jl | 569 -------------------- test/test_optBL_sticr_data.jl | 20 - test/test_optTopLevel.jl | 144 ----- test/test_optTopLevel2.jl | 168 ------ test/test_optTopLevelparts.jl | 107 ---- test/test_optTopParts.jl | 3 - test/test_readInputData.jl | 13 +- test/test_readTopology.jl | 140 ----- test/test_redundanteCycle.jl | 37 -- test/test_relaxed_reading.jl | 13 +- test/test_simplenet.jl | 12 - test/test_traitLikDiscrete.jl | 22 +- test/test_tree2Matrix.jl | 9 - test/test_updateBL.jl | 26 - test/tests_5taxon.jl | 46 -- test/tests_5taxon_delete.jl | 43 -- test/tree.tre | 1 - test/tree_example.jl | 38 -- test/truenetwork.txt | 1 - test/try.txt | 16 - test/try4.txt | 16 - 117 files changed, 719 insertions(+), 4125 deletions(-) create mode 100644 docs/src/man/fitdiscreteDNA.md delete mode 100644 test/1_astral.out delete mode 100644 test/CaseH_output.txt delete mode 100644 test/HGTtableCF.txt delete mode 100644 test/Tree_output.txt delete mode 100644 test/add_hybrid_caseC.jl delete mode 100644 test/add_hybrid_caseD.jl delete mode 100644 test/add_hybrid_caseE.jl delete mode 100644 test/add_hybrid_caseF.jl delete mode 100644 test/add_hybrid_caseG.jl delete mode 100644 test/add_hybrid_caseH.jl delete mode 100644 test/add_hybrid_caseI.jl delete mode 100644 test/add_hybrid_caseJ.jl delete mode 100644 test/bestNet.err delete mode 100644 test/bestNet.out delete mode 100644 test/debug_n6.jl delete mode 100644 test/debug_optBL.jl delete mode 100644 test/debug_readTopology.jl delete mode 100644 test/delete_caseC.jl delete mode 100644 test/delete_caseD.jl delete mode 100644 test/delete_caseE.jl delete mode 100644 test/delete_caseF.jl delete mode 100644 test/delete_caseG.jl delete mode 100644 test/delete_caseH.jl delete mode 100644 test/delete_caseI.jl delete mode 100644 test/delete_caseJ.jl delete mode 100644 test/descData.txt delete mode 100644 test/hola.txt delete mode 100644 test/initial_tests_deleteLeaf_quartetNet.jl delete mode 100644 test/movesTable.txt delete mode 100644 test/net1prueba.networks delete mode 100644 test/net1prueba.out delete mode 100644 test/net1prueba2.networks delete mode 100644 test/net1prueba2.out delete mode 100644 test/net2_snaq.out delete mode 100644 test/print_add.jl delete mode 100644 test/rand4Quartets.txt delete mode 100644 test/summaryTreesQuartets.txt delete mode 100644 test/tableCF0.txt delete mode 100644 test/tableCF1.txt delete mode 100644 test/tableCF2.txt delete mode 100644 test/tableCF3.txt delete mode 100644 test/tableCF4.txt delete mode 100644 test/test_5taxon_writeTopology.jl delete mode 100644 test/test_addDelete.jl delete mode 100644 test/test_afterOptBL.jl delete mode 100644 test/test_changeDir.jl delete mode 100644 test/test_checkrootplace.jl delete mode 100644 test/test_extractQuartet.jl delete mode 100644 test/test_functions_5taxon.jl delete mode 100644 test/test_hgt.jl delete mode 100644 test/test_hybridatnode.jl delete mode 100644 test/test_movedownlevel.jl delete mode 100644 test/test_nlopt_example.jl delete mode 100644 test/test_nni.jl delete mode 100644 test/test_optBL.jl delete mode 100644 test/test_optBL_sticr_data.jl delete mode 100644 test/test_optTopLevel.jl delete mode 100644 test/test_optTopLevel2.jl delete mode 100644 test/test_optTopLevelparts.jl delete mode 100644 test/test_optTopParts.jl delete mode 100644 test/test_readTopology.jl delete mode 100644 test/test_redundanteCycle.jl delete mode 100644 test/test_simplenet.jl delete mode 100644 test/test_tree2Matrix.jl delete mode 100644 test/test_updateBL.jl delete mode 100644 test/tests_5taxon.jl delete mode 100644 test/tests_5taxon_delete.jl delete mode 100644 test/tree.tre delete mode 100644 test/tree_example.jl delete mode 100644 test/truenetwork.txt delete mode 100644 test/try.txt delete mode 100644 test/try4.txt diff --git a/.travis.yml b/.travis.yml index f49fe91df..569bd1108 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ os: - osx julia: - 1.1 + - 1.2 notifications: email: false @@ -24,7 +25,7 @@ coveralls: true jobs: include: - stage: "Documentation" - julia: 1.0 + julia: 1.2 os: linux script: - julia --project=docs/ -e 'using Pkg; Pkg.instantiate(); @@ -35,6 +36,6 @@ jobs: - if [ "$TRAVIS_OS_NAME" == "linux" ]; then sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E084DAB9; fi - if [ "$TRAVIS_OS_NAME" == "linux" ]; then sudo add-apt-repository -y "deb http://cran.rstudio.com/bin/linux/ubuntu $(lsb_release -s -c)/"; fi - if [ "$TRAVIS_OS_NAME" == "linux" ]; then sudo apt-get update -qq -y; fi - - if [ "$TRAVIS_OS_NAME" == "linux" ]; then sudo apt-get install git r-base r-base-dev r-recommended -y; fi + - if [ "$TRAVIS_OS_NAME" == "linux" ]; then sudo apt-get install --allow-unauthenticated git r-base r-base-dev r-recommended -y; fi # - if [ "$TRAVIS_OS_NAME" == "linux" ]; then sudo R -e 'install.packages("ggplot2", dep=TRUE, repos="http://cran.us.r-project.org")'; fi # takes ~ 6min 2018-06-02 after_success: skip diff --git a/docs/Project.toml b/docs/Project.toml index 18cae79a4..02af2349d 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -13,4 +13,4 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" [compat] -Documenter = "~0.22" +Documenter = "~0.23" diff --git a/docs/make.jl b/docs/make.jl index e2fd530e0..ba71a3d2c 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -4,6 +4,7 @@ using Pkg Pkg.add(PackageSpec(name="PhyloPlots", rev="master")) using PhyloNetworks +DocMeta.setdocmeta!(PhyloNetworks, :DocTestSetup, :(using PhyloNetworks); recursive=true) makedocs( sitename = "PhyloNetworks.jl", diff --git a/docs/readme.md b/docs/readme.md index bfdcc11f1..acede60be 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -100,9 +100,10 @@ or interactively in `docs/`: pkg> activate . pkg> status # just to check pkg> status --manifest -pkg> instantiate -pkg> # dev PhyloPlots # to get the master branch -pkg> dev ~/.julia/dev/PhyloNetworks +pkg> instantiate # after deleting Manifest.toml and undo changes to Project.toml +pkg> # add RCall#master # in case some dependency causes an issue +pkg> dev PhyloNetworks +pkg> add PhyloPlots#master # to get the master branch: done by make.jl julia> include("make.jl") ``` diff --git a/docs/src/index.md b/docs/src/index.md index 30723f383..d6a1edcbf 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -10,7 +10,7 @@ and their use for trait evolution. **How to get help** - the package [wiki](https://github.com/crsl4/PhyloNetworks.jl/wiki) has a step-by-step - tutorial, done for the 2018 MBL workshop, with background on networks and + tutorial, done for the 2019 MBL workshop, with background on networks and explanations. - the [google group](https://groups.google.com/forum/#!forum/phylonetworks-users) has answers to common questions. @@ -28,7 +28,7 @@ and their use for trait evolution. Phylogenetic Comparative Methods for Phylogenetic Networks with Reticulations. Systematic Biology, 67(5):800–820. [doi:10.1093/sysbio/syy033](https://doi.org/10.1093/sysbio/syy033). -- Claudia Solís-Lemus and Cécile Ané(2016). +- Claudia Solís-Lemus and Cécile Ané (2016). Inferring Phylogenetic Networks with Maximum Pseudolikelihood under Incomplete Lineage Sorting. [PLoS Genet](http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1005896) 12(3):e1005896. [doi:10.1371/journal.pgen.1005896](https://doi.org/10.1371/journal.pgen.1005896) diff --git a/docs/src/lib/internals.md b/docs/src/lib/internals.md index a2fe1f456..72a24f761 100644 --- a/docs/src/lib/internals.md +++ b/docs/src/lib/internals.md @@ -1,8 +1,5 @@ ```@meta CurrentModule = PhyloNetworks -DocTestSetup = quote - using PhyloNetworks -end ``` # Internal Documentation @@ -34,8 +31,3 @@ Modules = [PhyloNetworks] Public = false Order = [:function, :constant] ``` - -```@meta -DocTestSetup = nothing -``` - diff --git a/docs/src/lib/public.md b/docs/src/lib/public.md index b9500324b..338b672d3 100644 --- a/docs/src/lib/public.md +++ b/docs/src/lib/public.md @@ -4,13 +4,6 @@ Documentation for `PhyloNetworks`'s public (exported) interface. See [Internal Documentation](@ref) for documentation on internal functions. -```@meta -DocTestSetup = quote - using PhyloNetworks -end -DocTestFilters = r" PhyloNetworks .*:\d+" -``` - ```@contents Pages = ["public.md"] ``` @@ -65,6 +58,7 @@ readTopology readTopologyLevel1 readInputTrees readMultiTopology +readNexusTrees readSnaqNetwork readTrees2CF readTableCF @@ -143,8 +137,3 @@ nstates stationary empiricalDNAfrequencies ``` - -```@meta -DocTestSetup = nothing -DocTestFilters = nothing -``` diff --git a/docs/src/man/bootstrap.md b/docs/src/man/bootstrap.md index c93450de2..2d2b76e7d 100644 --- a/docs/src/man/bootstrap.md +++ b/docs/src/man/bootstrap.md @@ -137,7 +137,7 @@ which tree edges have bootstrap support lower than 100% (none here) with ```@repl bootstrap using DataFrames # for showall() below show(BSe_tree, allrows=true, allcols=true) -BSe_tree[BSe_tree[:proportion] .< 100.0, :] +filter(row -> row[:proportion] < 100, BSe_tree) ``` Finally, we can map the bootstrap proportions onto the network or its main tree by passing the bootstrap table to the `edgeLabel` option of `plot`: @@ -163,7 +163,7 @@ output file of `snaq!` earlier, for consistency across different Julia sessions. If we wanted to plot only certain bootstrap values, like those below 100% (1.0), we could do this: ```julia -plot(net1, :R, edgeLabel=BSe_tree[BSe_tree[:proportion] .< 100.0, :]); +plot(net1, :R, edgeLabel=filter(row -> row[:proportion] < 100, BSe_tree)); ``` ## support for hybrid edges and hybrid nodes @@ -204,7 +204,8 @@ To see what is the clade named "H7", for instance: ```@repl bootstrap BSc # this might be too big show(BSc, allrows=true, allcols=true) -BSc[:taxa][BSc[:H7]] +# BSc[BSc[!,:H7], :taxa] # just a different syntax to subset the data in the same way +filter(row -> row[:H7], BSc).taxa ``` We can also get bootstrap values associated with edges, to describe the support that a given hybrid clade has a given sister clade. @@ -225,7 +226,7 @@ We can plot the bootstrap values of the 2 hybrid edges in the best network: ```@example bootstrap R"svg(name('boot_net_net.svg'), width=4, height=4)" # hide R"par"(mar=[0,0,0,0]) # hide -plot(net1, :R, edgeLabel=BSe[[:edge,:BS_hybrid_edge]]); +plot(net1, :R, edgeLabel=BSe[!,[:edge,:BS_hybrid_edge]]); R"dev.off()" # hide nothing # hide ``` @@ -240,7 +241,7 @@ of bootstrap networks. In another 1% bootstrap, A received gene flow from anothe ```@example bootstrap R"svg(name('boot_net_ret.svg'), width=4, height=4)" # hide R"par"(mar=[0,0,0,0]) # hide -plot(net1, :R, nodeLabel=BSn[[:hybridnode,:BS_hybrid_samesisters]]); +plot(net1, :R, nodeLabel=BSn[!,[:hybridnode,:BS_hybrid_samesisters]]); R"dev.off()" # hide nothing # hide ``` @@ -250,11 +251,12 @@ Below is example code to place tree edge support and hybrid edge support on the same plot. ```julia -tmp = BSe[!isna(BSe[:edge]),[:edge,:BS_hybrid_edge]] -rename!(tmp, :BS_hybrid_edge, :proportion) -rename!(tmp, :edge, :edgeNumber) +tmp = filter(row -> !ismissing(row[:edge]), BSe) # filter rows +select!(tmp, [:edge,:BS_hybrid_edge]) # select 2 columns only +rename!(tmp, :BS_hybrid_edge => :proportion) # rename those columns, to match names in BSe_tree +rename!(tmp, :edge => :edgeNumber) tmp = vcat(BSe_tree, tmp) -plot(net1, edgeLabel=tmp, nodeLabel=BSn[[:hybridnode,:BS_hybrid_samesisters]]) +plot(net1, edgeLabel=tmp, nodeLabel=BSn[!, [:hybridnode,:BS_hybrid_samesisters]]) ``` ### Who are the hybrids in bootstrap networks? @@ -268,12 +270,12 @@ being of hybrid origin. ```@example bootstrap R"svg(name('boot_net_hyb_1.svg'), width=4, height=4)" # hide R"par"(mar=[0,0,0,0]) # hide -plot(net1, :R, nodeLabel=BSn[BSn[:BS_hybrid].>0, [:hybridnode,:BS_hybrid]]); +plot(net1, :R, nodeLabel=filter(row->row[:BS_hybrid]>0, BSn)[!,[:hybridnode,:BS_hybrid]]); R"dev.off()" # hide nothing # hide R"svg(name('boot_net_hyb_2.svg'), width=4, height=4)" # hide R"par"(mar=[0,0,0,0]) # hide -plot(net1, :R, edgeLabel=BSn[BSn[:BS_hybrid].>0, [:edge,:BS_hybrid]]); +plot(net1, :R, edgeLabel=filter(row->row[:BS_hybrid]>0, BSn)[!,[:edge,:BS_hybrid]]); R"dev.off()" # hide nothing # hide ``` @@ -290,12 +292,12 @@ We filtered clades to show those with sister support > 5%: ```@example bootstrap R"svg(name('boot_net_clade_1.svg'), width=4, height=4)" # hide R"par"(mar=[0,0,0,0]) # hide -plot(net1, :R, nodeLabel=BSn[BSn[:BS_minor_sister].>5, [:node,:BS_minor_sister]]); +plot(net1, :R, nodeLabel=filter(r->r[:BS_minor_sister]>5, BSn)[!,[:node,:BS_minor_sister]]); R"dev.off()" # hide nothing # hide R"svg(name('boot_net_clade_2.svg'), width=4, height=4)" # hide R"par"(mar=[0,0,0,0]) # hide -plot(net1, :R, edgeLabel=BSn[BSn[:BS_minor_sister].>5, [:edge,:BS_minor_sister]]); +plot(net1, :R, edgeLabel=filter(r->r[:BS_minor_sister]>5, BSn)[!,[:edge,:BS_minor_sister]]); R"dev.off()" # hide nothing # hide ``` @@ -311,7 +313,7 @@ but there is much uncertainty about its exact placement and about its direction. Mapping the support for major sister clades might be interesting too: ```julia -plot(net1, nodeLabel=BSn[BSn[:BS_major_sister].>5, [:node,:BS_major_sister]]) +plot(net1, nodeLabel=filter(r->r[:BS_major_sister]>5, BSn)[!,[:node,:BS_major_sister]]) ``` The estimated heritability γ on hybrid edges in the reference network, when present in a diff --git a/docs/src/man/dist_reroot.md b/docs/src/man/dist_reroot.md index 47ec066e8..80b078fa1 100644 --- a/docs/src/man/dist_reroot.md +++ b/docs/src/man/dist_reroot.md @@ -86,6 +86,7 @@ R"svg(name('reroot_net7taxa_1.svg'), width=4, height=4)" # hide R"par"(mar=[0,0,0,0]) # hide plot(net7taxa, :R, showGamma=true, showEdgeNumber=true, tipOffset=0.2); R"dev.off()"; # hide +nothing # hide ``` ![reroot net7taxa 1](../assets/figures/reroot_net7taxa_1.svg) @@ -115,6 +116,7 @@ rootonedge!(net7taxa, 5); plot(net7taxa, :R, showGamma=true, tipOffset=0.2); R"mtext"("rooted on hybrid edge 5 (minor)", line=-1); R"dev.off()"; # hide +nothing # hide ``` ![reroot net7taxa 2](../assets/figures/reroot_net7taxa_2.svg) @@ -132,6 +134,7 @@ R"par"(mar=[0,0,0,0]); # hide plot(net7taxa, :R, tipOffset=0.2); # not showing gamma values, because we changed them artificially R"mtext"("rooted on hybrid edge 5 (considered major)", line=-1); R"dev.off()"; # hide +nothing # hide ``` ![reroot net7taxa 3](../assets/figures/reroot_net7taxa_3.svg) diff --git a/docs/src/man/expectedCFs.md b/docs/src/man/expectedCFs.md index 22ef80a3e..0dfe0b10b 100644 --- a/docs/src/man/expectedCFs.md +++ b/docs/src/man/expectedCFs.md @@ -27,7 +27,7 @@ Here is one way to plot them, via R again, and using the R package `ggplot2`. ```@example expCFs using RCall -obsCF = df_long[:obsCF]; expCF = df_long[:expCF]; # hide +obsCF = df_long[!,:obsCF]; expCF = df_long[!,:expCF]; # hide R"name <- function(x) file.path('..', 'assets', 'figures', x)"; # hide R"svg(name('expCFs_obsvsfitted.svg'), width=5, height=4)"; # hide R"par"(mar=[2.5,2.6,.5,.5], mgp=[1.5,.4,0], tck=-0.01, las=1, pty="s"); # hide @@ -75,23 +75,26 @@ Many points are overlapping, like before, so they are again "jittered" a bit. ```@example expCFs using DataFrames -df_long[:has_A] = "no" # add a column to our data, to indicate which 4-taxon sets have A or not +df_long[!,:has_A] .= "no"; # add a column to our data, to indicate which 4-taxon sets have A or not for r in eachrow(df_long) if "A" ∈ [r[:tx1], r[:tx2], r[:tx3], r[:tx4]] r[:has_A]="yes" end end -has_A = df_long[:has_A]; # hide -nq = length(has_A) # hide +has_A = df_long.has_A # hide +nq = length(has_A); # hide R"colA=rep('#008080',$nq); bgA=rep('#00808090',$nq);"; # hide R"colA[$has_A=='yes']='#F8766D'; bgA[$has_A=='yes']='#F8766D90'"; # hide R"svg(name('expCFs_obsvsfitted_A.svg'), width=5, height=4)"; # hide R"par"(mar=[2.5,2.6,.5,.5], mgp=[1.5,.4,0], tck=-0.01, las=1, pty="s"); # hide R"plot(0:1, 0:1, type='l', bty='L', lwd=0.3, col='black', xlab='quartet CF observed in gene trees', ylab='quartet CF expected from network')"; # hide -R"set.seed"(2345) # hide +R"set.seed"(2345); # hide R"points(jitter($obsCF,amount=0.005),jitter($expCF,amount=0.005),col=colA,bg=bgA,pch=21)"; # hide R"legend(x=0.7,y=0.3,pch=21,col=c('#008080','#F8766D'),legend=c('no','yes'),title='has A?', bty='n',bg=c('#00808090','#F8766D90'))"; # hide R"dev.off()"; # hide +nothing # hide +``` +```@repl expCFs first(df_long, 7) # first 7 rows ``` diff --git a/docs/src/man/fitDiscrete.md b/docs/src/man/fitDiscrete.md index d831ae721..38e0b16a5 100644 --- a/docs/src/man/fitDiscrete.md +++ b/docs/src/man/fitDiscrete.md @@ -1,7 +1,8 @@ -```@setup traitevol_fixednet +```@setup fitdiscrete_trait using PhyloNetworks using DataFrames mkpath("../assets/figures") +figname(x) = joinpath("..", "assets", "figures", x) ``` # Discrete Trait Evolution @@ -11,132 +12,220 @@ have evolved over time using a likelihood model. These traits should be discrete characteristics of a species such as feather color, diet type, or DNA in aligned genetic sequences. -## Discrete Trait Data +## Discrete trait data As with continuous trait evolution, we assume a fixed network, correctly rooted, with branch lengths proportional to calendar time. We start with a network, then -add data about the tips of this network. We allow data of two types. - -1. A vector of species names with a data frame of traits: - - ```@example traitevol_fixednet - # read in network - net = readTopology("(A:3,((B:0.4)#H1:1.6::0.92,((C:0.4,#H1:0::0.08):0.6,D:1):1):1);"); - # read in trait data - species = ["C","A","B","D"] - dat = DataFrame(trait=["hi","lo","lo","hi"]) - ``` - - If your species names and trait data are in the same data frame, - read in your data frame then subset the data like this: - ```@example traitevol_fixednet - dat = DataFrame(species=["C","A","B","D"], trait=["hi","lo","lo","hi"]) - species = dat[:species] - dat = DataFrame(trait = dat[:trait]) - ``` - -2. To use dna data, read in the network structure then start with a fasta - file. Reading the data from this file using the `readfastatodna` function. - This creates a data frame of dna data and a vector of dna pattern weights. - - ```@example traitevol_fixednet - # read in network - dna_net = readTopology("((((((((((((((Ae_caudata_Tr275:1.0,Ae_caudata_Tr276:1.0):1.0,Ae_caudata_Tr139:1.0):1.0)#H1:1.0::0.6,((((((Ae_longissima_Tr241:1.0,Ae_longissima_Tr242:1.0):1.0,Ae_longissima_Tr355:1.0):1.0,(Ae_sharonensis_Tr265:1.0,Ae_sharonensis_Tr264:1.0):1.0):1.0,((Ae_bicornis_Tr408:1.0,Ae_bicornis_Tr407:1.0):1.0,Ae_bicornis_Tr406:1.0):1.0):1.0,((Ae_searsii_Tr164:1.0,Ae_searsii_Tr165:1.0):1.0,Ae_searsii_Tr161:1.0):1.0):1.0)#H2:1.0::0.6):1.0,(((Ae_umbellulata_Tr266:1.0,Ae_umbellulata_Tr257:1.0):1.0,Ae_umbellulata_Tr268:1.0):1.0,#H1:1.0::0.4):1.0):1.0,((Ae_comosa_Tr271:1.0,Ae_comosa_Tr272:1.0):1.0,(((Ae_uniaristata_Tr403:1.0,Ae_uniaristata_Tr357:1.0):1.0,Ae_uniaristata_Tr402:1.0):1.0,Ae_uniaristata_Tr404:1.0):1.0):1.0):1.0,(((Ae_tauschii_Tr352:1.0,Ae_tauschii_Tr351:1.0):1.0,(Ae_tauschii_Tr180:1.0,Ae_tauschii_Tr125:1.0):1.0):1.0,(#H2:1.0::0.4,((((Ae_mutica_Tr237:1.0,Ae_mutica_Tr329:1.0):1.0,Ae_mutica_Tr244:1.0):1.0,Ae_mutica_Tr332:1.0):1.0)#H4:1.0::0.6):1.0):1.0):1.0,(((T_boeoticum_TS8:1.0,(T_boeoticum_TS10:1.0,T_boeoticum_TS3:1.0):1.0):1.0,T_boeoticum_TS4:1.0):1.0,((T_urartu_Tr315:1.0,T_urartu_Tr232:1.0):1.0,(T_urartu_Tr317:1.0,T_urartu_Tr309:1.0):1.0):1.0):1.0):1.0,(((((Ae_speltoides_Tr320:1.0,Ae_speltoides_Tr323:1.0):1.0,Ae_speltoides_Tr223:1.0):1.0,Ae_speltoides_Tr251:1.0):1.0):1.0,#H4:1.0::0.4):1.0):1.0):1.0,Ta_caputMedusae_TB2:1.0):1.0,S_vavilovii_Tr279:1.0):1.0,Er_bonaepartis_TB1:1.0):1.0,H_vulgare_HVens23:1.0);"); - # read in dna data - fastafile = joinpath(dirname(pathof(PhyloNetworks)), "..","examples","Ae_bicornis_Tr406_Contig10132.aln") - dna_dat, dna_weights = readfastatodna(fastafile, true); - dna_dat - dna_weights - ``` - -## Choosing a Substitution Model +add data about the tips of this network. + +The simplest way is to use a vector of species names with a data frame of traits: + +```@repl fitdiscrete_trait +# read in network +net = readTopology("(O:4,(A:3,((B:0.4)#H1:1.6::0.92,((C:0.4,#H1:0::0.08):0.6,(D:.2,E:.2):0.8):1):1):1);"); +# read in trait data +species = ["C","A","D","B","O","E"]; +dat = DataFrame(trait=["hi","lo","lo","hi","lo","lo"]) +``` + +If your species names and trait data are in the same data frame, +read in your data frame then subset the data like this: +```@example fitdiscrete_trait +dat = DataFrame(species=["C","A","D","B","O","E"], trait=["hi","lo","lo","hi","lo","lo"]); +species = dat.species # or: dat[!, :species] +select!(dat, Not(:species)) # select all columns except for :species; modifies dat in place +nothing # hide +``` + +Let's plot the network and map the data onto it: + +```@example fitdiscrete_trait +using RCall, PhyloPlots +R"svg"(figname("fitdiscrete_trait_net_1.svg"), width=4, height=3); # hide +R"par"(mar=[0,0,0,0]); # to reduce margins +res = plot(net, :R; tipOffset=0.3); # the results "res" provides point coordinates, to use for data annotation +o = [findfirst(isequal(tax), species) for tax in tipLabels(net)] # 5,2,4,1,3,6: order to match taxa from "species" to tip labels +isequal(species[o], tipLabels(net)) # true :) +traitcolor = map(x -> (x=="lo" ? "grey" : "red"), dat.trait[o]) +R"points"(x=res[13].x .+0.1, y=res[13].y, pch=16, col=traitcolor, cex=1.5); # adds grey & red points +R"legend"(x=1, y=2, legend=["hi","lo"], pch=16, col=["red","grey"], + title="my trait", bty="n",var"title.adj"=0); +# next: add arrow to show gene flow edge, and proportion γ of genes affected +hi = findfirst([!e.isMajor for e in net.edge]) # 6 : "h"ybrid "i"ndex: index of gene flow edge (minor hybrid) in net +(hx1, hx2, hy1, hy2) = (res[i][hi] for i in 9:12); # coordinates for minor hybrid edge; 1=start, 2=end +R"arrows"(hx1, hy1, hx2, hy2, col="deepskyblue", length=0.08, angle=20); # adds the arrow +R"text"(res[14][hi,:x]-0.2, res[14][hi,:y]+0.1, res[14][hi,:gam], col="deepskyblue", cex=0.75); # add the γ value +R"dev.off"(); # hide +nothing # hide +``` + +![net_1](../assets/figures/fitdiscrete_trait_net_1.svg) + +## Substitution models After reading in your data, choose a model to describe how evolutionary changes (or substitutions, in the case of DNA) happened over time. Available Markov substitution models are described below. -### Generic Trait Models - -These models works well for any type of trait we may want to model. For general -trait types, use one of these three models: +For general trait types, use one of these three models: - `:BTSM` Binary Trait Substitution Model (2 states, rates unconstrained) - `:ERSM` Equal Rates Substitution Model (`k` states, all transitions possible with equal rates) -- `:TBTSM` Two Binary Trait Substituion Model (though not fully implemented yet) - -### DNA-Specific Models - -The DNA-specific models are optimized for aligned sequence data. -The 4 nucleotide states are from -[BioSymbols](https://github.com/BioJulia/BioSymbols.jl) -(listed [here](http://biojulia.net/BioSymbols.jl/stable/nucleicacids/)). -Each model has a relative and an absolute version. -- `:JC69` Jukes & Cantor 1969 model: one single rate for all transitions. - The relative version has values -1 along the diagonal of the rate matrix - (1 expected transition / unit of time). The absolute version has an extra - parameter to scale the rate matrix. -- `:HKY85` Hasegawa, Kishino & Yano 1985: treats transitions differently - from transversions. - -## Fitting the model - -To infer evolutionary rates, run the `fitdiscrete` function on the network and data. -It will calculate the maximum likelihood score of a fixed network -given one or more discrete trait characters at the tips. -Along each edge, evolutionary changes -are modeled with a continous time Markov model, with parameters estimated by -maximizing the likelihood. At each hybrid node, the trait is assumed to be -inherited from the immediate parent (or parents, in the case of a hybrid node). -At a hybrid node, the trait is assumed to be inherited from one or the other -parent, with probabilities equal to the inheritance γ of each parent edge -(which is given by the network). -The model ignores incomplete lineage sorting (e.g. hemiplasy). - -### General Trait Data - -```@repl traitevol_fixednet +- `:TBTSM` Two Binary Trait Substitution Model (though not fully implemented yet) + + +## Inference + +To infer evolutionary rates, run [`fitdiscrete`](@ref) on the network and data. +It will calculate the maximum likelihood score +of one or more discrete trait characters at the tips +on a fixed network. + +- Along each edge, evolutionary changes are modeled with a + continous time Markov model. +- At a hybrid node, the trait is assumed to be inherited from one or the other + of its parents (immediately before the reticulation event), + with probabilities equal to the inheritance γ of each parent edge, + which is given by the network. +- At the root of the network, a uniform distribution among the possible + states is assumed a priori. +- The model ignores incomplete lineage sorting (e.g. hemiplasy). + +### parameter estimation & model fit + +The example below if for a binary trait, first using a model assuming +equal rates (from `lo` to `hi` and from `hi` to `lo`); +then using a model allowing for distinct rates. +The option `optimizeQ=false` causes transition rates +to stay at their starting values, without being optimized. + +```@repl fitdiscrete_trait s1 = fitdiscrete(net, :ERSM, species, dat; optimizeQ=false) s2 = fitdiscrete(net, :BTSM, species, dat; optimizeQ=false) ``` -In this `fitdiscrete` call, we do not optimize rates or allow for rate variation -across sites. The default rates (which act as starting value if rates -were to be optimized) are chosen equal to the inverse of the total edge lengths +The default rates, which act as starting value if rates were to be optimized, +are chosen equal to the inverse of the total edge lengths in the network (or 1/ntax if all branch lengths are missing). -If `optimizeQ = true` (which is the default), the `fitdiscrete` -function estimates the parameters of the rate matrix. -Because we didn't allow for rate variation across sites in these models, -there is nothing to optimize in the way rates may vary across traits (sites). +By default `optimizeQ = true`, such that [`fitdiscrete`](@ref) +estimates the parameters of the rate matrix Q. -```@repl traitevol_fixednet +```@repl fitdiscrete_trait s3 = fitdiscrete(net, :ERSM, species, dat) s4 = fitdiscrete(net, :BTSM, species, dat) ``` -### DNA Data +To compare the two models, we can use the Akaike criterion. +```@repl fitdiscrete_trait +using StatsBase +aic(s3) +aic(s4) +``` +Here, the equal-rate model is slightly favored (lower AIC), +so we will use `s3` below. + +### ancestral state prediction + +This is traditionally called "ancestral state reconstruction", +but we do not actually reconstruct anything. +We make predictions for (past of present-day) values, +hopefully with some measure to quantify our uncertainty. + +```@repl fitdiscrete_trait +# show(ancestralStateReconstruction(s3), allrows=true) +ancestralStateReconstruction(s3) +``` +Rows 1-6 correspond to the tips, with known values. +We see much prediction uncertainty at most of the internal nodes. +To see where these internal nodes (7-13/H1) are, we need to look +at the network stored within the fitted model. +This network might differ somewhat from the input network in case +taxa with missing data where pruned, and with edges possibly renumbered. + +```@example fitdiscrete_trait +R"svg"(figname("fitdiscrete_trait_net_2.svg"), width=4, height=3); # hide +R"par"(mar=[0,0,0,0]); # hide +plot(s3.net, :R, showNodeNumber=true, showIntNodeLabel=true, tipOffset=0.2); +R"dev.off"(); # hide +nothing # hide +``` + +![net_2](../assets/figures/fitdiscrete_trait_net_2.svg) + +Looking back at the posterior probabilities of states "hi" and "lo" +at each node from the ancestral 'prediction' table above, +we see that there is more uncertainty near the root, and +less uncertainty near the tips. +The most recent common ancestor of D and E (node 11), +in particular, is predicted to be "lo" with fairly high certainty. + +### impact of gene flow on the trait -For DNA data, use one of `:JC69` or `:HKY85`. -To allow for rate variation across sites, use the `:RV` option. +An interesting question is whether there is evidence that B obtained +it's "hi" state via gene flow. The prior probability for this is γ: +the supposedly known proportion of genes inherited via gene flow, +which is part of the network (along with branch lengths). +Here, this prior probability of trait inheritance via gene flow is: -```@example traitevol_fixednet -d1 = fitdiscrete(dna_net, :JC69, dna_dat, dna_weights, :RV; optimizeQ=false, optimizeRVAS=false) -d2 = fitdiscrete(dna_net, :HKY85, dna_dat, dna_weights, :RV; optimizeQ=false, optimizeRVAS=false) +```@repl fitdiscrete_trait +net.edge[6].gamma # the minor hybrid edge was edge 6, from above ``` -In these `fitdiscrete` models, we do not optimize rates (`optimizeQ=false`), but -we do allow for rate variation across sites, with a default α of 1. - -### Rate Variation Across Sites - -In its default version, `fitdiscrete` does not allow for rate variation across sites. -To allow for rate variation across sites in your estimate of evolutionary rates -(or rate variation across traits in the case of general traits), -include `:RV`. If you include `:RV` and `optimizeRVAS = true`, -the model will allow for rate variation and -also optimize the parameter α of the distribution of rates across sites. - -We optimize the evolutionary rates and the way rates vary across sites for the -DNA data here: -```@repl traitevol_fixednet -d3 = fitdiscrete(dna_net, :JC69, dna_dat, dna_weights, :RV; optimizeRVAS=false) -d4 = fitdiscrete(dna_net, :HKY85, dna_dat, dna_weights, :RV) + +We can compare this to the posterior probability, and get a Bayes factor +to compare the two hypotheses: gene flow vs. vertical inheritance. + +```@repl fitdiscrete_trait +exp(s3.priorltw[1]) # prior: for vertical inheritance. "ltw" = log tree weight +exp(s3.priorltw[2]) # prior: for gene flow inheritance, same as γ above +exp(s3.postltw[2]) # posterior: for gene flow inheritance +function geneflowBF(fit) + exp(fit.postltw[2] - fit.postltw[1] + fit.priorltw[1] - fit.priorltw[2]) +end +geneflowBF(s3) +``` + +We get a Bayes factor greater than 1, so there is more evidence that +the "hi" value of B was inherited via gene flow, than via vertical +inheritance. But the Bayes factor is just barely above 1, so the +evidence is very equivocal. +This may not be surprising given that +gene flow occurred between fairly closely related species, +and that the data set is very small. + +## Trait simulation + +[`randomTrait`](@ref) can simulate traits along a known network. +For example, we can define a binary trait model with states +"carnivory" (state 1) and "non-carnivory" (state 2), then ask for +a trait to be simulated along our network. We can ask for +3 independent simulations, giving us 3 traits then, arranged in 3 rows. + +```@repl fitdiscrete_trait +m1 = BinaryTraitSubstitutionModel(1.0,2.0, ["carnivory", "non-carnivory"]) +using Random; Random.seed!(1234); # for reproducibility of this example +traitmatrix, nodecolumn = randomTrait(m1, net; ntraits=3); +traitmatrix +``` + +In this trait matrix, each column corresponds to a node, +each row is a trait, and each entry gives the state of that trait for that node, +as an index. To get the state labels: + +```@repl fitdiscrete_trait +m1.label[traitmatrix] +``` + +The `nodecolumn` vector says which node corresponds to which column +in the trait matrix, and we can compare to the node numbers in the network. +For example, the first column corresponds to node `-2`, which is the root. +(The root is always in the first column: that's where the simulation starts.) +Also, as an example, the column for taxon "A" is column 12: + +```@repl fitdiscrete_trait +nodecolumn +net.node[net.root] +findfirst(isequal("A"), nodecolumn) +nodecolumn[12] +traitmatrix[:,12] +m1.label[traitmatrix[:,12]] ``` diff --git a/docs/src/man/fitdiscreteDNA.md b/docs/src/man/fitdiscreteDNA.md new file mode 100644 index 000000000..f1c2b8001 --- /dev/null +++ b/docs/src/man/fitdiscreteDNA.md @@ -0,0 +1,78 @@ +```@setup concatdna +using PhyloNetworks +``` + +# fitting DNA on a network + +The methods below model each DNA site as a trait, assuming that +sites are unlinked, that is, they evolve independently of each other. +In other words, this is a "concatenation" approach where +sites from the same locus do not share information about their +evolutionary path. This is appropriate if recombination is assumed +to have occurred within genes. + +## DNA evolution: data and models + +### reading in an alignment + +As for trait evolution, [`fitdiscrete`](@ref) can be used. It can be +given data in a variety of ways. For DNA, this is one way: + +```@repl concatdna +# read in network +dna_net = readTopology("((((((((((((((Ae_caudata_Tr275:1.0,Ae_caudata_Tr276:1.0):1.0,Ae_caudata_Tr139:1.0):1.0)#H1:1.0::0.6,((((((Ae_longissima_Tr241:1.0,Ae_longissima_Tr242:1.0):1.0,Ae_longissima_Tr355:1.0):1.0,(Ae_sharonensis_Tr265:1.0,Ae_sharonensis_Tr264:1.0):1.0):1.0,((Ae_bicornis_Tr408:1.0,Ae_bicornis_Tr407:1.0):1.0,Ae_bicornis_Tr406:1.0):1.0):1.0,((Ae_searsii_Tr164:1.0,Ae_searsii_Tr165:1.0):1.0,Ae_searsii_Tr161:1.0):1.0):1.0)#H2:1.0::0.6):1.0,(((Ae_umbellulata_Tr266:1.0,Ae_umbellulata_Tr257:1.0):1.0,Ae_umbellulata_Tr268:1.0):1.0,#H1:1.0::0.4):1.0):1.0,((Ae_comosa_Tr271:1.0,Ae_comosa_Tr272:1.0):1.0,(((Ae_uniaristata_Tr403:1.0,Ae_uniaristata_Tr357:1.0):1.0,Ae_uniaristata_Tr402:1.0):1.0,Ae_uniaristata_Tr404:1.0):1.0):1.0):1.0,(((Ae_tauschii_Tr352:1.0,Ae_tauschii_Tr351:1.0):1.0,(Ae_tauschii_Tr180:1.0,Ae_tauschii_Tr125:1.0):1.0):1.0,(#H2:1.0::0.4,((((Ae_mutica_Tr237:1.0,Ae_mutica_Tr329:1.0):1.0,Ae_mutica_Tr244:1.0):1.0,Ae_mutica_Tr332:1.0):1.0)#H4:1.0::0.6):1.0):1.0):1.0,(((T_boeoticum_TS8:1.0,(T_boeoticum_TS10:1.0,T_boeoticum_TS3:1.0):1.0):1.0,T_boeoticum_TS4:1.0):1.0,((T_urartu_Tr315:1.0,T_urartu_Tr232:1.0):1.0,(T_urartu_Tr317:1.0,T_urartu_Tr309:1.0):1.0):1.0):1.0):1.0,(((((Ae_speltoides_Tr320:1.0,Ae_speltoides_Tr323:1.0):1.0,Ae_speltoides_Tr223:1.0):1.0,Ae_speltoides_Tr251:1.0):1.0):1.0,#H4:1.0::0.4):1.0):1.0):1.0,Ta_caputMedusae_TB2:1.0):1.0,S_vavilovii_Tr279:1.0):1.0,Er_bonaepartis_TB1:1.0):1.0,H_vulgare_HVens23:1.0);"); +# read in alignment in FASTA format +fastafile = joinpath(dirname(pathof(PhyloNetworks)), "..","examples","Ae_bicornis_Tr406_Contig10132.aln"); +dna_dat, dna_weights = readfastatodna(fastafile, true); +dna_dat +dna_weights +``` + +Here, `dna_dat` is a single data frame containing both species names +and trait data (site patterns). The alignment was summarized by listing +each observed site pattern only once in `dna_dat`. +`dna_weights` is a vector of weights, containing +the number of times that each site pattern was observed. + +### sequence substitution models + +DNA-specific substitution models have 4 states: the 4 nucleotides from +[BioSymbols](https://github.com/BioJulia/BioSymbols.jl) +(listed [here](http://biojulia.net/BioSymbols.jl/stable/nucleicacids/)). +Each model has a relative and an absolute version. +- `:JC69` Jukes & Cantor 1969 model: one single rate for all transitions. + The relative version has values -1 along the diagonal of the rate matrix + (1 expected transition / unit of time). The absolute version has an extra + parameter to scale the rate matrix. +- `:HKY85` Hasegawa, Kishino & Yano 1985: treats transitions differently + from transversions. The relative is scaled to predict an average of + 1 transition / unit of time. + +We may allow for rate variation across sites using the `:RV` option. + +### likelihood of a fixed network + +In the examples below, none of the rate parameters are optimized, +so we get to see the default starting values. + +```@repl concatdna +d1 = fitdiscrete(dna_net, :JC69, dna_dat, dna_weights, :RV; optimizeQ=false, optimizeRVAS=false) +d2 = fitdiscrete(dna_net, :HKY85, dna_dat, dna_weights, :RV; optimizeQ=false, optimizeRVAS=false) +``` +When allowing for rate variation across sites, the default α is 1. + +In the more interesting examples below, +we optimize the evolutionary rates and the way rates vary across sites +(which is the default). +```@repl concatdna +d3 = fitdiscrete(dna_net, :JC69, dna_dat, dna_weights, :RV; ftolAbs=0.1, xtolAbs=0.01) +``` +Lenient tolerance parameters `ftolAbs` etc. have been chosen here to +make this example faster. +Note that the fitted object contains a separate version of the input network, +where any taxon without data has been pruned, and where branch length numbers +may have been modified. + +```@repl concatdna +d3.net +``` diff --git a/docs/src/man/fixednetworkoptim.md b/docs/src/man/fixednetworkoptim.md index 85114b689..d9260d895 100644 --- a/docs/src/man/fixednetworkoptim.md +++ b/docs/src/man/fixednetworkoptim.md @@ -33,11 +33,11 @@ net1alt.loglik # pseudo deviance actually: the lower the better ``` ```@example fixednetworkoptim using PhyloPlots, RCall -R"name <- function(x) file.path('..', 'assets', 'figures', x)" -R"svg(name('truenet_opt.svg'), width=4, height=4)" +R"name <- function(x) file.path('..', 'assets', 'figures', x)" # hide +R"svg(name('truenet_opt.svg'), width=4, height=4)" # hide R"par"(mar=[0,0,0,0]) plot(net1alt, :R, showGamma=true); -R"dev.off()" +R"dev.off()" # hide nothing # hide ``` ![truenet_opt](../assets/figures/truenet_opt.svg) @@ -134,6 +134,7 @@ R"mtext"("best net, score=28.3", line=-1); plot(netlist[2], :R, showGamma=true, showEdgeNumber=true, tipOffset=0.1); R"mtext"("direction modified, score=31.5", line=-1); R"dev.off()"; # hide +nothing # hide ``` ![othernets before reroot](../assets/figures/fixednetworkoptim_othernets1.svg) @@ -172,5 +173,6 @@ rootonedge!(netlist[2], 10) # net with modified direction: second way to make A plot(netlist[2], :R, showGamma=true, tipOffset=0.1); R"mtext"("second best in list, score=31.5\ndifferent root position", line=-2); R"dev.off()"; # hide +nothing # hide ``` ![othernets after reroot](../assets/figures/fixednetworkoptim_othernets2.svg) diff --git a/docs/src/man/inputdata.md b/docs/src/man/inputdata.md index a9a5d9aac..9a0e3dc92 100644 --- a/docs/src/man/inputdata.md +++ b/docs/src/man/inputdata.md @@ -52,6 +52,7 @@ You could read in these 30 trees and visualize the third one (say) like this: ```@example qcf using PhyloNetworks raxmltrees = joinpath(dirname(pathof(PhyloNetworks)), "..","examples","raxmltrees.tre"); +nothing # hide ``` ```@repl qcf genetrees = readMultiTopology(raxmltrees); @@ -162,6 +163,7 @@ R"svg(name('inputdata_astraltree.svg'), width=4, height=3)" # hide R"par"(mar=[0,0,0,0]) # hide plot(astraltree, :R, showEdgeLength=true); R"dev.off()"; # hide +nothing # hide ``` ![astraltree](../assets/figures/inputdata_astraltree.svg) diff --git a/docs/src/man/parsimony.md b/docs/src/man/parsimony.md index cef0a18ea..2b6432708 100644 --- a/docs/src/man/parsimony.md +++ b/docs/src/man/parsimony.md @@ -63,6 +63,7 @@ R"svg(name('parsimony-fixed-net.svg'), width=4, height=4)"; # hide R"par"(mar = [0,0,0,0]); plot(net, :R, xlim=[0.8,7.5]); R"dev.off"(); # hide +nothing # hide ``` ![parsimony-fixed-net](../assets/figures/parsimony-fixed-net.svg) diff --git a/docs/src/man/ticr_howtogetQuartetCFs.md b/docs/src/man/ticr_howtogetQuartetCFs.md index 5fbc25c1d..dc7c6f9f5 100644 --- a/docs/src/man/ticr_howtogetQuartetCFs.md +++ b/docs/src/man/ticr_howtogetQuartetCFs.md @@ -111,11 +111,11 @@ SLURM will parallelize the MrBayes runs across genes. to the path where the `mb` executable is located or put the whole path in the command: `/s/mrbayes-3.2.6-1/bin/mb` - In slurm, we can then submit the MrBayes array job with: + In slurm, we can then submit the MrBayes array job with: -```bash -sbatch mb-slurm-submit.sh -``` + ```bash + sbatch mb-slurm-submit.sh + ``` With this slurm pipeline, the steps below are needed: keep reading. @@ -141,11 +141,12 @@ to run mbsum for *all* the genes. `mbsum` is fast, so there is no attempt to parallelize the various mbsum commands. ```bash -julia mbsum-t-files.jl mbfolder +julia mbsum-t-files.jl mbfolder outputfolder burnin # or +julia --color=yes -- mbsum-t-files.jl mbfolder outputfolder burnin # for colorized messages to the screen ``` -**Warning:** a burnin of 2500 generations is hard coded in this script. This can -easily be changed: edit this short script near the top of the file, to change the -value of `burnin`. +where `burnin` is replaced by the number of trees to ignore in each tree file +for burnin. This `burnin` argument is optional (default: 2501). +The `outputfolder` will contain the output of `mbsum`. ## To run bucky on all 4-taxon sets: we already have the mbsum output @@ -244,6 +245,8 @@ for f in filter(x -> endswith(x, ".cf"), readdir()) end println("found $(length(files)) cf files") # to check how many .cf output files were found open("CFtable.csv","w") do f_out + # write the header: + write(f_out, "taxon1,taxon2,taxon3,taxon4,CF12_34,CF12_34_lo,CF12_34_hi,CF13_24,CF13_24_lo,CF13_24_hi,CF14_23,CF14_23_lo,CF14_23_hi,ngenes\n") for file in files @show file # to see the .cf file name: comment this out if that's too much screen output open(file) do f_in diff --git a/examples/case_f_example.jl b/examples/case_f_example.jl index cbcf46aa4..2fc7dc77c 100644 --- a/examples/case_f_example.jl +++ b/examples/case_f_example.jl @@ -47,6 +47,8 @@ setNode!(ed10,[n9,n10]); net=HybridNetwork([n1,n2,n3,n4,n5,n6,n7,n8,n9,n10],[ed1,ed2,ed3,ed4,ed5,ed6,ed7,ed8,ed9,ed10]); node=searchHybridNode(net); +n2.name = "H1"; n4.name = "4"; n6.name = "6"; n7.name = "7"; +n8.name = "8"; n10.name = "10" net.names=["1","2","3","4","5","6","7","8","9","10"] flag, nocycle,edges, nodes = updateInCycle!(net,node[1]); diff --git a/examples/test.nex b/examples/test.nex index 57f81efd2..3ce835f85 100644 --- a/examples/test.nex +++ b/examples/test.nex @@ -2,8 +2,8 @@ BEGIN TREES; Tree gt0=(((((taxa01:0.00372,taxa02:0.00219):0.00096,taxa03:0.00420):0.00055,taxa05:0.00482):0.00067,taxa06:0.00573):0.00084,taxa04:0.00706):0.00000; -Tree gt1=(((taxa02:0.00116,taxa03:0.00066):0.00145,((taxa05:0.00271,taxa01:0.00377):0.00018,taxa04:0.00331):0.00095):0.00073,taxa06:0.00548):0.00000; -Tree gt2=((taxa03:0.00352,(taxa02:0.00158,(taxa04:0.00251,(taxa05:0.00239,taxa01:0.00245):0.00050):0.00030):0.00040):0.00062,taxa06:0.00528):0.00000; + tree gt1 = (((taxa02:0.00116,taxa03:0.00066):0.00145,((taxa05:0.00271,taxa01:0.00377):0.00018,taxa04:0.00331):0.00095):0.00073,taxa06:0.00548):0.00000; +Tree gt2 = [&R] ((taxa03:0.00352,(taxa02:0.00158,(taxa04:0.00251,(taxa05:0.00239,taxa01:0.00245):0.00050):0.00030):0.00040):0.00062,taxa06:0.00528):0.00000; Tree gt3=(((taxa02:0.00306,taxa03:0.00310):0.00146,taxa06:0.00730):0.00050,((taxa05:0.00630,taxa01:0.00408):0.00083,taxa04:0.00536):0.00102):0.00000; Tree gt4=(((taxa01:0.00444,(taxa02:0.00324,(taxa04:0.00343,taxa03:0.00323):0.00044):0.00029):0.00075,taxa05:0.00372):0.00085,taxa06:0.00541):0.00000; Tree gt5=((((taxa02:0.00310,taxa03:0.00468):0.00100,taxa06:0.00593):0.00025,(taxa05:0.00422,taxa01:0.00394):0.00054):0.00036,taxa04:0.00503):0.00000; @@ -13,3 +13,6 @@ Tree gt8=(((taxa05:0.00475,taxa01:0.00411):0.00097,(taxa04:0.00603,(taxa02:0.004 Tree gt9=((((taxa04:0.00491,taxa06:0.00476):0.00071,taxa02:0.00547):0.00054,taxa03:0.00563):0.00035,(taxa05:0.00540,taxa01:0.00410):0.00061):0.00000; END; + +[this should be ignored] +tree trap = (taxa04:0.00491,taxa06:0.00476); diff --git a/src/auxiliary.jl b/src/auxiliary.jl index 70bb0ab8d..c1e3e1c0a 100644 --- a/src/auxiliary.jl +++ b/src/auxiliary.jl @@ -256,48 +256,6 @@ function getIndex(edge::Edge, edges::Vector{Edge}) return i end -function getIndex(bool::Bool, array::Array{Bool,1}) - i = 1; - while(i<= size(array,1) && !isequal(bool,array[i])) - i = i+1; - end - i <= size(array,1) || error("$(bool) not in array") - return i -end - -function getIndex(bool::Bool, array::Array{Any,1}) - i = 1; - while(i<= size(array,1) && !isequal(bool,array[i])) - i = i+1; - end - i <= size(array,1) || error("$(bool) not in array") - return i -end - - -# aux function to find the index of a string in a -# string array -function getIndex(name::AbstractString, array::Array{String,1}) - i = 1; - while(i<= size(array,1) && !isequal(name,array[i])) - i = i+1; - end - i <= size(array,1) || error("$(name) not in array") - return i -end - -# aux function to find the index of a int in an int array. -# But findfirst can do that as well, and probably more efficiently (returning nothing if not found) -function getIndex(name::Integer, array::Array{Int,1}) - i = 1; - while(i<= size(array,1) && !isequal(name,array[i])) - i = i+1; - end - i <= size(array,1) || error("$(name) not in array") - return i -end - - # aux function to find the index of a node in a # node array function getIndex(name::Node, array::Array{Node,1}) @@ -328,12 +286,13 @@ end # find the index of an edge in node.edge function getIndexEdge(edge::Edge,node::Node) - getIndex(true,[isequal(edge,e) for e in node.edge]) + findfirst(e -> isequal(edge,e), node.edge) end # find the index of an edge with given number in node.edge +# bug found & fixed 2019-08-22. Unused function? function getIndexEdge(number::Integer,node::Node) - getIndex(true,[isequal(edge,e) for e in node.edge]) + findfirst(e -> isequal(number,e.number), node.edge) end # find the index of a node in edge.node @@ -1271,7 +1230,7 @@ end Reorder the 4 taxa and reorders the observed concordance factors accordingly, on each row of the data frame. If `columns` is ommitted, taxon names are assumed to be in columns 1-4 and -CFs are assumed to be in columns 5-6 with quartets in this order: 12_34, 13_24, 14_23. +CFs are assumed to be in columns 5-6 with quartets in this order: `12_34`, `13_24`, `14_23`. Does **not** reorder credibility interval values, if present. sorttaxa!(DataCF) @@ -1302,7 +1261,7 @@ function sorttaxa!(df::DataFrame, co=Int[]::Vector{Int}) length(co) > 6 || error("column vector must be of length 7 or more") ptax = Array{Int8}(undef, 4) pCF = Array{Int8}(undef, 3) - taxnam = Array{eltype(df[co[1]])}(undef, 4) + taxnam = Array{eltypes(df)[co[1]]}(undef, 4) for i in 1:size(df,1) for j=1:4 taxnam[j] = df[i,co[j]]; end sortperm!(ptax, taxnam) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 32ceb0e36..d08b88e1e 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -23,7 +23,7 @@ function readBootstrapTrees(filelist::AbstractString; relative2listfile=true::Bo size(bootfiles)[2] > 0 || error("there should be a column in file $filelist: with a single bootstrap file name on each row (no header)") ngenes = size(bootfiles)[1] - bf = (relative2listfile ? joinpath.(filelistdir, bootfiles[1]) : bootfiles[1]) + bf = (relative2listfile ? joinpath.(filelistdir, bootfiles[!,1]) : bootfiles[!,1]) treelists = Array{Vector{HybridNetwork}}(undef, ngenes) for igene in 1:ngenes treelists[igene] = readMultiTopology(bf[igene]) @@ -126,7 +126,7 @@ function sampleCFfromCI(df::DataFrame, seed=0::Integer) error("CFs found in columns 1-4 where taxon labels are expected") length(findall(in(obsCFcol), colsCI)) ==0 || error("CFs found in columns where credibility intervals are expected") - newdf = deepcopy(df[ [colsTa; obsCFcol; colsCI] ]) + newdf = df[:, [colsTa; obsCFcol; colsCI] ] if seed==-1 return newdf else @@ -147,9 +147,9 @@ function sampleCFfromCI!(df::DataFrame, seed=0::Integer) c2 = (df[i,11]-df[i,10])*rand()+df[i,10] c3 = (df[i,13]-df[i,12])*rand()+df[i,12] suma = c1+c2+c3 - df[5][i] = c1/suma - df[6][i] = c2/suma - df[7][i] = c3/suma + df[i,5] = c1/suma + df[i,6] = c2/suma + df[i,7] = c3/suma end return df end @@ -469,11 +469,10 @@ input: vector of bootstrap networks (net), estimated network (net1), outgroup returns - a matrix with one row per bootstrap network, and 2*number of hybrids in net1, -column i corresponds to whether hybrid i (net1.hybrid[i]) is found in the bootstrap network, -column 2i+1 corresponds to the estimated gamma on the bootstrap network -(0.0 if hybrid not found). -To know the order of hybrids, print net1.hybrid[i] i=1,...,num of hybrids - + column i corresponds to whether hybrid i (`net1.hybrid[i]`) is found in the bootstrap network, + column 2i+1 corresponds to the estimated gamma on the bootstrap network + (0.0 if hybrid not found). + To know the order of hybrids, print `net1.hybrid` or `h.name for h in net1.hybrid` - list of discrepant trees (trees not matching the main tree in net1) """ function hybridDetection(net::Vector{HybridNetwork}, net1::HybridNetwork, outgroup::AbstractString) @@ -590,37 +589,37 @@ Output: The "node" data frame has one row per clade and 9 columns giving: - - **clade**: the clade's name, like the taxon name (if a hybrid is a single taxon) or + - `:clade`: the clade's name, like the taxon name (if a hybrid is a single taxon) or the hybrid tag (like 'H1') in the reference network - - **node**: the node number in the reference network. missing if the clade is not in this network. - - **hybridnode**: typically the same node number as above, except for hybrid clades in the + - `:node`: the node number in the reference network. missing if the clade is not in this network. + - `:hybridnode`: typically the same node number as above, except for hybrid clades in the reference network. For those, the hybrid node number is listed here. - - **edge**: number of the parent edge, parent to the node in column 2, + - `:edge`: number of the parent edge, parent to the node in column 2, if found in the ref network. missing otherwise. - - **BS_hybrid**: percentage of bootstrap networks in which the clade is found to be a hybrid clade. - - **BS_sister**: percentage of bootstrap networks in which the clade is found to be sister to + - `:BS_hybrid`: percentage of bootstrap networks in which the clade is found to be a hybrid clade. + - `:BS_sister`: percentage of bootstrap networks in which the clade is found to be sister to some hybrid clade (sum of the next 2 columns) - - **BS_major_sister**: percentage of bootstrap networks in which the clade is found to be the + - `:BS_major_sister`: percentage of bootstrap networks in which the clade is found to be the major sister to some hybrid clade - - **BS_minor_sister**: same as previous, but minor - - **BS_hybrid_samesisters**: percentage of bootstrap networks in which the clade is found to be + - `:BS_minor_sister`: same as previous, but minor + - `:BS_hybrid_samesisters`: percentage of bootstrap networks in which the clade is found to be a hybrid and with the same set of sister clades as in the reference network. Applies to hybrid clades found in the reference network only, missing for all other clades. The "edge" data frame has one row for each pair of clades, and 8 columns: - - **edge**: hybrid edge number, if the edge appears in the reference network. missing otherwise. - - **hybrid_clade**: name of the clade found to be a hybrid, descendent of 'edge' - - **hybrid**: node number of that clade, if it appears in the reference network. missing otherwise. - - **sister_clade**: name of the clade that is sister to 'edge', i.e. be sister to a hybrid - - **sister**: node number of that clade, if in the ref network. - - **BS_hybrid_edge**: percentage of bootstrap networks in which 'edge' is found to be a hybrid + - `:edge`: hybrid edge number, if the edge appears in the reference network. missing otherwise. + - `:hybrid_clade`: name of the clade found to be a hybrid, descendent of 'edge' + - `:hybrid`: node number of that clade, if it appears in the reference network. missing otherwise. + - `:sister_clade`: name of the clade that is sister to 'edge', i.e. be sister to a hybrid + - `:sister`: node number of that clade, if in the ref network. + - `:BS_hybrid_edge`: percentage of bootstrap networks in which 'edge' is found to be a hybrid edge, i.e. when the clade in the 'hybrid' column is found to be a hybrid and the clade in the 'sister' column is one of its sisters. - - **BS_major**: percentage of bootstrap networks in which 'edge' is found to be a major hybrid + - `:BS_major`: percentage of bootstrap networks in which 'edge' is found to be a major hybrid edge, i.e. when 'hybrid' is found to be a hybrid clade and 'sister' is found to be its major sister. - - **BS_minor**: same as previous, but minor + - `:BS_minor`: same as previous, but minor """ function hybridBootstrapSupport(nets::Vector{HybridNetwork}, refnet::HybridNetwork; rooted=false::Bool) @@ -941,21 +940,21 @@ function hybridBootstrapSupport(nets::Vector{HybridNetwork}, refnet::HybridNetwo rowh = 1 for h=1:length(clade) if h <= nclades && keepc[h] && hybparent[h]>0 - resNode[:hybridnode][rowh] = hybnode[hybparent[h]] - resNode[:BS_hybrid_samesisters][rowh] = BShybsamesis[hybparent[h]] + resNode[rowh,:hybridnode] = hybnode[hybparent[h]] + resNode[rowh,:BS_hybrid_samesisters] = BShybsamesis[hybparent[h]] elseif keepc[h] - resNode[:BS_hybrid_samesisters][rowh] = missing + resNode[rowh,:BS_hybrid_samesisters] = missing end if h>nclades # clade *not* in the reference network - resNode[:node][rowh] = missing - resNode[:hybridnode][rowh] = missing - resNode[:edge][rowh] = missing + resNode[rowh,:node] = missing + resNode[rowh,:hybridnode] = missing + resNode[rowh,:edge] = missing end if keepc[h] rowh += 1; end end - insertcols!(resNode, 10, :BS_all => resNode[:BS_hybrid]+resNode[:BS_sister]) + insertcols!(resNode, 10, :BS_all => resNode[!,:BS_hybrid]+resNode[!,:BS_sister]) sort!(resNode, [:BS_all,:BS_hybrid]; rev=true) - deletecols!(resNode, :BS_all) + select!(resNode, Not(:BS_all)) # edge summaries resEdge = DataFrame(edge = Vector{Union{Int, Missing}}(undef, length(hybcladei)), hybrid_clade=cladestr[hybcladei], @@ -967,16 +966,16 @@ function hybridBootstrapSupport(nets::Vector{HybridNetwork}, refnet::HybridNetwo for i=1:length(hybcladei) h = hybcladei[i] if h <= nclades && hybparent[h]>0 - resEdge[:hybrid][i] = hybnode[hybparent[h]] + resEdge[i,:hybrid] = hybnode[hybparent[h]] end - if h>nclades resEdge[:hybrid][i]=missing; end - if siscladei[i]>nclades resEdge[:sister][i]=missing; end + if h>nclades resEdge[i,:hybrid]=missing; end + if siscladei[i]>nclades resEdge[i,:sister]=missing; end if i <= nedges - resEdge[:edge][i] = edgenum[i] - else resEdge[:edge][i] = missing + resEdge[i,:edge] = edgenum[i] + else resEdge[i,:edge] = missing end end - o = [1:nedges; sortperm(resEdge[:BS_hybrid_edge][nedges+1:length(hybcladei)],rev=true) .+ nedges] + o = [1:nedges; sortperm(resEdge[nedges+1:length(hybcladei),:BS_hybrid_edge],rev=true) .+ nedges] return resNode, resEdge[o,:], resCluster, gamma, edgenum end diff --git a/src/compareNetworks.jl b/src/compareNetworks.jl index 7edc3b64a..501ad3160 100644 --- a/src/compareNetworks.jl +++ b/src/compareNetworks.jl @@ -12,17 +12,12 @@ function traverseTree2Matrix!(node::Node, edge::Edge, ie::Vector{Int}, M::Matrix for e in child.edge #postorder traversal if(!isEqual(e,edge)) # assumes a tree here grandchild = getOtherNode(e,child) - if (grandchild.leaf) - indsp= 0 - try - indsp = getIndex(grandchild.name,S) - catch - error("leaf $(grandchild.name) not in species list $(S)") - end + if grandchild.leaf + indsp = findfirst(isequal(grandchild.name), S) + indsp != nothing || error("leaf $(grandchild.name) not in species list $(S)") M[indedge,indsp+1] = 1 #indsp+1 bc first column is edge numbers else inde = ie[1]; - # inde = getIndex(e.number,M[:,1]) traverseTree2Matrix!(child,e,ie,M,S) M[indedge,2:size(M,2)] .|= M[inde,2:size(M,2)] end @@ -104,9 +99,8 @@ function hardwiredClusters!(node::Node, edge::Edge, ie::Vector{Int}, M::Matrix{I for e in child.edge if (e.hybrid && e != edge && (e.isChild1 ? e.node[1] == child : e.node[2] == child)) partner = e - try - indpartner = getIndex(partner.number,M[:,1]) - catch + indpartner = findfirst(isequal(partner.number), M[:,1]) + if isnothing(indpartner) partnerVisited = false # will need to continue traversal end break # partner hybrid edge was found @@ -124,13 +118,9 @@ function hardwiredClusters!(node::Node, edge::Edge, ie::Vector{Int}, M::Matrix{I for e in child.edge # postorder traversal if (e != edge && (!edge.hybrid || e!=partner)) # do not go back to (either) parent edge. grandchild = getOtherNode(e,child) - if (grandchild.leaf) - indsp = 0 - try - indsp = getIndex(grandchild.name,S) - catch - error("leaf $(grandchild.name) not in species list $(S)") - end + if grandchild.leaf + indsp = findfirst(isequal(grandchild.name), S) + indsp != nothing || error("leaf $(grandchild.name) not in species list $(S)") M[indedge,indsp+1] = 1 #indsp+1 because first column is edge numbers else inde = hardwiredClusters!(child,e,ie,M,S) @@ -157,7 +147,7 @@ and the function does not test for this. visited: vector of node numbers, of all visited nodes. -# Examples: #" +# Examples: ```jldoctest julia> net5 = "(A,((B,#H1),(((C,(E)#H2),(#H2,F)),(D)#H1)));" |> readTopology |> directEdges! ; @@ -172,14 +162,14 @@ julia> taxa = net5 |> tipLabels # ABC EF D julia> hardwiredCluster(net5.edge[12], taxa) # descendants of 12th edge = CEF 6-element Array{Bool,1}: - false - false - true - true - true - false + 0 + 0 + 1 + 1 + 1 + 0 ``` -""" #" +""" function hardwiredCluster(edge::Edge,taxa::Union{Vector{String},Vector{Int}}) v = zeros(Bool,length(taxa)) hardwiredCluster!(v,edge,taxa) diff --git a/src/deleteHybrid.jl b/src/deleteHybrid.jl index 3f693395c..78df7b006 100644 --- a/src/deleteHybrid.jl +++ b/src/deleteHybrid.jl @@ -213,7 +213,8 @@ function deleteHybrid!(node::Node,net::HybridNetwork,minor::Bool, blacklist::Boo push!(net.blacklist, hybedge1.number) end end - hybindex = getIndex(true,[e.hybrid for e in other2.edge]); + hybindex = findfirst([e.hybrid for e in other2.edge]); + hybindex != nothing || error("didn't find hybrid edge in other2") if(hybindex == 1) treeedge1 = other2.edge[2]; treeedge2 = other2.edge[3]; @@ -439,7 +440,8 @@ function undoPartition!(net::HybridNetwork, hybrid::Node, edgesInCycle::Vector{E @debug "hybrid number matches with partition.cycle" p = splice!(net.partition,i) @debug "after splice, p partition has edges $([e.number for e in p.edges]) and cycle $(p.cycle)" - ind = getIndex(hybrid.number,p.cycle) + ind = findfirst(isequal(hybrid.number), p.cycle) + ind != nothing || error("hybrid not found in p.cycle") deleteat!(p.cycle,ind) #get rid of that hybrid number cycles = vcat(cycles,p.cycle) edges = vcat(edges,p.edges) diff --git a/src/manipulateNet.jl b/src/manipulateNet.jl index e48075d95..a09fa382a 100644 --- a/src/manipulateNet.jl +++ b/src/manipulateNet.jl @@ -105,7 +105,7 @@ This node must be in one (and only one) cycle, otherwise an error will be thrown single cycle with a single reticulation. Check and update the nodes' field `inCycle`. -# Example #" +# Example ```julia julia> net = readTopology("(A:1.0,((B:1.1,#H1:0.2::0.2):1.2,(((C:0.52,(E:0.5)#H2:0.02::0.7):0.6,(#H2:0.01::0.3,F:0.7):0.8):0.9,(D:0.8)#H1:0.3::0.8):1.3):0.7):0.1;"); julia> using PhyloPlots @@ -113,7 +113,7 @@ julia> plot(net, showNodeNumber=true) julia> hybridatnode!(net, -4) julia> plot(net) ``` -""" #" +""" function hybridatnode!(net::HybridNetwork, nodeNumber::Integer) undoInCycle!(net.edge, net.node) for n in net.hybrid diff --git a/src/multipleAlleles.jl b/src/multipleAlleles.jl index 82cb0970c..1d8fbff53 100644 --- a/src/multipleAlleles.jl +++ b/src/multipleAlleles.jl @@ -68,10 +68,10 @@ function mapAllelesCFtable!(cfDF::DataFrame, alleleDF::DataFrame, co::Vector{Int compareTaxaNames(alleleDF,cfDF,co) for j in 1:4 for ia in 1:size(alleleDF,1) # for all alleles - cfDF[co[j]] = map(x->replace(string(x), + cfDF[!,co[j]] = map(x->replace(string(x), Regex("^$(string(alleleDF[ia,:allele]))\$") => alleleDF[ia,:species]), - cfDF[co[j]]) + cfDF[!,co[j]]) end end if write @@ -90,10 +90,10 @@ function cleanAlleleDF!(newdf::DataFrame, cols::Vector{Int};keepOne=false::Bool) delrows = Int[] # indices of rows to delete repSpecies = String[] if(isa(newdf[1,cols[1]],Integer)) #taxon names as integers: we need this to be able to add __2 - newdf[cols[1]] = map(x->string(x),newdf[cols[1]]) - newdf[cols[2]] = map(x->string(x),newdf[cols[2]]) - newdf[cols[3]] = map(x->string(x),newdf[cols[3]]) - newdf[cols[4]] = map(x->string(x),newdf[cols[4]]) + newdf[!,cols[1]] = map(string, newdf[!,cols[1]]) + newdf[!,cols[2]] = map(string, newdf[!,cols[2]]) + newdf[!,cols[3]] = map(string, newdf[!,cols[3]]) + newdf[!,cols[4]] = map(string, newdf[!,cols[4]]) end row = Vector{String}(undef, 4) for i in 1:size(newdf,1) #check all rows @@ -240,9 +240,8 @@ end function compareTaxaNames(alleleDF::DataFrame, cfDF::DataFrame, co::Vector{Int}) checkMapDF(alleleDF) #println("found $(length(alleleDF[1])) allele-species matches") - CFtaxa = convert(Array, unique(stack(cfDF[co[1:4]], 1:4)[:value])) - CFtaxa = map(x->string(x),CFtaxa) #treat as string - alleleTaxa = map(x->string(x),alleleDF[:allele]) #treat as string + CFtaxa = string.(mapreduce(x -> unique(skipmissing(x)), union, eachcol(cfDF[!,co[1:4]]))) + alleleTaxa = map(string, alleleDF[!,:allele]) # as string, too sizeCF = length(CFtaxa) sizeAllele = length(alleleTaxa) if sizeAllele > sizeCF @@ -267,12 +266,12 @@ function checkMapDF(alleleDF::DataFrame) size(alleleDF,2) <= 2 || error("Allele-Species matching Dataframe should have at least 2 columns") size(alleleDF,2) >= 2 || @warn "allele mapping file contains more than two columns: will ignore all columns not labelled allele or species" try - alleleDF[:allele] + alleleDF[!,:allele] catch error("In allele mapping file there is no column named allele") end try - alleleDF[:species] + alleleDF[!,:species] catch error("In allele mapping file there is no column named species") end diff --git a/src/optimization.jl b/src/optimization.jl index 29d068824..b5dae76fa 100644 --- a/src/optimization.jl +++ b/src/optimization.jl @@ -114,7 +114,8 @@ function parameters!(qnet::QuartetNetwork, net::HybridNetwork) if(qnet.numHybrids == 1 && qnet.hybrid[1].isBadDiamondI) ind1 = parse(Int,string(string(qnet.hybrid[1].number),"1")) ind2 = parse(Int,string(string(qnet.hybrid[1].number),"2")) - i = getIndex(ind1,nhz) + i = findfirst(isequal(ind1), nhz) + i != nothing || error("ind1 not found in nhz") edges = hybridEdges(qnet.hybrid[1]) push!(qnhz,i+net.numHybrids-net.numBad+k) push!(qnhz,i+1+net.numHybrids-net.numBad+k) @@ -126,7 +127,8 @@ function parameters!(qnet::QuartetNetwork, net::HybridNetwork) if(n.isBadDiamondI) ind1 = parse(Int,string(string(n.number),"1")) ind2 = parse(Int,string(string(n.number),"2")) - i = getIndex(ind1,nhz) + i = findfirst(isequal(ind1), nhz) + i != nothing || error("ind1 not found in nhz") edges = hybridEdges(n) push!(qnhz,i+net.numHybrids-net.numBad+k) push!(qnhz,i+1+net.numHybrids-net.numBad+k) @@ -140,33 +142,27 @@ function parameters!(qnet::QuartetNetwork, net::HybridNetwork) all((n -> !n.isBadDiamondI),qnet.hybrid) || error("cannot have bad diamond I hybrid nodes in this qnet, case dealt separately before") for e in qnet.edge if(e.istIdentifiable) - try - getIndex(e.number,nt) - catch + enum_in_nt = findfirst(isequal(e.number), nt) + if isnothing(enum_in_nt) error("identifiable edge $(e.number) in qnet not found in net") end - push!(qnt, getIndex(e.number,nt) + net.numHybrids - net.numBad) + push!(qnt, enum_in_nt + net.numHybrids - net.numBad) push!(qindxt, getIndex(e,qnet)) end if(!e.istIdentifiable && all((n->!n.leaf),e.node) && !e.hybrid && e.fromBadDiamondI) # tree edge not identifiable but internal with length!=0 (not bad diamII nor bad triangle) - try - getIndex(e.number,nhz) - catch + enum_in_nhz = findfirst(isequal(e.number), nhz) + if isnothing(enum_in_nhz) error("internal edge $(e.number) corresponding to gammaz in qnet not found in net.ht") end - push!(qnhz, getIndex(e.number,nhz) + net.numHybrids - net.numBad + k) + push!(qnhz, enum_in_nhz + net.numHybrids - net.numBad + k) push!(qindxhz, getIndex(e,qnet)) end if(e.hybrid && !e.isMajor) node = e.node[e.isChild1 ? 1 : 2] node.hybrid || error("strange hybrid edge $(e.number) poiting to tree node $(node.number)") - found = true - try - getIndex(e.number,nh) - catch - found = false - end - found ? push!(qnh, getIndex(e.number,nh)) : nothing + enum_in_nh = findfirst(isequal(e.number), nh) + found = (enum_in_nh != nothing) + found ? push!(qnh, enum_in_nh) : nothing found ? push!(qindxh, getIndex(e,qnet)) : nothing end end # for qnet.edge diff --git a/src/parsimony.jl b/src/parsimony.jl index 70496839f..c90a240d9 100755 --- a/src/parsimony.jl +++ b/src/parsimony.jl @@ -397,13 +397,14 @@ end """ readfastatodna(filename::String) -Read fasta file to a dataframe containing a column for each site. +Read a fasta file to a dataframe containing a column for each site. Calculate weights and remove matching site patterns to reduce matrix dimension. Return a tuple containing: - dataframe of BioSequence DNA sequences, with taxon in column 1 and a column for each site. [1] - array of weights, one weights for each of the site columns. The length of the weight is equal to nsites. [2] - +1. data frame of BioSequence DNA sequences, with taxon names in column 1 + followed by a column for each site pattern, in columns 2-npatterns; +2. array of weights, one weight for each of the site columns. + The length of the weight vector is equal to npatterns. """ function readfastatodna(fastafile::String, countPatterns=false::Bool) reader = BioSequences.FASTA.Reader(open(fastafile)) @@ -471,7 +472,7 @@ function readCSVtoArray(dat::DataFrame) end species = String[] - for d in dat[i] + for d in dat[!,i] push!(species,string(d)) end diff --git a/src/pseudolik.jl b/src/pseudolik.jl index 8121fa2a7..832b12764 100644 --- a/src/pseudolik.jl +++ b/src/pseudolik.jl @@ -482,12 +482,9 @@ end function extractQuartet!(net::HybridNetwork, quartet::Quartet) list = Node[] for q in quartet.taxon - try - getIndexNode(getIndex(q,net.names),net) - catch - error("taxon $(q) not in network") - end - push!(list, net.node[getIndexNode(getIndex(q,net.names),net)]) + tax_in_net = findfirst(n -> n.name == q, net.node) + tax_in_net != nothing || error("taxon $(q) not in network") + push!(list, net.node[tax_in_net]) end qnet = extractQuartet(net,list) @debug "EXTRACT: extracted quartet $(quartet.taxon)" @@ -731,7 +728,7 @@ function identifyQuartet!(qnet::QuartetNetwork, node::Node) end error("strange quartet network with a hybrid node $(node.number) but no cycle") elseif(k == 2) - other = qnet.node[getIndex(true, [(n.inCycle == node.number && size(n.edge,1) == 3 && !isEqual(n,node)) for n in qnet.node])] + other = qnet.node[findfirst(n -> (n.inCycle == node.number && size(n.edge,1) == 3 && !isEqual(n,node)), qnet.node)] edgebla,edgebla,edge1 = hybridEdges(node) edgebla,edgebla,edge2 = hybridEdges(other) if(getOtherNode(edge1,node).leaf || getOtherNode(edge2,other).leaf) @@ -901,9 +898,10 @@ function eliminateTriangle!(qnet::QuartetNetwork, node::Node, other::Node, case: error("node $(node.number) and other node $(other.number) are not connected by an edge") end #println("hybedge is $(hybedge.number), otheredge is $(otheredge.number)") - middle = qnet.node[getIndex(true, [(n.inCycle == node.number && size(n.edge,1) == 3 && !isEqual(n,other) && !isEqual(n,node)) for n in qnet.node])] + middle = qnet.node[findfirst(n -> (n.inCycle == node.number && size(n.edge,1) == 3 && !isEqual(n,other) && !isEqual(n,node)), qnet.node)] #println("middle node is $(middle.number) in eliminateTriangle") - ind = getIndex(true,[(e.inCycle == node.number && !isEqual(getOtherNode(e,middle),node)) for e in middle.edge]) + ind = findfirst(e -> (e.inCycle == node.number && !isEqual(getOtherNode(e,middle),node)), middle.edge) + ind != nothing || error("edge number not found in middle edge") edge = middle.edge[ind] #println("edge is $(edge.number) with length $(edge.length) in eliminateTriangle, will do deleteIntLeaf from middle through edge") deleteIntLeafWhile!(qnet,edge,middle) @@ -975,7 +973,7 @@ function quartetType5!(qnet::QuartetNetwork, node::Node) end leaf2 = getOtherNode(edgetree1,other1) leaf3 = getOtherNode(edgetree2, other2) - leaf4 = qnet.leaf[getIndex(true,[(!isEqual(n,leaf1) && !isEqual(n,leaf2) && !isEqual(n,leaf3)) for n in qnet.leaf])] + leaf4 = qnet.leaf[findfirst(n -> (!isEqual(n,leaf1) && !isEqual(n,leaf2) && !isEqual(n,leaf3)), qnet.leaf)] #println("leaf1 is $(leaf1.number)") #println("leaf2 is $(leaf2.number)") #println("leaf3 is $(leaf3.number)") @@ -1049,24 +1047,22 @@ end # two edges in a quartet network with qnet.which=1 # eliminate internal nodes in every direction function internalLength!(qnet::QuartetNetwork) - if(qnet.which == 1) - try - getIndex(true,[size(n.edge,1) == 3 for n in qnet.node]) - catch + if qnet.which == 1 + ind_3e = findfirst(n -> length(n.edge) == 3, qnet.node) + if isnothing(ind_3e) printEdges(qnet) printNodes(qnet) error("not found internal node in qnet with 3 edges") end - node = qnet.node[getIndex(true,[size(n.edge,1) == 3 for n in qnet.node])] - try - getIndex(true,[size(n.edge,1) == 3 && !isEqual(n,node) for n in qnet.node]) - catch + node = qnet.node[ind_3e] + ind_3eo = findfirst(n -> (size(n.edge,1) == 3 && n !== node), qnet.node) + if isnothing(ind_3eo) println("first node found with 3 edges $(node.number)") printEdges(qnet) printNodes(qnet) error("not found another internal node in qnet with 3 edges") end - node2 = qnet.node[getIndex(true,[size(n.edge,1) == 3 && !isEqual(n,node) for n in qnet.node])] + node2 = qnet.node[ind_3eo] for e in node.edge deleteIntLeafWhile!(qnet,e,node,true) end @@ -1130,6 +1126,7 @@ end # returns leaf for taxon1, leaf for taxon2 (i.e. 12) # warning: assumes that the numbers for the taxon in the output.csv table are the names function whichLeaves(qnet::QuartetNetwork, taxon1::String, taxon2::String, leaf1::Node, leaf2::Node, leaf3::Node, leaf4::Node) + # danger: this quartet code assumes a particular correspondance between net.names and [n.name for n in net.node] if(taxon1 == qnet.names[leaf1.number]) if(taxon2 == qnet.names[leaf2.number]) return 1,2 @@ -1184,9 +1181,9 @@ function updateSplit!(qnet::QuartetNetwork) if(qnet.which == 1) if(qnet.split == [-1,-1,-1,-1]) qnet.split = [2,2,2,2] - middle = qnet.node[getIndex(true,[size(n.edge,1) == 3 for n in qnet.node])] - leaf1 = middle.edge[getIndex(true,[getOtherNode(e,middle).leaf for e in middle.edge])] - leaf2 = middle.edge[getIndex(true,[(getOtherNode(e,middle).leaf && !isEqual(leaf1,e)) for e in middle.edge])] + middle = qnet.node[findfirst(n -> size(n.edge,1) == 3, qnet.node)] + leaf1 = middle.edge[findfirst(e -> getOtherNode(e,middle).leaf, middle.edge)] + leaf2 = middle.edge[findfirst(e -> getOtherNode(e,middle).leaf && !isEqual(leaf1,e), middle.edge)] leaf1 = getOtherNode(leaf1,middle) #leaf1 was edge, now it is node leaf2 = getOtherNode(leaf2,middle) ind1 = getIndex(leaf1,qnet.leaf) diff --git a/src/readData.jl b/src/readData.jl index 102fa9776..4cd22579d 100644 --- a/src/readData.jl +++ b/src/readData.jl @@ -34,8 +34,8 @@ function writeTableCF(quartets::Array{Quartet,1}) push!(df, [q.taxon[1],q.taxon[2],q.taxon[3],q.taxon[4],q.obsCF[1],q.obsCF[2],q.obsCF[3], (q.ngenes==-1.0 ? missing : q.ngenes)]) end - if all(ismissing, df[:ngenes]) - deletecols!(df, :ngenes) + if all(ismissing, df[!,:ngenes]) + select!(df, Not(:ngenes)) end return df end @@ -164,7 +164,8 @@ If multiple rows correspond to the same 4-taxon set, these rows are merged and t Modify the `.quartet.obsCF` values in the `DataCF` object with those read from the data frame in columns numbered `columns`. -`columns` should have **3** columns numbers for the 3 CFs in this order: 12_34, 13_24 and 14_23. +`columns` should have **3** columns numbers for the 3 CFs in this order: +`12_34`, `13_24` and `14_23`. Assumptions: - same 4-taxon sets in `DataCF` and in the data frame, and in the same order, @@ -666,11 +667,7 @@ function readTrees2CF(treefile::AbstractString; quartetfile="none"::AbstractStri writeTab=true::Bool, CFfile="none"::AbstractString, taxa=Vector{String}()::Union{Vector{String},Vector{Int}}, writeQ=false::Bool, writeSummary=true::Bool, nexus=false::Bool) - if(nexus) - trees = readNexusTrees(treefile) - else - trees = readInputTrees(treefile) - end + trees = (nexus ? readNexusTrees(treefile, readTopologyUpdate, false, false) : readInputTrees(treefile)) if length(taxa)==0 # unionTaxa(trees) NOT default argument: taxa = unionTaxa(trees) # otherwise: tree file is read twice end @@ -804,8 +801,8 @@ function updateBL!(net::HybridNetwork,d::DataCF) x = by(df, :edge, Nquartets= :CF => length, edgeL = :CF => x -> -log(3/2*(1. - mean(x)))) # ommitting columns: meanCF= :CF => mean, sdCF= :CF => std - edges = x[:edge] - lengths = x[:edgeL] + edges = x[!,:edge] + lengths = x[!,:edgeL] for i in 1:length(edges) ind = getIndexEdge(edges[i],net) # helpful error if not found if net.edge[ind].length < 0.0 || net.edge[ind].length==1.0 @@ -920,27 +917,19 @@ end function extractQuartetTree(q::Quartet, M::Matrix{Int},S::Union{Vector{String},Vector{Int}}) @debug "extractQuartet: $(q.taxon)" @debug "matrix: $(M)" - try - ind1 = getIndex(q.taxon[1],S) - ind2 = getIndex(q.taxon[2],S) - ind3 = getIndex(q.taxon[3],S) - ind4 = getIndex(q.taxon[4],S) - catch + inds = indexin(q.taxon, S) + if any(isnothing, inds) error("some taxon in quartet $(q.taxon) not found in list of all species $(S)") end - ind1 = getIndex(q.taxon[1],S) - ind2 = getIndex(q.taxon[2],S) - ind3 = getIndex(q.taxon[3],S) - ind4 = getIndex(q.taxon[4],S) - subM = M[:,[ind1+1,ind2+1,ind3+1,ind4+1]] + subM = M[:, inds.+1] @debug "subM: $(subM)" for r in 1:size(subM,1) #rows in subM @debug "subM[r,:]: $(subM[r,:])" - if(subM[r,:] == [0,0,1,1] || subM[r,:] == [1,1,0,0]) + if subM[r,:] == [0,0,1,1] || subM[r,:] == [1,1,0,0] return 1 - elseif(subM[r,:] == [0,1,0,1] || subM[r,:] == [1,0,1,0]) + elseif subM[r,:] == [0,1,0,1] || subM[r,:] == [1,0,1,0] return 2 - elseif(subM[r,:] == [0,1,1,0] || subM[r,:] == [1,0,0,1]) + elseif subM[r,:] == [0,1,1,0] || subM[r,:] == [1,0,0,1] return 3 end end @@ -989,26 +978,46 @@ function createQuartet(taxa::Union{Vector{String},Vector{Int}},qvec::Vector{Int} return Quartet(num,names,[1.0,0.0,0.0]) end -## internal function to read a treefile in nexus format -function readNexusTrees(file::AbstractString) +""" + readNexusTrees(filename::AbstractString, treereader=readTopology::Function [, args...]) + +Read trees in nexus-formatted file and return a vector of `HybridNetwork`s. +For the nexus format, see Maddison, Swofford & Maddison (1997) +https://doi.org/10.1093/sysbio/46.4.590. +The optional arguments are passed onto the individual tree reader. + +Warnings: +- "translate" tables are not supported yet +- only the first tree block is read +""" +function readNexusTrees(file::AbstractString, treereader=readTopology::Function, args...) vnet = HybridNetwork[] + rx_start = r"^\s*begin\s+trees\s*;"i + rx_end = r"^\s*end\s*;"i + rx_tree = r"^\s*tree\s+[^(]+(\([^;]*;)"i + # spaces,"Tree",spaces,any_symbols_other_than_(, then we capture: + # ( any_symbols_other_than_; ; + treeblock = false # whether we are currently reading the TREE block or not open(file) do s - numl = 1 + numl = 0 for line in eachline(s) - line = strip(line) # remove spaces - @debug "$(line)" - m = match(r"^\s*Tree\s+[^(]+(\([^;]*;)", line) - # regex: spaces,"Tree",spaces,any_symbols_other_than_(, then we capture: - # ( any_symbols_other_than_; ; - if m != nothing - phy = m.captures[1] - try - push!(vnet, readTopologyUpdate(phy,false)) - catch err - error("could not read tree in line $(numl). The error is $(err)") - end - end numl += 1 + if treeblock # currently reading trees, check for END signal + occursin(rx_end, line) && break # break if end of tree block + else # not reading trees: check for the BEGIN signal + if occursin(rx_start, line) treeblock=true; end + continue # to next line, either way + end + # if we get there, it's that we are inside the treeblock (true) and no END signal yet + m = match(rx_tree, line) + m != nothing || continue # continue to next line if no match + phy = m.captures[1] # string + try + push!(vnet, treereader(phy, args...)) # readTopologyUpdate(phy,false) + catch err + print("skipped phylogeny on line $(numl) of file $file: ") + if :msg in fieldnames(typeof(err)) println(err.msg); else println(typeof(err)); end + end end end return vnet # consistent output type: HybridNetwork vector. might be of length 0. diff --git a/src/readwrite.jl b/src/readwrite.jl index ab8e91ea0..280f9d388 100644 --- a/src/readwrite.jl +++ b/src/readwrite.jl @@ -1115,12 +1115,9 @@ function updateRoot!(net::HybridNetwork, outgroup::AbstractString) checkroot = true else println("outgroup defined $(outgroup)") - try - index = getIndex(true,[isequal(outgroup,n.name) for n in net.node]) - catch + index = findfirst(n -> outgroup == n.name, net.node) + index != nothing || error("outgroup $(outgroup) not in net.names $(net.names)") - end - index = getIndex(true,[isequal(outgroup,n.name) for n in net.node]) node = net.node[index] node.leaf || error("outgroup $(outgroup) is not a leaf in net") length(net.node[index].edge) == 1 || error("strange leaf $(outgroup), node number $(net.node[index].number) with $(length(net.node[index].edge)) edges instead of 1") @@ -1182,35 +1179,35 @@ end undoRoot!(net::HybridNetwork) = undoRoot!(net, true) -# function to read the .out file from snaq (optTopRuns) function -function readOutfile(file::AbstractString) - try - s = open(file) - catch - error("Could not find or open $(file) file"); - end - s = open(file) - line = readline(s) - @debug "line read $(line)" - c = line[1] - if(c == '(') - println("Estimated network from file $(file): $(line)") - net = readTopologyUpdate(line) - vec = split(line,"-Ploglik = ") - net.loglik = parse(Float64,vec[2]) - else - error("output file $(filename).out does not contain a tree in the first line, instead it has $(line); or we had trouble finding ploglik.") - end - return net -end - +# .out file from snaq written by optTopRuns """ -`readSnaqNetwork(output file)` + readSnaqNetwork(output file) -function to read the estimated network from an .out file generated by the snaq function -""" -readSnaqNetwork(file::AbstractString) = readOutfile(file) +Read the estimated network from a `.out` file generated by `snaq!`. +The network score is read also, and stored in the network's field `.loglik`. +Warning: despite the name "loglik", this score is only proportional +to the network's pseudo-deviance: the lower, the better. +Do NOT use this score to calculate an AIC or BIC (etc.) value. +""" +function readSnaqNetwork(file::AbstractString) + open(file) do s + line = readline(s) + line[1] == '(' || + error("output file $(file) does not contain a tree in the first line, instead it has $(line); or we had trouble finding ploglik.") + # println("Estimated network from file $(file): $(line)") + net = readTopology(line) + # readTopologyUpdate is inadequate: would replace missing branch lengths, which are unidentifiable, by 1.0 values + try + vec = split(line,"-Ploglik = ") + net.loglik = parse(Float64,vec[2]) + catch e + @warn "could not find the network score; the error was:" + rethrow(e) + end + return net + end +end # function to change negative branch lengths to 1.0 for starting topology # and to change big branch lengths to 10.0 @@ -1277,7 +1274,7 @@ Use the option append=true to append to the file. Otherwise, the default is to c file or overwrite it, if it already existed. Each network is written with `writeTopology`. -# Examples #" +# Examples ``` julia> net = [readTopology("(D,((A,(B)#H7:::0.864):2.069,(F,E):3.423):0.265,(C,#H7:::0.1361111):10);"), readTopology("(A,(B,C));"),readTopology("(E,F);"),readTopology("(G,H,F);")]; @@ -1290,12 +1287,12 @@ julia> writeMultiTopology(net, stdout) # to write to the screen (standar (E,F); (G,H,F); ``` -""" #" +""" function writeMultiTopology(n::Vector{HybridNetwork},file::AbstractString; append::Bool=false) mode = (append ? "a" : "w") - s = open(file, mode) + open(file, mode) do s writeMultiTopology(n,s) - close(s) + end # closes file safely end function writeMultiTopology(net::Vector{HybridNetwork},s::IO) diff --git a/src/substitutionModels.jl b/src/substitutionModels.jl index a4b46816d..7114a7c6d 100644 --- a/src/substitutionModels.jl +++ b/src/substitutionModels.jl @@ -206,7 +206,7 @@ rate matrix Q: julia> PhyloNetworks.P(m1, 3.0) -4×4 StaticArrays.MArray{Tuple{4,4},Float64,2,16}: +4×4 StaticArrays.MArray{Tuple{4,4},Float64,2,16} with indices SOneTo(4)×SOneTo(4): 0.217509 0.198417 0.190967 0.198417 0.297625 0.312992 0.297625 0.28645 0.28645 0.297625 0.312992 0.297625 @@ -218,7 +218,7 @@ Juke-Cantor example: julia> m1 = JC69([1.]); julia> PhyloNetworks.P(m1, 0.2) -4×4 StaticArrays.MArray{Tuple{4,4},Float64,2,16}: +4×4 StaticArrays.MArray{Tuple{4,4},Float64,2,16} with indices SOneTo(4)×SOneTo(4): 0.824446 0.0585179 0.0585179 0.0585179 0.0585179 0.824446 0.0585179 0.0585179 0.0585179 0.0585179 0.824446 0.0585179 @@ -477,7 +477,7 @@ nparams(::ERSM) = 1::Int function Base.show(io::IO, obj::ERSM) str = "Equal Rates Substitution Model with k=$(obj.k),\n" - str *= "all rates equal to α=$(obj.rate[1]).\n" + str *= "all rates equal to α=$(round(obj.rate[1], digits=5)).\n" str *= "rate matrix Q:\n" print(io, str) showQ(io, obj) @@ -651,7 +651,7 @@ to an average of 1 transition per unit of time: in which case `rate` is set to 1 julia> m1 = JC69([0.25], false) Jukes and Cantor 69 Substitution Model, absolute rate version -off-diagonal rates equal to [0.25]/3. +off-diagonal rates equal to 0.25/3. rate matrix Q: A C G T A * 0.0833 0.0833 0.0833 @@ -721,7 +721,7 @@ function Base.show(io::IO, obj::JC69) str *= "off-diagonal rates equal to 1/3\n" else str *= "absolute rate version\n" - str *= "off-diagonal rates equal to $(obj.rate)/3.\n" + str *= "off-diagonal rates equal to $(round(obj.rate[1], digits=5))/3.\n" end str *= "rate matrix Q:\n" print(io, str) @@ -829,9 +829,12 @@ end function Base.show(io::IO, obj::HKY85) str = "HKY85 Substitution Model base frequencies: $(obj.pi)\n" if obj.relative - str *= "relative rate version with transition/tranversion ratio kappa = $(obj.rate[1]),\n scaled so that there is one substitution per unit time\n" + str *= "relative rate version with transition/tranversion ratio kappa = $(round(obj.rate[1], digits=5))," + str *= "\n scaled so that there is one substitution per unit time\n" else - str *= "absolute rate version with transition/transversion ratio kappa = a/b = $(obj.rate[1]/obj.rate[2])\n with rates a = $(obj.rate[1]) and b = $(obj.rate[2])\n" + str *= "absolute rate version with transition/transversion ratio kappa = a/b = " + str *= "$(round(obj.rate[1]/obj.rate[2], digits=5))" + str *= "\n with rates a = $(round(obj.rate[1], digits=5)) and b = $(round(obj.rate[2], digits=5))\n" end str *= "rate matrix Q:\n" print(io, str) @@ -983,7 +986,7 @@ julia> rv = RateVariationAcrossSites() Rate Variation Across Sites using Discretized Gamma Model alpha: 1.0 categories for Gamma discretization: 4 -ratemultiplier: [0.145784, 0.513132, 1.07083, 2.27025] +ratemultiplier: [0.14578, 0.51313, 1.07083, 2.27025] julia> PhyloNetworks.setalpha!(rv, 2.0) @@ -991,13 +994,13 @@ julia> rv Rate Variation Across Sites using Discretized Gamma Model alpha: 2.0 categories for Gamma discretization: 4 -ratemultiplier: [0.319065, 0.683361, 1.10898, 1.8886] +ratemultiplier: [0.31907, 0.68336, 1.10898, 1.8886] julia> RateVariationAcrossSites(2.0, 4) Rate Variation Across Sites using Discretized Gamma Model alpha: 2.0 categories for Gamma discretization: 4 -ratemultiplier: [0.319065, 0.683361, 1.10898, 1.8886] +ratemultiplier: [0.31907, 0.68336, 1.10898, 1.8886] ``` """ mutable struct RateVariationAcrossSites @@ -1039,9 +1042,9 @@ end function Base.show(io::IO, obj::RateVariationAcrossSites) str = "Rate Variation Across Sites using Discretized Gamma Model\n" - str *= "alpha: $(obj.alpha)\n" + str *= "alpha: $(round(obj.alpha, digits=5))\n" str *= "categories for Gamma discretization: $(obj.ncat)\n" - str *= "ratemultiplier: $(obj.ratemultiplier)\n" + str *= "ratemultiplier: $(round.(obj.ratemultiplier, digits=5))\n" print(io, str) end @@ -1057,7 +1060,7 @@ Estimate base frequencies in DNA data `DNAdata`, ordered ACGT. - `DNAdata`: data frame. All columns are used. If the first column gives species names, find a way to ignore it before calculating empirical - frequencies, e.g. `empiricalDNAfrequencies(view(DNAdata, 2:ncol(DNAdata)))`. + frequencies, e.g. `empiricalDNAfrequencies(view(DNAdata, :, 2:ncol(DNAdata)))`. Data type must be `BioSymbols.DNA` or `Char` or `String`. WARNING: this is checked on the first column only. - `DNAweights`: vector of weights, to weigh each column in `DNAdata`. @@ -1081,7 +1084,7 @@ function empiricalDNAfrequencies(dnaDat::AbstractDataFrame, dnaWeights::Vector, convert2dna = eltypes(dnaDat)[1] != BioSymbols.DNA for j in 1:ncol(dnaDat) # for each column - col = dnaDat[j] + col = dnaDat[!,j] wt = dnaWeights[j] for nuc in col # for each row if convert2dna diff --git a/src/ticr.jl b/src/ticr.jl index de8c6bbe4..2ff442009 100644 --- a/src/ticr.jl +++ b/src/ticr.jl @@ -50,12 +50,12 @@ output: NWM Stenz, B Larget, DA Baum and C Ané (2015). Exploring tree-like and non-tree-like patterns using genome sequences: An example using the inbreeding plant species *Arabidopsis thaliana* (L.) Heynh. -Systematic Biology, 64(5):809-823. doi: 10.1093/sysbio/syv039 +Systematic Biology, 64(5):809-823. (https://doi.org/10.1093/sysbio/syv039) """ function ticr!(net, D::DataFrame, optimizeBL::Bool) d = readTableCF(D); res = ticr!(net, d, optimizeBL); # = (chisq_pval, chisq, pseudolik, alpha, pval) - D[:p_value] = res[5] # order of value in results: (chisq_pval, chisq, pseudolik, alpha, pval) + D[!, :p_value] = res[5] # order of value in results: (chisq_pval, chisq, pseudolik, alpha, pval) return res end diff --git a/src/traits.jl b/src/traits.jl index bec4dd3f3..f28c442d0 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -477,7 +477,7 @@ each shift, each labelled according to the pattern shift_{number_of_edge}. It ha an aditional column labelled `tipNames` to allow easy fitting afterward (see example). # Examples -```jldoctest +```jldoctest; filter = r"Info: Loading DataFrames support into Gadfly" julia> net = readTopology("(A:2.5,((B:1,#H1:0.5::0.4):1,(C:1,(D:0.5)#H1:0.5::0.6):1):0.5);"); julia> preorder!(net) @@ -512,7 +512,7 @@ julia> sim = simulate(net, params); # simulate a dataset with shifts julia> using DataFrames # to handle data frames julia> dat = DataFrame(trait = sim[:Tips], tipNames = sim.M.tipNames) -4×2 DataFrame +4×2 DataFrames.DataFrame │ Row │ trait │ tipNames │ │ │ Float64 │ String │ ├─────┼─────────┼──────────┤ @@ -522,7 +522,7 @@ julia> dat = DataFrame(trait = sim[:Tips], tipNames = sim.M.tipNames) │ 4 │ 7.88906 │ D │ julia> dfr_shift = regressorShift(net.node[nodes_shifts], net) # the regressors matching the shifts. -4×3 DataFrame +4×3 DataFrames.DataFrame │ Row │ shift_1 │ shift_8 │ tipNames │ │ │ Float64 │ Float64 │ String │ ├─────┼─────────┼─────────┼──────────┤ @@ -546,13 +546,13 @@ Parameter(s) Estimates: Sigma2: 0.0112618 Coefficients: -──────────────────────────────────────────────────── - Estimate Std.Error t value Pr(>|t|) -──────────────────────────────────────────────────── -(Intercept) 9.48238 0.327089 28.9902 0.0220 -shift_1 3.9096 0.46862 8.34279 0.0759 -shift_8 -2.4179 0.422825 -5.71843 0.1102 -──────────────────────────────────────────────────── +─────────────────────────────────────────────────────────────────────────── + Estimate Std. Error t value Pr(>|t|) Lower 95% Upper 95% +─────────────────────────────────────────────────────────────────────────── +(Intercept) 9.48238 0.327089 28.9902 0.0220 5.32632 13.6384 +shift_1 3.9096 0.46862 8.34279 0.0759 -2.04479 9.86399 +shift_8 -2.4179 0.422825 -5.71843 0.1102 -7.7904 2.95461 +─────────────────────────────────────────────────────────────────────────── Log Likelihood: 1.8937302027 AIC: 4.2125395947 @@ -592,7 +592,7 @@ function regressorShift(node::Vector{Node}, return(Symbol("shift_$(x)")) end names!(df, [tmp_fun(num) for num in eNum]) - df[:tipNames]=T.tipNames + df[!,:tipNames]=T.tipNames return(df) end @@ -618,7 +618,7 @@ an aditional column labelled `tipNames` to allow easy fitting afterward (see exa This function can be used to test for heterosis. # Examples -```jldoctest +```jldoctest; filter = r"Info: Loading DataFrames support into Gadfly" julia> using DataFrames # Needed to handle data frames. julia> net = readTopology("(A:2.5,((B:1,#H1:0.5::0.4):1,(C:1,(D:0.5)#H1:0.5::0.6):1):0.5);"); @@ -652,7 +652,7 @@ julia> using Random; Random.seed!(2468); # sets the seed for reproducibility julia> sim = simulate(net, params); # simulate a dataset with shifts julia> dat = DataFrame(trait = sim[:Tips], tipNames = sim.M.tipNames) -4×2 DataFrame +4×2 DataFrames.DataFrame │ Row │ trait │ tipNames │ │ │ Float64 │ String │ ├─────┼─────────┼──────────┤ @@ -662,7 +662,7 @@ julia> dat = DataFrame(trait = sim[:Tips], tipNames = sim.M.tipNames) │ 4 │ 12.6891 │ D │ julia> dfr_hybrid = regressorHybrid(net) # the reressors matching the hybrids. -4×3 DataFrame +4×3 DataFrames.DataFrame │ Row │ shift_6 │ tipNames │ sum │ │ │ Float64 │ String │ Float64 │ ├─────┼─────────┼──────────┼─────────┤ @@ -686,12 +686,12 @@ Parameter(s) Estimates: Sigma2: 0.041206 Coefficients: -──────────────────────────────────────────────────── - Estimate Std.Error t value Pr(>|t|) -──────────────────────────────────────────────────── -(Intercept) 10.064 0.277959 36.2068 0.0008 -shift_6 2.72526 0.315456 8.63912 0.0131 -──────────────────────────────────────────────────── +─────────────────────────────────────────────────────────────────────────── + Estimate Std. Error t value Pr(>|t|) Lower 95% Upper 95% +─────────────────────────────────────────────────────────────────────────── +(Intercept) 10.064 0.277959 36.2068 0.0008 8.86805 11.26 +shift_6 2.72526 0.315456 8.63912 0.0131 1.36796 4.08256 +─────────────────────────────────────────────────────────────────────────── Log Likelihood: -0.7006021946 AIC: 7.4012043891 @@ -703,7 +703,7 @@ AIC: 7.4012043891 function regressorHybrid(net::HybridNetwork; checkPreorder=true::Bool) childs = [getChildren(nn)[1] for nn in net.hybrid] dfr = regressorShift(childs, net; checkPreorder=checkPreorder) - dfr[:sum] = vec(sum(Matrix(dfr[findall(names(dfr) .!= :tipNames)]), dims=2)) + dfr[!,:sum] = sum.(eachrow(select(dfr, Not(:tipNames), copycols=false))) return(dfr) end @@ -1519,11 +1519,11 @@ Parameter(s) Estimates: Sigma2: 0.00294521 Coefficients: -─────────────────────────────────────────────────── - Estimate Std.Error t value Pr(>|t|) -─────────────────────────────────────────────────── -(Intercept) 4.679 0.330627 14.1519 <1e-31 -─────────────────────────────────────────────────── +────────────────────────────────────────────────────────────────────────── + Estimate Std. Error t value Pr(>|t|) Lower 95% Upper 95% +────────────────────────────────────────────────────────────────────────── +(Intercept) 4.679 0.330627 14.1519 <1e-31 4.02696 5.33104 +────────────────────────────────────────────────────────────────────────── Log Likelihood: -78.9611507833 AIC: 161.9223015666 @@ -1676,7 +1676,7 @@ function phyloNetworklm(f::StatsModels.FormulaTerm, end else # ind = indexin(V.tipNames, fr[:tipNames]) - ind = indexin(fr[:tipNames], tipLabels(net)) + ind = indexin(fr[!,:tipNames], tipLabels(net)) if any(ind == 0) || length(unique(ind)) != length(ind) error("""Tips names of the network and names provided in column tipNames of the dataframe do not match.""") @@ -1711,7 +1711,7 @@ StatsBase.vcov(m::PhyloNetworkLinearModel) = vcov(m.lm) StatsBase.stderror(m::PhyloNetworkLinearModel) = stderror(m.lm) # confidence Intervals for coefficients: # hcat(coef,coef) + stderror * quantile(TDist(dof_residual, (1.-level)/2.) * [1. -1.] -StatsBase.confint(m::PhyloNetworkLinearModel; level=0.95::Real) = confint(m.lm, level) +StatsBase.confint(m::PhyloNetworkLinearModel; level=0.95::Real) = confint(m.lm, level=level) # coef table: t-values t=coef/se # CoefTable(hcat(coef,se,t,ccdf(FDist(1, dof_residual), abs2(t))), # ["Estimate","Std.Error","t value", "Pr(>|t|)"], @@ -1988,7 +1988,7 @@ function expectationsPlot(obj::ReconstructedStates; markMissing="*"::AbstractStr nonmissing = obj.model.nonmissing ind = obj.model.ind missingTipNumbers = obj.model.V.tipNumbers[ind][.!nonmissing] - indexMissing = indexin(missingTipNumbers, expe[:nodeNumber]) + indexMissing = indexin(missingTipNumbers, expe[!,:nodeNumber]) expetxt[indexMissing] .*= markMissing end return DataFrame(nodeNumber = [obj.NodeNumbers; obj.TipNumbers], PredInt = expetxt) @@ -2045,23 +2045,6 @@ function predintPlot(obj::ReconstructedStates; level=0.95::Real, withExp=false:: return DataFrame(nodeNumber = [obj.NodeNumbers; obj.TipNumbers], PredInt = pritxt) end -# """ -# 'plot(net::HybridNetwork, obj::ReconstructedStates; kwargs...) -# -# Plot the reconstructed states computed by function `ancestralStateReconstruction` -# on a network. -# -# # Arguments -# * `net::HybridNetwork`: a phylogenetic network. -# * `obj::ReconstructedStates`: the reconstructed states on the network. -# * `kwargs...`: further arguments to be passed to the netwotk `plot` function. -# -# See documentation for function `ancestralStateReconstruction(obj::PhyloNetworkLinearModel[, X_n::Matrix])` for examples. -# -# """ -# function Gadfly.plot(net::HybridNetwork, obj::ReconstructedStates; kwargs...) -# plot(net, nodeLabel = predintPlot(obj); kwargs...) -# end """ ancestralStateReconstruction(net::HybridNetwork, Y::Vector, params::ParamsBM) @@ -2185,7 +2168,7 @@ function ancestralStateReconstruction(obj::PhyloNetworkLinearModel, X_n::Matrix) obj) end -""" +@doc raw""" ancestralStateReconstruction(obj::PhyloNetworkLinearModel[, X_n::Matrix]) Function to find the ancestral traits reconstruction on a network, given an @@ -2200,7 +2183,7 @@ See documentation for this type and examples for functions that can be applied t # Examples -```jldoctest +```jldoctest; filter = [r" PhyloNetworks .*:\d+", r"Info: Loading DataFrames support into Gadfly"] julia> using CSV # to read data file julia> phy = readTopology(joinpath(dirname(pathof(PhyloNetworks)), "..", "examples", "carnivores_tree.txt")); @@ -2353,7 +2336,7 @@ julia> using DataFrames # to use allowmissing! julia> allowmissing!(dat, :trait); -julia> dat[[2, 5], :trait] = missing; # missing values allowed to fit model +julia> dat[[2, 5], :trait] .= missing; # missing values allowed to fit model julia> fitBM = phyloNetworklm(@formula(trait ~ 1), dat, phy); @@ -2365,7 +2348,7 @@ julia> ancStates = ancestralStateReconstruction(fitBM); └ @ PhyloNetworks ~/build/crsl4/PhyloNetworks.jl/src/traits.jl:2163 julia> expectations(ancStates) -31×2 DataFrame +31×2 DataFrames.DataFrame │ Row │ nodeNumber │ condExpectation │ │ │ Int64 │ Float64 │ ├─────┼────────────┼─────────────────┤ @@ -2410,7 +2393,7 @@ julia> predint(ancStates) 1.0695 1.0695 julia> expectationsPlot(ancStates) # format node <-> ancestral state -31×2 DataFrame +31×2 DataFrames.DataFrame │ Row │ nodeNumber │ PredInt │ │ │ Int64 │ Abstract… │ ├─────┼────────────┼───────────┤ @@ -2434,7 +2417,7 @@ julia> expectationsPlot(ancStates) # format node <-> ancestral state julia> plot(phy, :RCall, nodeLabel = expectationsPlot(ancStates)); julia> predintPlot(ancStates) # prediction intervals, in data frame, useful to plot -31×2 DataFrame +31×2 DataFrames.DataFrame │ Row │ nodeNumber │ PredInt │ │ │ Int64 │ Abstract… │ ├─────┼────────────┼───────────────┤ diff --git a/src/traitsLikDiscrete.jl b/src/traitsLikDiscrete.jl index c8b24aa21..73304c81e 100644 --- a/src/traitsLikDiscrete.jl +++ b/src/traitsLikDiscrete.jl @@ -47,7 +47,8 @@ function Base.show(io::IO, obj::SSM) disp *= "$(obj.nsites) traits, $(length(obj.trait)) species\n" if obj.ratemodel.ncat != 1 disp *= "variable rates across sites ~ discretized gamma with\n alpha=$(obj.ratemodel.alpha)" - disp *= "\n $(obj.ratemodel.ncat) categories\n rate multipliers: $(obj.ratemodel.ratemultiplier)\n" + disp *= "\n $(obj.ratemodel.ncat) categories" + disp *= "\n rate multipliers: $(round.(obj.ratemodel.ratemultiplier, digits=5))\n" end disp *= "on a network with $(obj.net.numHybrids) reticulations" if !ismissing(obj.loglik) @@ -163,7 +164,7 @@ julia> fitJC69 = fitdiscrete(net, mJC69, tips) PhyloNetworks.StatisticalSubstitutionModel: Jukes and Cantor 69 Substitution Model, absolute rate version -off-diagonal rates equal to [0.292336]/3. +off-diagonal rates equal to 0.29234/3. rate matrix Q: A C G T A * 0.0974 0.0974 0.0974 @@ -178,13 +179,13 @@ julia> rv = RateVariationAcrossSites() Rate Variation Across Sites using Discretized Gamma Model alpha: 1.0 categories for Gamma discretization: 4 -ratemultiplier: [0.145784, 0.513132, 1.07083, 2.27025] +ratemultiplier: [0.14578, 0.51313, 1.07083, 2.27025] julia> fitdiscrete(net, mJC69, rv, tips; optimizeQ=false, optimizeRVAS=false) PhyloNetworks.StatisticalSubstitutionModel: Jukes and Cantor 69 Substitution Model, absolute rate version -off-diagonal rates equal to [0.25]/3. +off-diagonal rates equal to 0.25/3. rate matrix Q: A C G T A * 0.0833 0.0833 0.0833 @@ -195,7 +196,7 @@ rate matrix Q: variable rates across sites ~ discretized gamma with alpha=1.0 4 categories - rate multipliers: [0.145784, 0.513132, 1.07083, 2.27025] + rate multipliers: [0.14578, 0.51313, 1.07083, 2.27025] on a network with 0 reticulations log-likelihood: -5.2568 ``` @@ -241,8 +242,8 @@ function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::Ra error("""expecting taxon names in column 'taxon', or 'species' or column 1, and trait values in column 'trait' or column 2.""") end - species = copy(dat[i]) # modified in place later - dat = traitlabels2indices(dat[j], model) # vec of vec, indices + species = dat[:,i] # modified in place later + dat = traitlabels2indices(dat[!,j], model) # vec of vec, indices o, net = check_matchtaxonnames!(species, dat, net) StatsBase.fit(StatisticalSubstitutionModel, net, model, ratemodel, view(dat, o); kwargs...) end @@ -300,13 +301,8 @@ end function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::RateVariationAcrossSites, dnadata::DataFrame, dnapatternweights::Array{Float64}; kwargs...) - dat2 = traitlabels2indices(dnadata[2:end], model) - # dat2 = traitlabels2indices(view(dnadata, 2:ncol(dnadata)), model) #removed view bc it wasnt recognized - #as DF by traitlabels2indices - # this doesnt work, but was attempting to uses view to avoid a shallow copy and - # be more space efficient - #produces a vec of vec, indices - o, net = check_matchtaxonnames!(copy(dnadata[1]), dat2, net) + dat2 = traitlabels2indices(dnadata[!,2:end], model) + o, net = check_matchtaxonnames!(dnadata[:,1], dat2, net) kwargs = (:siteweights => dnapatternweights, kwargs...) StatsBase.fit(StatisticalSubstitutionModel, net, model, ratemodel, view(dat2, o); kwargs...) @@ -320,7 +316,7 @@ function fitdiscrete(net::HybridNetwork, modSymbol::Symbol, dnadata::DataFrame, model = JC69([1.0], true) # 1.0 instead of rate because relative version (relative = true) elseif modSymbol == :HKY85 model = HKY85([1.0], # transition/transversion rate ratio - empiricalDNAfrequencies(view(dnadata, 2:ncol(dnadata)), dnapatternweights), + empiricalDNAfrequencies(view(dnadata, :, 2:ncol(dnadata)), dnapatternweights), true) elseif modSymbol == :ERSM model = EqualRatesSubstitutionModel(4, rate, [BioSymbols.DNA_A, BioSymbols.DNA_C, BioSymbols.DNA_G, BioSymbols.DNA_T]); @@ -741,7 +737,7 @@ julia> dat = DataFrame(species=["C","A","B","D"], trait=["hi","lo","lo","hi"]); julia> fit1 = fitdiscrete(net, m1, dat); julia> asr = ancestralStateReconstruction(fit1) -9×4 DataFrame +9×4 DataFrames.DataFrame │ Row │ nodenumber │ nodelabel │ lo │ hi │ │ │ Int64 │ String │ Float64 │ Float64 │ ├─────┼────────────┼───────────┼──────────┼──────────┤ @@ -762,7 +758,7 @@ julia> round.(exp.(fit1.postltw), digits=6) # marginal (posterior) probability t julia> using PhyloPlots -julia> plot(fit1.net, :R, nodeLabel = asr[[:nodenumber, :lo]], tipOffset=0.2); # pp for "lo" state +julia> plot(fit1.net, :R, nodeLabel = asr[!,[:nodenumber, :lo]], tipOffset=0.2); # pp for "lo" state ``` """ ancestralStateReconstruction(obj::SSM) = ancestralStateReconstruction(obj, obj.activesite) @@ -853,7 +849,7 @@ function learnLabels(modSymbol::Symbol, species::Array{String}, dat::DataFrame) if modSymbol == :BTSM length(labels) == 2 || error("Binary Trait Substitution Model supports traits with two states. These data have do not have two states.") elseif modSymbol == :TBTSM - unique(dat[1]) == 2 && unique(dat[2] == 2) || error("Two Binary Trait Substitution Model supports two traits with two states each.") + unique(dat[!,1]) == 2 && unique(dat[!,2] == 2) || error("Two Binary Trait Substitution Model supports two traits with two states each.") elseif modSymbol == :HKY85 occursin(uppercase(join(sort(labels))), "ACGT") || error("HKY85 requires that trait data are dna bases A, C, G, and T") elseif modSymbol == :JC69 diff --git a/test/1_astral.out b/test/1_astral.out deleted file mode 100644 index 1d69b74a6..000000000 --- a/test/1_astral.out +++ /dev/null @@ -1 +0,0 @@ -(6,(5,(3,(4,(2,1))))); diff --git a/test/CaseH_output.txt b/test/CaseH_output.txt deleted file mode 100644 index 1f1c47528..000000000 --- a/test/CaseH_output.txt +++ /dev/null @@ -1,6 +0,0 @@ -"t1","t2","t3","t4","CF1234","CF1324","CF1423" -"6","7","4","8",0.13002257237728915,0.7399548552454217,0.13002257237728915 -"6","7","10","8",0.36936019721747243,0.28371387344983595,0.34692592933269173 -"10","7","4","8",0.34692592933269173,0.28371387344983595,0.36936019721747243 -"6","10","4","8",0.12051951084152591,0.7589609783169482,0.12051951084152591 -"6","7","4","10",0.11095702789935982,0.7780859442012804,0.11095702789935982 diff --git a/test/HGTtableCF.txt b/test/HGTtableCF.txt deleted file mode 100644 index 9600296bd..000000000 --- a/test/HGTtableCF.txt +++ /dev/null @@ -1,16 +0,0 @@ -"t1","t2","t3","t4","CF1234","CF1324","CF1423" -"6","1","5","4",0.4,0.6,0.0 -"6","1","5","2",0.2,0.8,0.0 -"6","1","5","3",0.5,0.5,0.0 -"6","1","4","2",0.4,0.6,0.0 -"6","1","4","3",0.4,0.2,0.4 -"6","1","2","3",0.2,0.1,0.7 -"6","5","4","2",0.8,0.0,0.2 -"6","5","4","3",0.4,0.4,0.2 -"6","5","2","3",0.6,0.4,0.0 -"6","4","2","3",0.3,0.2,0.5 -"1","5","4","2",0.4,0.0,0.6 -"1","5","4","3",0.3,0.5,0.2 -"1","5","2","3",0.1,0.8,0.1 -"1","4","2","3",0.1,0.5,0.4 -"5","4","2","3",0.3,0.2,0.5 diff --git a/test/Tree_output.txt b/test/Tree_output.txt deleted file mode 100644 index 60e6cea76..000000000 --- a/test/Tree_output.txt +++ /dev/null @@ -1,6 +0,0 @@ -"t1","t2","t3","t4","CF1234","CF1324","CF1423" -"6","7","4","8",0.2729102510259939,0.45417949794801216,0.2729102510259939 -"6","7","10","8",0.3967750546426937,0.30161247267865315,0.30161247267865315 -"10","7","4","8",0.30161247267865315,0.30161247267865315,0.3967750546426937 -"6","10","4","8",0.24693940689390592,0.5061211862121882,0.24693940689390592 -"6","7","4","10",0.2729102510259939,0.45417949794801216,0.2729102510259939 diff --git a/test/add_hybrid_caseC.jl b/test/add_hybrid_caseC.jl deleted file mode 100644 index 3c48cd071..000000000 --- a/test/add_hybrid_caseC.jl +++ /dev/null @@ -1,25 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case C to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 4; - index2 = 7; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -# index1=4, index2=7 => case C -include("print_add.jl") -testCaseC(net) diff --git a/test/add_hybrid_caseD.jl b/test/add_hybrid_caseD.jl deleted file mode 100644 index c603583f4..000000000 --- a/test/add_hybrid_caseD.jl +++ /dev/null @@ -1,26 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case D bad triangle -# to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 7; - index2 = 1; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -# index1=7, index2=1 => case D (bad triangle) -include("print_add.jl") -testCaseD(net) diff --git a/test/add_hybrid_caseE.jl b/test/add_hybrid_caseE.jl deleted file mode 100644 index 91d1e6598..000000000 --- a/test/add_hybrid_caseE.jl +++ /dev/null @@ -1,25 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case E to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 1; - index2 = 4; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -# index1=1, index2=4 => case E -include("print_add.jl") -testCaseE(net) diff --git a/test/add_hybrid_caseF.jl b/test/add_hybrid_caseF.jl deleted file mode 100644 index 8bb7794a8..000000000 --- a/test/add_hybrid_caseF.jl +++ /dev/null @@ -1,26 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case F bad diamond -# to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 7; - index2 = 6; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -# index1=7, index2=6 => case F (bad diamond) -include("print_add.jl") -testCaseF(net) diff --git a/test/add_hybrid_caseG.jl b/test/add_hybrid_caseG.jl deleted file mode 100644 index 1cf8d35bc..000000000 --- a/test/add_hybrid_caseG.jl +++ /dev/null @@ -1,25 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case G to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 3; - index2 = 7; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -# index1=3, index2=7 => case G -include("print_add.jl") -testCaseG(net) diff --git a/test/add_hybrid_caseH.jl b/test/add_hybrid_caseH.jl deleted file mode 100644 index 21422cfb8..000000000 --- a/test/add_hybrid_caseH.jl +++ /dev/null @@ -1,25 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case H to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 1; - index2 = 3; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -# index1=1, index2=3 => case H -include("print_add.jl") -testCaseH(net) diff --git a/test/add_hybrid_caseI.jl b/test/add_hybrid_caseI.jl deleted file mode 100644 index 6aa7665ea..000000000 --- a/test/add_hybrid_caseI.jl +++ /dev/null @@ -1,25 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case I to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 6; - index2 = 4; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -# index1=6, index2=4 => case I -include("print_add.jl") -testCaseI(net) diff --git a/test/add_hybrid_caseJ.jl b/test/add_hybrid_caseJ.jl deleted file mode 100644 index ec107b747..000000000 --- a/test/add_hybrid_caseJ.jl +++ /dev/null @@ -1,25 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case J to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 5; - index2 = 2; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -# index1=5, index2=2 => case J -include("print_add.jl") -testCaseJ(net) diff --git a/test/bestNet.err b/test/bestNet.err deleted file mode 100644 index 79e4b318d..000000000 --- a/test/bestNet.err +++ /dev/null @@ -1,2 +0,0 @@ - - Total errors: 0 in seeds [] \ No newline at end of file diff --git a/test/bestNet.out b/test/bestNet.out deleted file mode 100644 index 86d9d8a35..000000000 --- a/test/bestNet.out +++ /dev/null @@ -1,18 +0,0 @@ -(2,(((5,(6,#H7:2.905::0.286):10.0):0.519,3):0.489,4):1.512,(1)#H7:2.07::0.714); - -Ploglik = 126.7425098197494 - Dendroscope: (2,(((5,(6,#H7:2.905):10.0):0.519,3):0.489,4):1.512,(1)#H7:2.07); - Elapsed time: 148.405287691 seconds in 10 successful runs -------- -List of estimated networks for all runs: - (2,(3,(5,#H7:1.076::0.262):8.917):1.083,(((1,6):0.015,4):0.0)#H7:0.0::0.738);, with -loglik 348.07622750889095 - (#H7:0.365::0.454,5,(6,(1,(2,((4,3):0.0)#H7:0.0::0.546):0.519):2.237):9.432);, with -loglik 127.51222108633253 - (2,(((5,(6,#H7:2.905::0.286):10.0):0.519,3):0.489,4):1.512,(1)#H7:2.07::0.714);, with -loglik 126.7425098197494 - ((1,((5,#H7:0.365::0.454):9.151,6):2.237):0.519,2,((3,4):0.0)#H7:0.0::0.546);, with -loglik 127.51268061323222 - (2,(1,(3,((5,6):0.295,(4)#H7:1.778::0.673):0.046):1.107):10.0,#H7:0.456::0.327);, with -loglik 158.1387467216007 - (3,(5,(6,#H7:0.0::0.286):10.0):0.519,((2,(1)#H7:10.0::0.714):1.512,4):0.489);, with -loglik 126.74250981975692 - (3,(1,(2,#H7:0.312::0.327):9.995):1.107,((6,5):0.295,(4)#H7:0.0::0.673):0.046);, with -loglik 158.13875001616046 - (4,((1,(2)#H7:0.0::0.627):0.808,(3,(5,6):0.335):0.126):0.4,#H7:0.065::0.373);, with -loglik 155.49625735420875 - (2,(1,((5,#H7:0.04::0.448):6.821,6):2.244):0.521,((3,4):0.021)#H7:0.0::0.552);, with -loglik 128.22380649461752 - ((5,4):0.0,(3)#H7:0.121::0.964,(6,(2,(1,#H7:0.0::0.036):6.33):0.725):0.024);, with -loglik 311.13916979917155 - (2,(4,(3,(5,(6,#H7:0.366::0.286):10.0):0.519):0.489):1.512,(1)#H7:7.716::0.714);, with -loglik 126.74250981976283 -------- \ No newline at end of file diff --git a/test/debug_n6.jl b/test/debug_n6.jl deleted file mode 100644 index 8e9ef6889..000000000 --- a/test/debug_n6.jl +++ /dev/null @@ -1,131 +0,0 @@ -# tests for the usual functions: optBL, extractQuartet, moves -# for network with n=6 (1_astral.out, add hybridization; true network) -# messy code intended to find specific bugs, does not follow an order -# Claudia April 2015 - -include("../src/types.jl") -include("../src/functions.jl") - -quartets = readListQuartets("allQuartets.txt"); - -truenet = readTopologyUpdate("truenetwork.txt"); -printEdges(truenet) -printNodes(truenet) - -tree = readTopologyUpdate("1_astral.out"); -printEdges(tree) -writeTopologyLevel1(tree) -net = deepcopy(tree); -Random.seed!(1234) -addHybridizationUpdate!(net); -printEdges(net) -writeTopologyLevel1(net) - - -extractQuartet!(truenet, quartets) # no errors -extractQuartet!(net,quartets) # errors: after delete leaf!: fixed! - -printNodes(net) -nettest = deepcopy(net); -extractQuartet!(nettest,quartets[1]) # error here - -qnet = QuartetNetwork(net); -deleteLeaf!(qnet,qnet.node[3]) -printEdges(qnet) - -## qnet2 = deepcopy(qnet); # for testing -## printNodes(qnet2) - -## deleteLeaf!(qnet2,qnet2.node[4]) # no error!!! - -deleteLeaf!(qnet,qnet.node[4]) -updateHasEdge!(qnet,net) -parameters!(qnet,net) # error here: fixed - -# now, error in minor edge 12 set as identifiable AND node.k=-1 even after updateInCycle -tree = readTopologyUpdate("1_astral.out"); -printEdges(tree) -writeTopologyLevel1(tree) -net = deepcopy(tree); -Random.seed!(1234) -hybrid = addHybridization!(net); -printNodes(net) -printEdges(net) -flag, nocycle, edgesInCycle, nodesInCycle = updateInCycle!(net,hybrid); -net.node[11].k #4, so correct! -updateMajorHybrid!(net,hybrid); -flag2, edgesGammaz = updateGammaz!(net,hybrid,allow); - -success,hybrid,flag,nocycle,flag2,flag3 = addHybridizationUpdate!(net); - - -# ---------------- -include("../src/types.jl") -include("../src/functions.jl") - -quartets = readListQuartets("allQuartets.txt"); -df = readtable("HGT_truenet_expCF.csv") -d = readTableCF(df); #expCF - -tree = readTopologyUpdate("1_astral.out"); -printEdges(tree) -net = deepcopy(tree); -Random.seed!(1234) -addHybridizationUpdate!(net); -printEdges(net) - -currT = deepcopy(net); -optBL!(currT,d) -newT = deepcopy(currT); -count = 0 -move = whichMove(currT) -move = :CHdir -move = :MVorigin -move = :MVtarget -move = :nni - -movescount = zeros(Int,18) -movesfail = zeros(Int,6) -flag = proposedTop!(move,newT,true,count,1, movescount, movesfail) -printEdges(newT) -printNodes(newT) -optBL!(newT,d) - -newT0 = deepcopy(newT); - -qnet = QuartetNetwork(newT); -q=quartets[3]; -q.taxon -newT.names -extractQuartet!(newT,quartets) # error - -qnet = QuartetNetwork(newT); -printNodes(qnet) -deleteLeaf!(qnet,qnet.node[4]) -printNodes(qnet) -printEdges(qnet) -qnet0 = deepcopy(qnet); -qnet = deepcopy(qnet0); - -identifyQuartet!(qnet) -qnet.which != 1 ? error("qnet which not correctly assigned") : nothing -qnet.hybrid[1].k != 2 ? error("qnet.hybrid[1].k not correctly assigned") : nothing -qnet.hybrid[1].typeHyb != 1 ? error("qnet.hybrid[1].typeHyb not correctly assigned") : nothing -qnet.hybrid[1].prev.number != -5 ? error("qnet.hybrid[1].prev not correctly assigned") : nothing - -eliminateHybridization!(qnet) #error here -size(qnet.hybrid,1) != 0 || qnet.numHybrids != 0 ? error("qnet should not have hybrid nodes anymore") : nothing -qnet.t1 != 0.2-log(1-0.1*(1-exp(-1.1))) ? error("internal edge length not correctly updated") : nothing - -#--- -node = qnet.hybrid[1]; - -updateSplit!(qnet) -qnet.split != [1,1,2,2] ? error("qnet.split not correctly assigned") : nothing - -updateFormula!(qnet) -qnet.formula != [2,1,2] ? error("qnet.formula not correctly assigned") : nothing - -calculateExpCF!(qnet) -qnet.expCF != [1/3*exp(-qnet.t1),1-2/3*exp(-qnet.t1),1/3*exp(-qnet.t1)] ? error("qnet.expCF wrongly calculated") : nothing - diff --git a/test/debug_optBL.jl b/test/debug_optBL.jl deleted file mode 100644 index 01408b8e6..000000000 --- a/test/debug_optBL.jl +++ /dev/null @@ -1,106 +0,0 @@ -# code to try to figure out problem with gammaz inequality in bad diamond I -# Claudia March 2015 - -# test optBL with Case F Bad Diamond I -# Claudia January 2015 - - -## include("../examples/case_f_example.jl"); -## parameters!(net) -## q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -## q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -## q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -## q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -## q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -## d = DataCF([q1,q2,q3,q4,q5]); -## extractQuartet!(net,d) - -## df = writeExpCF(d.quartet) -## writetable("CaseF_output.csv",df) - -## include("../examples/case_f_example2.jl"); -## parameters!(net) -## q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -## q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -## q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -## q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -## q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -## d = DataCF([q1,q2,q3,q4,q5]); -## extractQuartet!(net,d) - -## df = writeExpCF(d.quartet) -## writetable("CaseF_output2.csv",df) - -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseF_output.csv") -df2 = readtable("CaseF_output2.csv") #longer branches -d = readTableCF(df) -d2 = readTableCF(df2) - -# starting ht (gamma,t4,t5,t9) -ht = [0.1,1.,1.,1.] -ht = [0.3,10.,10.,1.] # crashed, with check inside logPseudoLik, does not crash! - -tree = string("(((6:0.1,(4)11#H1:::",string(1-ht[1]),")1:",string(ht[3]),",(11#H1:::",string(ht[1]),",7))5:",string(ht[4])",8:0.1,10:0.1);") # Case F: bad diamond I -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht -realht1 = [0.1,0.127,0.0285] -realht2 = [1.1,0.49,0.2] - -@time optBL!(net,d,true) - -# ----------------- NLopt example website ----------------------- -# to do here: put website example and change starting point to close to the boundary -# also, do a small example of optimization with inequality x_n+x_n-1<=1 to send to the author of nlopt - -using NLopt - -count = 0 # keep track of # function evaluations - -function myfunc(x::Vector, grad::Vector) - println("myfunc with x $(x)") - if length(grad) > 0 - grad[1] = 0 - grad[2] = 0.5/sqrt(x[2]) - end - global count - count::Int += 1 - println("f_$count($x)") - sqrt(x[2]) -end - -function myconstraint(x::Vector, grad::Vector, a, b) - println("myconstraint with x $(x)") - if length(grad) > 0 - grad[1] = 3a * (a*x[1] + b)^2 - grad[2] = -1 - end - val = (a*x[1] + b)^3 - x[2] #should be negative - println("value of inequality: $(val)") - #val<0 || error("val in myconstraint not negative") - return val -end - - -#opt = Opt(:LD_MMA, 2) -opt = Opt(:LN_COBYLA, 2) -lower_bounds!(opt, [-Inf, 0.]) -xtol_rel!(opt,1e-4) - -min_objective!(opt, myfunc) -inequality_constraint!(opt, (x,g) -> myconstraint(x,g,2,0), 1e-8) -inequality_constraint!(opt, (x,g) -> myconstraint(x,g,-1,1), 1e-8) - -x=[1.234,5.678] -x=[1.,2.] -(minf,minx,ret) = optimize(opt,x) -println("got $minf at $minx after $count iterations (returned $ret)") diff --git a/test/debug_readTopology.jl b/test/debug_readTopology.jl deleted file mode 100644 index 7baf6ab3a..000000000 --- a/test/debug_readTopology.jl +++ /dev/null @@ -1,67 +0,0 @@ -# problems found in readTopologyUpdate when trying to plot with John Spaw's functions -# will test here the problems -# Claudia July 2015 - -include("../src/types.jl") -include("../src/functions.jl") - -# estNetworks form baseline and phylonet -n1 = "(((1,2))#H1,(6,(5,((4,3),#H1))));" -n2 = "((5)#H1,(6,((1,2),(3,(4,#H1)))));" -n3 = "((5,((3,4),((2,1))#H1)),(6,#H1));" -n4 = "(6,(5,((1,(2,(3)#H1)),(4,#H1))));" -n5 = "(6,((((3,4))#H1,(2,1)),(5,#H1)));" -n6 = "((((5,(3,4)))#H1,(1,2)),(6,#H1));" -n7 = "(((5)#H1,((2,1),(4,3))),(#H1,6));" -n8 = "(6,((5,((4,3))#H1),((1,2),#H1)));" -n9 = "(6,(((1,2),((3,4))#H1),(5,#H1)));" -n10 = "(6,((((4,3))#H1,(1,2)),(5,#H1)));" -n11 = "(6,(((2,1),((4,3))#H1),(5,#H1)));" -n12 = "(6,((5)#H1,((2,1),((4,3),#H1))));" -n13 = "(6,((((4,3))#H1,(1,2)),(#H1,5)));" -n14 = "(6,((5,((4,3))#H1),(#H1,(2,1))));" -n15 = "(6,(((2,1),((4,3))#H1),(5,#H1)));" -n16 = "(6,(5,(((4,3))#H1,(1,(#H1,2)))));" -n17 = "(6,(((1,2),((4)#H1,3)),(5,#H1)));" -n18 = "((5,((4,3),(1,(2)#H1))),(6,#H1));" -n19 = "((((5,(3,4)))#H1,6),((2,1),#H1));" -n20 = "(6,(((1,2),((4,3))#H1),(5,#H1)));" -n21 = "(6,((5,((3,4))#H1),((1,2),#H1)));" -n22 = "((5)#H1,(6,((2,1),(#H1,(4,3)))));" -n23 = "(6,(((1,2),((4)#H1,3)),(5,#H1)));" -n24 = "(6,((5,((3,4))#H1),((1,2),#H1)));" -n25 = "(6,((5,((4,3))#H1),((2,1),#H1)));" -n26 = "(((1,2))#H1,(6,(5,((3,4),#H1))));" -n27 = "(6,((5)#H1,((2,1),((4,3),#H1))));" -n28 = "(6,(((2,1),((3,4))#H1),(5,#H1)));" -n29 = "(6,((((4,3))#H1,(2,1)),(5,#H1)));" -n30 = "(6,(((1,2),(3,(4)#H1)),(#H1,5)));" - -string = [n1,n2,n3,n4,n5,n6,n7,n8,n9,n10,n11,n12,n13,n14,n15,n16,n17,n18,n19,n20,n21,n22,n23,n24,n25,n26,n27,n28,n29,n30] - -for s in string - net = readTopologyUpdate(s); - printEdges(net) - printNodes(net) - net.node[net.root].number - net.node[net.root].leaf - canBeRoot(net.node[net.root]) || error("root wrongly placed in $(s)") - - net2 = readTopologyUpdate(s,true); - printEdges(net2) - printNodes(net2) - net2.node[net2.root].number - net2.node[net2.root].leaf - canBeRoot(net2.node[net2.root]) || error("root wrongly placed in $(s)") -end - -println("NO ERRORS!") - -## # -------------- -## n31 = "(6,((5,(((1,(2,(3)#H7:9.380388137723473):1.215558354715711):1.533022718529031,#H7:0.8208307297991476):0.0,(4)#H9:-0.0):10.0):10.0,#H9:0.0):0.02572743545235116);" -## net = readTopologyUpdate(n31) -## printEdges(net) -## printNodes(net) -## net.node[net.root].number -## net.node[net.root].leaf -## canBeRoot(net.node[net.root]) || error("root wrongly placed in $(s)") diff --git a/test/delete_caseC.jl b/test/delete_caseC.jl deleted file mode 100644 index 56115aa04..000000000 --- a/test/delete_caseC.jl +++ /dev/null @@ -1,27 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case C to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# it then deletes the hybridization and tests -# if the result is the original tree example -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 4; - index2 = 7; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -success,hybrid,flag,nocycle,flag2,flag3 = addHybridizationUpdate!(net); -deleteHybridizationUpdate!(net,hybrid,false); -testTree(net) diff --git a/test/delete_caseD.jl b/test/delete_caseD.jl deleted file mode 100644 index d746f0898..000000000 --- a/test/delete_caseD.jl +++ /dev/null @@ -1,27 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case D to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# it then deletes the hybridization and tests -# if the result is the original tree example -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 7; - index2 = 1; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -success,hybrid,flag,nocycle,flag2,flag3 = addHybridizationUpdate!(net); -deleteHybridizationUpdate!(net,hybrid,false); -testTree(net) diff --git a/test/delete_caseE.jl b/test/delete_caseE.jl deleted file mode 100644 index 1db3c2173..000000000 --- a/test/delete_caseE.jl +++ /dev/null @@ -1,27 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case E to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# it then deletes the hybridization and tests -# if the result is the original tree example -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 1; - index2 = 4; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -success,hybrid,flag,nocycle,flag2,flag3 = addHybridizationUpdate!(net); -deleteHybridizationUpdate!(net,hybrid,false); -testTree(net) diff --git a/test/delete_caseF.jl b/test/delete_caseF.jl deleted file mode 100644 index 9b551a9d4..000000000 --- a/test/delete_caseF.jl +++ /dev/null @@ -1,27 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case F to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# it then deletes the hybridization and tests -# if the result is the original tree example -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 7; - index2 = 6; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -success,hybrid,flag,nocycle,flag2,flag3 = addHybridizationUpdate!(net); -deleteHybridizationUpdate!(net,hybrid,false); -testTree(net) diff --git a/test/delete_caseG.jl b/test/delete_caseG.jl deleted file mode 100644 index a3902bff7..000000000 --- a/test/delete_caseG.jl +++ /dev/null @@ -1,27 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case G to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# it then deletes the hybridization and tests -# if the result is the original tree example -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 3; - index2 = 7; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -success,hybrid,flag,nocycle,flag2,flag3 = addHybridizationUpdate!(net); -deleteHybridizationUpdate!(net,hybrid,false); -testTree(net) diff --git a/test/delete_caseH.jl b/test/delete_caseH.jl deleted file mode 100644 index f799fcaca..000000000 --- a/test/delete_caseH.jl +++ /dev/null @@ -1,27 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case H to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# it then deletes the hybridization and tests -# if the result is the original tree example -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 1; - index2 = 3; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -success,hybrid,flag,nocycle,flag2,flag3 = addHybridizationUpdate!(net); -deleteHybridizationUpdate!(net,hybrid,false); -testTree(net) diff --git a/test/delete_caseI.jl b/test/delete_caseI.jl deleted file mode 100644 index b1ef94e8e..000000000 --- a/test/delete_caseI.jl +++ /dev/null @@ -1,27 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case I to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# it then deletes the hybridization and tests -# if the result is the original tree example -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 6; - index2 = 4; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -success,hybrid,flag,nocycle,flag2,flag3 = addHybridizationUpdate!(net); -deleteHybridizationUpdate!(net,hybrid,false); -testTree(net) diff --git a/test/delete_caseJ.jl b/test/delete_caseJ.jl deleted file mode 100644 index 5badf3e2f..000000000 --- a/test/delete_caseJ.jl +++ /dev/null @@ -1,27 +0,0 @@ -# function that chooses the right index1, index2 -# to add hybridization for Case J to tree_example.jl -# warning: requires other functions not updated here -# as this function will only run as part of -# tests_5taxon.jl -# it then deletes the hybridization and tests -# if the result is the original tree example -# Claudia September 2015 - - -# we need a different chooseEdgesGamma to control the edges1,2 chosen -# change index1, index2 according to the specific case -# warning: not real chooseEdgesGamma, the real one is in functions.jl -function chooseEdgesGamma(net::HybridNetwork) -@warn "function chooseEdgesGamma is deterministic" - index1 = 5; - index2 = 2; - edge1 = net.edge[index1]; - edge2 = net.edge[index2]; - gamma = rand()*0.5; - return edge1, edge2, gamma -end - - -success,hybrid,flag,nocycle,flag2,flag3 = addHybridizationUpdate!(net); -deleteHybridizationUpdate!(net,hybrid,false); -testTree(net) diff --git a/test/descData.txt b/test/descData.txt deleted file mode 100644 index 4543d6123..000000000 --- a/test/descData.txt +++ /dev/null @@ -1,22 +0,0 @@ -DATA: data consists of 10 gene trees and 10 quartets -DATA: same taxa in gene trees and quartets: ASCIIString["6","1","5","4","2","3"] -Taxon 6 appears in 10 input trees (100.0 %) -Taxon 1 appears in 10 input trees (100.0 %) -Taxon 5 appears in 10 input trees (100.0 %) -Taxon 4 appears in 10 input trees (100.0 %) -Taxon 2 appears in 10 input trees (100.0 %) -Taxon 3 appears in 10 input trees (100.0 %) ----------------------------- - -Quartet 1 obsCF constructed with 10 gene trees (100.0%) -Quartet 2 obsCF constructed with 10 gene trees (100.0%) -Quartet 3 obsCF constructed with 10 gene trees (100.0%) -Quartet 4 obsCF constructed with 10 gene trees (100.0%) -Quartet 5 obsCF constructed with 10 gene trees (100.0%) -Quartet 6 obsCF constructed with 10 gene trees (100.0%) -Quartet 7 obsCF constructed with 10 gene trees (100.0%) -Quartet 8 obsCF constructed with 10 gene trees (100.0%) -Quartet 9 obsCF constructed with 10 gene trees (100.0%) -Quartet 10 obsCF constructed with 10 gene trees (100.0%) ----------------------------- - diff --git a/test/hola.txt b/test/hola.txt deleted file mode 100644 index 6cc89a964..000000000 --- a/test/hola.txt +++ /dev/null @@ -1,11 +0,0 @@ -"t1","t2","t3","t4","CF1234","CF1324","CF1423" -"6","1","5","3",0.5,0.5,0.0 -"6","1","4","2",0.4,0.6,0.0 -"6","1","2","3",0.2,0.1,0.7 -"6","5","4","3",0.4,0.4,0.2 -"6","4","2","3",0.3,0.2,0.5 -"1","5","4","2",0.4,0.0,0.6 -"1","5","4","3",0.3,0.5,0.2 -"1","5","2","3",0.1,0.8,0.1 -"1","4","2","3",0.1,0.5,0.4 -"5","4","2","3",0.3,0.2,0.5 diff --git a/test/initial_tests_deleteLeaf_quartetNet.jl b/test/initial_tests_deleteLeaf_quartetNet.jl deleted file mode 100644 index 555fe13a0..000000000 --- a/test/initial_tests_deleteLeaf_quartetNet.jl +++ /dev/null @@ -1,89 +0,0 @@ -# Initial tests to see if deleteLeaf works -# as expected with QuartetNetwork type -# Claudia November 2014 - -include("../examples/bad_triangle_example.jl") -include("../examples/case_f_example.jl") -qnet = QuartetNetwork(net); -printEdges(qnet) -printNodes(qnet) -printEdges(net) -printNodes(net) -qnet.hasEdge - - -# bad triangle -deleteLeaf!(qnet,qnet.node[8]) -deleteLeaf!(qnet,qnet.node[7]) -deleteLeaf!(qnet,qnet.node[5]) -deleteLeaf!(qnet,qnet.node[6]) -deleteLeaf!(qnet,qnet.node[1]) - -# bad diamond -deleteLeaf!(qnet,qnet.node[10]) -deleteLeaf!(qnet,qnet.node[8]) -deleteLeaf!(qnet,qnet.node[6]) -deleteLeaf!(qnet,qnet.node[7]) -deleteLeaf!(qnet,qnet.node[4]) - -# Case G : no simplification in deleteLeaf! -include("../examples/case_g_example.jl") - -qnet = QuartetNetwork(net); -printEdges(qnet) -printNodes(qnet) - -deleteLeaf!(qnet,qnet.node[10]) # 8 -deleteLeaf!(qnet,qnet.node[7]) # 7 -deleteLeaf!(qnet,qnet.node[4]) # 4 -deleteLeaf!(qnet,qnet.node[2]) # 2 -deleteLeaf!(qnet,qnet.node[1]) # 1 - - -# Case C: bad triangle II -include("../examples/case_c_example.jl") - -qnet = QuartetNetwork(net); -printEdges(qnet) -printNodes(qnet) - -deleteLeaf!(qnet,qnet.node[1]) # 1 -deleteLeaf!(qnet,qnet.node[2]) # 2 -deleteLeaf!(qnet,qnet.node[8]) # 5 -deleteLeaf!(qnet,qnet.node[9]) # 6 -deleteLeaf!(qnet,qnet.node[4]) # 3 - - - - -# test extractQuartet ------------------- - -include("../examples/bad_triangle_example.jl") -q1 = Quartet(1,["6","7","1","8"],[0.5,0.4,0.1]); -qnet = extractQuartet(net,q1); -printEdges(qnet) -printNodes(qnet) -qnet.hasEdge - -include("../examples/bad_triangle_example.jl") -qnet = extractQuartet(net,net.node[6],net.node[7],net.node[1],net.node[8]); -printEdges(qnet) -printNodes(qnet) -qnet.hasEdge - -include("../examples/case_f_example.jl") - - -# bad triangle -qnet=QuartetNetwork(net); -printEdges(qnet) -printNodes(qnet) -deleteLeaf!(qnet,qnet.node[8]) -deleteLeaf!(qnet,qnet.node[7]) -deleteLeaf!(qnet,qnet.node[5]) -deleteLeaf!(qnet,qnet.node[6]) -deleteLeaf!(qnet,qnet.node[1]) - -# bad diamond -deleteLeaf!(qnet,qnet.node[10]) -deleteLeaf!(qnet,qnet.node[8]) diff --git a/test/movesTable.txt b/test/movesTable.txt deleted file mode 100644 index 8aacf6816..000000000 --- a/test/movesTable.txt +++ /dev/null @@ -1 +0,0 @@ -PERFORMANCE: total number of moves (proposed, successful, accepted) in general, and to fix gamma=0.0,t=0.0 cases --------moves general-------- --------moves gamma-------- move Num.Proposed Num.Successful Num.Accepted | Num.Proposed Num.Successful Num.Acceptedadd 0 0 0 | 0 0 --mvorigin 48 48 0 | 0 0 --mvtarget 37 37 1 | 0 0 --chdir 9 9 1 | 0 0 --delete 0 0 0 | 0 0 --nni 55 0 0 | 0 0 --Total 149 94 2 | 0 0 0Proportion 1.0 0.6 0.0 | 0.0 0.0 0.0 \ No newline at end of file diff --git a/test/net1prueba.networks b/test/net1prueba.networks deleted file mode 100644 index 15704bfee..000000000 --- a/test/net1prueba.networks +++ /dev/null @@ -1,6 +0,0 @@ -((2,(#H-5:::0.49,1)):1.5121335305466412,4,((5,(6)#H-5:::0.51):0.5191605811059357,3):0.489418799149601);, with -loglik -1.0 -((2,((#H-3:::0.49,6),1)):1.5121335305466412,4,((5)#H-3:::0.51,3):0.489418799149601);, with -loglik -1.0 -((2,(((5,#H-4:::0.49):10.0,6),1)):1.5121335305466412,4,(3)#H-4:::0.51);, with -loglik -1.0 -((5,(#H-2:::0.49,3):0.5191605811059357):10.0,6,((2,(4)#H-2:::0.51),1));, with -loglik -1.0 -((2)#H8:::0.51,4,((5,(6,(#H8:::0.49,1)):10.0):0.5191605811059357,3):0.489418799149601);, with -loglik -1.0 -Problem found when optimizing branch lengths for some networks, left loglik as -1. Please report this issue to claudia@stat.wisc.edu, google group or github issues. Thank you! \ No newline at end of file diff --git a/test/net1prueba.out b/test/net1prueba.out deleted file mode 100644 index 20ebf8058..000000000 --- a/test/net1prueba.out +++ /dev/null @@ -1,11 +0,0 @@ -((2,(1)#H7:::0.7143969563530107):1.5121335305466412,4,((5,(6,#H7:::0.2856030436469893):10.0):0.5191605811059357,3):0.489418799149601); -Ploglik = 126.74250981975307 - Dendroscope: ((2,(1)#H7):1.5121335305466412,4,((5,(6,#H7):10.0):0.5191605811059357,3):0.489418799149601); - Elapsed time: 8.047296065 seconds in 5 successful runs -------- -List of estimated networks for all runs: - ((2,(1)#H7:::0.7143969563530107):1.5121335305466412,4,((5,(6,#H7:::0.2856030436469893):10.0):0.5191605811059357,3):0.489418799149601);, with -loglik 126.74250981975307 - (1,((4,#H7:7.599603390006409::0.37302486593984974):0.40021990523139966,(3,(6,5):0.33534692016222695):0.12573257042944036):0.8082647558877085,(2)#H7:10.0::0.6269751340601503);, with -loglik 155.49625734566945 - ((3,(2,#H7:0.0::0.49578624417628053):9.864343470186494):0.13851456440978557,(6,5):0.24980503322268607,((1,4):0.0)#H7:0.0::0.5042137558237194);, with -loglik 192.1045202146181 - ((((5,6):0.3353472312700546,3):0.12573315221965528,(1,(2)#H9:1.3329881588275758::0.6269752696229017):0.808264724878358):0.40021960769620457,4,#H9:4.36389880263218::0.37302473037709827);, with -loglik 155.49625734551378 - ((1,(2)#H7:8.721421786168476::0.6269753032429941):0.8082647064511037,(3,(5,6):0.3353472221500118):0.1257332140052616,(4,#H7:0.0::0.3730246967570059):0.4002195296586772);, with -loglik 155.49625734551572 -------- diff --git a/test/net1prueba2.networks b/test/net1prueba2.networks deleted file mode 100644 index a01287124..000000000 --- a/test/net1prueba2.networks +++ /dev/null @@ -1,5 +0,0 @@ -((((6)#H-5:::0.6805144626250624,5):1.954268572767666,3):0.461227328281011,(2,(1,#H-5:::0.31948553737493757):2.478371059718378):0.6665298704988971,4);, with -loglik 128.4704561578712 -(((5)#H-4:::0.5081358432996316,3):1.0463685044171078,(2,(1,(6,#H-4:::0.4918641567003684):4.598592963405185):0.037466519465998216):0.20087212270712623,4);, with -loglik 231.7736094851884 -((3)#H-3:::0.5944208222904643,(2,(1,(6,(#H-3:::0.40557917770953567,5):9.893729510815922):1.0287950254466858):0.08886267733828704):0.10915585084821476,4);, with -loglik 215.41348384704173 -(6,(((4)#H-2:::0.6100701427224106,3):0.029097601632317324,5):0.22046248932095286,(1,(2,#H-2:::0.38992985727758944):3.1965289740296914):1.1986265394207396);, with -loglik 172.22050695919188 -((((6,(1,(2)#H8:::0.5694311834200659):5.411054959947867):0.15304140347881498,5):0.029744087155358624,3):1.6705680717951823,#H8:::0.43056881657993407,4);, with -loglik 192.03467244507002 diff --git a/test/net1prueba2.out b/test/net1prueba2.out deleted file mode 100644 index 8b66b99c2..000000000 --- a/test/net1prueba2.out +++ /dev/null @@ -1,11 +0,0 @@ -((((6,#H7:::0.2856030435563703):10.0,5):0.5191607785499526,3):0.48941882463229214,(2,(1)#H7:::0.7143969564436297):1.5121336303781006,4); -Ploglik = 126.74250981974951 - Dendroscope: ((((6,#H7):10.0,5):0.5191607785499526,3):0.48941882463229214,(2,(1)#H7):1.5121336303781006,4); - Elapsed time: 6.993290717 seconds in 5 successful runs -------- -List of estimated networks for all runs: - (4,(((6,5):0.3353470932867715,3):0.12573320879015532,(1,(2)#H7:5.733697896500873::0.6269752705040429):0.8082647465545645):0.4002196144174887,#H7:0.07458340422944992::0.3730247294959571);, with -loglik 155.496257345519 - ((((6,#H7:::0.2856030435563703):10.0,5):0.5191607785499526,3):0.48941882463229214,(2,(1)#H7:::0.7143969564436297):1.5121336303781006,4);, with -loglik 126.74250981974951 - (((3,(5,(6)#H7:::0.6922870947532545):1.7390957703886551):0.4598961082497883,4):0.6610717818675633,(1,#H7:::0.3077129052467456):8.879027867903215,2);, with -loglik 128.2264279467482 - (3,((2,#H7:::0.32675875153374256):10.0,1):1.1071480539184226,((5,6):0.29503688508386916,(4)#H7:::0.6732412484662574):0.04619015937942773);, with -loglik 158.1387467189919 - ((3,(6,5):0.32545396986099545):0.0,(1,(2,#H7:::0.31783040568500653):9.999199878494535):1.1135649304712412,(4)#H7:::0.6821695943149935);, with -loglik 158.6226916101674 -------- diff --git a/test/net2_snaq.out b/test/net2_snaq.out deleted file mode 100644 index 9680f1b7d..000000000 --- a/test/net2_snaq.out +++ /dev/null @@ -1,17 +0,0 @@ -(4:0.66,(((6:3.5,(5:0.349)#H9:0.168::0.589):0.578,(#H9:0.0::0.411,3:0.0):0.366):0.0,(1:2.236,(2:0.142)#H7:9.779::0.628):0.793):0.393,#H7:5.775::0.372); - -Ploglik = 112.85311439829204 - Dendroscope: (4:0.66,(((6:3.5,(5:0.349)#H9:0.168):0.578,(#H9:0.0,3:0.0):0.366):0.0,(1:2.236,(2:0.142)#H7:9.779):0.793):0.393,#H7:5.775); - Elapsed time: 21.852749094 seconds in 10 successful runs -------- -List of estimated networks for all runs: - ((3,((6,#H7:::0.286):10.0,5):0.519):0.489,(2,(1)#H7:::0.714):1.512,4);, with -loglik 126.74250981975123 - (4,((5,(6,#H7:::0.286):10.0):0.519,3):0.489,(2,(1)#H7:::0.714):1.512);, with -loglik 126.74250981974944 - (4,((1,(2)#H7:9.336::0.628):0.793,((6,(5)#H9:1.873::0.589):0.578,(3,#H9:0.0::0.411):0.366):0.0):0.393,#H7:0.619::0.372);, with -loglik 112.85311441301114 - ((((5,#H7:::0.335):10.0,6):0.866,4):0.038,(2,1):0.461,(3)#H7:::0.665);, with -loglik 165.4147783831256 - (4,((5,#H7:::0.335):10.0,6):0.866,((1,2):0.461,(3)#H7:::0.665):0.038);, with -loglik 165.41477837882883 - ((2,(1)#H7:::0.714):1.512,(3,(5,(6,#H7:::0.286):10.0):0.519):0.489,4);, with -loglik 126.74250981975212 - (2,(3,(((4,1):0.0)#H9:0.0::0.504,(5,6):0.25):0.139):9.989,#H9:0.0::0.496);, with -loglik 192.10453903132804 - ((2,(1)#H7:::0.714):1.512,((5,(6,#H7:::0.286):10.0):0.519,3):0.489,4);, with -loglik 126.74250981975148 - (4,(((6,(5)#H9:0.168::0.589):0.578,(#H9:0.0::0.411,3):0.366):0.0,(1,(2)#H7:9.779::0.628):0.793):0.393,#H7:5.775::0.372);, with -loglik 112.85311439829204 - ((5,(6,#H9:::0.294):10.0):1.131,3,(((4,#H7:2.035::0.377):0.414,(1,(2)#H7:0.23::0.623):0.821))#H9:::0.706);, with -loglik 116.38467328485964 -------- \ No newline at end of file diff --git a/test/print_add.jl b/test/print_add.jl deleted file mode 100644 index 9344a072d..000000000 --- a/test/print_add.jl +++ /dev/null @@ -1,9 +0,0 @@ -# code to printEdges, Nodes and addHybridizationUpdate to avoid steps -# in tests_5taxon.jl -# Claudia Septiembre 2014 - -#printEdges(net) -#printNodes(net) -success,hybrid,flag,nocycle,flag2,flag3 = addHybridizationUpdate!(net); -#printEdges(net) -#printNodes(net) diff --git a/test/rand4Quartets.txt b/test/rand4Quartets.txt deleted file mode 100644 index a02db01f0..000000000 --- a/test/rand4Quartets.txt +++ /dev/null @@ -1,4 +0,0 @@ -a,b,c,d -a,b,c,e -a,c,d,e -b,c,d,e diff --git a/test/readme.md b/test/readme.md index 80a72cb92..cf4b6810c 100644 --- a/test/readme.md +++ b/test/readme.md @@ -1,24 +1,15 @@ ### Tests functions All in runtests.jl +old test files: checkout PhyloNetworks v0.9.1 or older to see those older files. + #### add hybridization -(deprecated) -tests_5taxon.jl runs all the tests for the eight 5taxon networks of -starting with tree_example.jl and adding one hybridization -It calls add_hybrid_caseC,D,E,F,G,H,I,J.jl -aux functions: print_add.jl and test_functions_5taxon.jl -test_add2hyb.jl add a second hybrid which is a bad triangle, and the +test_add2hyb.jl add a first hybrid, then a second hybrid that +makes a bad triangle, and the functions should identify it #### delete hybridization -(deprecated) -tests_5taxon_delete.jl runs all the tests for the eight 5taxon -networks of starting with tree_example.jl and adding one -hybridization, and then deleting it and comparing to the original tree -example -It calls delete_hybrid_caseC,D,E,F,G,H,I,J.jl -aux functions: test_functions_5taxon.jl test_deleteHybridizationUpdate.jl checks that all attributes are correctly updated after deleting a hybridization (1 and 2 hybrids) diff --git a/test/summaryTreesQuartets.txt b/test/summaryTreesQuartets.txt deleted file mode 100644 index 25e4c02d5..000000000 --- a/test/summaryTreesQuartets.txt +++ /dev/null @@ -1,14 +0,0 @@ -data consists of 10 gene trees and 15 4-taxon subsets - - same taxa in gene trees and quartets: String["1", "2", "3", "4", "5", "6"] -Taxon 1 appears in 10 input trees (100.0 %) -Taxon 2 appears in 10 input trees (100.0 %) -Taxon 3 appears in 10 input trees (100.0 %) -Taxon 4 appears in 10 input trees (100.0 %) -Taxon 5 appears in 10 input trees (100.0 %) -Taxon 6 appears in 10 input trees (100.0 %) ----------------------------- - -will print below only the 4-taxon subsets with data from <= 70.0% genes ----------------------------- - diff --git a/test/tableCF0.txt b/test/tableCF0.txt deleted file mode 100644 index 9600296bd..000000000 --- a/test/tableCF0.txt +++ /dev/null @@ -1,16 +0,0 @@ -"t1","t2","t3","t4","CF1234","CF1324","CF1423" -"6","1","5","4",0.4,0.6,0.0 -"6","1","5","2",0.2,0.8,0.0 -"6","1","5","3",0.5,0.5,0.0 -"6","1","4","2",0.4,0.6,0.0 -"6","1","4","3",0.4,0.2,0.4 -"6","1","2","3",0.2,0.1,0.7 -"6","5","4","2",0.8,0.0,0.2 -"6","5","4","3",0.4,0.4,0.2 -"6","5","2","3",0.6,0.4,0.0 -"6","4","2","3",0.3,0.2,0.5 -"1","5","4","2",0.4,0.0,0.6 -"1","5","4","3",0.3,0.5,0.2 -"1","5","2","3",0.1,0.8,0.1 -"1","4","2","3",0.1,0.5,0.4 -"5","4","2","3",0.3,0.2,0.5 diff --git a/test/tableCF1.txt b/test/tableCF1.txt deleted file mode 100644 index 9600296bd..000000000 --- a/test/tableCF1.txt +++ /dev/null @@ -1,16 +0,0 @@ -"t1","t2","t3","t4","CF1234","CF1324","CF1423" -"6","1","5","4",0.4,0.6,0.0 -"6","1","5","2",0.2,0.8,0.0 -"6","1","5","3",0.5,0.5,0.0 -"6","1","4","2",0.4,0.6,0.0 -"6","1","4","3",0.4,0.2,0.4 -"6","1","2","3",0.2,0.1,0.7 -"6","5","4","2",0.8,0.0,0.2 -"6","5","4","3",0.4,0.4,0.2 -"6","5","2","3",0.6,0.4,0.0 -"6","4","2","3",0.3,0.2,0.5 -"1","5","4","2",0.4,0.0,0.6 -"1","5","4","3",0.3,0.5,0.2 -"1","5","2","3",0.1,0.8,0.1 -"1","4","2","3",0.1,0.5,0.4 -"5","4","2","3",0.3,0.2,0.5 diff --git a/test/tableCF2.txt b/test/tableCF2.txt deleted file mode 100644 index 3cabc1503..000000000 --- a/test/tableCF2.txt +++ /dev/null @@ -1,11 +0,0 @@ -"t1","t2","t3","t4","CF1234","CF1324","CF1423" -"6","1","5","2",0.2,0.8,0.0 -"6","1","2","3",0.2,0.1,0.7 -"6","5","4","2",0.8,0.0,0.2 -"6","5","4","3",0.4,0.4,0.2 -"6","5","2","3",0.6,0.4,0.0 -"6","4","2","3",0.3,0.2,0.5 -"1","5","4","2",0.4,0.0,0.6 -"1","5","4","3",0.3,0.5,0.2 -"1","4","2","3",0.1,0.5,0.4 -"5","4","2","3",0.3,0.2,0.5 diff --git a/test/tableCF3.txt b/test/tableCF3.txt deleted file mode 100644 index f0c65029d..000000000 --- a/test/tableCF3.txt +++ /dev/null @@ -1,11 +0,0 @@ -"t1","t2","t3","t4","CF1234","CF1324","CF1423" -"6","1","5","4",0.4,0.6,0.0 -"6","1","5","2",0.2,0.8,0.0 -"6","1","4","2",0.4,0.6,0.0 -"6","1","4","3",0.4,0.2,0.4 -"6","1","2","3",0.2,0.1,0.7 -"6","5","4","3",0.4,0.4,0.2 -"1","5","4","2",0.4,0.0,0.6 -"1","5","4","3",0.3,0.5,0.2 -"1","5","2","3",0.1,0.8,0.1 -"1","4","2","3",0.1,0.5,0.4 diff --git a/test/tableCF4.txt b/test/tableCF4.txt deleted file mode 100644 index 6fcbd8336..000000000 --- a/test/tableCF4.txt +++ /dev/null @@ -1,6 +0,0 @@ -"t1","t2","t3","t4","CF1234","CF1324","CF1423" -"1","2","3","4",0.5,0.4,0.1 -"1","2","3","5",0.8,0.1,0.1 -"1","2","4","5",0.6,0.0,0.4 -"1","3","4","5",0.2,0.5,0.3 -"2","3","4","5",0.3,0.5,0.2 diff --git a/test/test_5taxon_writeTopology.jl b/test/test_5taxon_writeTopology.jl deleted file mode 100644 index 976fe9343..000000000 --- a/test/test_5taxon_writeTopology.jl +++ /dev/null @@ -1,56 +0,0 @@ -# tests with the 5 taxon networks write to parenthetical format -# Claudia March 2015 -# need to draw in dendroscope to compare, still not automatic - - -# types in "types.jl" -include("../src/types.jl") -include("../src/functions.jl") - -# needed modules: -using Base.Collections # for updateInCycle with priority queue - -tests = ["C","F","G","H","J","D","E","I"]; -wrong = String[]; - -function whichtree(t::String) - if(t == "tree") - tree = "(((6:0.1,4:1.5)1:0.2,7:0.2)5:0.1,8:0.1,10:0.1);" # normal tree - elseif(t == "C") - tree = "((((6:0.1,4:1.5),(7:0.2)11#H1),11#H1),8:0.1,10:0.1);" # Case C: bad triangle II - elseif(t == "F") - tree = "(((6:0.1,(4)11#H1)1:0.2,(11#H1,7))5:0.1,8:0.1,10:0.1);" # Case F: bad diamond I - elseif(t == "G") - tree = "((((6:0.1,4:1.5)1:0.2,(7)11#H1)5:0.1,(11#H1,8)),10:0.1);" # Case G - elseif(t == "H") - tree = "((((6,4),#H1),7),(8)#H1,10);" # Case H - elseif(t == "J") - tree = "((((6)#H1,4),7),8,(#H1,10));" # Case J - elseif(t == "D") - tree = "((((6,4))#H1,(#H1,7)),8,10);" # Case D Bad triangle I - elseif(t == "E") - tree = "(((((8,10))#H1,7),#H1),6,4);" # Case E Bad triangle I - elseif(t == "I") - tree = "((((8,10))#H1,7),6,(4,#H1));" # Case I Bad diamond II - else - error("not a known 5 taxon network case") - end - return tree -end - -tests = ["F","G","H","J","I"]; -t="I" -for t in tests - println("running $(t)") - net = nothing; - tree = whichtree(t) - f = open("prueba_tree.txt","w") - write(f,tree) - close(f) - net = readTopologyUpdate("prueba_tree.txt"); - printEdges(net) - printNodes(net) - written = writeTopologyLevel1(net) - written2 = writeTopologyLevel1(net,true) - tree -end diff --git a/test/test_addDelete.jl b/test/test_addDelete.jl deleted file mode 100644 index 282f8eff9..000000000 --- a/test/test_addDelete.jl +++ /dev/null @@ -1,81 +0,0 @@ -# tests to debug addHybridizationUpdateSmart -# and deleteHybridizationUpdate -# Claudia March 2015 - - -# starting topology: Case G -include("../examples/case_g_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case H -df = readtable("CaseH_output.csv") -d = readDataCF(df) - -currloglik,currxmin = optBL!(currT,d) -newT = deepcopy(currT); -count = 0 -N = 100 -move = whichMove(currT) -move = :CHdir -move = :MVorigin -move = :MVtarget -move = :nni - -flag = proposedTop!(move,newT,true,count,N) -printEdges(newT) -printNodes(newT) - -newloglik, newxmin = optBL!(newT,d) -isValid(newT) -node=searchHybridNode(newT); -node[1].number -deleteHybridizationUpdate!(newT,node[1]) -printEdges(newT) -printNodes(newT) -success=addHybridizationUpdate!(newT); -success[1] -printEdges(newT) -printNodes(newT) - -# ---------- debug addHybridizationUpdateSmart - - -# starting topology: Case G -include("../examples/case_g_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case H -df = readtable("CaseH_output.csv") -d = readDataCF(df) - -currloglik,currxmin = optBL!(currT,d) -newT = deepcopy(currT); -count = 0 -N = 100 -move = whichMove(currT) -move = :CHdir -move = :MVorigin -move = :MVtarget -move = :nni - -flag = proposedTop!(move,newT,true,count,N) -printEdges(newT) -printNodes(newT) - -newloglik, newxmin = optBL!(newT,d) -isValid(newT) -node=searchHybridNode(newT); -node[1].number -deleteHybridizationUpdate!(newT,node[1]) -printEdges(newT) -printNodes(newT) -success=addHybridizationUpdateSmart!(newT) -printEdges(newT) -printNodes(newT) - - -afterOptBL!(newT,d) -currT=deepcopy(newT); - diff --git a/test/test_afterOptBL.jl b/test/test_afterOptBL.jl deleted file mode 100644 index c2fca295b..000000000 --- a/test/test_afterOptBL.jl +++ /dev/null @@ -1,232 +0,0 @@ -# test afterOptBL -# Claudia March 2015 -# use cases in test_optBL and test_optTopLevel - -# test_optBL ------------------------------------------------------------- -# should not do anything, should leave net unchanged - -# CASE G -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseG_output.csv") -d2 = readTableCF(df) - -tree = "((((6,4)1,(7)11#H1:::0.8)5,(11#H1:::0.2,8)),10);" # Case G different starting branch lengths -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht -realht = [0.1,0.2,0.1,1.0] - -@time fmin,xmin=optBL!(net,d2) -# with 100*logPseudoLik and t in (0,10): takes longer, but finds it! -#got 0.0 at [0.10085,0.19991,0.1001,0.98557] after 1123 iterations (returned FTOL_REACHED) -#elapsed time: 53.20413795 seconds (107820188 bytes allocated, 0.13% gc time) -#(2.169777681982341e-9,[0.100853,0.199907,0.100098,0.985569]) - -f=afterOptBL!(net,d2) -all(f) || error("afterOptBL not correctly for optBL in CASE G") - -#-------- -# Case H -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseH_output.csv") -d2 = readTableCF(df) - -# starting ht (gamma,t3,t5,t7) -ht = [0.2,1.,1.,1.] - -tree = string("((((6:0.1,4:1.5):",string(ht[2]),",#H1:::",string(ht[1]),"),7:0.2):",string(ht[4]),",(8)#H1:::",string(1-ht[1]),",10:0.1);") # Case H -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht -realht = [0.1,0.1,1.,0.1] - -@time fmin,xmin=optBL!(net,d2) -# with 100*logPseudoLik and t in (0,10): -#got 0.0 at [0.1,0.1,1.0,0.1] after 208 iterations (returned FTOL_REACHED) -#elapsed time: 6.892366587 seconds (20646048 bytes allocated) -#(2.4010737722561867e-13,[0.0999999,0.0999992,1.0,0.1]) - -f=afterOptBL!(net,d2) -all(f) || error("afterOptBL not correctly for optBL in CASE H") - -# ----------- -# CASE J -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseJ_output.csv") -d2 = readTableCF(df) - -# starting ht (gamma,t3,t5,t7) -ht = [0.2,1.,1.,1.] - -tree = string("((((6)#H1:::",string(1-ht[1]),",4:1.5):",string(ht[2]),",7:0.2):",string(ht[3]),",8:0.1,(#H1:::",string(ht[1]),",10));") # Case J -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht -realht = [0.1,0.2,0.1,1.0] - -@time fmin,xmin=optBL!(net,d2) -# with 100*logPseudoLik and t in (0,10): -#got 0.0 at [0.1,0.2,0.1,1.00003] after 249 iterations (returned FTOL_REACHED) -#elapsed time: 6.656956697 seconds (23010204 bytes allocated, 0.56% gc time) -#(5.280560068867562e-12,[0.0999983,0.199999,0.0999999,1.00003]) - -f=afterOptBL!(net,d2) -all(f) || error("afterOptBL not correctly for optBL in CASE J") - -# ----------- -# CASE F -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseF_output.csv") -d2 = readTableCF(df) - -# starting ht (gamma,t4,t5,t9) -ht = [0.1,1.,1.,1.] - -tree = string("(((6:0.1,(4)11#H1:::",string(1-ht[1]),")1:",string(ht[3]),",(11#H1:::",string(ht[1]),",7))5:",string(ht[4])",8:0.1,10:0.1);") # Case F: bad diamond I -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht -realht = [0.1,0.127,0.0285] - -@time fmin,xmin=optBL!(net,d2) -# with 100*logPseudoLik and t in (0,10): -# got 0.0 at [0.1,0.12689,0.02855] after 3356 iterations (returned FTOL_REACHED) -# elapsed time: 90.405727385 seconds (281649264 bytes allocated, 0.24% gc time) -# (8.157692309723136e-12,[0.0999999,0.126889,0.028549]) - -f=afterOptBL!(net,d2) -all(f) || error("afterOptBL not correctly for optBL in CASE F") - -# --------------- -# CASE I -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseI_output.csv") -d2 = readTableCF(df) - -# starting ht (gamma,t4,t6,t9,t10) -ht = [0.2,0.0,2.0,2.0,2.0] -ht = [0.2,0.0,0.5,0.5,0.5] - -tree = string("((((8,10):",string(ht[2]),")#H1:::",string(1-ht[1]),",7):",string(ht[3]),",6,(4,#H1:",string(ht[4]),"::",string(ht[1]),"):",string(ht[5]),");") # Case I Bad diamond II -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht -realht = [0.1,2.0,1.0,1.0,1.0] - -@time fmin,xmin=optBL!(net,d2) -# with 100*logPseudoLik and t in (0,10): -# ht = [0.2,0.0,2.0,2.0,2.0] -#got 0.0 at [0.1,2.0,1.0,0.99994,1.0] after 753 iterations (returned FTOL_REACHED) -#elapsed time: 47.213517434 seconds (80265788 bytes allocated, 0.16% gc time) -#(1.2568189823957448e-12,[0.0999998,2.0,1.0,0.999941,1.0]) - -f=afterOptBL!(net,d2) -all(f) || error("afterOptBL not correctly for optBL in CASE I") - - -# test_optTopLevelparts.jl------------------------------------------------------------------ - -# starting topology: Case G -include("../examples/case_g_example.jl"); -currT = deepcopy(net); -printEdges(currT) -printNodes(currT) - -# real network: Case H -df = readtable("CaseH_output.csv") -d = readTableCF(df) - -# real network: Case H -df = readtable("CaseH_output.csv") -d = readTableCF(df) - -currloglik,currxmin = optBL!(currT,d, false) -isValid(currT) -success,flagh,flagt,flaghz = afterOptBL!(currT,d) -reject = afterOptBLAll!(currT,d) - - -newT = deepcopy(currT); -count = 0 -N = 100 -move = whichMove(currT) -move = :CHdir -move = :MVorigin -move = :MVtarget -move = :nni - -flag = proposedTop!(move,newT,true,count,N) -printEdges(newT) -printNodes(newT) - -newloglik, newxmin = optBL!(newT,d) -reject = afterOptBLAll!(newT,d) -isValid(newT) -afterOptBL!(newT,d) -currT=deepcopy(newT); - - -# starting topology: Case F -include("../examples/case_f_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case H -df = readtable("CaseH_output.csv") -d = readTableCF(df) - -currloglik,currxmin = optBL!(currT,d) -updateParameters!(currT) -updateLik!(currT,currloglik) -newT = deepcopy(currT); -count = 0 -N = 100 -move = whichMove(currT) -move = :CHdir -move = :MVorigin -move = :MVtarget -move = :nni - -flag = proposedTop!(move,newT,true,count,N) -printEdges(newT) -printNodes(newT) -count([e.hybrid for e in newT.edge]) == 2 || error("there are not 2 hybrid edges") -newT.hybrid[1].k - -newloglik, newxmin = optBL!(newT,d) -newloglik - currloglik - -currT = deepcopy(newT); -currloglik = newloglik -currxmin = newxmin - diff --git a/test/test_badDiamII.jl b/test/test_badDiamII.jl index adc25f270..9876ce063 100644 --- a/test/test_badDiamII.jl +++ b/test/test_badDiamII.jl @@ -26,7 +26,7 @@ df=DataFrame(t1=["1","1","2","2","1","2","2","2","2","2","1","2","2","3","2"], t4=["6","4","4","6","4","6","4","5","4","4","4","6","4","4","4"], CF1234=[0.565,0.0005,0.0005,0.565,0.00003,0.99986,0.0410167,1,0.99987,1,0.040167,0.998667,1,0.073167,0.00003], CF1324=[0.0903,0.8599,0.8599,0.0903,0.8885,0.00006,0.263,0,0.00006,0,0.2630,0.00006,0,0.0424667,0.8885]) -df[:CF1423] = 1.0 .- df[:CF1234] .- df[:CF1324] +df[!,:CF1423] = 1.0 .- df[!,:CF1234] .- df[!,:CF1324] d = readTableCF(df) Random.seed!(345); diff --git a/test/test_bootstrap.jl b/test/test_bootstrap.jl index d063dade5..ebca545c6 100644 --- a/test/test_bootstrap.jl +++ b/test/test_bootstrap.jl @@ -13,25 +13,25 @@ resn, rese, resc, gam, edgenum = hybridBootstrapSupport(bootnet,bestnet); #@show resn; @show rese; showall(gam); @show edgenum; resc # plot(bestnet, showIntNodeNumber=true) -@test resn[:clade][1:2] == ["H26","H25"] -@test resn[:BS_hybrid_samesisters][1:2] == [25.0,100.0] -@test resn[:BS_hybrid] == [100.0,100,0,0,0,75,0,0,0,0,0,0,5,5,5,5,5,0,0,0] -@test resn[:BS_minor_sister] == [0.0,0,100,0,0,5,10,70,75,25,5,5,0,0,0,0,0,0,0,5] -@test resn[:BS_major_sister] == [0.0,0,0,100,100,0,70,10,0,0,5,5,0,0,0,0,0,5,5,0] -@test rese[:BS_minor][2] == 25.0 # BS of introgression for H26 -@test rese[:BS_minor][4] == 100.0 # BS of introgression for H25 -@test resc[:taxa]==["Xgordoni","Xmeyeri","Xcouchianus","Xvariatus","Xevelynae","Xxiphidium", +@test resn[1:2,:clade] == ["H26","H25"] +@test resn[1:2,:BS_hybrid_samesisters] == [25.0,100.0] +@test resn[!,:BS_hybrid] == [100.0,100,0,0,0,75,0,0,0,0,0,0,5,5,5,5,5,0,0,0] +@test resn[!,:BS_minor_sister] == [0.0,0,100,0,0,5,10,70,75,25,5,5,0,0,0,0,0,0,0,5] +@test resn[!,:BS_major_sister] == [0.0,0,0,100,100,0,70,10,0,0,5,5,0,0,0,0,0,5,5,0] +@test rese[2,:BS_minor] == 25.0 # BS of introgression for H26 +@test rese[4,:BS_minor] == 100.0 # BS of introgression for H25 +@test resc[!,:taxa]==["Xgordoni","Xmeyeri","Xcouchianus","Xvariatus","Xevelynae","Xxiphidium", "Xmilleri","Xandersi","Xmaculatus","Xhellerii","Xalvarezi","Xmayae","Xsignum","Xclemenciae_F2", "Xmonticolus","Xmontezumae","Xnezahuacoyotl","Xbirchmanni_GARC","Xmalinche_CHIC2","Xcortezi", "Xcontinens","Xpygmaeus","Xnigrensis","Xmultilineatus"] -@test resc[:taxa][resc[:H26]] == ["Xnezahuacoyotl"] -@test resc[:taxa][resc[:H25]] == ["Xmontezumae","Xnezahuacoyotl","Xbirchmanni_GARC","Xmalinche_CHIC2","Xcortezi","Xcontinens","Xpygmaeus","Xnigrensis","Xmultilineatus"] -@test resc[:taxa][resc[:c_minus27]] == ["Xnigrensis","Xmultilineatus"] # minor sis of H26 -@test resc[:taxa][resc[:Xxiphidium]] == ["Xxiphidium"] # minor sis of H25 -@test resc[:taxa][resc[:Xsignum]] == ["Xsignum"] # donor8 previously -@test resc[:taxa][resc[:c_minus24]] == ["Xcontinens","Xpygmaeus","Xnigrensis","Xmultilineatus"] # donor7 -@test resc[:taxa][resc[:Xmontezumae]] == ["Xmontezumae"] # major sis of H26. Below: major sis of H25 -@test resc[:taxa][resc[:c_minus12]] == ["Xhellerii","Xalvarezi","Xmayae","Xsignum","Xclemenciae_F2","Xmonticolus"] +@test resc[!,:taxa][resc[!,:H26]] == ["Xnezahuacoyotl"] +@test resc[!,:taxa][resc[!,:H25]] == ["Xmontezumae","Xnezahuacoyotl","Xbirchmanni_GARC","Xmalinche_CHIC2","Xcortezi","Xcontinens","Xpygmaeus","Xnigrensis","Xmultilineatus"] +@test resc[!,:taxa][resc[!,:c_minus27]] == ["Xnigrensis","Xmultilineatus"] # minor sis of H26 +@test resc[!,:taxa][resc[!,:Xxiphidium]] == ["Xxiphidium"] # minor sis of H25 +@test resc[!,:taxa][resc[!,:Xsignum]] == ["Xsignum"] # donor8 previously +@test resc[!,:taxa][resc[!,:c_minus24]] == ["Xcontinens","Xpygmaeus","Xnigrensis","Xmultilineatus"] # donor7 +@test resc[!,:taxa][resc[!,:Xmontezumae]] == ["Xmontezumae"] # major sis of H26. Below: major sis of H25 +@test resc[!,:taxa][resc[!,:c_minus12]] == ["Xhellerii","Xalvarezi","Xmayae","Xsignum","Xclemenciae_F2","Xmonticolus"] @test gam[:,2] == [.0,.0,.192,.0,.0,.0,.0,.0,.193,.0,.184,.193,.0,.0,.0,.0,.0,.193,.0,.0] @test gam[:,4] == [.165,.166,.165,.166,.165,.165,.166,.165,.165,.166,.164,.166,.166,.165,.165,.165,.166,.165,.166,.166] @test edgenum ==[25,39,43,7] diff --git a/test/test_calculateExpCF.jl b/test/test_calculateExpCF.jl index 4ae972e7e..ff11121c8 100644 --- a/test/test_calculateExpCF.jl +++ b/test/test_calculateExpCF.jl @@ -5,7 +5,7 @@ #println("------ Case G ----------") include("../examples/case_g_example.jl") -#net.names +# include(joinpath(dirname(pathof(PhyloNetworks)), "..","examples","case_g_example.jl")) error1 = false ind = 0 @@ -153,7 +153,7 @@ end #println("------ Case F Bad diamond I ----------") include("../examples/case_f_example.jl"); -#net.names +# include(joinpath(dirname(pathof(PhyloNetworks)), "..","examples","case_f_example.jl")) error1 = false ind = 0 parameters!(net) @@ -297,7 +297,7 @@ end #println("------ Case I Bad diamond II ----------") include("../examples/case_i_example.jl"); -#net.names +# include(joinpath(dirname(pathof(PhyloNetworks)), "..","examples","case_i_example.jl")) error1 = false ind = 0 diff --git a/test/test_changeDir.jl b/test/test_changeDir.jl deleted file mode 100644 index ed1688b81..000000000 --- a/test/test_changeDir.jl +++ /dev/null @@ -1,47 +0,0 @@ -# tests with change direction update -# if done twice, does it leave the same network? -# Claudia March 2015 - -# Case G -include("../examples/case_g_example.jl"); -net0 = deepcopy(net); -printEdges(net) -printNodes(net) -node = searchHybridNode(net); -node[1].number -success,newnode = changeDirectionUpdate!(net,node[1]); -printEdges(net) - -success,node = changeDirectionUpdate!(net,newnode); -printEdges(net) -printEdges(net0) -# not the same, bad diamond I found in between - -# Case H -include("../examples/case_h_example.jl"); -net0 = deepcopy(net); -printEdges(net) -printNodes(net) -node = searchHybridNode(net); -node[1].number -success,newnode = changeDirectionUpdate!(net,node[1]); -printEdges(net) - -success,node = changeDirectionUpdate!(net,newnode); -printEdges(net) -printEdges(net0) -# not the same, bad diamond II found in between - -# Case J -include("../examples/case_j_example.jl"); -net0 = deepcopy(net); -printEdges(net) -printNodes(net) -node = searchHybridNode(net); -node[1].number -success,newnode = changeDirectionUpdate!(net,node[1]); -printEdges(net) - -success,node = changeDirectionUpdate!(net,newnode); -printEdges(net) -printEdges(net0) diff --git a/test/test_checkrootplace.jl b/test/test_checkrootplace.jl deleted file mode 100644 index 7da2d2f06..000000000 --- a/test/test_checkrootplace.jl +++ /dev/null @@ -1,16 +0,0 @@ -## script to test function checkRootPlace -## Claudia May 2016 - -include("../src/types.jl") -include("../src/functions.jl") - -net = readTopologyLevel1("(6,((5,#H7:0.0::0.29999):10.0,(((2,#H9:0.0::0.15902):0.02332,(1,(3)#H9:0.9417::0.84098):0.12997):1.2018,(4)#H7:0.01722::0.70001):9.99428):0.24593);") -plot(net) -checkRootPlace!(net,outgroup="6") -plot(net) -checkRootPlace!(net,outgroup="2") -plot(net) -checkRootPlace!(net,outgroup="1") -plot(net) -checkRootPlace!(net,outgroup="3") ## can't do it, does not change net -plot(net) diff --git a/test/test_correctLik.jl b/test/test_correctLik.jl index 905067e8f..f39449484 100644 --- a/test/test_correctLik.jl +++ b/test/test_correctLik.jl @@ -6,7 +6,6 @@ PhyloNetworks.CHECKNET || error("need CHECKNET==true in PhyloNetworks to test snaq in test_correctLik.jl") -#df = readtable("Tree_output.txt") df=DataFrame(t1=["6","6","10","6","6"], t2=["7","7","7","10","7"], t3=["4","10","4","4","4"], @@ -20,14 +19,14 @@ d = readTableCF(df) @test tipLabels(d) == ["4","6","7","8","10"] @test_logs PhyloNetworks.descData(d, devnull) -df[:ngenes] = [10,10,10,10,20] +df[!,:ngenes] = [10,10,10,10,20] allowmissing!(df, :ngenes) d = readTableCF(df) -df[:ngenes][1] = missing; d.quartet[1].ngenes = -1.0 +df[1,:ngenes] = missing; d.quartet[1].ngenes = -1.0 newdf = writeTableCF(d) -@test newdf[1:7] == rename(df, [:obsCF12 => :CF12_34, :obsCF13 => :CF13_24, :obsCF14 => :CF14_23])[1:7] -@test ismissing(newdf[:ngenes][1]) -@test newdf[:ngenes][2:end] == df[:ngenes][2:end] +@test newdf[!,1:7] == rename(df, [:obsCF12 => :CF12_34, :obsCF13 => :CF13_24, :obsCF14 => :CF14_23])[!,1:7] +@test ismissing(newdf[1,:ngenes]) +@test newdf[2:end,:ngenes] == df[2:end,:ngenes] # starting tree: tree = "((6,4),(7,8),10);" @@ -39,8 +38,8 @@ currT = readTopologyLevel1(tree); extractQuartet!(currT,d) calculateExpCFAll!(d) tmp = (@test_logs PhyloNetworks.writeExpCF(d)) -for i in [5,7] for j in 2:5 @test tmp[i][j] ≈ 0.12262648039048077; end end -for j in 2:5 @test tmp[6][j] ≈ 0.7547470392190385; end +for i in [5,7] for j in 2:5 @test tmp[j,i] ≈ 0.12262648039048077; end end +for j in 2:5 @test tmp[j,6] ≈ 0.7547470392190385; end lik = logPseudoLik(d) @test lik ≈ 193.7812623319291 #estTree = optTopRun1!(currT,d,0,5454) # issue with printCounts, TravisCI? @@ -91,6 +90,9 @@ end rmprocs(workers()) @test writeTopology(n1, round=true)==writeTopology(n2, round=true) @test n1.loglik == n2.loglik + n3 = readSnaqNetwork("snaq.out") + @test writeTopology(n3, round=true)==writeTopology(n2, round=true) + @test n3.loglik > 0.0 rm("snaq.out") rm("snaq.networks") rm("snaq.log") # .log and .err should be git-ignored, but still diff --git a/test/test_extractQuartet.jl b/test/test_extractQuartet.jl deleted file mode 100644 index 5d7defcf5..000000000 --- a/test/test_extractQuartet.jl +++ /dev/null @@ -1,129 +0,0 @@ -# test to extract quartet from a "good" 5taxon network (Case G) -# Claudia November 2014 -# also test to extract all quartets from a Data object -# Claudia January 2015 - -# Case G --------- - -include("../examples/case_g_example.jl"); -net.names - -q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -qnet = extractQuartet!(net,q1); -printEdges(qnet) -printNodes(qnet) - -q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -qnet = extractQuartet!(net,q2); -printEdges(qnet) -printNodes(qnet) - -q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -qnet = extractQuartet!(net,q3); -printEdges(qnet) -printNodes(qnet) - -q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -qnet = extractQuartet!(net,q4); -printEdges(qnet) -printNodes(qnet) - -q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); -qnet = extractQuartet!(net,q5); -printEdges(qnet) -printNodes(qnet) - -[n.number for n in qnet.leaf] -qnet.numTaxa - -# Bad triangle ------------ - -## include("../bad_triangle_example.jl"); -## printEdges(net) -## printNodes(net) -## net.names - -## q1 = Quartet(1,["6","7","1","5"],[0.5,0.4,0.1]); -## qnet = extractQuartet!(net,q1) -## printEdges(qnet) -## printNodes(qnet) - -## q2 = Quartet(2,["6","5","1","8"],[0.5,0.4,0.1]); -## qnet = extractQuartet!(net,q2); -## printEdges(qnet) -## printNodes(qnet) - -## q3 = Quartet(3,["1","7","5","8"],[0.5,0.4,0.1]); -## qnet = extractQuartet!(net,q3); -## printEdges(qnet) -## printNodes(qnet) -## # correct, but does not update length of external edge 8 (should be 0.2, at is 0.1) - -## q4 = Quartet(4,["6","1","7","8"],[0.5,0.4,0.1]); -## qnet = extractQuartet!(net,q4); -## printEdges(qnet) -## printNodes(qnet) -## # correct, but does not update length of external edge 9 (should be 0.2, at is 0.1) - -## q5 = Quartet(5,["6","7","5","8"],[0.5,0.4,0.1]); -## qnet = extractQuartet!(net,q5); -## printEdges(qnet) -## printNodes(qnet) - -## [n.number for n in qnet.leaf] -## qnet.numTaxa - -# Bad diamond ------------ - -include("../examples/case_f_example.jl") -printEdges(net) -printNodes(net) -net.names -parameters!(net) -net.numht - -parameters!(qnet,net) - -q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -qnet = extractQuartet!(net,q1); -printEdges(qnet) -printNodes(qnet) -qnet.indexht -qnet.hasEdge - -q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -qnet = extractQuartet!(net,q2); -printEdges(qnet) -printNodes(qnet) - -q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -qnet = extractQuartet!(net,q3); -printEdges(qnet) -printNodes(qnet) - -q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -qnet = extractQuartet!(net,q4); -printEdges(qnet) -printNodes(qnet) - -q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); -qnet = extractQuartet!(net,q5); -printEdges(qnet) -printNodes(qnet) - -[n.number for n in qnet.leaf] -qnet.numTaxa - -# ------------------------------------ -# extract all quartets -include("../examples/case_g_example.jl"); - -q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -d = DataCF([q1,q2,q3,q4,q5]); -extractQuartet!(net,d); -calculateExpCFAll!(d) diff --git a/test/test_functions_5taxon.jl b/test/test_functions_5taxon.jl deleted file mode 100644 index 6c40d962e..000000000 --- a/test/test_functions_5taxon.jl +++ /dev/null @@ -1,224 +0,0 @@ -# test functions for the 5taxon networks -# functions used in tests_5taxon.jl -# Claudia September 2014 -# changed to new reparametrizations: bad diamondI,II and bad triangle I,II -# Claudia November 2014 - -# Case C bad triangle II -function testCaseC(net::HybridNetwork) - n=searchHybridNode(net); - node = n[1]; - node.k != 3 ? error("k diff than 3") : nothing - node.isVeryBadTriangle ? nothing : error("does not know it is very bad triangle") - node.isExtBadTriangle ? error("thinks it is extremely bad triangle") : nothing - net.hasVeryBadTriangle ? nothing : error("net does not know it has very bad triangle") - net.numBad == 0 ? nothing : error("net.numBad should be 0") - net.numHybrids != 1 ? error("should have 1 hybrid, but net.numHybrids is $(net.numHybrids): $([n.number for n in net.hybrid])") : nothing -end - -# Case F bad diamond -function testCaseF(net::HybridNetwork) - net.visited = [e.istIdentifiable for e in net.edge]; - n = searchHybridNode(net); - node = n[1]; - node.k != 4 ? error("k diff than 4") : nothing - edge5 = getIndexEdge(5,net); - edge9 = getIndexEdge(9,net); - edge11 = getIndexEdge(11,net); - edge12 = getIndexEdge(12,net); - edge15 = getIndexEdge(15,net); - edge14 = getIndexEdge(14,net); - node1 = getIndexNode(1,net); - node5 = getIndexNode(5,net); - node11 = getIndexNode(11,net); - node12 = getIndexNode(12,net); - (net.edge[edge5].inCycle != node.number || net.edge[edge11].inCycle != node.number || net.edge[edge12].inCycle != node.number || net.edge[edge15].inCycle != node.number ) ? error("edges not correctly in cycle") : nothing - (net.node[node1].inCycle != node.number || net.node[node5].inCycle != node.number || net.node[node11].inCycle != node.number || net.node[node12].inCycle != node.number) ? error("nodes 1,5,11,12 not correctly in cycle") : nothing - !node.isBadDiamondI ? error("does not know it is bad diamondI") : nothing - node.isBadDiamondII ? error("thinks it is bad diamond II") : nothing - net.edge[edge14].containRoot ? error("edge can contain root") : nothing - (!net.edge[edge11].hybrid || !net.edge[edge11].isMajor) ? error("edge 11 is not hybrid or major") : nothing - net.node[node12].gammaz != net.edge[edge15].gamma*net.edge[edge12].z ? error("node 12 gammaz not correctly calculated") : nothing - net.node[1].gammaz != net.edge[edge11].gamma*net.edge[edge5].z ? error("node 11 gammaz not correctly calculated") : nothing - !net.edge[edge9].istIdentifiable ? error("edge9 not identifiable") : nothing - net.visited[edge9] = false; - !all([!id for id in net.visited]) ? error("edges not identifiable as identifiable") : nothing - net.numHybrids != 1 ? error("should have 1 hybrid, but net.numHybrids is $(net.numHybrids): $([n.number for n in net.hybrid])") : nothing - net.numBad == 1 ? nothing : error("net.numBad should be 1") -end - -# Case G -function testCaseG(net::HybridNetwork) - net.visited = [e.istIdentifiable for e in net.edge]; - n = searchHybridNode(net); - node = n[1]; - node.k != 4 ? error("k diff than 4") : nothing - edge9 = getIndexEdge(9,net); - edge12 = getIndexEdge(12,net); - edge15 = getIndexEdge(15,net); - edge5 = getIndexEdge(5,net); - edge13 = getIndexEdge(13,net); - edge14 = getIndexEdge(14,net); - node9 = getIndexNode(9,net); - node5 = getIndexNode(5,net); - node11 = getIndexNode(11,net); - node12 = getIndexNode(12,net); - (net.edge[edge9].inCycle != node.number || net.edge[edge12].inCycle != node.number || net.edge[edge13].inCycle != node.number || net.edge[edge15].inCycle != node.number ) ? error("edges not correctly in cycle") : nothing - (net.node[node9].inCycle != node.number || net.node[node5].inCycle != node.number || net.node[node11].inCycle != node.number || net.node[node12].inCycle != node.number) ? error("nodes not correctly in cycle") : nothing - (node.isBadDiamondI || node.isBadDiamondII) ? error("thinks it is bad diamond") : nothing - net.edge[edge14].containRoot ? error("14 can contain root") : nothing - (!net.edge[edge12].hybrid || !net.edge[edge12].isMajor) ? error("edge 12 is not hybrid or major") : nothing - net.node[node12].gammaz != -1 ? error("node 12 gammaz should be -1") : nothing - (!net.edge[edge9].istIdentifiable || !net.edge[edge5].istIdentifiable || !net.edge[edge13].istIdentifiable) ? error("edge9,5,13not identifiable") : nothing - net.visited[edge9] = false; - net.visited[edge5] = false; - net.visited[edge13] = false; - !all([!id for id in net.visited]) ? error("edges not identifiable as identifiable") : nothing - net.numHybrids != 1 ? error("should have 1 hybrid, but net.numHybrids is $(net.numHybrids): $([n.number for n in net.hybrid])") : nothing -end - -# Case H -function testCaseH(net::HybridNetwork) - net.visited = [e.istIdentifiable for e in net.edge]; - n = searchHybridNode(net); - node = n[1]; - node.k != 4 ? error("k diff than 4") : nothing - edge9 = getIndexEdge(9,net); - edge14 = getIndexEdge(14,net); - edge15 = getIndexEdge(15,net); - edge13 = getIndexEdge(13,net); - edge5 = getIndexEdge(5,net); - edge8 = getIndexEdge(8,net); - node9 = getIndexNode(9,net); - node5 = getIndexNode(5,net); - node11 = getIndexNode(11,net); - node12 = getIndexNode(12,net); - (net.edge[edge9].inCycle != node.number || net.edge[edge5].inCycle != node.number || net.edge[edge14].inCycle != node.number || net.edge[edge15].inCycle != node.number ) ? error("edges not correctly in cycle") : nothing - (net.node[node9].inCycle != node.number || net.node[node5].inCycle != node.number || net.node[node11].inCycle != node.number || net.node[node12].inCycle != node.number) ? error("nodes 9,5,11,12 not correctly in cycle") : nothing - (node.isBadDiamondI || node.isBadDiamondII )? error("thinks it is bad diamond") : nothing - net.edge[edge8].containRoot ? error("8 can contain root") : nothing - (!net.edge[edge14].hybrid || !net.edge[edge14].isMajor) ? error("edge 14 is not hybrid or major") : nothing - net.node[node12].gammaz != -1 ? error("node 12 gammaz should be -1") : nothing - (!net.edge[edge9].istIdentifiable || !net.edge[edge5].istIdentifiable || !net.edge[edge13].istIdentifiable) ? error("edge9,5,13not identifiable") : nothing - net.visited[edge9] = false; - net.visited[edge5] = false; - net.visited[edge13] = false; - !all([!id for id in net.visited]) ? error("edges not identifiable as identifiable") : nothing - net.numHybrids != 1 ? error("should have 1 hybrid, but net.numHybrids is $(net.numHybrids): $([n.number for n in net.hybrid])") : nothing -end - - -# Case J -function testCaseJ(net::HybridNetwork) - net.visited = [e.istIdentifiable for e in net.edge]; - n = searchHybridNode(net); - node = n[1]; - node.k != 5 ? error("k diff than 5") : nothing - edge9 = getIndexEdge(9,net); - edge10 = getIndexEdge(10,net); - edge5 = getIndexEdge(5,net); - edge15 = getIndexEdge(15,net); - edge6 = getIndexEdge(6,net); - edge14 = getIndexEdge(14,net); - node9 = getIndexNode(9,net); - node1 = getIndexNode(1,net); - node5 = getIndexNode(5,net); - node11 = getIndexNode(11,net); - node12 = getIndexNode(12,net); - (net.edge[edge9].inCycle != node.number || net.edge[edge10].inCycle != node.number || net.edge[edge6].inCycle != node.number || net.edge[edge15].inCycle != node.number || net.edge[edge5].inCycle != node.number) ? error("edges not correctly in cycle") : nothing - (net.node[node9].inCycle != node.number || net.node[node5].inCycle != node.number || net.node[node11].inCycle != node.number || net.node[node12].inCycle != node.number || net.node[node1].inCycle) != node.number ? error("nodes 9,5,11,12,1 not correctly in cycle") : nothing - net.edge[edge14].containRoot ? error("14 can contain root") : nothing - (!net.edge[edge6].hybrid || !net.edge[edge6].isMajor) ? error("edge 6 is not hybrid or major") : nothing - net.node[node12].gammaz != -1 ? error("node 12 gammaz should be -1") : nothing - (!net.edge[edge9].istIdentifiable || !net.edge[edge5].istIdentifiable || !net.edge[edge10].istIdentifiable) ? error("edge9,5,10not identifiable") : nothing - net.visited[edge9] = false; - net.visited[edge5] = false; - net.visited[edge10] = false; - !all([!id for id in net.visited]) ? error("edges not identifiable as identifiable") : nothing - net.numHybrids != 1 ? error("should have 1 hybrid, but net.numHybrids is $(net.numHybrids): $([n.number for n in net.hybrid])") : nothing -end - -# Case D bad triangle I -function testCaseD(net::HybridNetwork) - net.visited = [e.istIdentifiable for e in net.edge]; - n = searchHybridNode(net); - node = n[1]; - node.k != 3 ? error("k diff than 3") : nothing - node.isVeryBadTriangle ? nothing : error("does not know it is very bad triangle") - node.isExtBadTriangle ? error("thinks it is extremely bad triangle") : nothing - net.hasVeryBadTriangle ? nothing : error("net does not know it has very bad triangle") - net.numBad == 0 ? nothing : error("net.numBad should be 0") - net.numHybrids != 1 ? error("should have 1 hybrid, but net.numHybrids is $(net.numHybrids): $([n.number for n in net.hybrid])") : nothing -end - -# Case E bad triangle I -function testCaseE(net::HybridNetwork) - net.visited = [e.istIdentifiable for e in net.edge]; - n = searchHybridNode(net); - node = n[1]; - node.k != 3 ? error("k diff than 3") : nothing - node.isVeryBadTriangle ? nothing : error("does not know it is very bad triangle") - node.isExtBadTriangle ? error("thinks it is extremely bad triangle") : nothing - net.hasVeryBadTriangle ? nothing : error("net does not know it has very bad triangle") - net.numBad == 0 ? nothing : error("net.numBad should be 0") - net.numHybrids != 1 ? error("should have 1 hybrid, but net.numHybrids is $(net.numHybrids): $([n.number for n in net.hybrid])") : nothing -end - - -# Case I bad diamond II -function testCaseI(net::HybridNetwork) - n = searchHybridNode(net); - node = n[1]; - node.isBadDiamondII ? nothing : error("does not know it is bad diamond II") - !node.isBadDiamondI ? nothing : error("thinks it is bad diamond I") - net.numHybrids != 1 ? error("should have 1 hybrid, but net.numHybrids is $(net.numHybrids): $([n.number for n in net.hybrid])") : nothing - net.visited = [e.istIdentifiable for e in net.edge]; - edge14 = getIndexEdge(14,net); - edge10 = getIndexEdge(10,net); - edge5 = getIndexEdge(5,net); - edge15 = getIndexEdge(15,net); - edge9 = getIndexEdge(9,net); - edge11 = getIndexEdge(11,net); - edge8 = getIndexEdge(8,net); - node1 = getIndexNode(1,net); - node12 = getIndexNode(12,net); - node5 = getIndexNode(5,net); - node11 = getIndexNode(11,net); - (net.edge[edge5].inCycle != node.number || net.edge[edge9].inCycle != node.number || net.edge[edge11].inCycle != node.number || net.edge[edge15].inCycle != node.number ) ? error("edges not correctly in cycle") : nothing - (net.node[node1].inCycle != node.number || net.node[node12].inCycle != node.number || net.node[node5].inCycle != node.number || net.node[node11].inCycle != node.number) ? error("nodes 1,5,11,12 not correctly in cycle") : nothing - (net.edge[edge14].containRoot || net.edge[edge10].containRoot || net.edge[edge8].containRoot) ? error("edges can contain root and shouldn't") : nothing - (!net.edge[edge9].hybrid || !net.edge[edge9].isMajor) ? error("edge 4 is not hybrid or major") : nothing - net.edge[edge14].length != 0 ? error("edges should have length 0") : nothing - net.edge[edge14].istIdentifiable ? error("edge14 identifiable and should not") : nothing - net.visited[edge9] = false; - net.visited[edge5] = false; - net.visited[edge11] = false; - net.visited[edge15] = false; - !all([!id for id in net.visited]) ? error("edges not identifiable as identifiable") : nothing - net.numBad == 0 ? nothing : error("net.numBad should be 0") -end - - -# tree example -# warning: if added hybridization (bad diamond/triangle) and then delete, -# original edge lengths cannot be recovered -function testTree(net::HybridNetwork) - !all([!e.hybrid for e in net.edge]) ? error("some edge is still hybrid") : nothing - !all([!e.hybrid for e in net.node]) ? error("some node is still hybrid") : nothing - !all([!e.hasHybEdge for e in net.node]) ? error("some node has hybrid edge") : nothing - !all([e.isMajor for e in net.edge]) ? error("some edge is not major") : nothing - !all([e.containRoot for e in net.edge]) ? error("some edge cannot contain root") : nothing - edge9 = getIndexEdge(9,net); - edge5 = getIndexEdge(5,net); - (!net.edge[edge9].istIdentifiable || !net.edge[edge5].istIdentifiable) ? error("edge9,5 not identifiable") : nothing - net.visited = [e.istIdentifiable for e in net.edge]; - net.visited[edge9] = false; - net.visited[edge5] = false; - !all([!id for id in net.visited]) ? error("edges not identifiable as identifiable") : nothing - isempty(net.hybrid) ? nothing : error("something in net.hybrid") - net.numHybrids != 0 ? error("should have 0 hybrid, but net.numHybrids is $(net.numHybrids)") : nothing - #edge11 = getIndexEdge(11,net); - #edge12 = getIndexEdge(12,net); - #net.edge[edge11].length != 1.5 ? error("edge length for 11 is wrong") : nothing - #net.edge[edge12].length != 0.2 ? error("edge length for 12 is wrong") : nothing -end diff --git a/test/test_hasEdge.jl b/test/test_hasEdge.jl index b795e1d13..da78283df 100644 --- a/test/test_hasEdge.jl +++ b/test/test_hasEdge.jl @@ -4,6 +4,7 @@ #println("----- Case G ------") include("../examples/case_g_example.jl"); +# include(joinpath(dirname(pathof(PhyloNetworks)), "..","examples","case_g_example.jl")) q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); @@ -46,6 +47,7 @@ end #println("----- Case F: bad diamond ------") include("../examples/case_f_example.jl"); +# include(joinpath(dirname(pathof(PhyloNetworks)), "..","examples","case_f_example.jl")) q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); diff --git a/test/test_hgt.jl b/test/test_hgt.jl deleted file mode 100644 index e952ecdae..000000000 --- a/test/test_hgt.jl +++ /dev/null @@ -1,132 +0,0 @@ -# test with the network in HGTinconsistency -# Claudia April 2015 - -include("../src/types.jl") -include("../src/functions.jl") - -truenetwork = "((((1,2),((3,4))#H1),(#H1,5)),6);" -treefile = "1.ms" - -d = readInputData(treefile, "HGTtableCF.txt"); #will get list of all quartets (allQuartets.txt), and obsCF (HGTtableCF.txt) -length(d.quartet) - -# for expCF -net = readTopologyUpdate(truenetwork); -printEdges(net) -extractQuartet!(net,d) #extract qnet from that list and calculate expCF -df2 = writeExpCF(d.quartet) -writetable("HGT_truenet_expCF.csv",df2) - -# compare table with expCF and obsCF: very different! - -# ----------------- optTopLevel with expCF and starting tree 1_astral.out, no branches updated -------- -include("../src/types.jl") -include("../src/functions.jl") - -df2 = readtable("HGT_truenet_expCF.csv") -d2 = readTableCF(df2); #expCF - -truenetwork = "((((1,2),((3,4))#H1),(#H1,5)),6);" -net = readTopologyUpdate(truenetwork); -printEdges(net) -@time optBL!(net,d2) #loglik~1e.-15 in 0.17secs - -currT0 = readTopologyUpdate("1_astral.out"); -printEdges(currT0) -Random.seed!(1234) #found right network in 135secs, wrong BL, right gamma (debug3hgtGood.txt) -Random.seed!(4568) # bug with movedownlevel, will leave for later (debug4hgt.txt) -Random.seed!(11233) #found right network in 119.34secs (debug4hgtBad.txt, by mistake) -currT = deepcopy(currT0); -addHybridizationUpdate!(currT); #add hybrid at random (different test would be to start with the tree) -printEdges(currT) - -@time optTopLevel!(currT,d2,1) - -# ----------------- optTopLevel with expCF and starting tree 1_astral.out, branches updated -------- -include("../src/types.jl") -include("../src/functions.jl") - -df2 = readtable("HGT_truenet_expCF.csv") -d2 = readTableCF(df2); #expCF - -truenetwork = "((((1,2),((3,4))#H1),(#H1,5)),6);" -net = readTopologyUpdate(truenetwork); -printEdges(net) -@time optBL!(net,d2) #loglik~1e.-15 in 0.17secs - -currT0 = readTopologyUpdate("1_astral.out"); -x = updateBL!(currT0,d2) -printEdges(currT0) -Random.seed!(1234) #right network in 20secs(debug12hgt) -Random.seed!(4568) #movedownlevel: debug13hgtBad -Random.seed!(11233) #very close to right network in 135secs (debug8hgtgood.txt) -currT = deepcopy(currT0); -addHybridizationUpdate!(currT); #add hybrid at random (different test would be to start with the tree) -printEdges(currT) - -@time optTopLevel!(currT,d2,1); - -# ----------------- optTopLevel with obsCF and starting tree 1_astral.out, no branches updated -------- -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("HGTtableCF.txt") #from 1.ms -d = readTableCF(df); #obsCF - -truenetwork = "((((1,2),((3,4))#H1),(#H1,5)),6);" -net = readTopologyUpdate(truenetwork); -printEdges(net) -@time optBL!(net,d,true) #loglik ~144.74 for true net - -currT0 = readTopologyUpdate("1_astral.out"); -printEdges(currT0) -Random.seed!(1234) #local max found debug5hgtBad -Random.seed!(4568) #local max2 found debug6hgtBad -Random.seed!(11233) #local max found debug7hgtBad -currT = deepcopy(currT0); -addHybridizationUpdate!(currT); #add hybrid at random (different test would be to start with the tree) -printEdges(currT) - -@time optTopLevel!(currT,d,1); - -# ----------------- optTopLevel with obsCF and starting tree 1_astral.out, branches updated -------- -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("HGTtableCF.txt") #from 1.ms -d = readTableCF(df); #obsCF - -truenetwork = "((((1,2),((3,4))#H1),(#H1,5)),6);" -net = readTopologyUpdate(truenetwork); -printEdges(net) -@time optBL!(net,d,true) #loglik ~144.74 for true net - -currT0 = readTopologyUpdate("1_astral.out"); -x = updateBL!(currT0,d) -printEdges(currT0) -Random.seed!(1234) #local max found debug9hgt.txt -Random.seed!(4568) #local max2 found debug9hgt.txt -Random.seed!(11233) #local max3 found debug9hgt.txt -currT = deepcopy(currT0); -addHybridizationUpdate!(currT); #add hybrid at random (different test would be to start with the tree) -printEdges(currT) - -@time optTopLevel!(currT,d,1); - - -# ----------------------- -# bug in Random.seed!(4568) -net = readTopologyUpdate("((4,#H7:9.99670403892172::0.43454301575229803):1.5467254857425556,((6,(5)#H7:2.512064322645178::0.565456984247702):9.221085796210835,(2,1):0.38003642076628485):0.0,3);"); -Random.seed!(4568) -flag = proposedTop!(:nni,net,true,0,100, zeros(Int,18), zeros(Int,6)) -df = readtable("HGTtableCF.txt") #from 1.ms -d = readTableCF(df); #obsCF - -qnet=QuartetNetwork(net); -extractQuartet!(net,d) -printEdges(qnet) -printNodes(qnet) -deleteLeaf!(qnet,qnet.node[1]) #leaf1 -qnet0=deepcopy(qnet); -deleteLeaf!(qnet,qnet.node[2]) #leaf4 - diff --git a/test/test_hybridatnode.jl b/test/test_hybridatnode.jl deleted file mode 100644 index f90e28763..000000000 --- a/test/test_hybridatnode.jl +++ /dev/null @@ -1,28 +0,0 @@ -# test for hybridatnode -# still not automatic test function -# Claudia April 2016 - - -include("../src/types.jl") -include("../src/functions.jl") - -net = readTopologyLevel1("(Gma,(Gch,((Gmo_C7,((Gmo_N2,Gmo_I3):0.037,Gmo_I7):0.181):2.318,#H8:::0.03):2.892):2.347,(Bsa)#H8:::0.97);") -plot(net) -plot(net, showNodeNumber=true) -# want to change hybrid to -3 - -net2 = hybridatnode(net,-3) -plot(net2) -plot(net2, showEdgeNumber=true) -rootonedge!(net2,13) #Bsa outgroup -plot(net2) - - -hybridatnode!(net,-3) -plot(net) - - -net = readTopology("(A:1.0,(B,(((C:0.52,(E:0.5)#H2:0.02::0.7):0.6,(#H2:0.01::0.3,F:0.7):0.8):0.9,D:0.8):1.3):0.7):0.1;"); -plot(net, showNodeNumber=true) -hybridatnode!(net, -7) -plot(net) diff --git a/test/test_lm.jl b/test/test_lm.jl index 805aca46f..5a8135799 100644 --- a/test/test_lm.jl +++ b/test/test_lm.jl @@ -125,7 +125,7 @@ fitSH = phyloNetworklm(@formula(trait ~ 1), dfr, net, model = "scalingHybrid", f @test aic(fitlam) ≈ aic(fitSH) ## Pagel's Lambda -fitlam = (@test_logs (:info, r"^Maximum lambda value") match_mode=:any phyloNetworklm(@formula(trait ~ 1), dfr, net, model = "lambda")) +fitlam = (@test_logs (:info, r"^Maximum lambda value") phyloNetworklm(@formula(trait ~ 1), dfr, net, model = "lambda")) @test lambda_estim(fitlam) ≈ 1.24875 ## Scaling Hybrid @@ -150,12 +150,12 @@ Y = sim[:Tips] ## Construct regression matrix dfr_shift = regressorShift(net.edge[[8,17]], net) -dfr_shift[:sum] = vec(sum(Matrix(dfr_shift[findall(names(dfr_shift) .!= :tipNames)]), dims=2)) +dfr_shift[!,:sum] = vec(sum(Matrix(dfr_shift[:,findall(names(dfr_shift) .!= :tipNames)]), dims=2)) dfr_hybrid = regressorHybrid(net) -@test dfr_shift[:shift_8] ≈ dfr_hybrid[:shift_8] -@test dfr_shift[:shift_17] ≈ dfr_hybrid[:shift_17] -@test dfr_shift[:sum] ≈ dfr_hybrid[:sum] +@test dfr_shift[!,:shift_8] ≈ dfr_hybrid[!,:shift_8] +@test dfr_shift[!,:shift_17] ≈ dfr_hybrid[!,:shift_17] +@test dfr_shift[!,:sum] ≈ dfr_hybrid[!,:sum] ## Data dfr = DataFrame(trait = Y, tipNames = sim.M.tipNames) @@ -203,10 +203,10 @@ modhet = phyloNetworklm(@formula(trait ~ sum + shift_8), dfr, net) table1 = ftest(modhet, modhom, modnull) table2 = PhyloNetworks.anova(modnull, modhom, modhet) -@test table1.fstat[1] ≈ table2[:F][2] -@test table1.fstat[2] ≈ table2[:F][1] -@test table1.pval[1].v ≈ table2[Symbol("Pr(>F)")][2] -@test table1.pval[2].v ≈ table2[Symbol("Pr(>F)")][1] +@test table1.fstat[1] ≈ table2[2,:F] +@test table1.fstat[2] ≈ table2[1,:F] +@test table1.pval[1].v ≈ table2[2,Symbol("Pr(>F)")] +@test table1.pval[2].v ≈ table2[1,Symbol("Pr(>F)")] # ## Replace next 4 lines with previous ones when GLM.ftest available # @test table1[:F][2] ≈ table2[:F][2] # @test table1[:F][1] ≈ table2[:F][1] @@ -218,12 +218,12 @@ modhetbis = phyloNetworklm(@formula(trait ~ shift_8 + shift_17), dfr, net) table2bis = PhyloNetworks.anova(modnull, modhom, modhetbis) -@test table2[:F] ≈ table2bis[:F] -@test table2[Symbol("Pr(>F)")] ≈ table2bis[Symbol("Pr(>F)")] -@test table2[:dof_res] ≈ table2bis[:dof_res] -@test table2[:RSS] ≈ table2bis[:RSS] -@test table2[:dof] ≈ table2bis[:dof] -@test table2[:SS] ≈ table2bis[:SS] +@test table2[!,:F] ≈ table2bis[!,:F] +@test table2[!,Symbol("Pr(>F)")] ≈ table2bis[!,Symbol("Pr(>F)")] +@test table2[!,:dof_res] ≈ table2bis[!,:dof_res] +@test table2[!,:RSS] ≈ table2bis[!,:RSS] +@test table2[!,:dof] ≈ table2bis[!,:dof] +@test table2[!,:SS] ≈ table2bis[!,:SS] end @@ -343,7 +343,7 @@ fitbis = phyloNetworklm(@formula(trait ~ pred), dfr, net) # unnamed ordered data dfr = DataFrame(trait = B, pred = A) -fitter = (@test_logs (:info, r"^As requested \(no_names=true\)") match_mode=:any phyloNetworklm(@formula(trait ~ pred), dfr, net, no_names=true)) +fitter = (@test_logs (:info, r"^As requested \(no_names=true\)") phyloNetworklm(@formula(trait ~ pred), dfr, net, no_names=true)) @test coef(phynetlm) ≈ coef(fitter) @test vcov(phynetlm) ≈ vcov(fitter) @@ -372,8 +372,8 @@ dfr = dfr[sample(1:12, 12, replace=false), :] ### Add NAs dfr = DataFrame(trait = B, pred = A, tipNames = tipLabels(sim)) -dfr[:pred] = allowmissing(dfr[:pred]) -dfr[[2, 8, 11], :pred] = missing +allowmissing!(dfr, :pred) +dfr[[2, 8, 11], :pred] .= missing fitna = phyloNetworklm(@formula(trait ~ pred), dfr, net) #@show fitna @@ -433,7 +433,7 @@ fitSH = phyloNetworklm(@formula(trait ~ pred), dfr, net, model = "scalingHybrid" @test aic(fitlam) ≈ aic(fitSH) ## Pagel's Lambda -fitlam = (@test_logs (:info, r"^Maximum lambda value") match_mode=:any phyloNetworklm(@formula(trait ~ pred), dfr, net, model = "lambda")) +fitlam = (@test_logs (:info, r"^Maximum lambda value") phyloNetworklm(@formula(trait ~ pred), dfr, net, model = "lambda")) #@show fitlam @test lambda_estim(fitlam) ≈ 1.1135518305 atol=1e-6 @@ -456,16 +456,16 @@ blup = (@test_logs (:warn, r"^These prediction intervals show uncertainty in anc @test_logs show(devnull, blup) # BLUP same, using the function directly -blup_bis = (@test_logs (:warn, r"^These prediction intervals show uncertainty in ancestral values") match_mode=:any ancestralStateReconstruction(dfr, net)); +blup_bis = (@test_logs (:warn, r"^These prediction intervals show uncertainty in ancestral values") ancestralStateReconstruction(dfr, net)); -@test expectations(blup)[:condExpectation] ≈ expectations(blup_bis)[:condExpectation] -@test expectations(blup)[:nodeNumber] ≈ expectations(blup_bis)[:nodeNumber] +@test expectations(blup)[!,:condExpectation] ≈ expectations(blup_bis)[!,:condExpectation] +@test expectations(blup)[!,:nodeNumber] ≈ expectations(blup_bis)[!,:nodeNumber] @test blup.traits_tips ≈ blup_bis.traits_tips @test blup.TipNumbers ≈ blup_bis.TipNumbers @test predint(blup) ≈ predint(blup_bis) -@test predintPlot(blup)[:PredInt] == predintPlot(blup_bis)[:PredInt] -@test predintPlot(blup, withExp=true)[:PredInt] == predintPlot(blup_bis, withExp=true)[:PredInt] -@test expectationsPlot(blup)[:PredInt] == expectationsPlot(blup_bis)[:PredInt] +@test predintPlot(blup)[!,:PredInt] == predintPlot(blup_bis)[!,:PredInt] +@test predintPlot(blup, withExp=true)[!,:PredInt] == predintPlot(blup_bis, withExp=true)[!,:PredInt] +@test expectationsPlot(blup)[!,:PredInt] == expectationsPlot(blup_bis)[!,:PredInt] dfr = DataFrame(trait = Y, tipNames = tipLabels(sim), reg = Y) @test_throws ErrorException ancestralStateReconstruction(dfr, net) # cannot handle a predictor @@ -475,14 +475,14 @@ dfr2 = dfr[sample(1:12, 12, replace=false), :] phynetlm = phyloNetworklm(@formula(trait~1), dfr2, net) blup2 = (@test_logs (:warn, r"^These prediction intervals show uncertainty in ancestral values") ancestralStateReconstruction(phynetlm)) -@test expectations(blup)[:condExpectation][1:length(blup.NodeNumbers)] ≈ expectations(blup2)[:condExpectation][1:length(blup.NodeNumbers)] +@test expectations(blup)[1:length(blup.NodeNumbers),:condExpectation] ≈ expectations(blup2)[1:length(blup.NodeNumbers),:condExpectation] @test blup.traits_tips[phynetlm.model.ind] ≈ blup2.traits_tips @test blup.TipNumbers[phynetlm.model.ind] ≈ blup2.TipNumbers @test predint(blup)[1:length(blup.NodeNumbers), :] ≈ predint(blup2)[1:length(blup.NodeNumbers), :] # With unknown tips -dfr[:trait] = allowmissing(dfr[:trait]) -dfr[[2, 4], :trait] = missing +allowmissing!(dfr, :trait) +dfr[[2, 4], :trait] .= missing phynetlm = phyloNetworklm(@formula(trait~1), dfr, net) blup = (@test_logs (:warn, r"^These prediction intervals show uncertainty in ancestral values") ancestralStateReconstruction(phynetlm)) # plot(net, blup) @@ -492,14 +492,14 @@ dfr2 = dfr[[1, 2, 5, 3, 4, 6, 7, 8, 9, 10, 11, 12], :] phynetlm = phyloNetworklm(@formula(trait~1), dfr, net) blup2 = (@test_logs (:warn, r"^These prediction intervals show uncertainty in ancestral values") ancestralStateReconstruction(phynetlm)) -@test expectations(blup)[:condExpectation] ≈ expectations(blup2)[:condExpectation] +@test expectations(blup)[!,:condExpectation] ≈ expectations(blup2)[!,:condExpectation] @test predint(blup) ≈ predint(blup2) -@test predintPlot(blup)[:PredInt] == predintPlot(blup2)[:PredInt] -@test predintPlot(blup, withExp=true)[:PredInt] == predintPlot(blup2, withExp=true)[:PredInt] +@test predintPlot(blup)[!,:PredInt] == predintPlot(blup2)[!,:PredInt] +@test predintPlot(blup, withExp=true)[!,:PredInt] == predintPlot(blup2, withExp=true)[!,:PredInt] # Test mark on missing ee = expectationsPlot(blup) -predMiss = ee[indexin([n.number for n in net.leaf][[2,4]], ee[:nodeNumber]),:PredInt] +predMiss = ee[indexin([n.number for n in net.leaf][[2,4]], ee[!,:nodeNumber]),:PredInt] for pp = predMiss @test pp[end] == '*' end @@ -523,14 +523,14 @@ B = b0 .+ (b1 .* A + randn(size(tipLabels(net), 1))) dfr = DataFrame(trait = B, pred = A, tipNames = tipLabels(net)) ## Network -phynetlm = (@test_logs (:info, r"^Maximum lambda value") match_mode=:any phyloNetworklm(@formula(trait ~ pred), dfr, net, model = "lambda")) +phynetlm = (@test_logs (:info, r"^Maximum lambda value") phyloNetworklm(@formula(trait ~ pred), dfr, net, model = "lambda")) @test lambda_estim(phynetlm) ≈ 0.5894200143 atol=1e-8 ## Major Tree global tree tree = majorTree(net) -phynetlm = (@test_logs (:info, r"^Maximum lambda value") match_mode=:any phyloNetworklm(@formula(trait ~ pred), dfr, tree, model = "lambda")) +phynetlm = (@test_logs (:info, r"^Maximum lambda value") phyloNetworklm(@formula(trait ~ pred), dfr, tree, model = "lambda")) @test lambda_estim(phynetlm) ≈ 0.5903394415 atol=1e-6 diff --git a/test/test_lm_tree.jl b/test/test_lm_tree.jl index 6da1d5910..0bf06f3ca 100644 --- a/test/test_lm_tree.jl +++ b/test/test_lm_tree.jl @@ -85,7 +85,7 @@ ancR = CSV.read(joinpath(@__DIR__, "..", "examples", "caudata_Rphylopars.txt"), ## Expectations expe = expectations(anc) -expeR = ancR[:trait] +expeR = ancR[!,:trait] # Matching tips ? tipsR = expeR[expe[197:393, :nodeNumber]] tipsJulia = expe[197:393, :condExpectation] @@ -101,7 +101,7 @@ nodesJulia = expe[1:196, :condExpectation] ## Variances vars = LinearAlgebra.diag(anc.variances_nodes) # Rphylopars -varsR = ancR[:var] +varsR = ancR[!,:var] # Matching nodes ? nodesR = varsR[-expe[1:196, :nodeNumber] .+ 196] @test nodesR ≈ vars atol=1e-3 ## RK: Small tol !! @@ -111,7 +111,7 @@ ancRt = CSV.read(joinpath(@__DIR__, "..", "examples", "caudata_Phytools.txt")); ## Expectations expe = expectations(anc) -expeRt = ancRt[:trait] +expeRt = ancRt[!,:trait] # Matching nodes ? nodesRt = expeRt[-expe[1:196, :nodeNumber] .+ (196 - 197)] nodesJulia = expe[1:196, :condExpectation] @@ -123,7 +123,7 @@ nodesJulia = expe[1:196, :condExpectation] ## Variances vars = LinearAlgebra.diag(anc.variances_nodes) # Rphylopars -varsRt = ancRt[:var] +varsRt = ancRt[!,:var] # Matching nodes ? nodesRt = varsRt[-expe[1:196, :nodeNumber] .+ (196 - 197)] @test nodesRt ≈ vars atol=2e-3 ## RK: Small tol !! @@ -209,7 +209,7 @@ nodesRt = varsRt[-expe[1:196, :nodeNumber] .+ (196 - 197)] ############################################################################### ## Fit Pagel's lambda -fitLambda = (@test_logs (:info, r"^Maximum lambda value") match_mode=:any phyloNetworklm(@formula(trait ~ 1), dat, phy, model = "lambda")); +fitLambda = (@test_logs (:info, r"^Maximum lambda value") phyloNetworklm(@formula(trait ~ 1), dat, phy, model = "lambda")); @test lambda_estim(fitLambda) ≈ 0.9193 atol=1e-4 # Due to convergence issues, tolerance is lower. @test loglikelihood(fitLambda) ≈ -51.684379 atol=1e-6 @@ -454,7 +454,7 @@ vcovR = [0.0200086273 -0.0136717540 0.0084815090 -0.0093192029 -0.0114417825 ############################################################################### ## Fit lambda -fitLambda = (@test_logs (:info, r"^Maximum lambda value") match_mode=:any phyloNetworklm(@formula(AVG_SVL ~ AVG_ltoe_IV + AVG_lfing_IV * region), dat, phy, model = "lambda")) +fitLambda = (@test_logs (:info, r"^Maximum lambda value") phyloNetworklm(@formula(AVG_SVL ~ AVG_ltoe_IV + AVG_lfing_IV * region), dat, phy, model = "lambda")) # Tests against results obtained with geiger::fitContinuous or phylolm::phylolm @test lambda_estim(fitLambda) ≈ 0.9982715594 atol=1e-5 diff --git a/test/test_movedownlevel.jl b/test/test_movedownlevel.jl deleted file mode 100644 index 779cfa6f5..000000000 --- a/test/test_movedownlevel.jl +++ /dev/null @@ -1,24 +0,0 @@ -# test for moveDownLevel -# Claudia April 2015 - -# starting topology: Case G -include("../examples/case_g_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case H -df = readtable("CaseH_output.csv") -d = readTableCF(df) - -optBL!(currT,d) - - -include("../examples/case_f_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case H -df = readtable("CaseH_output.csv") -d = readTableCF(df) - -optBL!(currT,d) diff --git a/test/test_multipleAlleles.jl b/test/test_multipleAlleles.jl index 7149460b7..08ed278a2 100644 --- a/test/test_multipleAlleles.jl +++ b/test/test_multipleAlleles.jl @@ -17,7 +17,7 @@ global tree, df, d, net, currT filename="CFmapped.csv")) rm("CFmapped.csv") rm("tmp.csv") - @test df[:t4] == ["4","7","3","7","3","3","7","3","3","3","7","3","3","3","3"] + @test df[!,:t4] == ["4","7","3","7","3","3","7","3","3","3","7","3","3","3","3"] end #----------------------------------------------------------# @@ -36,19 +36,19 @@ for i1 in 1:4 deleteat!(ind34, findfirst(isequal(i2), ind34)) for j in 1:2 i3=ind34[j]; i4=ind34[3-j] - d[:t1][irow]=letters[i1]; d[:t2][irow]=letters[i2]; d[:t3][irow]=letters[i3]; d[:t4][irow]=letters[i4] + d[irow,:t1]=letters[i1]; d[irow,:t2]=letters[i2]; d[irow,:t3]=letters[i3]; d[irow,:t4]=letters[i4] # CF12_34 corresponds to CFi1i2_i3i4 - if (i1,i2)∈[(1,2),(2,1),(3,4),(4,3)] d[:CF12_34][irow] = cfvalues[1] - elseif (i1,i2)∈[(1,3),(3,1),(2,4),(4,2)] d[:CF12_34][irow] = cfvalues[2] - elseif (i1,i2)∈[(1,4),(4,1),(2,3),(3,2)] d[:CF12_34][irow] = cfvalues[3] + if (i1,i2)∈[(1,2),(2,1),(3,4),(4,3)] d[irow,:CF12_34] = cfvalues[1] + elseif (i1,i2)∈[(1,3),(3,1),(2,4),(4,2)] d[irow,:CF12_34] = cfvalues[2] + elseif (i1,i2)∈[(1,4),(4,1),(2,3),(3,2)] d[irow,:CF12_34] = cfvalues[3] end # next: set CF13_24 - if (i1,i3)∈[(1,2),(2,1),(3,4),(4,3)] d[:CF13_24][irow] = cfvalues[1] - elseif (i1,i3)∈[(1,3),(3,1),(2,4),(4,2)] d[:CF13_24][irow] = cfvalues[2] - elseif (i1,i3)∈[(1,4),(4,1),(2,3),(3,2)] d[:CF13_24][irow] = cfvalues[3] + if (i1,i3)∈[(1,2),(2,1),(3,4),(4,3)] d[irow,:CF13_24] = cfvalues[1] + elseif (i1,i3)∈[(1,3),(3,1),(2,4),(4,2)] d[irow,:CF13_24] = cfvalues[2] + elseif (i1,i3)∈[(1,4),(4,1),(2,3),(3,2)] d[irow,:CF13_24] = cfvalues[3] end # nest: set CF14_23 - if (i1,i4)∈[(1,2),(2,1),(3,4),(4,3)] d[:CF14_23][irow] = cfvalues[1] - elseif (i1,i4)∈[(1,3),(3,1),(2,4),(4,2)] d[:CF14_23][irow] = cfvalues[2] - elseif (i1,i4)∈[(1,4),(4,1),(2,3),(3,2)] d[:CF14_23][irow] = cfvalues[3] + if (i1,i4)∈[(1,2),(2,1),(3,4),(4,3)] d[irow,:CF14_23] = cfvalues[1] + elseif (i1,i4)∈[(1,3),(3,1),(2,4),(4,2)] d[irow,:CF14_23] = cfvalues[2] + elseif (i1,i4)∈[(1,4),(4,1),(2,3),(3,2)] d[irow,:CF14_23] = cfvalues[3] end irow += 1 end diff --git a/test/test_nlopt_example.jl b/test/test_nlopt_example.jl deleted file mode 100644 index 554b4bc08..000000000 --- a/test/test_nlopt_example.jl +++ /dev/null @@ -1,44 +0,0 @@ -# small test with one tree quartet for the NLopt -# Claudia January 2015 - -treal = 0.1 -obsCF=[1-2/3*exp(-treal),1/3*exp(-treal),1/3*exp(-treal)] -using NLopt - -type ToyQuartet - t::Float64 -end - -function objective(x::Float64,obsCF::Vector{Float64}) - length(obsCF) == 3 || error("obsCF must be size 3") - val = obsCF[1]*log(1-2/3*exp(-x)) + obsCF[2]*log(1/3*exp(-x)) + obsCF[3]*log(1/3*exp(-x)) - return -val -end - -function optimizar(obsCF::Vector{Float64}, q::ToyQuartet) - t = q.t - k = length(t) - opt = NLopt.Opt(:LN_BOBYQA,k) # :LD_MMA if use gradient - # criterion based on prof Bates code - NLopt.ftol_rel!(opt,1e-12) # relative criterion - NLopt.ftol_abs!(opt,1e-8) # absolute critetion - NLopt.xtol_abs!(opt,1e-10) # criterion on parameter value changes - NLopt.lower_bounds!(opt, zeros(k)) - NLopt.upper_bounds!(opt, Inf) - count = 0 - function obj(x::Vector{Float64},g::Vector{Float64}) # added g::Vector{Float64} for gradient, ow error - count += 1 - println("t is $(t) initially, x: $(x)") - t = deepcopy(x) - println("t is now $(t), x: $(x)") - val = objective(x[1],obsCF) - println("f_$count: $(round(val, digits=5)), x: $(x)") - return val - end - NLopt.min_objective!(opt,obj) - fmin, xmin, ret = NLopt.optimize(opt,[t]) - return fmin,xmin -end - -q = ToyQuartet(1.0) -fmin,xmin = optimizar(obsCF,q) diff --git a/test/test_nni.jl b/test/test_nni.jl deleted file mode 100644 index 06267223b..000000000 --- a/test/test_nni.jl +++ /dev/null @@ -1,78 +0,0 @@ -# test tree move NNI in a simple quartet -# Claudia February 2015 - - -include("../src/types.jl") -include("../src/functions.jl") - -using Base.Collections # for updateInCycle with priority queue - -tree = "((1:0.1,2:0.2):0.5,3:0.3,4:0.4);" -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) - -net = readTopology("prueba_tree.txt"); -printEdges(net) -printNodes(net) - -edge = chooseEdgeNNI(net); -println("$(edge.number)") -NNI!(net,edge) - -for e in net.edge - e.inCycle = -1 -end - -# Case 1 -net.edge[1].inCycle = 2 -net.edge[3].inCycle = 2 -net.edge[4].inCycle = 2 - -flag,edge = chooseEdgeNNI(net,10); -NNI!(net,edge) - -# Case 2 -net.edge[1].inCycle = 2 -net.edge[3].inCycle = 2 -net.edge[4].inCycle = 2 -net.edge[2].inCycle = 2 - -flag,edge = chooseEdgeNNI(net,10); -NNI!(net,edge) - -# Case 3 -net.edge[1].inCycle = 2 -net.edge[2].inCycle = 3 - -flag,edge = chooseEdgeNNI(net,10); -NNI!(net,edge) - -# Case 4 -net.edge[1].inCycle = 2 -net.edge[2].inCycle = 2 - -flag,edge = chooseEdgeNNI(net,10); -NNI!(net,edge) - -# Case 5 -net.edge[1].inCycle = 2 -net.edge[2].inCycle = 2 -net.edge[4].inCycle = 3 -net.edge[5].inCycle = 3 - -flag,edge = chooseEdgeNNI(net,10); -NNI!(net,edge) - -# Case 1 -net.edge[2].inCycle = 2 -net.edge[3].inCycle = 2 -net.edge[4].inCycle = 2 - -net.node[3].inCycle = 2 -net.node[6].inCycle = 2 - -flag,edge = chooseEdgeNNI(net,10) -NNI!(net,edge) -printEdges(net) -printNodes(net) diff --git a/test/test_optBL.jl b/test/test_optBL.jl deleted file mode 100644 index ed7d16289..000000000 --- a/test/test_optBL.jl +++ /dev/null @@ -1,569 +0,0 @@ -# test for the optimization of branch lengths -# with Case g -# Claudia January 2015 - -## include("../examples/case_g_example.jl"); -## q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -## q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -## q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -## q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -## q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -## d = DataCF([q1,q2,q3,q4,q5]); -## extractQuartet!(net,d) - -## df = writeExpCF(d.quartet) -## writetable("CaseG_output.csv",df) - -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseG_output.csv") -d2 = readTableCF(df) - -tree = "((((6,4)1,(7)11#H1:::0.8)5,(11#H1:::0.2,8)),10);" # Case G different starting branch lengths -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht -realht = [0.1,0.2,0.1,1.0] - -@time optBL!(net,d2) -# with usual logPseudoLik: -#got 5.33118 at [0.247,0.18076,0.12026,0.2956] after 81 iterations (returned FTOL_REACHED) -#elapsed time: 8.654081812 seconds (8624832 bytes allocated) -#(5.331178696104555,[0.246999,0.180762,0.120265,0.295604]) - -# with new logPseudoLik: -#got 0.0 at [0.14088,0.19534,0.10493,0.59544] after 512 iterations (returned FTOL_REACHED) -#elapsed time: 6.552122524 seconds (95796384 bytes allocated, 0.56% gc time) -#(5.522931422394296e-8,[0.140883,0.195339,0.10493,0.595437]) - -# with 100*logPseudoLik and t in (0,10): takes longer, but finds it! -#got 0.0 at [0.10085,0.19991,0.1001,0.98557] after 1123 iterations (returned FTOL_REACHED) -#elapsed time: 53.20413795 seconds (107820188 bytes allocated, 0.13% gc time) -#(2.169777681982341e-9,[0.100853,0.199907,0.100098,0.985569]) - -# with new ftol, xtol values -#got 0.00073 at [0.2474,0.18189,0.11927,0.30014] after 48 iterations (returned XTOL_REACHED) -#elapsed time: 0.023274544 seconds (4256960 bytes allocated) -# - -# with old ftol,xtol values -#got 0.0 at [0.13768,0.19568,0.10456,0.61459] after 561 iterations (returned FTOL_REACHED) -#elapsed time: 2.122653096 seconds (118537484 bytes allocated, 4.50% gc time) - -@time optBL!(net,d2,false,1e-5,1e-6,1e-3,1e-4) -#got 0.0001 at [0.24708,0.18075,0.12033,0.29593] after 64 iterations (returned SUCCESS) -#elapsed time: 0.028643709 seconds (5502208 bytes allocated) - - -@allocated fmin,xmin=optBL!(net,d2) - -# -------- different starting point ------ - -# (0.12,1.,1.,1.) -tree = "((((6,4)1,(7)11#H1:::0.88)5,(11#H1:::0.12,8)),10);" # Case G different starting branch lengths -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -@time fmin,xmin=optBL!(net,d2) - - -# (0.1,1.,1.,1.) -tree = "((((6,4)1,(7)11#H1)5,(11#H1,8)),10);" # Case G different starting branch lengths -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -@time optBL!(net,d2) -#got 0.01558 at [0.09049,0.20435,0.09535,0.91173] after 31 iterations (returned XTOL_REACHED) -#elapsed time: 0.014890177 seconds (2772976 bytes allocated) - -# (0.05,1.,1.,1.) -tree = "((((6,4)1,(7)11#H1:::0.95)5,(11#H1:::0.05,8)),10);" # Case G different starting branch lengths -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -@time fmin,xmin=optBL!(net,d2) - - -# (0.15,1.,1.,1.) -tree = "((((6,4)1,(7)11#H1:::0.85)5,(11#H1:::0.15,8)),10);" # Case G different starting branch lengths -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -@time fmin,xmin=optBL!(net,d2) - - -# (0.35,1.,1.,1.) -tree = "((((6,4)1,(7)11#H1:::0.65)5,(11#H1:::0.35,8)),10);" # Case G different starting branch lengths -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -@time fmin,xmin=optBL!(net,d2) - - -# (0.11,0.22,0.11,1.1) -tree = "((((6,4)1,(7)11#H1:::0.89)5,(11#H1:::0.11,8)),10);" # Case G different starting branch lengths -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) -printNodes(net) - -@time fmin,xmin=optBL!(net,d2) - -# ================================================================================================== - -# test optBL with Case H -# Claudia January 2015 - - -## include("../examples/case_h_example.jl"); -## q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -## q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -## q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -## q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -## q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -## d = DataCF([q1,q2,q3,q4,q5]); -## extractQuartet!(net,d) - -## df = writeExpCF(d.quartet) -## writetable("CaseH_output.csv",df) - -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseH_output.csv") -df = readtable("CaseH_output2.csv") -d2 = readTableCF(df) - -# starting ht (gamma,t3,t5,t7) -ht = [0.2,1.,1.,1.] -ht = [0.05,0.0001,2.,1.] #strange case un debug18bad.txt - -tree = string("((((6:0.1,4:1.5):",string(ht[2]),",#H1:::",string(ht[1]),"):",string(ht[3]),",7:0.2):",string(ht[4]),",(8)#H1:::",string(1-ht[1]),",10:0.1);") # Case H -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht -realht = [0.1,0.1,1.,0.1] - -@time optBL!(net,d2) -# with usual logPseudoLik: -#got 4.34098 at [0.1,0.10001,0.99999,0.1] after 151 iterations (returned FTOL_REACHED) -#elapsed time: 21.076511672 seconds (21355496 bytes allocated, 0.17% gc time) -#(4.340977085797292,[0.0999962,0.100005,0.999989,0.0999984]) - -# with new logPseudoLik: -#got 0.0 at [0.1,0.1,1.0,0.1] after 192 iterations (returned FTOL_REACHED) -#elapsed time: 0.973165584 seconds (16698980 bytes allocated, 3.44% gc time) -#(4.3376522534621724e-13,[0.1,0.0999979,1.0,0.100001]) - -# with 100*logPseudoLik and t in (0,10): -#got 0.0 at [0.1,0.1,1.0,0.1] after 208 iterations (returned FTOL_REACHED) -#elapsed time: 6.892366587 seconds (20646048 bytes allocated) -#(2.4010737722561867e-13,[0.0999999,0.0999992,1.0,0.1]) - -# with new ftol, xtol (start 0.2,1,1,1) -#got 0.04565 at [0.20363,0.64827,0.40823,0.11374] after 51 iterations (returned XTOL_REACHED) -#elapsed time: 0.057075544 seconds (4462816 bytes allocated, 53.14% gc time) - -#got 0.04478 at [0.20178,0.64205,0.41684,0.11366] after 25 iterations (returned SUCCESS) -#elapsed time: 0.016354695 seconds (2226424 bytes allocated) - -# with old ftol, xol -#got 0.0 at [0.1,0.1,1.0,0.1] after 176 iterations (returned FTOL_REACHED) -#elapsed time: 0.117537385 seconds (15143800 bytes allocated, 29.95% gc time) - -@time optBL!(net,d2,false,1e-5,1e-6,1e-5,1e-6) -#got 1.0e-5 at [0.10043,0.1054,0.99408,0.10017] after 115 iterations (returned FTOL_REACHED) -#elapsed time: 0.055701792 seconds (10398360 bytes allocated) - -@time optBL!(net,d2,false,1e-5,1e-6,1e-3,1e-4) -#got 1.0e-5 at [0.10054,0.10558,0.99394,0.10022] after 25 iterations (returned FTOL_REACHED) -#elapsed time: 0.013228257 seconds (2226504 bytes allocated) - -# from debug18bad -tree = string("(4,6,(#H2:0.7392085405544356::0.046179825120885414,(7,(10,(8)#H2:0.0::0.9538201748791146):0.9803511144374873):2.212878358589699):0.00000038687);") -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht -realht = [0.1,0.1,1.,0.1] - -@time optBL!(net,d2) - - -# ================================================================================================================================== - -# test optBL with Case J -# Claudia January 2015 - - -## include("../examples/case_j_example.jl"); -## q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -## q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -## q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -## q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -## q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -## d = DataCF([q1,q2,q3,q4,q5]); -## extractQuartet!(net,d) - -## df = writeExpCF(d.quartet) -## writetable("CaseJ_output.csv",df) - -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseJ_output.csv") -d2 = readTableCF(df) - -# starting ht (gamma,t3,t5,t7) -ht = [0.2,1.,1.,1.] - -tree = string("((((6)#H1:::",string(1-ht[1]),",4:1.5):",string(ht[2]),",7:0.2):",string(ht[3]),",8:0.1,(#H1:::",string(ht[1]),",10));") # Case J -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht -realht = [0.1,0.2,0.1,1.0] - -@time optBL!(net,d2) -# with usual logPseudoLik: -#got 5.39161 at [0.09997,0.19999,0.1,1.00061] after 246 iterations (returned FTOL_REACHED) -#elapsed time: 59.297791699 seconds (71939204 bytes allocated, 0.11% gc time) -#(5.3916134028946034,[0.0999661,0.199991,0.0999991,1.00061]) - -# with new logPseudoLik: -#got 0.0 at [0.10002,0.20001,0.1,0.99958] after 260 iterations (returned FTOL_REACHED) -#elapsed time: 1.211645168 seconds (21966196 bytes allocated) -#(9.797186804450541e-12,[0.100022,0.200007,0.100001,0.999579]) - -# with 100*logPseudoLik and t in (0,10): -#got 0.0 at [0.1,0.2,0.1,1.00003] after 249 iterations (returned FTOL_REACHED) -#elapsed time: 6.656956697 seconds (23010204 bytes allocated, 0.56% gc time) -#(5.280560068867562e-12,[0.0999983,0.199999,0.0999999,1.00003]) - -# with ftol, xtol -#got 0.05209 at [0.26015,0.26161,0.10523,0.24363] after 42 iterations (returned XTOL_REACHED) -#elapsed time: 0.021402455 seconds (3634568 bytes allocated) - -# with old ftol, xtol -#got 0.0 at [0.1,0.2,0.1,1.0] after 233 iterations (returned FTOL_REACHED) -#elapsed time: 0.112556852 seconds (19426488 bytes allocated) - -@time optBL!(net,d2,false,1e-5,1e-6,1e-3,1e-4) -#got 0.00219 at [0.1303,0.20847,0.10099,0.6283] after 106 iterations (returned FTOL_REACHED) -#elapsed time: 0.060469424 seconds (8887448 bytes allocated) - -@time optBL!(net,d2,false,1e-5,1e-6,1e-5,1e-6) -#got 0.00177 at [0.13073,0.20994,0.10121,0.64478] after 26 iterations (returned FTOL_REACHED) -#elapsed time: 0.012715507 seconds (2277912 bytes allocated) - -# ================================================================================================================================== - -# test optBL with Case F Bad Diamond I -# Claudia January 2015 - - -## include("../examples/case_f_example.jl"); -## parameters!(net) -## q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -## q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -## q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -## q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -## q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -## d = DataCF([q1,q2,q3,q4,q5]); -## extractQuartet!(net,d) - -## df = writeExpCF(d.quartet) -## writetable("CaseF_output.csv",df) - -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseF_output.csv") -d2 = readTableCF(df) - -# starting ht (gamma,t4,t5,t9) -ht = [0.1,1.,1.,1.] - -tree = string("(((6:0.1,(4)11#H1:::",string(1-ht[1]),")1:",string(ht[3]),",(11#H1:::",string(ht[1]),",7))5:",string(ht[4])",8:0.1,10:0.1);") # Case F: bad diamond I -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht -realht = [0.1,0.127,0.0285] - -@time optBL!(net,d2) -# with usual logPseudoLik: -#got 5.40235 at [0.1,0.12689,0.02855] after 116 iterations (returned FTOL_REACHED) -#elapsed time: 15.648447251 seconds (17823396 bytes allocated, 0.18% gc time) -#(5.402353356033268,[0.1,0.126887,0.0285486]) - -# with new logPseudoLik: -#got 0.0 at [0.1,0.12689,0.02855] after 116 iterations (returned FTOL_REACHED) -#elapsed time: 0.494250509 seconds (9272420 bytes allocated) -#(3.6216219428084243e-12,[0.1,0.126887,0.0285488]) - -# with new logPseudoLik and new algorithm LN_COBYLA -#got 0.0 at [0.1,0.12689,0.02855] after 3277 iterations (returned FTOL_REACHED) -#elapsed time: 23.001580343 seconds (312597904 bytes allocated, 0.98% gc time) -#(1.920897287842076e-13,[0.0999998,0.126889,0.0285491]) - -# with 100*logPseudoLik and t in (0,10): -# got 0.0 at [0.1,0.12689,0.02855] after 3356 iterations (returned FTOL_REACHED) -# elapsed time: 90.405727385 seconds (281649264 bytes allocated, 0.24% gc time) -# (8.157692309723136e-12,[0.0999999,0.126889,0.028549]) - -# with ftol,xtol -#got 0.25041 at [0.06362,0.15339,0.04918] after 24 iterations (returned XTOL_REACHED) -#elapsed time: 0.045551191 seconds (2403576 bytes allocated) - -#with old ftol, xtol -#got 0.0 at [0.1,0.12689,0.02855] after 2917 iterations (returned FTOL_REACHED) -#elapsed time: 1.303746554 seconds (229545768 bytes allocated, 11.98% gc time) - -@time optBL!(net,d2,false,1e-5,1e-6,1e-3,1e-4) -#got 5.0e-5 at [0.09974,0.12735,0.02892] after 92 iterations (returned XTOL_REACHED) -#elapsed time: 0.079739994 seconds (7321368 bytes allocated, 45.28% gc time) - -# ================================================================================================================================== - -# test optBL with Case I Bad Diamond II -# Claudia January 2015 - - -## include("../examples/case_i_example.jl"); -## q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -## q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -## q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -## q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -## q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -## d = DataCF([q1,q2,q3,q4,q5]); -## extractQuartet!(net,d) - -## df = writeExpCF(d.quartet) -## writetable("CaseI_output.csv",df) - -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseI_output.csv") -d2 = readTableCF(df) - -# starting ht (gamma,t4,t6,t9,t10) -ht = [0.2,0.0,2.0,2.0,2.0] -ht = [0.2,0.0,0.5,0.5,0.5] - -tree = string("((((8,10):",string(ht[2]),")#H1:::",string(1-ht[1]),",7):",string(ht[3]),",6,(4,#H1:",string(ht[4]),"::",string(ht[1]),"):",string(ht[5]),");") # Case I Bad diamond II -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht -realht = [0.1,2.0,1.0,1.0,1.0] - -@time optBL!(net,d2) -## with usual logPseudoLik: -# with ht = [0.2,0.0,0.5,0.5,0.5] -#got 3.69564 at [0.09753,1.97129,0.99532,0.46822,1.04214] after 267 iterations (returned FTOL_REACHED) -#elapsed time: 45.70869142 seconds (35023452 bytes allocated, 0.06% gc time) -#(3.6956398763931144,[0.0975269,1.97129,0.995318,0.468224,1.04214]) - -#ht: [0.2,1.0,2.0,2.0,2.0] -#got 3.69564 at [0.1034,2.03878,1.00653,3.58586,0.94929] after 176 iterations (returned FTOL_REACHED) -#elapsed time: 15.743171805 seconds (17639780 bytes allocated) -#(3.6956415275081724,[0.103403,2.03878,1.00653,3.58586,0.949288]) - - -## with new logPseudoLik: -#ht: [0.2,1.0,2.0,2.0,2.0] -#got 0.0 at [0.10342,2.03906,1.00656,3.60666,0.94899] after 195 iterations (returned FTOL_REACHED) -#elapsed time: 1.11298624 seconds (17529096 bytes allocated, 2.98% gc time) -#(3.3635155002465492e-6,[0.103419,2.03906,1.00656,3.60666,0.948988]) - -# with ht = [0.2,0.0,0.5,0.5,0.5] -#got 0.0 at [0.09997,1.99969,0.99995,0.99284,1.00042] after 424 iterations (returned FTOL_REACHED) -#elapsed time: 1.733474741 seconds (36994324 bytes allocated, 1.90% gc time) -#(1.8811458391292683e-10,[0.0999737,1.99969,0.99995,0.992835,1.00042]) - -# with 100*logPseudoLik and t in (0,10): -# ht = [0.2,0.0,2.0,2.0,2.0] -#got 0.0 at [0.1,2.0,1.0,0.99994,1.0] after 753 iterations (returned FTOL_REACHED) -#elapsed time: 47.213517434 seconds (80265788 bytes allocated, 0.16% gc time) -#(1.2568189823957448e-12,[0.0999998,2.0,1.0,0.999941,1.0]) - -# with ftol,xtol -# ht = [0.2,0.0,2.0,2.0,2.0] -#got 0.00256 at [0.10406,2.0465,1.00637,3.33775,0.98475] after 58 iterations (returned XTOL_REACHED) -#elapsed time: 0.034984668 seconds (5205128 bytes allocated) - -# with old ftol, xtol -#got 0.00034 at [0.1034,2.03863,1.00652,3.5823,0.94937] after 201 iterations (returned FTOL_REACHED) -#elapsed time: 0.094041162 seconds (17632400 bytes allocated) - -@time optBL!(net,d2,false,1e-5,1e-6,1e-3,1e-4) -# ht = [0.2,0.0,2.0,2.0,2.0] -#got 0.00043 at [0.10287,2.02652,1.00596,5.93267,0.96293] after 51 iterations (returned SUCCESS) -#elapsed time: 0.027747237 seconds (4558192 bytes allocated) - -@time optBL!(net,d2,false,1e-5,1e-6,1e-5,1e-6) -#got 0.00036 at [0.10311,2.03708,1.00602,3.43898,0.95781] after 35 iterations (returned FTOL_REACHED) -#elapsed time: 0.018274282 seconds (3167744 bytes allocated) - -# -------------------5taxon tree------------------ -## include("../tree_example_read.jl"); -## printEdges(net) -## q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -## q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -## q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -## q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -## q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -## d = DataCF([q1,q2,q3,q4,q5]); -## extractQuartet!(net,d) - -## df = writeExpCF(d.quartet) -## writetable("Tree_output.csv",df) - - -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("Tree_output.csv") -d2 = readTableCF(df) - -# starting tree: -ht = [1.0,1.0] -tree = string("(((6:0.1,4:1.5)1:",string(ht[1]),",7:0.2)5:",string(ht[2]),",8:0.1,10:0.1);") # normal tree -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); - - -net.ht -realht = [0.2,0.1] - -@time optBL!(net,d2) -#got 5.34957 at [0.2,0.1] after 28 iterations (returned FTOL_REACHED) -#elapsed time: 5.533522804 seconds (54162200 bytes allocated, 0.41% gc time) -#(5.349567420518451,[0.2,0.1]) - -#with ftol, xtol -#got 0.0 at [0.19999,0.09999] after 20 iterations (returned XTOL_REACHED) -#elapsed time: 0.007440742 seconds (1234840 bytes allocated) - -#============================================================================================== -#================ Debugging optBL ============================================================ -# compare expCF from wrong estimates with real estimates -# =========================================================================================== - -# test optBL with Case I Bad Diamond II -# does not yield correct ht for one starting point - -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseI_output.csv") -d2 = readTableCF(df) - -# starting ht (gamma,t4,t6,t9,t10) -wronght = [0.1,1.0,1.0,3.6,1.0] -ht = wronght -realht = [0.1,2.0,1.0,1.0,1.0] - -tree = string("((((8,10):",string(ht[2]),")#H1:::",string(1-ht[1]),",7):",string(ht[3]),",6,(4,#H1:",string(ht[4]),"::",string(ht[1]),"):",string(ht[5]),");") # Case I Bad diamond II -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) - -net.ht - -q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -d = DataCF([q1,q2,q3,q4,q5]); -extractQuartet!(net,d) - -wrongdf = writeExpCF(d.quartet) -writetable("CaseI_output_wrong.csv",wrongdf) - - -# test optBL with Case G -# does not yield correct ht for one starting point -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("CaseG_output.csv") -d2 = readTableCF(df) - -# starting ht (gamma,t3,t6,t9) -wronght = [0.14,0.2,0.1,0.6] -ht = wronght -realht = [0.1,0.2,0.1,1.0] - -tree = string("((((6,4)1:",string(ht[2]),",(7)11#H1:::",string(1-ht[1]),")5:",string(ht[3]),",(11#H1:::",string(ht[1]),",8):",string(ht[4]),"),10);") # Case G different starting branch lengths -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net - -net.ht - -q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -d = DataCF([q1,q2,q3,q4,q5]); -extractQuartet!(net,d) - -wrongdf = writeExpCF(d.quartet) -writetable("CaseG _output_wrong.csv",wrongdf) diff --git a/test/test_optBL_sticr_data.jl b/test/test_optBL_sticr_data.jl deleted file mode 100644 index 034c7df49..000000000 --- a/test/test_optBL_sticr_data.jl +++ /dev/null @@ -1,20 +0,0 @@ -# test of optBL with the example in STICR panmixia test -# Claudia January 2015 - -include("types.jl") -include("functions.jl") - -df = readtable("/Users/Clauberry/Documents/phylo/software/CFimplementation/sticr/quartetCF.csv") -d2 = readTableCF(df) - -net = readTopologyUpdate("/Users/Clauberry/Documents/phylo/software/CFimplementation/sticr/tree.tre") -# fixit: cannot read _ inside the taxon name, need to change readsubtree -printEdges(net) - - -net.ht -realht = [0.1,0.2,0.1,1.0] - -@time fmin,xmin=optBL(net,d2) - - diff --git a/test/test_optTopLevel.jl b/test/test_optTopLevel.jl deleted file mode 100644 index 3383ca306..000000000 --- a/test/test_optTopLevel.jl +++ /dev/null @@ -1,144 +0,0 @@ -# test for whole optimization on the space of topologies -# with the same number of hybridizations -# Claudia February 2015 - -# -------------------5taxon tree------------------ - -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("Tree_output.csv") -d = readTableCF(df) - -# starting tree: -tree = "((6,4),(7,8),10);" -tree = "((((6,4),7),8),10);" #true tree -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -currT = readTopologyUpdate("prueba_tree.txt"); -printEdges(currT) - - -@time optTopLevel!(currT,d,0) -net = snaq(currT,d,hmax=0); - -#old: -#@time optTopLevel!(currT,M,N,d,0); -#got 5.34957 at [0.2,0.1] after 28 iterations (returned FTOL_reached) -#loglik_1 = 5.34957 -#found minimizer topology at step 1 with -loglik=5.34957 and ht_min=[0.2,0.1] -#elapsed time: 8.910952599 seconds (91065584 bytes allocated, 0.59% gc time) -printEdges(net) -# forgot to copy, but true tree! - - - -# ------------------5taxon network 1 hybridization: Case H----------------- -# starting topology: Case G -include("../examples/case_g_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case H -df = readtable("CaseH_output.csv") -d = readTableCF(df) - - -@time newT = optTopLevel!(currT,d,1) -# with original optBL -# elapsed time: 347.527745834 seconds (372704328 bytes allocated, 0.06% gc time) -# did not find right network (Case H), came back to starting point (Case G) - -# with new added inequality -#elapsed time: 58.711593728 seconds (498732864 bytes allocated, 0.49% gc time) -# did not find the right network (Case H), stopped in bad diamond II case - -# with afterOptBLAll -# elapsed time: 6.321085645 seconds (340867164 bytes allocated, 3.52% gc time) -# found correct network!! - -# with ftol,xtol in optBL -#got 0.11429 at [0.67712,0.92599,0.13123,0.23364] after 219 iterations (returned FTOL_REACHED) -#WARNING: newT.loglik 0.11428632002947303 not really close to 0.0, you might need to redo with another starting point -#END optTopLevel: found minimizer topology at step 100 with -loglik=0.18708 and ht_min=[0.1995,0.62389,0.44779,0.07295] - -# with old ftol, xtol -#got 73.2177 at [0.42152,1.0016,0.64937,0.0] after 91 iterations (returned FTOL_REACHED) -#before comparing: newT.loglik 73.21769826979121, currT.loglik 0.09699788946913745 -#ends while for 100 with delta 0.7803748966954892 -#WARNING: newT.loglik 0.09699788946913745 not really close to 0.0, you might need to redo with another starting point -#END optTopLevel: found minimizer topology at step 100 with -loglik=0.097 and ht_min=[0.96515,0.06804,0.0846] - -tree = "(1,2,((4,#H-6::0.29974323628759736):0.0,((8)#H-6:0.2532215227803048::0.7002567637124026,7):0.1265313400455008):0.9651457506039909);" -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -newT = readTopologyUpdate("prueba_tree.txt"); -printEdges(newT) - - -newT.ht - -q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -q1 = Quartet(1,["1","4","2","7"],[0.5,0.4,0.1]); -q2 = Quartet(2,["1","4","8","7"],[0.5,0.4,0.1]); -q3 = Quartet(3,["8","4","2","7"],[0.5,0.4,0.1]); -q4 = Quartet(4,["1","8","2","7"],[0.5,0.4,0.1]); -q5 = Quartet(5,["1","4","2","8"],[0.5,0.4,0.1]); - -d = DataCF([q1,q2,q3,q4,q5]); -extractQuartet!(newT,d) - -wrongdf = writeExpCF(d.quartet) -writetable("CaseH_output_wrong_optTop_startCaseG.csv",wrongdf) - -# ------------------ -# starting topology: Case F -include("../examples/case_f_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case H -df = readtable("CaseH_output.csv") -d = readTableCF(df) - -@time optTopLevel!(currT,d,1) - -printEdges(newT) - - -# ------------------5taxon network 1 hybridization: Case F----------------- -# starting topology: Case G -include("../examples/case_g_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case F -df = readtable("CaseF_output.csv") -d = readTableCF(df) - - -@time optTopLevel!(currT,d,1) - -printEdges(newT) - - -# starting topology: Case H -include("../examples/case_h_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case F -df = readtable("CaseF_output.csv") -d = readTableCF(df) - - -@time optTopLevel!(currT,d,1) - -printEdges(newT) diff --git a/test/test_optTopLevel2.jl b/test/test_optTopLevel2.jl deleted file mode 100644 index 32c540e9c..000000000 --- a/test/test_optTopLevel2.jl +++ /dev/null @@ -1,168 +0,0 @@ -# test for whole optimization on the space of topologies -# for h<= hmax -# Claudia March 2015 -# based on test_optTopLevel.jl, but now we use a more identifiable Case G, H to begin with -# and now we use afterOptBLALL - -# -------------------5taxon tree------------------ - -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("Tree_output.csv") -d = readTableCF(df) - -# starting tree: -tree = "((6,4),(7,8),10);" -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -currT = readTopologyUpdate("prueba_tree.txt"); -printEdges(currT) - -epsilon = eps() -N = 100 - -@time newT = optTopLevel!(currT,epsilon,d,0); -#got 5.34957 at [0.2,0.1] after 28 iterations (returned FTOL_reached) -#loglik_1 = 5.34957 -#found minimizer topology at step 1 with -loglik=5.34957 and ht_min=[0.2,0.1] -#elapsed time: 8.910952599 seconds (91065584 bytes allocated, 0.59% gc time) -printEdges(newT) -# forgot to copy, but true tree! - - - -# ------------------5taxon network 1 hybridization: Case H----------------- -# starting topology: Case G - -## include("../examples/case_h_example2.jl"); -## q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -## q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -## q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -## q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -## q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -## d = DataCF([q1,q2,q3,q4,q5]); -## extractQuartet!(net,d) - -## df = writeExpCF(d.quartet) -## writetable("CaseH_output2.csv",df) - -include("../examples/case_g_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case H -df = readtable("CaseH_output2.csv") -d = readTableCF(df) - -@time optTopLevel!(currT,d,1) - - -tree = string("(1,2,((7,(8)#H5:5.814544267883624):0.9977876663423212,(#H5:1.0,4):0.0):1.9430580774498776);") -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -newT = readTopologyUpdate("prueba_tree.txt"); - -printEdges(newT) - -newT.ht - -q1 = Quartet(1,["1","4","2","7"],[0.5,0.4,0.1]); -q2 = Quartet(2,["1","4","8","7"],[0.5,0.4,0.1]); -q3 = Quartet(3,["8","4","2","7"],[0.5,0.4,0.1]); -q4 = Quartet(4,["1","8","2","7"],[0.5,0.4,0.1]); -q5 = Quartet(5,["1","4","2","8"],[0.5,0.4,0.1]); - -d = DataCF([q1,q2,q3,q4,q5]); -extractQuartet!(newT,d) - -wrongdf = writeExpCF(d.quartet) -writetable("CaseH_output_wrong_optTop_startCaseG.csv",wrongdf) - -# ------------------ -# starting topology: Case F -include("../examples/case_f_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case H -df = readtable("CaseH_output2.csv") -d = readTableCF(df) - -@time optTopLevel!(currT,d,1) - -tree = string("(4,6,(#H2:0.7392085405544356::0.046179825120885414,(7,(10,(8)#H2:0.0::0.9538201748791146):0.9803511144374873):2.212878358589699):0.00000073506);") -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -newT = readTopologyUpdate("prueba_tree.txt"); - -printEdges(newT) - -#include("../examples/case_h_example2.jl"); -newT.ht - -q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -dd = DataCF([q1,q2,q3,q4,q5]); -extractQuartet!(newT,dd) - -wrongdf = writeExpCF(d.quartet) -writetable("CaseH2_output_wrong_optTop_startCaseF.csv",wrongdf) - -#elapsed time: 4.236802192 seconds (274215268 bytes allocated, 4.26% gc time) -#WARNING: newT.loglik 1.4887847814495139 not really close to 0.0, you might need to redo with another starting point - - -# ------------------5taxon network 1 hybridization: Case F----------------- -# starting topology: Case G -include("../examples/case_g_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -## include("../examples/case_f_example2.jl"); -## parameters!(net) -## q1 = Quartet(1,["6","7","4","8"],[0.5,0.4,0.1]); -## q2 = Quartet(2,["6","7","10","8"],[0.5,0.4,0.1]); -## q3 = Quartet(3,["10","7","4","8"],[0.5,0.4,0.1]); -## q4 = Quartet(4,["6","10","4","8"],[0.5,0.4,0.1]); -## q5 = Quartet(5,["6","7","4","10"],[0.5,0.4,0.1]); - -## d = DataCF([q1,q2,q3,q4,q5]); -## extractQuartet!(net,d) - -## df = writeExpCF(d.quartet) -## writetable("CaseF_output2.csv",df) - -# real network: Case F -df = readtable("CaseF_output.csv") -df = readtable("CaseF_output2.csv") -d = readTableCF(df) - - -@time optTopLevel!(currT,d,1) - -printEdges(newT) - - -# starting topology: Case H -include("../examples/case_h_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case F -df = readtable("CaseF_output.csv") -df = readtable("CaseF_output2.csv") -d = readTableCF(df) - - -@time optTopLevel!(currT,d,1) - -printEdges(newT) diff --git a/test/test_optTopLevelparts.jl b/test/test_optTopLevelparts.jl deleted file mode 100644 index fa6ecd230..000000000 --- a/test/test_optTopLevelparts.jl +++ /dev/null @@ -1,107 +0,0 @@ -# test the parts in optTopLevel -# Claudia February 2015 - -# ------------------5taxon network 1 hybridization----------------- -# starting topology: Case G -include("../examples/case_g_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case H -df = readtable("CaseH_output.csv") -d = readDataCF(df) -#df2 = readtable("CaseG_output.csv") - -optBL!(currT,d) -newT = deepcopy(currT); -count = 0 -N = 100 -move = whichMove(currT) -move = :CHdir -move = :MVorigin -move = :MVtarget -move = :nni - -flag = proposedTop!(move,newT,true,count,N, zeros(Int,18), zeros(Int,6)) -printEdges(newT) -printNodes(newT) -count([e.hybrid for e in newT.edge]) == 2 || error("there are not 2 hybrid edges") -newT.hybrid[1].k - -optBL!(newT,d) -newloglik - currloglik -changeDirectionUpdate!(newT.node[5],newT,false) - -currT = deepcopy(newT); -currloglik = newloglik -currxmin = newxmin - -# ------------------5taxon network 1 hybridization----------------- -# starting topology: Case F -include("../case_f_example.jl"); -currT = deepcopy(net); -printEdges(currT) - -# real network: Case H -df = readtable("CaseH_output.csv") -d = readDataCF(df) - -currloglik,currxmin = optBL!(currT,d) -updateParameters!(currT) -updateLik!(currT,currloglik) -newT = deepcopy(currT); -count = 0 -N = 100 -move = whichMove(currT) -move = :CHdir -move = :MVorigin -move = :MVtarget -move = :nni - -flag = proposedTop!(move,newT,true,count,N) -printEdges(newT) -printNodes(newT) -count([e.hybrid for e in newT.edge]) == 2 || error("there are not 2 hybrid edges") -newT.hybrid[1].k - -newloglik, newxmin = optBL!(newT,d) -newloglik - currloglik - -currT = deepcopy(newT); -currloglik = newloglik -currxmin = newxmin - -# -------------------5taxon tree------------------ - -include("../src/types.jl") -include("../src/functions.jl") - -df = readtable("Tree_output.csv") -d = readDataCF(df) - -# starting tree: -tree = "((6,4),(7,8),10);" -f = open("prueba_tree.txt","w") -write(f,tree) -close(f) -currT = readTopologyUpdate("prueba_tree.txt"); -printEdges(currT) - - -optBL!(currT,d) -newT = deepcopy(currT); -count = 0 -N = 10 -move = :nni - -flag = proposedTop!(move,newT,true,count,N, zeros(Int,18), zeros(Int,6)) -flag -printEdges(newT) -printNodes(newT) - -newloglik, newxmin = optBL!(newT,d) -newloglik - currloglik -` -currT = deepcopy(newT); -currloglik = newloglik -currxmin = newxmin diff --git a/test/test_optTopParts.jl b/test/test_optTopParts.jl deleted file mode 100644 index cd008ef14..000000000 --- a/test/test_optTopParts.jl +++ /dev/null @@ -1,3 +0,0 @@ -# tests to check the parts of the whole optimization that includes -# optTopLevel (search in the same level of hybrids) and -# addHybridizationUpdate(net) that will add a new hybrid if needed, and deleteHybridizationUpdate() diff --git a/test/test_readInputData.jl b/test/test_readInputData.jl index f433056aa..2c432f63b 100644 --- a/test/test_readInputData.jl +++ b/test/test_readInputData.jl @@ -26,8 +26,15 @@ ## end -nexusfile = joinpath(@__DIR__, "..", "examples", "test.nex") @testset "test: reading nexus file" begin - vnet = readNexusTrees(nexusfile) - @test length(vnet) == 10 +nexusfile = joinpath(@__DIR__, "..", "examples", "test.nex") +# nexusfile = joinpath(dirname(pathof(PhyloNetworks)), "..","examples","test.nex") +vnet = readNexusTrees(nexusfile); +@test length(vnet) == 10 +@test length(vnet[10].edge) == 10 +@test vnet[10].edge[7].length ≈ 0.00035 +vnet = readNexusTrees(nexusfile, PhyloNetworks.readTopologyUpdate, false, false); +@test length(vnet) == 10 +@test length(vnet[10].edge) == 9 +@test vnet[10].edge[7].length ≈ 0.00035 end diff --git a/test/test_readTopology.jl b/test/test_readTopology.jl deleted file mode 100644 index ae0b4034b..000000000 --- a/test/test_readTopology.jl +++ /dev/null @@ -1,140 +0,0 @@ -# initial tests for readTopology -# reads only a tree and with taxon number, not taxon names -# Claudia October 2014 -######################################################### - -include("../src/types.jl") -include("../src/functions.jl") - -using Base.Collections # for updateInCycle with priority queue - -# good trees --------------------------- -f = open("prueba_tree.txt","w") -tree = "((1,2),(3,4));" -tree = "((11,22),(33,44));" -tree = "((Ant,Bear),(Cat,Dog));" -tree = "((Ant1,Bear2),(Cat3,Dog4));" -tree = "((1Ant,2Bear),(3Cat,4Dog));" -tree = "((1,2),3,4);" -tree = "(1,2,(3,4));" -tree = "(Ant,Bear,(Cat,Dog));" -tree = "(A,B,(C,D));" -tree = "(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);" -tree = "((((((1,2),3),4),5),6),7,8);" # yeast data tree0 -write(f,tree) -close(f) - -net = readTopology("prueba_tree.txt"); -printEdges(net) -printNodes(net) -net.names - -cleanAfterRead!(net) - -# bad trees -------------------------- -f = open("prueba_tree.txt","w") -tree = "(((1,2),(3,4)));" # extra parenthesis -tree = "((1,2,(3,4)));" # extra parenthesis -tree = "(((1,2),3,4));" # extra parenthesis -tree = "((1),2,3,4,5);" # not tree -tree = "((1,2),3,4,5);" # polytomy -tree = "((1,*),(3,4));" # not letter/number taxon name -tree = "(1,2::,(3,4));" #double : -tree = "(1,2:::,(3,4));" #triple : -tree = "(1,2:0.2:,(3,4));" #no 2nd : -tree = "(1,2:0.2:0.2:,(3,4));" #no 3rd : -tree = "(1,2:,(3,4));" #no 1st: -tree = "((1,2),(3,4))" # no ; -write(f,tree) -close(f) - -net = readTopology("prueba_tree.txt") -printEdges(net) -printNodes(net) -net.names - -cleanAfterRead!(net) - - -# trees with additional info ----------------- -f = open("prueba_tree.txt","w") -tree = "((1:1.2,2:0.3),3:1.8,4);" -tree = "((1:1.2,2:0.3):1.2:0.6:0.3,3:1.8,4);" -tree = "((1:1.2,2:0.3):1.2::0.6,3:1.8,4);" # no bootstrap -tree = "((1:1.2,2:0.3):::0.6,3:1.8,4);" # no length nor bootstrap -tree = "((1:1.2,2:0.3):1.2:2.5:1.6,3:1.8,4);" # wrong value gamma -tree = "((1:1.2,2:0.3):1.2:2.5:,3:1.8,4);" # missing value gamma -write(f,tree) -close(f) - -net = readTopology("prueba_tree.txt") -printEdges(net) -printNodes(net) -net.names - -# beginning of networks --------------------------- -include("../src/types.jl") -include("../src/functions.jl") - -using Base.Collections # for updateInCycle with priority queue - - -f = open("prueba_tree.txt","w") -tree = "(((3,4)Z#H1,1),(Z#H1,2));" # expand child, no gammas -tree = "((Z#H1,2),((3,4)Z#H1,1));" # expand child, no gammas, leaf read first -tree = "(((3,4)Z#H1:::0.9,1),(Z#H1:::0.2,2));" # gammas do not sum up to 1 -tree = "(((3,4)Z#H1:5.0::0.9,1),(Z#H1:::0.2,2));" -tree = "(1,2,(3,4)A);" -tree = "(1,2,(3,4)A:0.8);" -tree = "((1,2),#H1,4);" #only one hybrid -tree = "((1,2),#H1,(3,#H2));" #two hybrids but different -tree = "((1,2)#H1,(3,4)#H1);" #error: both hybrid have childre -tree = "(((1,#H1),2),#H1,3);" # both hybrid leaves -tree = "(((1,4)#H1,2),#H1,(5,6)#H1);" # hybrid polytomy -write(f,tree) -close(f) - -net = readTopology("prueba_tree.txt"); -printEdges(net) -printNodes(net) -net.names -net.numHybrids - -updateAllReadTopology!(net) - -readTopologyUpdate("prueba_tree.txt") - -#====================================================== - -include("../src/types.jl") -include("../src/functions.jl") - -using Base.Collections # for updateInCycle with priority queue - -# good trees --------------------------- -f = open("prueba_tree.txt","w") -tree = "((1,2),(3,4));" -tree = "((11,22),(33,44));" -tree = "((Ant,Bear),(Cat,Dog));" -tree = "((Ant1,Bear2),(Cat3,Dog4));" -tree = "((1Ant,2Bear),(3Cat,4Dog));" -tree = "((1,2),3,4);" -tree = "(1,2,(3,4));" -tree = "(Ant,Bear,(Cat,Dog));" -tree = "(A,B,(C,D));" -tree = "(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);" -tree = "((((((1,2),3),4),5),6),7,8);" # yeast data tree0 -tree = "(Scer,((Smik,(Skud,Sbay)),Spar));" # yeast data astral output -tree = "(1,2,3,4,5);" -write(f,tree) -close(f) - -net = readTopologyUpdate("prueba_tree.txt"); -printEdges(net) -printNodes(net) -net.names - -net2 = readTopologyUpdate(tree); -printEdges(net2) -printNodes(net2) -net2.names diff --git a/test/test_redundanteCycle.jl b/test/test_redundanteCycle.jl deleted file mode 100644 index 1f84a6d74..000000000 --- a/test/test_redundanteCycle.jl +++ /dev/null @@ -1,37 +0,0 @@ -# test of code to remove redundante cycles after extracting quartet -# Claudia May 2015 - - -include("/Users/Clauberry/Documents/phylo/software/CFimplementation/julia/git_laptop/CFnetworks/types.jl") -include("/Users/Clauberry/Documents/phylo/software/CFimplementation/julia/git_laptop/CFnetworks/functions.jl") - -tree = "(((((((1,2),3),4),5),(6,7)),(8,9)),10);" - - -seed = 2738 -currT0 = readTopologyUpdate(tree); -Random.seed!(seed) -besttree = deepcopy(currT0); -success,hybrid,flag,nocycle,flag2,flag3 = addHybridizationUpdate!(besttree); -success -success,hybrid,flag,nocycle,flag2,flag3 = addHybridizationUpdate!(besttree); -success -printEdges(besttree) -writeTopologyLevel1(besttree,true) -net = deepcopy(besttree); - -q1 = Quartet(1,["1","2","3","4"],[0.5,0.4,0.1]); -extractQuartet!(net,q1); -qnet=deepcopy(q1.qnet); -printEdges(qnet) -printNodes(qnet) - -## redundantCycle!(qnet,qnet.node[14]) -## redundantCycle!(qnet,qnet.node[8]) -## printEdges(qnet) -## printNodes(qnet) -identifyQuartet!(qnet) -eliminateHybridization!(qnet) -updateSplit!(qnet) -updateFormula!(qnet) -calculateExpCF!(qnet) diff --git a/test/test_relaxed_reading.jl b/test/test_relaxed_reading.jl index 44e4e3984..6bc4beacd 100644 --- a/test/test_relaxed_reading.jl +++ b/test/test_relaxed_reading.jl @@ -31,13 +31,20 @@ end @test_logs PhyloNetworks.printEverything(net) redirect_stdout(originalstdout) end -@testset "internal nodes" begin +@testset "internal nodes, writemulti" begin @test writeTopology(readTopology("(a,b):0.5;")) == "(a,b);" @test writeTopology(readTopology("((a,(b)#H1)i1,(#H1,c))r;")) == "((a,(b)#H1)i1,(#H1,c))r;" @test writeTopology(readTopology("((a,(b)#H1)i1,(#H1,c))r;"), internallabel=false) == "((a,(b)#H1),(#H1,c));" - @test_logs readTopology("((a,(b)#H1)i1,(#H1,c)i2)root:0.5;"); - @test_logs readTopology("(((a,(b)#H1)i1,(#H1,c)i2)root:0.5);"); # root edge was deleted + n11 = (@test_logs readTopology("((a,(b)#H1)i1,(#H1,c)i2)root:0.5;")); + n12 = (@test_logs readTopology("(((a,(b)#H2)i1,(#H2,c)i2)root:0.5);")); # root edge was deleted # writeTopology(net) == "((a,(b)#H1)i1,(#H1,c)i2);" # readTopology("((((a,(b)#H1)i1,(#H1,c)i2)root:0.5));"); still has 1 (of the 2) root edges + # writeMultiTopology([n1,n2], stdout) + n11.root = 2 # below the hybrid node: will trigger RootMismatch and message below + originalstdout = stdout + redirect_stdout(open("/dev/null", "w")) + writeMultiTopology([n11,n12], "test_relaxedreading.net") + rm("test_relaxedreading.net") + redirect_stdout(originalstdout) end end diff --git a/test/test_simplenet.jl b/test/test_simplenet.jl deleted file mode 100644 index a16bada96..000000000 --- a/test/test_simplenet.jl +++ /dev/null @@ -1,12 +0,0 @@ -# strange error in simple network -# Claudia November 2015 - -include("../src/types.jl") -include("../src/functions.jl") - -tree= "(A,((B,#H1),(C,(D)#H1)));" #fails -tree= "(A,((C,(D)#H1),(B,#H1)));" #works -net = readTopology(tree) -net = readTopologyUpdate(tree) -printEdges(net) -printNodes(net) diff --git a/test/test_traitLikDiscrete.jl b/test/test_traitLikDiscrete.jl index d4229920d..43cf892c2 100644 --- a/test/test_traitLikDiscrete.jl +++ b/test/test_traitLikDiscrete.jl @@ -224,14 +224,14 @@ d = DataFrame(species=["D","C","B","A"], x1=[1,1,1,1], x2=[1,2,2,1], x3=[2,2,2,2 x11=[1,2,1,1], x12=[2,2,1,2], x13=[2,1,1,2], x14=[1,2,2,2], x15=[1,1,1,2], x16=[2,1,2,2]) lik = Float64[] for i in 1:16 - fit = fitdiscrete(net, m1, d[[:species, Symbol("x",i)]]; optimizeQ=false, optimizeRVAS=false) + fit = fitdiscrete(net, m1, d[!,[:species, Symbol("x",i)]]; optimizeQ=false, optimizeRVAS=false) push!(lik, fit.loglik) end @test lik ≈ [-1.6218387598967712, -3.008066347196894, -4.3943604143403245, -3.008199100743402, -3.70121329832901, -3.0081981601869483, -2.315051933868397, -2.314985711030534, -3.0081988850020873, -3.0081983709272504, -2.3150512090547584, -3.70134532205944, -3.008132923628349, -3.7012134632082083, -2.3149859724945876, -3.7013460518770915] -fit1 = fitdiscrete(net, m1, d[[:species, :x6]]; optimizeRVAS=false) +fit1 = fitdiscrete(net, m1, d[!,[:species, :x6]]; optimizeRVAS=false) # with parameter estimation net = readTopology("(((A:2.0,(B:1.0)#H1:0.1::0.9):1.5,(C:0.6,#H1:1.0::0.1):1.0):0.5,D:2.0);") @@ -261,11 +261,11 @@ fit1.model.rate[2] = 0.34981109618902395; @test_throws ErrorException ancestralStateReconstruction(fit1, 4) # 1 trait, not 4: error asr = ancestralStateReconstruction(fit1) @test names(asr) == [:nodenumber, :nodelabel, :lo, :hi] -@test asr[:nodenumber] == collect(1:9) -@test asr[:nodelabel] == ["A","B","C","D","5","6","7","8","H1"] -@test asr[:lo] ≈ [1.,1.,0.,0., 0.28602239466671175, 0.31945742289603263, +@test asr[!,:nodenumber] == collect(1:9) +@test asr[!,:nodelabel] == ["A","B","C","D","5","6","7","8","H1"] +@test asr[!,:lo] ≈ [1.,1.,0.,0., 0.28602239466671175, 0.31945742289603263, 0.16855042517785512, 0.7673588716207436, 0.7827758475866091] atol=1e-5 -@test asr[:hi] ≈ [0.,0.,1.,1.,0.713977605333288, 0.6805425771039674, +@test asr[!,:hi] ≈ [0.,0.,1.,1.,0.713977605333288, 0.6805425771039674, 0.8314495748221447, 0.23264112837925616, 0.21722415241339132] atol=1e-5 @test fit1.postltw ≈ [-0.08356534477069566, -2.5236181051014333] atol=1e-5 end # end of testset, fixed topology @@ -365,7 +365,7 @@ fitHKY85 = fitdiscrete(net, mHKY85, tips; optimizeQ=false); #with optimization (confirmed with ape ace() function) mHKY85 = HKY85([0.5, 0.1], [0.25, 0.25, 0.25, 0.25], false); #absolute -@time fitHKY85 = fitdiscrete(net, mHKY85, tips; optimizeQ=true) +fitHKY85 = fitdiscrete(net, mHKY85, tips; optimizeQ=true) @test fitHKY85.model.rate[1] ≈ 1.4975887229148119 atol = 2e-4 @test loglikelihood(fitHKY85) ≈ -3.3569474489525244 atol = 2e-8 @@ -502,10 +502,10 @@ HKY85_1 = HKY85([0.5, 0.5], [0.2, 0.3, 0.25, 0.25], false) # test empiricalDNAfrequencies with string type # Bayesian correction by default: more stable and avoids zeros -dna_String = view(DataFrame(A = ["s1", "s2"], site1 = ["A", "A"], site2 = ["G", "T"]), 2:3) +dna_String = view(DataFrame(A = ["s1", "s2"], site1 = ["A", "A"], site2 = ["G", "T"]), :, 2:3) @test PhyloNetworks.empiricalDNAfrequencies(dna_String, [1, 1]) ≈ [3,1,2,2]/(4+4) # with char type -dna_Char = view(DataFrame(A = ["s1", "s2"], site1 = ['A', 'A'], site2 = ['G', 'T']), 2:3) +dna_Char = view(DataFrame(A = ["s1", "s2"], site1 = ['A', 'A'], site2 = ['G', 'T']), :, 2:3) @test PhyloNetworks.empiricalDNAfrequencies(dna_Char, [1, 1]) ≈ [3,1,2,2]/(4+4) # uncorrected estimate @test PhyloNetworks.empiricalDNAfrequencies(dna_Char, [1, 1], false) ≈ [2,0,1,1]/4 @@ -517,10 +517,10 @@ dna_Char = DataFrame(site1 = ['A','A','Y'], site2 = ['G','T','V']) #fastafile = abspath(joinpath(dirname(Base.find_package("PhyloNetworks")), "..", "examples", "test_8_withrepeatingsites.aln")) fastafile = joinpath(@__DIR__, "..", "examples", "test_8_withrepeatingsites.aln") dat, weights = readfastatodna(fastafile, true); -@test PhyloNetworks.empiricalDNAfrequencies(view(dat, 2:6), weights) ≈ [0.21153846153846154, 0.3076923076923077, 0.40384615384615385, 0.07692307692307693] atol=1e-9 +@test PhyloNetworks.empiricalDNAfrequencies(view(dat, :, 2:6), weights) ≈ [0.21153846153846154, 0.3076923076923077, 0.40384615384615385, 0.07692307692307693] atol=1e-9 #test PhyloNetworks.empiricalDNAfrequencies with bad type -dna_bad = view(DataFrame(A = ["s1", "s2"], trait1 = ["hi", "lo"], trait2 = ["lo", "hi"]), 2:3) +dna_bad = view(DataFrame(A = ["s1", "s2"], trait1 = ["hi", "lo"], trait2 = ["lo", "hi"]), :, 2:3) @test_throws ErrorException PhyloNetworks.empiricalDNAfrequencies(dna_bad, [1, 1]) end #testing stationary and empiricalDNAfrequencies functions diff --git a/test/test_tree2Matrix.jl b/test/test_tree2Matrix.jl deleted file mode 100644 index 9da6d795d..000000000 --- a/test/test_tree2Matrix.jl +++ /dev/null @@ -1,9 +0,0 @@ -# test for tree2Matrix -# Claudia October 2015 - -include("../src/types.jl") -include("../src/functions.jl") - -T = readTopology("(A,(B,(C,D)));"); -S = ["A","B","C","D"] -M = tree2Matrix(T,S) diff --git a/test/test_updateBL.jl b/test/test_updateBL.jl deleted file mode 100644 index 5f056a73c..000000000 --- a/test/test_updateBL.jl +++ /dev/null @@ -1,26 +0,0 @@ -# test for updateBL function -# Claudia April 2015 - -include("../examples/tree_example.jl"); -printEdges(net) - -parts = edgesParts(net); -[isInternalEdge(e)?e.number:0 for e in net.edge] -i = 1 -[n.number for n in parts[i].part1] -[n.number for n in parts[i].part2] -[n.number for n in parts[i].part3] -[n.number for n in parts[i].part4] - - -include("../src/types.jl") -include("../src/functions.jl") -net=readTopologyUpdate("1_astral.out"); -printEdges(net) -net.names -df0 = readtable("HGT_truenet_expCF.csv") -d = readTableCF(df0); #expCF - -parts = edgesParts(net); -df = makeTable(net,parts,d) -x = updateBL!(net,d) diff --git a/test/tests_5taxon.jl b/test/tests_5taxon.jl deleted file mode 100644 index 6bda27cb6..000000000 --- a/test/tests_5taxon.jl +++ /dev/null @@ -1,46 +0,0 @@ -# tests for addHybridizationUpdate! for the 5 taxon network -# we have in the ipad summary -# Claudia September 2014 -########################################################## - -@warn "to run tests_5taxon.jl, you need to set updateGammaz to return true always. this is because we have bad triangles in here" -# types in "types.jl" -include("../src/types.jl") - -# needed modules: -using Base.Collections # for updateInCycle with priority queue - -# test functions -include("test_functions_5taxon.jl") - -@warn "BUG IN CASE C: sometimes it shows errors, but if Julia is closed and reopened, no more error shown" - -tests = ["F","G","H","J","I"]; -wrong = String[]; - -for t in tests - include("../src/functions.jl") - include("tree_example.jl"); - tp = string("add_hybrid_case","$(t).jl"); - println("running $(tp)-----"); - try - include(tp) - catch - println("error in $(tp)"); - push!(wrong,t); - end -end - -if(!isempty(wrong)) - for t in wrong - include("../src/functions.jl") - include("tree_example.jl"); - tp = string("add_hybrid_case","$(t).jl"); - println("running $(tp)"); - include(tp) - end -else - println("----------NO ERRORS!----------"); -end - - diff --git a/test/tests_5taxon_delete.jl b/test/tests_5taxon_delete.jl deleted file mode 100644 index c0922c426..000000000 --- a/test/tests_5taxon_delete.jl +++ /dev/null @@ -1,43 +0,0 @@ -# tests for deleteHybridizationUpdate! for the 5 taxon network -# we have in the ipad summary -# Claudia September 2014 -########################################################## - -@warn "to run tests_5taxon_delete.jl, you need to set updateGammaz to return true always. this is because we have bad triangles in here" -# types in "types.jl" -include("../src/types.jl") - -# needed modules: -using Base.Collections # for updateInCycle with priority queue - -# test functions -include("test_functions_5taxon.jl") - -tests = ["C","F","G","H","J","D","E","I"]; -wrong = String[]; -t="C" -for t in tests - include("../src/functions.jl") - include("tree_example.jl"); - tp = string("delete_case","$(t).jl"); - println("running $(tp)"); - try - include(tp) - catch - println("error in $(tp)"); - push!(wrong,t); - end -end - -if(!isempty(wrong)) - for t in wrong - include("../src/functions.jl") - include("tree_example.jl"); - tp = string("delete_case","$(t).jl"); - println("running $(tp)"); - include(tp) - end -else - println("----------NO ERRORS!----------"); -end - diff --git a/test/tree.tre b/test/tree.tre deleted file mode 100644 index 07bc30092..000000000 --- a/test/tree.tre +++ /dev/null @@ -1 +0,0 @@ -(((((((((Bsch_0:0.1,Is_0:0.1):0.263402303,(Hey_1:0.1,Tha_1:0.1):0.0885790387):0.0431603383,(Mnz_0:0.1,Wt_5:0.1):0.2234951756):0.01602997012,Co_1:0.1):0.02485171616,Tu_0:0.1):0.03651249593,((((((Dra_0:0.1,Da1_12:0.1):2.751479759,Uod_1:0.1):0.1039832594,Wa_1:0.1):0.008859126433,Jm_0:0.1):0.04874719634,Hau_0:0.1):0.02528284356,(Cnt_1:0.1,Uk_1:0.1):0.08423424506):0.01046528485):0.01970797162,((((((Ha_0:0.1,Nw_0:0.1):0.04140086574,Kro_0:0.1):0.03638473833,En_2:0.1):0.02383386683,(Yo_0:0.1,Pna_17:0.1):0.1777818768):0.2151362609,((Vind_1:0.1,Van_0:0.1):0.09217620694,Rome_1:0.1):0.1015450525):0.08514947504,Ragl_1:0.1):0.0578056821):0.01258906707,(Et_0:0.1,Pla_0:0.1):0.09601331156):0.1644778605,A_Lyr:0.1,Qar_8a:0.1); diff --git a/test/tree_example.jl b/test/tree_example.jl deleted file mode 100644 index e087c5da6..000000000 --- a/test/tree_example.jl +++ /dev/null @@ -1,38 +0,0 @@ -# example of 5 taxa tree in ipad notes -# used to test createHybrid! -# Claudia September 2014 -# -# in julia: include("tree_example.jl") - -ed11=Edge(11,1.5); -ed12=Edge(12,0.2); -ed3=Edge(3,0.9); -ed4=Edge(4,0.1); -ed5=Edge(5,0.2); -ed6=Edge(6,0.1); -ed7=Edge(7,0.1); -ed8=Edge(8,0.1); -ed9=Edge(9,0.1); -ed10=Edge(10,0.1); -ed13=Edge(13,0.1); - - -n1=Node(1,false,false,[ed11,ed5,ed6]); -n4=Node(4,true,false,[ed11]); -n5=Node(5,false,false,[ed12,ed5,ed9]); -n6=Node(6,true, false,[ed6]); -n7=Node(7,true, false,[ed12]); -n8=Node(8,true, false,[ed8]); -n9=Node(9,false,false,[ed8,ed9,ed10]); -n10=Node(10,true, false,[ed10]); - - -setNode!(ed5,[n5,n1]); -setNode!(ed6,[n1,n6]); -setNode!(ed8,[n8,n9]); -setNode!(ed9,[n5,n9]); -setNode!(ed10,[n9,n10]); -setNode!(ed11,[n1,n4]); -setNode!(ed12,[n5,n7]); - -net=HybridNetwork([n1,n4,n5,n6,n7,n8,n9,n10],[ed5,ed6,ed8,ed9,ed10,ed11,ed12]); diff --git a/test/truenetwork.txt b/test/truenetwork.txt deleted file mode 100644 index fbc8fa6b5..000000000 --- a/test/truenetwork.txt +++ /dev/null @@ -1 +0,0 @@ -((((1,2),((3,4))#H1),(#H1,5)),6); \ No newline at end of file diff --git a/test/try.txt b/test/try.txt deleted file mode 100644 index 9600296bd..000000000 --- a/test/try.txt +++ /dev/null @@ -1,16 +0,0 @@ -"t1","t2","t3","t4","CF1234","CF1324","CF1423" -"6","1","5","4",0.4,0.6,0.0 -"6","1","5","2",0.2,0.8,0.0 -"6","1","5","3",0.5,0.5,0.0 -"6","1","4","2",0.4,0.6,0.0 -"6","1","4","3",0.4,0.2,0.4 -"6","1","2","3",0.2,0.1,0.7 -"6","5","4","2",0.8,0.0,0.2 -"6","5","4","3",0.4,0.4,0.2 -"6","5","2","3",0.6,0.4,0.0 -"6","4","2","3",0.3,0.2,0.5 -"1","5","4","2",0.4,0.0,0.6 -"1","5","4","3",0.3,0.5,0.2 -"1","5","2","3",0.1,0.8,0.1 -"1","4","2","3",0.1,0.5,0.4 -"5","4","2","3",0.3,0.2,0.5 diff --git a/test/try4.txt b/test/try4.txt deleted file mode 100644 index 9600296bd..000000000 --- a/test/try4.txt +++ /dev/null @@ -1,16 +0,0 @@ -"t1","t2","t3","t4","CF1234","CF1324","CF1423" -"6","1","5","4",0.4,0.6,0.0 -"6","1","5","2",0.2,0.8,0.0 -"6","1","5","3",0.5,0.5,0.0 -"6","1","4","2",0.4,0.6,0.0 -"6","1","4","3",0.4,0.2,0.4 -"6","1","2","3",0.2,0.1,0.7 -"6","5","4","2",0.8,0.0,0.2 -"6","5","4","3",0.4,0.4,0.2 -"6","5","2","3",0.6,0.4,0.0 -"6","4","2","3",0.3,0.2,0.5 -"1","5","4","2",0.4,0.0,0.6 -"1","5","4","3",0.3,0.5,0.2 -"1","5","2","3",0.1,0.8,0.1 -"1","4","2","3",0.1,0.5,0.4 -"5","4","2","3",0.3,0.2,0.5