From e7b2f486ced2aedbaee235a20755fb3ed87fce93 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Thu, 31 Oct 2024 09:32:50 -0700 Subject: [PATCH 01/11] Annotation from Lanciotti et al 1999 Lanciotti, et al, 1999. Origin of the West Nile virus responsible for an outbreak of encephalitis in the northeastern United States. Science, 286(5448), pp.2333-2337. https://www.science.org/doi/epdf/10.1126/science.286.5448.2333 --- ingest/defaults/annotations.tsv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ingest/defaults/annotations.tsv b/ingest/defaults/annotations.tsv index 6315093..16990f4 100644 --- a/ingest/defaults/annotations.tsv +++ b/ingest/defaults/annotations.tsv @@ -294,3 +294,5 @@ KT163243 date 1968-XX-XX AF260968 date 1951-XX-XX AF260968 region Africa AF260968 country Egypt +AF196835 host Phoenicopterus chilensis +AF196835 date 1999-XX-XX \ No newline at end of file From 5df7f1b03b7c2910e98c1eba31d5edbbf07924d2 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Thu, 31 Oct 2024 09:51:48 -0700 Subject: [PATCH 02/11] Annotation from Bakonyi et al, 2006 Bakonyi, et al, 2006. Lineage 1 and 2 strains of encephalitic West Nile virus, central Europe. Emerging infectious diseases, 12(4), p.618. https://wwwnc.cdc.gov/eid/article/12/4/05-1379_article --- ingest/defaults/annotations.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/ingest/defaults/annotations.tsv b/ingest/defaults/annotations.tsv index 16990f4..d88c1c3 100644 --- a/ingest/defaults/annotations.tsv +++ b/ingest/defaults/annotations.tsv @@ -294,5 +294,6 @@ KT163243 date 1968-XX-XX AF260968 date 1951-XX-XX AF260968 region Africa AF260968 country Egypt +AF260968 host Homo sapians AF196835 host Phoenicopterus chilensis AF196835 date 1999-XX-XX \ No newline at end of file From 91ad8abf8ca2f0332d825597509f5b8324ae6a5d Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Thu, 31 Oct 2024 13:17:12 -0700 Subject: [PATCH 03/11] Annotation from Bakonyi et al, 2006 for lineage 3 Bakonyi, et al, 2005. Novel flavivirus or new lineage of West Nile virus, central Europe. Emerging infectious diseases, 11(2), p.225. https://wwwnc.cdc.gov/eid/article/11/2/04-1028_article --- ingest/defaults/annotations.tsv | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ingest/defaults/annotations.tsv b/ingest/defaults/annotations.tsv index d88c1c3..b9642fb 100644 --- a/ingest/defaults/annotations.tsv +++ b/ingest/defaults/annotations.tsv @@ -296,4 +296,7 @@ AF260968 region Africa AF260968 country Egypt AF260968 host Homo sapians AF196835 host Phoenicopterus chilensis -AF196835 date 1999-XX-XX \ No newline at end of file +AF196835 date 1999-XX-XX +AY765264 date 1997-XX-XX +AY765264 country Czech Republic +AY765264 region Europe From 22b4544c506011f12241fc81f0a81e65df00eccf Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Thu, 31 Oct 2024 13:28:59 -0700 Subject: [PATCH 04/11] Annotation from Chaintoutis, et al, 2013 for lineage 2 Chaintoutis, et al, 2013. West Nile virus lineage 2 strain in Greece, 2012. Emerging infectious diseases, 19(5), p.827. https://wwwnc.cdc.gov/eid/article/19/5/12-1418_article --- ingest/defaults/annotations.tsv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ingest/defaults/annotations.tsv b/ingest/defaults/annotations.tsv index b9642fb..9f547ca 100644 --- a/ingest/defaults/annotations.tsv +++ b/ingest/defaults/annotations.tsv @@ -300,3 +300,5 @@ AF196835 date 1999-XX-XX AY765264 date 1997-XX-XX AY765264 country Czech Republic AY765264 region Europe +DQ318020 date 1972-XX-XX +DQ318020 host Culex tigripes \ No newline at end of file From f42f9184d3a3f50c45fb0d6e5581ae833eb3f173 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Thu, 31 Oct 2024 13:43:47 -0700 Subject: [PATCH 05/11] Annotation from Platonov, et al, 2001 for Kunjin lineage 2 Platonov, et al, 2001. Outbreak of West Nile virus infection, Volgograd Region, Russia, 1999. Emerging infectious diseases, 7(1), p.128. https://wwwnc.cdc.gov/eid/article/7/1/70-0128_article --- ingest/defaults/annotations.tsv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ingest/defaults/annotations.tsv b/ingest/defaults/annotations.tsv index 9f547ca..a5f7a2a 100644 --- a/ingest/defaults/annotations.tsv +++ b/ingest/defaults/annotations.tsv @@ -301,4 +301,6 @@ AY765264 date 1997-XX-XX AY765264 country Czech Republic AY765264 region Europe DQ318020 date 1972-XX-XX -DQ318020 host Culex tigripes \ No newline at end of file +DQ318020 host Culex tigripes +D00246 country Australia +D00246 date 1960-XX-XX \ No newline at end of file From c2369402ece41f738181d96de48d2520cbddbbaf Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Thu, 31 Oct 2024 22:13:37 -0700 Subject: [PATCH 06/11] Drop lab strain serafend --- phylogenetic/defaults/exclude.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/phylogenetic/defaults/exclude.txt b/phylogenetic/defaults/exclude.txt index 49028e0..7de99da 100644 --- a/phylogenetic/defaults/exclude.txt +++ b/phylogenetic/defaults/exclude.txt @@ -53,3 +53,4 @@ OM202905 # Clusters below PAT FV537222 OM202932 # Clusters below PAT FV537222 FV537223 # Clusters below PAT FV537222 FV537222 # Clusters below PAT FV537222 +AY688948 # Laboratory strain Sarafend based on https://pmc.ncbi.nlm.nih.gov/articles/PMC3320449/ \ No newline at end of file From 8aca398916804f0a8a6a8e865280301c2ec9d20d Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Thu, 31 Oct 2024 22:14:41 -0700 Subject: [PATCH 07/11] Force include representative sequences from smaller lineages --- phylogenetic/defaults/include.txt | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/phylogenetic/defaults/include.txt b/phylogenetic/defaults/include.txt index 8161a2a..e3d7d60 100644 --- a/phylogenetic/defaults/include.txt +++ b/phylogenetic/defaults/include.txt @@ -68,3 +68,24 @@ KJ501222 # SW03 MG004537 # SW03 MF175866 # SW03 MG004540 # SW03 +MW383507 # Lineage 2 +HM147822 # Lineage 2 +GQ903680 # Lineage 2 +DQ176636 # Lineage 2 +KU978767 # Lineage 2 +HM147823 # Lineage 2 +PP445212 # Lineage 3 +AY765264 # Lineage 3 +AY277251 # Lineage 4 +FJ159131 # Lineage 4 +FJ159129 # Lineage 4 +FJ159130 # Lineage 4 +KJ831223 # Lineage 4 +KU978770 # Lineage 5 +DQ256376 # Lineage 5 +JX041632 # Lineage 5 +GQ851604 # Lineage 5 +GQ851605 # Lineage 5 +KY703855 # Lineage 7 +OP846972 # Lineage 7 +KY703856 # Lineage 8 From 2c0d3ec57e202028aba9ecd8bb294634f52e0981 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Thu, 31 Oct 2024 22:16:39 -0700 Subject: [PATCH 08/11] Midpoint the global tree --- phylogenetic/defaults/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phylogenetic/defaults/config.yaml b/phylogenetic/defaults/config.yaml index a86b515..d8c50ff 100644 --- a/phylogenetic/defaults/config.yaml +++ b/phylogenetic/defaults/config.yaml @@ -2,7 +2,7 @@ strain_id_field: "accession" # Use 'Egypt 1951' as the reference and root, following Mencattelli et al, 2023 # https://www.nature.com/articles/s41467-023-42185-7 reference: "defaults/reference_global.gb" -root: "AF260968" +root: "mid_point" # Sequences must be FASTA and metadata must be TSV # Both files must be zstd compressed From f34b75572c1de782eec3aeb0f36cc990f54ce990 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Thu, 31 Oct 2024 22:17:22 -0700 Subject: [PATCH 09/11] Set clock rate based on literature search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From May et al, 2011 "Phylogeography of West Nile Virus: From the Cradle of Evolution in Africa to Eurasia, Australia, and the Americas" https://journals.asm.org/doi/10.1128/jvi.01963-10 "The mean rate of nucleotide substitution among all isolates of WNV is 7.55 x 10^-4 substitutions/site/year...This rate is comparable to that previously estimated for other flaviviruses...WNV show some heterogeneity among different genotypes, but all groups have various rates of between 2.24 x 10^-4 substitutions/site/year ... and 1.06 x 10^-3." Fixing the clock-rate at 0.00075 results in MRCA in auspice tree between 1600-1700 which matches the literature from Fall et al, 2017 "Biological and phylogenetic characteristics of West African lineages of West Nile virus" https://journals.plos.org/plosntds/article?id=10.1371/journal.pntd.0006078 "The tMRCA of WNV is predicted to have originated in the late 16th/early 17th century (95%HPD: 1476–1765)" --- phylogenetic/defaults/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phylogenetic/defaults/config.yaml b/phylogenetic/defaults/config.yaml index d8c50ff..ef9c5c5 100644 --- a/phylogenetic/defaults/config.yaml +++ b/phylogenetic/defaults/config.yaml @@ -69,7 +69,7 @@ subsampling: force_include: --exclude-all --include defaults/include.txt refine: - treetime_params: --coalescent opt --date-inference marginal --date-confidence + treetime_params: --coalescent opt --date-inference marginal --date-confidence --keep-polytomies --clock-rate 0.000755 traits: metadata_columns: [ From 0354f128ab3fb9be0fbe41d713a5f8157d19bfef Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 1 Nov 2024 01:18:39 -0700 Subject: [PATCH 10/11] Revert AF260968_REF to AF260968 --- phylogenetic/defaults/exclude.txt | 1 + phylogenetic/defaults/include.txt | 2 +- phylogenetic/defaults/reference_global.gb | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/phylogenetic/defaults/exclude.txt b/phylogenetic/defaults/exclude.txt index 7de99da..602d881 100644 --- a/phylogenetic/defaults/exclude.txt +++ b/phylogenetic/defaults/exclude.txt @@ -1,3 +1,4 @@ +AF260968 # Egypt 1951 will added back in during augur align reference HW816192 # 11029 bp PAT 27-MAY-2015 CS543188 # 11029 bp PAT 20-APR-2007 CS568914 # 11029 bp PAT 18-MAY-2007 diff --git a/phylogenetic/defaults/include.txt b/phylogenetic/defaults/include.txt index e3d7d60..bbef20d 100644 --- a/phylogenetic/defaults/include.txt +++ b/phylogenetic/defaults/include.txt @@ -1,4 +1,4 @@ -AF260968 # Egypt 1951 +# AF260968 # Egypt 1951 will be used as augur align reference NC_001563 # Lineage 2 reference NC_009942 # Lineage 1 reference HM051416 # Isreal 1953 diff --git a/phylogenetic/defaults/reference_global.gb b/phylogenetic/defaults/reference_global.gb index 61356c7..8e4fc84 100644 --- a/phylogenetic/defaults/reference_global.gb +++ b/phylogenetic/defaults/reference_global.gb @@ -1,6 +1,6 @@ -LOCUS AF260968_REF 11029 bp RNA linear VRL 27-AUG-2000 +LOCUS AF260968 11029 bp RNA linear VRL 27-AUG-2000 DEFINITION West Nile virus strain Eg101, complete genome. -ACCESSION AF260968_REF +ACCESSION AF260968 VERSION AF260968.1 KEYWORDS . SOURCE West Nile virus (WNV) From bf0ed7c7435c6ae3c82cebfade123f4a4b1ddf22 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 1 Nov 2024 01:48:57 -0700 Subject: [PATCH 11/11] Set state-based clock rate based on literature search From May et al, 2011 "Phylogeography of West Nile Virus: From the Cradle of Evolution in Africa to Eurasia, Australia, and the Americas" https://journals.asm.org/doi/10.1128/jvi.01963-10 Table 1, IS98-STD and North America 6.53E-04 --- phylogenetic/build-configs/washington-state/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phylogenetic/build-configs/washington-state/config.yaml b/phylogenetic/build-configs/washington-state/config.yaml index 6268f41..a243cb7 100644 --- a/phylogenetic/build-configs/washington-state/config.yaml +++ b/phylogenetic/build-configs/washington-state/config.yaml @@ -20,7 +20,7 @@ subsampling: force_include: --exclude-all --include ../nextclade/defaults/include.txt refine: - treetime_params: --coalescent opt --clock-filter-iqd 4 --date-inference marginal --date-confidence + treetime_params: --coalescent opt --clock-filter-iqd 4 --date-inference marginal --date-confidence --clock-rate 0.000653 traits: metadata_columns: [