Skip to content

Commit

Permalink
Add "Host Genus" and "Host Type" coloring to the phylogenetic tree #42
Browse files Browse the repository at this point in the history
  • Loading branch information
j23414 authored Nov 18, 2024
2 parents 042d799 + 9c96add commit 68d122c
Show file tree
Hide file tree
Showing 7 changed files with 373 additions and 3 deletions.
2 changes: 2 additions & 0 deletions ingest/defaults/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ curate:
'location',
'state',
'host',
'host_genus',
'host_type',
'is_lab_host',
#'date_submitted',
#'sra_accession',
Expand Down
286 changes: 286 additions & 0 deletions ingest/defaults/host_hostgenus_hosttype_map.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
host host_genus host_type
Homo sapiens Homo Human
Aedeomyia madagascarica Aedeomyia Mosquito
Aedes albopictus Aedes Mosquito
Aedes cinereus Aedes Mosquito
Aedes dalzieli Aedes Mosquito
Aedes japonicus Aedes Mosquito
Aedes rossicus Aedes Mosquito
Aedes sp. Aedes Mosquito
Aedes vexans Aedes Mosquito
Anopheles atroparvus Anopheles Mosquito
Anopheles bancroftii Anopheles Mosquito
Anopheles coustani Anopheles Mosquito
Anopheles farauti Anopheles Mosquito
Anopheles hyrcanus Anopheles Mosquito
Anopheles maculipennis Anopheles Mosquito
Anopheles messeae Anopheles Mosquito
Anopheles pauliani Anopheles Mosquito
Anopheles plumbeus Anopheles Mosquito
Anopheles sp. Anopheles Mosquito
Anopheles stephensi Anopheles Mosquito
Coquillettidia Coquillettidia Mosquito
Coquillettidia perturbans Coquillettidia Mosquito
Coquillettidia richiardii Coquillettidia Mosquito
Culex Culex Mosquito
Culex annulirostris Culex Mosquito
Culex antennatus Culex Mosquito
Culex bitaeniorhynchus Culex Mosquito
Culex erraticus Culex Mosquito
Culex erythrothorax Culex Mosquito
Culex gelidus Culex Mosquito
Culex interrogator Culex Mosquito
Culex modestus Culex Mosquito
Culex neavei Culex Mosquito
Culex nigripalpus Culex Mosquito
Culex perexiguus Culex Mosquito
Culex perfuscus Culex Mosquito
Culex pipiens Culex Mosquito
Culex pipiens complex Culex Mosquito
Culex pipiens pipiens Culex Mosquito
Culex pipiens sensu lato Culex Mosquito
Culex poicilipes Culex Mosquito
Culex pseudovishnui Culex Mosquito
Culex pullus Culex Mosquito
Culex quinquefasciatus Culex Mosquito
Culex restuans Culex Mosquito
Culex salinarius Culex Mosquito
Culex sitiens Culex Mosquito
Culex squamosus Culex Mosquito
Culex stigmatosoma Culex Mosquito
Culex tarsalis Culex Mosquito
Culex theileri Culex Mosquito
Culex tritaeniorhynchus Culex Mosquito
Culex univittatus Culex Mosquito
Culex vishnui Culex Mosquito
Culex whitmorei Culex Mosquito
Culicidae Culicidae Mosquito
Culiseta Culiseta Mosquito
Culiseta inornata Culiseta Mosquito
Culiseta longiareolata Culiseta Mosquito
Culiseta melanura Culiseta Mosquito
Culiseta morsitans Culiseta Mosquito
Mansonia uniformis Mansonia Mosquito
Ochlerotatus canadensis Ochlerotatus Mosquito
Ochlerotatus cantator Ochlerotatus Mosquito
Ochlerotatus caspius Ochlerotatus Mosquito
Ochlerotatus communis Ochlerotatus Mosquito
Ochlerotatus dorsalis Ochlerotatus Mosquito
Ochlerotatus flavescens Ochlerotatus Mosquito
Ochlerotatus sollicitans Ochlerotatus Mosquito
Ochlerotatus spencerii Ochlerotatus Mosquito
Ochlerotatus sticticus Ochlerotatus Mosquito
Ochlerotatus taeniorhynchus Ochlerotatus Mosquito
Ochlerotatus triseriatus Ochlerotatus Mosquito
Ochlerotatus trivittatus Ochlerotatus Mosquito
Psorophora Psorophora Mosquito
Psorophora ferox Psorophora Mosquito
Uranotaenia Uranotaenia Mosquito
Uranotaenia unguiculata Uranotaenia Mosquito
Accipiter Accipiter Bird
Accipiter cooperii Accipiter Bird
Accipiter gentilis Accipiter Bird
Accipiter nisus Accipiter Bird
Accipiter striatus Accipiter Bird
Accipitridae Accipitridae Bird
Acrocephalus dumetorum Acrocephalus Bird
Actitis macularius Actitis Bird
Aegithalos caudatus Aegithalos Bird
Aegypius monachus Aegypius Bird
Agelaius phoeniceus Agelaius Bird
Alopochen aegyptiaca Alopochen Bird
Anas platyrhynchos Anas Bird
Anatidae Anatidae Bird
Anser Anser Bird
Aphelocoma Aphelocoma Bird
Aphelocoma californica Aphelocoma Bird
Apus apus Apus Bird
Aquila adalberti Aquila Bird
Aquila chrysaetos Aquila Bird
Aquila fasciata Aquila Bird
Ardeidae Ardeidae Bird
Asio flammeus Asio Bird
Asio otus Asio Bird
Athene noctua Athene Bird
Aves Aves Bird
Baeolophus bicolor Baeolophus Bird
Baeolophus inornatus Baeolophus Bird
Bombycilla garrulus Bombycilla Bird
Bonasa umbellus Bonasa Bird
Branta canadensis Branta Bird
Bubo scandiacus Bubo Bird
Bubo virginianus Bubo Bird
Bubulcus ibis Bubulcus Bird
Buteo Buteo Bird
Buteo buteo Buteo Bird
Buteo jamaicensis Buteo Bird
Buteo lineatus Buteo Bird
Buteo regalis Buteo Bird
Buteo swainsoni Buteo Bird
Butorides virescens Butorides Bird
Calidris alba Calidris Bird
Calypte costae Calypte Bird
Cardinalis Cardinalis Bird
Cardinalis cardinalis Cardinalis Bird
Charadrius melodus Charadrius Bird
Chroicocephalus ridibundus Chroicocephalus Bird
Ciconiidae Ciconiidae Bird
Clanga pomarina Clanga Bird
Colaptes auratus Colaptes Bird
Coloeus monedula Coloeus Bird
Columba livia Columba Bird
Columba palumbus Columba Bird
Columbidae Columbidae Bird
Coracopsis vasa Coracopsis Bird
Coragyps atratus Coragyps Bird
Corvidae Corvidae Bird
Corvus Corvus Bird
Corvus brachyrhynchos Corvus Bird
Corvus corax Corvus Bird
Corvus cornix Corvus Bird
Corvus corone Corvus Bird
Corvus frugilegus Corvus Bird
Corvus ossifragus Corvus Bird
Cuculus canorus Cuculus Bird
Curruca conspicillata Curruca Bird
Cyanistes caeruleus Cyanistes Bird
Cyanocitta cristata Cyanocitta Bird
Cygnus buccinator Cygnus Bird
Cygnus olor Cygnus Bird
Dumetella Dumetella Bird
Egretta garzetta Egretta Bird
Euphagus cyanocephalus Euphagus Bird
Falco Falco Bird
Falco columbarius Falco Bird
Falco peregrinus Falco Bird
Falco punctatus Falco Bird
Falco sparverius Falco Bird
Falco tinnunculus Falco Bird
Fulica Fulica Bird
Galliformes Galliformes Bird
Gallus gallus Gallus Bird
Garrulus glandarius Garrulus Bird
Gymnogyps californianus Gymnogyps Bird
Haemorhous mexicanus Haemorhous Bird
Haliaeetus albicilla Haliaeetus Bird
Haliaeetus leucocephalus Haliaeetus Bird
Hirundinidae Hirundinidae Bird
Hirundo rustica Hirundo Bird
Hylocichla mustelina Hylocichla Bird
Ichthyaetus leucophthalmus Ichthyaetus Bird
Ictinia mississippiensis Ictinia Bird
Lanius ludovicianus Lanius Bird
Larus crassirostris Larus Bird
Larus delawarensis Larus Bird
Larus michahellis Larus Bird
Larus smithsonianus Larus Bird
Lathamus discolor Lathamus Bird
Loriini Loriini Bird
Meleagris gallopavo Meleagris Bird
Mergus squamatus Mergus Bird
Mimus Mimus Bird
Mimus polyglottos Mimus Bird
Molothrus ater Molothrus Bird
Nestor notabilis Nestor Bird
Oena capensis Oena Bird
Oriolus flavocinctus Oriolus Bird
Pandion haliaetus Pandion Bird
Parulidae Parulidae Bird
Parus major Parus Bird
Passer domesticus Passer Bird
Passer sp. Passer Bird
Passeridae Passeridae Bird
Pelecanus Pelecanus Bird
Pelecanus erythrorhynchos Pelecanus Bird
Pelecanus occidentalis Pelecanus Bird
Phalacrocoracidae Phalacrocoracidae Bird
Phalacrocorax auritus Phalacrocorax Bird
Phalacrocorax carbo Phalacrocorax Bird
Phasianinae Phasianinae Bird
Phasianus colchicus Phasianus Bird
Pheucticus melanocephalus Pheucticus Bird
Phoenicoparrus andinus Phoenicoparrus Bird
Phoenicopterus chilensis Phoenicopterus Bird
Phoenicopterus roseus Phoenicopterus Bird
Phoenicopterus ruber Phoenicopterus Bird
Phoenicopterus sp. Phoenicopterus Bird
Phylloscopus collybita Phylloscopus Bird
Pica hudsonia Pica Bird
Pica nuttalli Pica Bird
Pica pica Pica Bird
Pluvialis apricaria Pluvialis Bird
Podiceps cristatus Podiceps Bird
Poecile atricapillus Poecile Bird
Poecile carolinensis Poecile Bird
Prunella modularis Prunella Bird
Pyrrhocorax graculus Pyrrhocorax Bird
Quelea quelea Quelea Bird
Quiscalus Quiscalus Bird
Quiscalus major Quiscalus Bird
Quiscalus quiscula Quiscalus Bird
Rallus aquaticus Rallus Bird
Serinus canaria Serinus Bird
Spatula querquedula Spatula Bird
Spheniscus humboldti Spheniscus Bird
Spinus tristis Spinus Bird
Sternula Sternula Bird
Sternula antillarum Sternula Bird
Streptopelia capicola Streptopelia Bird
Streptopelia decaocto Streptopelia Bird
Strigidae Strigidae Bird
Strix aluco Strix Bird
Strix nebulosa Strix Bird
Strix nebulosa lapponica Strix Bird
Sturnidae Sturnidae Bird
Sturnus vulgaris Sturnus Bird
Sylvia atricapilla Sylvia Bird
Toxostoma rufum Toxostoma Bird
Trichoglossus haematodus Trichoglossus Bird
Trichoglossus moluccanus Trichoglossus Bird
Turdus merula Turdus Bird
Turdus migratorius Turdus Bird
Turdus philomelos Turdus Bird
Tyto alba Tyto Bird
Zenaida macroura Zenaida Bird
Equus caballus Equus Horse
Equus caballus x Equus asinus Equus Horse
Alectorobius capensis Alectorobius Tick
Dermacentor marginatus Dermacentor Tick
Hyalomma Hyalomma Tick
Hyalomma marginatum Hyalomma Tick
Hyalomma marginatum marginatum Hyalomma Tick
Hyalomma plumbeum plumbeum Hyalomma Tick
Hyalomma scupense Hyalomma Tick
Ixodoidea Ixodoidea Tick
Rhipicephalus guilhoni Rhipicephalus Tick
Rhipicephalus pulchellus Rhipicephalus Tick
Bos taurus Bos Other
Camelus bactrianus Camelus Other
Camelus dromedarius Camelus Other
Canis lupus familiaris Canis Other
Chiroptera Chiroptera Other
Cricetinae Cricetinae Other
Crocodylus moreletii Crocodylus Other
Crocodylus niloticus Crocodylus Other
Crocodylus porosus Crocodylus Other
Equus Equus Other
Equus asinus Equus Other
Equus ferus Equus Other
Giraffa giraffa Giraffa Other
Laridae Laridae Other
Mastomys erythroleucus Mastomys Other
Mephitis mephitis Mephitis Other
Mesocricetus auratus Mesocricetus Other
Mus musculus Mus Other
Orcinus orca Orcinus Other
Ovis aries Ovis Other
Panthera leo Panthera Other
Pelophylax ridibundus Pelophylax Other
Platycercus Platycercus Other
Rodentia Rodentia Other
Rousettus leschenaultii Rousettus Other
Sciuridae Sciuridae Other
Sciurus carolinensis Sciurus Other
Sciurus niger Sciurus Other
Syncerus caffer Syncerus Other
Vicugna pacos Vicugna Other
7 changes: 7 additions & 0 deletions ingest/rules/curate.smk
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ rule curate:
sequences_ndjson="data/genbank.ndjson",
all_geolocation_rules="data/all-geolocation-rules.tsv",
annotations=config["curate"]["annotations"],
manual_mapping="defaults/host_hostgenus_hosttype_map.tsv",
output:
metadata="data/raw_metadata_curated.tsv",
sequences="results/sequences.fasta",
Expand Down Expand Up @@ -98,6 +99,12 @@ rule curate:
| ./scripts/post_process_metadata.py \
| ./scripts/add-field-names \
--metadata-columns {params.metadata_columns} \
| ./scripts/transform-new-fields \
--map-tsv {input.manual_mapping} \
--map-id host \
--metadata-id host \
--map-fields host_genus host_type \
--pass-through true \
| augur curate apply-record-annotations \
--annotations {input.annotations} \
--id-field {params.annotations_id} \
Expand Down
50 changes: 50 additions & 0 deletions ingest/scripts/transform-new-fields
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#! /usr/bin/env python3

import argparse
import json
import csv
from sys import stdin, stdout

def parse_args():
parser = argparse.ArgumentParser(
description="Transform JSON data by applying a TSV mapping and adding new columns."
)
parser.add_argument("--map-tsv", required=True,
help="Path to the TSV mapping file.")
parser.add_argument("--map-id", required=True,
help="Column name in the map TSV to use as the mapping key.")
parser.add_argument("--metadata-id", required=True,
help="Column name in the metadata JSON to use as the mapping key.")
parser.add_argument("--map-fields", nargs="+", required=True,
help="Columns to add from the mapping file.")
parser.add_argument("--pass-through", default=False,
help="If set, pass through the original value when no mapping is found.")
return parser.parse_args()

def load_mapping(map_tsv, map_id, map_fields):
mapping = {}
with open(map_tsv, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f, delimiter='\t')
for row in reader:
key = row[map_id]
mapping[key] = {col: row[col] for col in map_fields}
return mapping

def main():
args = parse_args()
mapping = load_mapping(args.map_tsv, args.map_id, args.map_fields)

for line in stdin:
record = json.loads(line)
key = record.get(args.metadata_id, '')

if key in mapping:
record.update(mapping[key])
elif args.pass_through:
for col in args.map_fields:
record[col] = record.get(args.metadata_id, '')

stdout.write(json.dumps(record) + '\n')

if __name__ == "__main__":
main()
12 changes: 11 additions & 1 deletion phylogenetic/defaults/auspice_config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
{
"title": "Washington Focused West Nile Virus Build",
"data_provenance": [
{
"name": "GenBank",
"url": "https://www.ncbi.nlm.nih.gov/genbank/"
}
],
"colorings": [
{"key": "gt", "title": "Genotype", "type": "categorical"},
{"key": "num_date", "title": "Sampling Date", "type": "continuous"},
Expand All @@ -10,7 +16,9 @@
{"key": "lineage", "title": "Pathoplexus lineage", "type": "categorical"},
{"key": "clade_membership", "title": "Clade", "type": "categorical"},
{"key": "author", "title": "Authors", "type": "categorical"},
{"key": "host", "title": "Host Species", "type": "categorical"}
{"key": "host", "title": "Host Species", "type": "categorical"},
{"key": "host_genus", "title": "Host Genus", "type": "categorical"},
{"key": "host_type", "title": "Host Type", "type": "categorical"}
],
"geo_resolutions": [
"state",
Expand All @@ -29,6 +37,8 @@
"author",
"clade_membership",
"host",
"host_genus",
"host_type",
"lineage"
],
"display_defaults": {
Expand Down
Loading

0 comments on commit 68d122c

Please sign in to comment.