Skip to content

Commit

Permalink
Ingest: fetch and append pathoplexus global lineage calls #40
Browse files Browse the repository at this point in the history
  • Loading branch information
j23414 authored Nov 13, 2024
2 parents 20237ec + a347e7c commit 3556b10
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 4 deletions.
7 changes: 6 additions & 1 deletion ingest/defaults/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,9 @@ curate:

nextclade:
nextclade_dataset_path: '../nextclade/dataset'
nextclade_field: 'clade_membership'
nextclade_field: 'clade_membership'

pathoplexus:
URL: 'https://lapis.pathoplexus.org/west-nile/sample/details'
fields: 'insdcAccessionBase,lineage'
accession_field: 'insdcAccessionBase'
74 changes: 72 additions & 2 deletions ingest/rules/nextclade.smk
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,55 @@ like to customize the rules:
https://docs.nextstrain.org/projects/nextclade/page/user/nextclade-cli.html
"""

rule pathoplexus_classify:
"""
Pulls global lineage calls from Pathoplexus API
"""
output:
pathoplexus_tsv="data/pathoplexus_results/global_lineages.tsv",
params:
URL=config["pathoplexus"]["URL"],
fields=config["pathoplexus"]["fields"],
accession_field=config["pathoplexus"]["accession_field"],
id_field=config["curate"]["output_id_field"],
shell:
"""
curl "{params.URL}?dataFormat=TSV&downloadAsFile=false&fields={params.fields}" \
| uniq \
| csvtk -t rename -f {params.accession_field} -n {params.id_field} \
> {output.pathoplexus_tsv}
"""

rule select_USA_potential_samples:
"""
Select 1A or "unassigned" sequences from the USA
"""
input:
sequences="results/sequences.fasta",
pathoplexus_tsv="data/pathoplexus_results/global_lineages.tsv",
output:
potential_1A_samples="data/pathoplexus_results/potential_1A_samples.tsv",
sequences="data/potential_1A_sequences.fasta",
params:
id_field=config["curate"]["output_id_field"],
shell:
"""
tsv-filter -H \
--not-regex 'lineage:1B|[2,3,4,5,6,7,8]' \
{input.pathoplexus_tsv} \
> {output.potential_1A_samples}
augur filter \
--sequences {input.sequences} \
--metadata {output.potential_1A_samples} \
--metadata-id-column {params.id_field} \
--output-sequences {output.sequences}
"""

rule nextclade_classify:
#Classifies sequences into clades using Nextclade
input:
sequences="results/sequences.fasta",
sequences="data/potential_1A_sequences.fasta",
dataset=config["nextclade"]["nextclade_dataset_path"],
output:
nextclade_tsv="data/nextclade_results/nextclade.tsv",
Expand Down Expand Up @@ -55,7 +100,7 @@ rule append_nextclade_columns:
metadata="data/raw_metadata.tsv",
nextclade_subtypes="data/nextclade_clades.tsv",
output:
metadata_all="results/metadata.tsv",
metadata_all="data/metadata_nextclade.tsv",
params:
id_field=config["curate"]["output_id_field"],
nextclade_field=config["nextclade"]["nextclade_field"],
Expand All @@ -69,3 +114,28 @@ rule append_nextclade_columns:
{input.metadata} \
> {output.metadata_all}
"""

rule append_pathoplexus_columns:
"""
Append the pathoplexus results to the metadata
"""
input:
metadata="data/metadata_nextclade.tsv",
pathoplexus_tsv="data/pathoplexus_results/global_lineages.tsv",
output:
metadata="results/metadata.tsv",
params:
id_field=config["curate"]["output_id_field"],
pathoplexus_field=config["curate"]["output_id_field"],
shell:
r"""
augur merge \
--metadata \
metadata={input.metadata:q} \
pathoplexus={input.pathoplexus_tsv:q} \
--metadata-id-columns \
metadata={params.id_field:q} \
pathoplexus={params.pathoplexus_field:q} \
--output-metadata {output.metadata:q} \
--no-source-columns
"""
2 changes: 1 addition & 1 deletion phylogenetic/defaults/auspice_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
{"key": "state", "title": "State", "type": "categorical"},
{"key": "division", "title": "Division", "type": "categorical"},
{"key": "location", "title": "Location", "type": "categorical"},
{"key": "lineage", "title": "Strain", "type": "categorical"},
{"key": "lineage", "title": "Pathoplexus lineage", "type": "categorical"},
{"key": "clade_membership", "title": "Clade", "type": "categorical"},
{"key": "author", "title": "Authors", "type": "categorical"},
{"key": "host", "title": "Host Species", "type": "categorical"}
Expand Down
2 changes: 2 additions & 0 deletions phylogenetic/defaults/auspice_config_global.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
{"key": "num_date", "title": "Sampling Date", "type": "continuous"},
{"key": "region", "title": "Region", "type": "categorical"},
{"key": "country", "title": "Country", "type": "categorical"},
{"key": "lineage", "title": "Pathoplexus lineage", "type": "categorical"},
{"key": "clade_membership", "title": "Clade", "type": "categorical"},
{"key": "author", "title": "Authors", "type": "categorical"},
{"key": "host", "title": "Host Species", "type": "categorical"}
],
Expand Down

0 comments on commit 3556b10

Please sign in to comment.