Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter transcripts #105

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion common/schemas/gene.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ type Gene {
name: String
so_term: String!
transcripts: [Transcript!]!
transcripts_page(page: Int!, per_page: Int!): TranscriptsPage!
transcripts_page(page: Int!, per_page: Int!, filters: TranscriptFilter): TranscriptsPage!
slice: Slice!
external_references: [ExternalReference!]!
alternative_symbols: [String!]!
Expand Down
7 changes: 6 additions & 1 deletion common/schemas/query.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,9 @@ type Locus {
input RegionNameInput {
genome_id: String!
name: String!
}
}

input TranscriptFilter {
biotype: [String!]
tsl: [String!]
}
17 changes: 16 additions & 1 deletion graphql_service/resolver/gene_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,14 +218,26 @@ async def resolve_gene_transcripts(gene: Dict, info: GraphQLResolveInfo) -> List

@GENE_TYPE.field("transcripts_page")
async def resolve_gene_transcripts_page(
gene: Dict, _: GraphQLResolveInfo, page: int, per_page: int
gene: Dict,
_: GraphQLResolveInfo,
page: int,
per_page: int,
filters=None,
):
"This resolver passes required fields down to child resolvers"

if filters is None:
filters = {}
transcript_filter_queries = {
".".join(["metadata", filter_name, "value"]): {"$in": filter_values}
for filter_name, filter_values in filters.items()
}

return {
"gene_primary_key": gene["gene_primary_key"],
"page": page,
"per_page": per_page,
"transcript_filter_queries": transcript_filter_queries,
}


Expand All @@ -234,9 +246,11 @@ async def resolve_transcripts_page_transcripts(
transcripts_page: Dict, info: GraphQLResolveInfo
) -> List[Dict]:
"Load a page of transcripts"

query = {
"type": "Transcript",
"gene_foreign_key": transcripts_page["gene_primary_key"],
**transcripts_page["transcript_filter_queries"],
}
page, per_page = transcripts_page["page"], transcripts_page["per_page"]
collection = info.context["mongo_db"]
Expand All @@ -256,6 +270,7 @@ async def resolve_transcripts_page_metadata(
query = {
"type": "Transcript",
"gene_foreign_key": transcripts_page["gene_primary_key"],
**transcripts_page["transcript_filter_queries"],
}
collection = info.context["mongo_db"]
return {
Expand Down
94 changes: 89 additions & 5 deletions graphql_service/resolver/tests/test_resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def fixture_transcript_data():
}
],
"gene_foreign_key": "1_ENSG001.1",
"metadata": {"biotype": {"value": "protein_coding"}},
},
{
"genome_id": "1",
Expand All @@ -105,6 +106,7 @@ def fixture_transcript_data():
"gene": "ENSG001.1",
"product_generating_contexts": [],
"gene_foreign_key": "1_ENSG001.1",
"metadata": {"biotype": {"value": "nonsense_mediated_decay"}},
},
{
"genome_id": "1",
Expand Down Expand Up @@ -862,15 +864,36 @@ async def test_resolve_gene_transcripts_page():
"unversioned_stable_id": "ENSG001",
"gene_primary_key": "1_ENSG001.1",
}
result = await model.resolve_gene_transcripts_page(gene, None, 1, 2)
assert result == {"gene_primary_key": "1_ENSG001.1", "page": 1, "per_page": 2}
transcript_filter = {
"biotype": ["protein_coding", "nonsense_mediated_decay"],
"tsl": ["tsl1", "tsl2", "tsl3"],
}
result = await model.resolve_gene_transcripts_page(
gene, None, 1, 2, transcript_filter
)
assert result == {
"gene_primary_key": "1_ENSG001.1",
"page": 1,
"per_page": 2,
"transcript_filter_queries": {
"metadata.biotype.value": {
"$in": ["protein_coding", "nonsense_mediated_decay"]
},
"metadata.tsl.value": {"$in": ["tsl1", "tsl2", "tsl3"]},
},
}


@pytest.mark.asyncio
async def test_resolve_transcripts_page_transcripts(transcript_data):
info = create_info(transcript_data)

transcripts_page = {"gene_primary_key": "1_ENSG001.1", "page": 2, "per_page": 1}
transcripts_page = {
"gene_primary_key": "1_ENSG001.1",
"page": 2,
"per_page": 1,
"transcript_filter_queries": {},
}
result = await model.resolve_transcripts_page_transcripts(transcripts_page, info)
assert remove_ids(result) == [
{
Expand All @@ -882,6 +905,41 @@ async def test_resolve_transcripts_page_transcripts(transcript_data):
"symbol": "grape",
"type": "Transcript",
"unversioned_stable_id": "ENST002",
"metadata": {"biotype": {"value": "nonsense_mediated_decay"}},
}
]


@pytest.mark.asyncio
async def test_resolve_transcripts_page_transcripts_filters(transcript_data):
info = create_info(transcript_data)

transcripts_page = {
"gene_primary_key": "1_ENSG001.1",
"page": 1,
"per_page": 2,
"transcript_filter_queries": {
"metadata.biotype.value": {"$in": ["protein_coding"]}
},
}
result = await model.resolve_transcripts_page_transcripts(transcripts_page, info)
assert remove_ids(result) == [
{
"gene": "ENSG001.1",
"gene_foreign_key": "1_ENSG001.1",
"genome_id": "1",
"metadata": {"biotype": {"value": "protein_coding"}},
"product_generating_contexts": [
{
"product_foreign_key": "1_ENSP001.1",
"product_id": "ENSP001.1",
"product_type": "Protein",
}
],
"stable_id": "ENST001.1",
"symbol": "kumquat",
"type": "Transcript",
"unversioned_stable_id": "ENST001",
}
]

Expand All @@ -890,7 +948,12 @@ async def test_resolve_transcripts_page_transcripts(transcript_data):
async def test_resolve_transcripts_page_transcripts_no_transcripts(transcript_data):
info = create_info(transcript_data)

transcripts_page = {"gene_primary_key": "1_ENSG001.1", "page": 3, "per_page": 1}
transcripts_page = {
"gene_primary_key": "1_ENSG001.1",
"page": 3,
"per_page": 1,
"transcript_filter_queries": {},
}
result = await model.resolve_transcripts_page_transcripts(transcripts_page, info)
assert result == []

Expand All @@ -899,11 +962,32 @@ async def test_resolve_transcripts_page_transcripts_no_transcripts(transcript_da
async def test_resolve_transcripts_page_metadata(transcript_data):
info = create_info(transcript_data)

transcripts_page = {"gene_primary_key": "1_ENSG001.1", "page": 2, "per_page": 1}
transcripts_page = {
"gene_primary_key": "1_ENSG001.1",
"page": 2,
"per_page": 1,
"transcript_filter_queries": {},
}
result = await model.resolve_transcripts_page_metadata(transcripts_page, info)
assert result == {"page": 2, "per_page": 1, "total_count": 2}


@pytest.mark.asyncio
async def test_resolve_transcripts_page_metadata_filters(transcript_data):
info = create_info(transcript_data)

transcripts_page = {
"gene_primary_key": "1_ENSG001.1",
"page": 1,
"per_page": 1,
"transcript_filter_queries": {
"metadata.biotype.value": {"$in": ["protein_coding"]}
},
}
result = await model.resolve_transcripts_page_metadata(transcripts_page, info)
assert result == {"page": 1, "per_page": 1, "total_count": 1}


def remove_ids(test_output):
if isinstance(test_output, dict):
del test_output["_id"]
Expand Down
1 change: 1 addition & 0 deletions graphql_service/tests/fixtures/human_brca2.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def build_transcripts():
"so_term": "protein_coding",
"genome_id": "homo_sapiens_GCA_000001405_28",
"exons": build_exons(),
"metadata": {"biotype": {"value": "protein_coding"}},
"slice": {
"region_id": "homo_sapiens_GCA_000001405_28_13_chromosome",
"location": {
Expand Down
80 changes: 43 additions & 37 deletions graphql_service/tests/snapshots/snap_test_gene_retrieval.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# -*- coding: utf-8 -*-
# snapshottest: v1 - https://goo.gl/zC4yUc

"""Test data for gene retrieval tests"""

from __future__ import unicode_literals

from snapshottest import Snapshot
Expand All @@ -12,35 +9,55 @@

snapshots["test_gene_retrieval_by_id_camel_case 1"] = {
"gene": {
"metadata": {
"biotype": {
"definition": "Transcipt that contains an open reading frame (ORF).",
"description": None,
"label": "Protein coding",
"value": "protein_coding",
},
"name": {
"accession_id": "HGNC:1101",
"source": {
"description": "HUGO Genome Nomenclature Committee",
"id": "HGNC",
"name": "HGNC Symbol",
"release": None,
"url": "https://www.genenames.org",
},
"url": "https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:1101",
"value": "BRCA2 DNA repair associated",
},
},
"name": "BRCA2 DNA repair associated [Source:HGNC Symbol;Acc:HGNC:1101]",
"slice": {
"location": {"end": 32400266, "start": 32315086},
"region": {
"name": "13",
"length": 114364328,
"code": "chromosome",
"topology": "linear",
"assembly": {
"id": "GRCh38.p13",
"default": True,
"name": "GRCh38",
"accession_id": "GCA_000001405.28",
"accessioning_body": "EGA",
"default": True,
"id": "GRCh38.p13",
"name": "GRCh38",
},
"code": "chromosome",
"length": 114364328,
"metadata": {
"ontology_terms": [
{
"accession_id": "SO:0000340",
"value": "chromosome",
"url": "www.sequenceontology.org/browser/current_release/term/SO:0000340",
"source": {
"description": "The Sequence Ontology is a set of terms and relationships used to describe the features and attributes of biological sequence. ",
"name": "Sequence Ontology",
"url": "www.sequenceontology.org",
"description": "The Sequence Ontology is a set of terms and relationships used to describe the features and attributes of biological sequence. ",
},
"url": "www.sequenceontology.org/browser/current_release/term/SO:0000340",
"value": "chromosome",
}
]
},
"name": "13",
"topology": "linear",
},
"strand": {"code": "forward"},
},
Expand All @@ -53,37 +70,17 @@
],
"unversioned_stable_id": "ENSG00000139618",
"version": 15,
"metadata": {
"biotype": {
"label": "Protein coding",
"definition": "Transcipt that contains an open reading frame (ORF).",
"description": None,
"value": "protein_coding",
},
"name": {
"accession_id": "HGNC:1101",
"value": "BRCA2 DNA repair associated",
"url": "https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:1101",
"source": {
"id": "HGNC",
"name": "HGNC Symbol",
"description": "HUGO Genome Nomenclature Committee",
"url": "https://www.genenames.org",
"release": None,
},
},
},
}
}

expected_id_and_symbol = {
snapshots["test_gene_retrieval_by_id_snake_case 1"] = {
"stable_id": "ENSG00000139618.15",
"symbol": "BRCA2",
}

snapshots["test_gene_retrieval_by_id_snake_case 1"] = expected_id_and_symbol

snapshots["test_gene_retrieval_by_symbol 1"] = [expected_id_and_symbol]
snapshots["test_gene_retrieval_by_symbol 1"] = [
{"stable_id": "ENSG00000139618.15", "symbol": "BRCA2"}
]

snapshots["test_transcript_pagination 1"] = {
"gene": {
Expand All @@ -93,3 +90,12 @@
}
}
}

snapshots["test_transcript_pagination_filters 1"] = {
"gene": {
"transcripts_page": {
"page_metadata": {"page": 1, "per_page": 2, "total_count": 1},
"transcripts": [{"stable_id": "ENST00000380152.7"}],
}
}
}
32 changes: 32 additions & 0 deletions graphql_service/tests/test_gene_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,35 @@ async def test_transcript_pagination(snapshot):
)
assert success
snapshot.assert_match(result["data"])


@pytest.mark.asyncio
async def test_transcript_pagination_filters(snapshot):
"""
Run a query checking pagination with filters
"""
query = """
{
gene(by_id:{
genome_id:"homo_sapiens_GCA_000001405_28",
stable_id: "ENSG00000139618.15"
}) {
transcripts_page(page: 1, per_page:2, filters: {biotype: ["protein_coding"]}) {
transcripts {
stable_id
}
page_metadata {
total_count
page
per_page
}
}
}
}
"""
query_data = {"query": query}
(success, result) = await graphql(
executable_schema, query_data, context_value=add_loaders_to_context(context)
)
assert success
snapshot.assert_match(result["data"])