From 24969d7f3bb41587f7c4f827a69dbf467e973907 Mon Sep 17 00:00:00 2001 From: hunterckx <118154470+hunterckx@users.noreply.github.com> Date: Sun, 17 Nov 2024 18:33:14 -0800 Subject: [PATCH] feat: add tags to genome list (#177) --- .../brc-analytics-catalog/common/entities.ts | 1 + app/components/index.ts | 1 + .../common/viewModelBuilders.ts | 14 +++++++ files/build-catalog.ts | 19 +++++++--- files/entities.ts | 7 ++++ files/out/genomes.json | 38 +++++++++++++++++++ site-config/brc-analytics/category.ts | 2 + .../local/index/genomeEntityConfig.ts | 13 +++++++ 8 files changed, 90 insertions(+), 5 deletions(-) diff --git a/app/apis/catalog/brc-analytics-catalog/common/entities.ts b/app/apis/catalog/brc-analytics-catalog/common/entities.ts index 28d0b20..e61b319 100644 --- a/app/apis/catalog/brc-analytics-catalog/common/entities.ts +++ b/app/apis/catalog/brc-analytics-catalog/common/entities.ts @@ -23,6 +23,7 @@ export interface BRCDataCatalogGenome { scaffoldCount: number; scaffoldL50: number; scaffoldN50: number; + tags: string[]; taxon: string; ucscBrowserUrl: string | null; } diff --git a/app/components/index.ts b/app/components/index.ts index bf3c465..d0441eb 100644 --- a/app/components/index.ts +++ b/app/components/index.ts @@ -21,6 +21,7 @@ export { export { Logo } from "@databiosphere/findable-ui/lib/components/Layout/components/Header/components/Content/components/Logo/logo"; export { Link } from "@databiosphere/findable-ui/lib/components/Links/components/Link/link"; export { BasicCell } from "@databiosphere/findable-ui/lib/components/Table/components/TableCell/components/BasicCell/basicCell"; +export { NTagCell } from "@databiosphere/findable-ui/lib/components/Table/components/TableCell/components/NTagCell/nTagCell"; export { CopyText } from "./common/CopyText/copyText"; export { AnalysisMethod } from "./Entity/components/AnalysisMethod/analysisMethod"; export { AnalysisMethods } from "./Entity/components/AnalysisMethods/analysisMethods"; diff --git a/app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders.ts b/app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders.ts index d960147..72b346f 100644 --- a/app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders.ts +++ b/app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders.ts @@ -201,6 +201,20 @@ export const buildScaffoldN50 = ( }; }; +/** + * Build props for the tags cell. + * @param genome - Genome entity. + * @returns Props to be used for the cell. + */ +export const buildTags = ( + genome: BRCDataCatalogGenome +): ComponentProps => { + return { + label: "Tags", + values: genome.tags, + }; +}; + /** * Build props for the taxonomy ID cell. * @param genome - Genome entity. diff --git a/files/build-catalog.ts b/files/build-catalog.ts index ff24ba7..4c8a323 100644 --- a/files/build-catalog.ts +++ b/files/build-catalog.ts @@ -1,8 +1,9 @@ import { parse as parseCsv } from "csv-parse/sync"; import fsp from "fs/promises"; import { BRCDataCatalogGenome } from "../app/apis/catalog/brc-analytics-catalog/common/entities"; -import { SourceGenome } from "./entities"; +import { SourceGenome, SourceOrganism } from "./entities"; +const SOURCE_PATH_ORGANISMS = "files/source/organisms-from-ncbi.tsv"; const SOURCE_PATH_GENOMES = "files/source/genomes-from-ncbi.tsv"; buildCatalog(); @@ -17,9 +18,16 @@ async function buildCatalog(): Promise { } async function buildGenomes(): Promise { + const sourceOrganismRows = await readValuesFile( + SOURCE_PATH_ORGANISMS + ); + const sourceOrganismsByTaxon = new Map( + sourceOrganismRows.map((row) => [row.taxon, row]) + ); const sourceRows = await readValuesFile(SOURCE_PATH_GENOMES); - const mappedRows = sourceRows.map( - (row): BRCDataCatalogGenome => ({ + const mappedRows = sourceRows.map((row): BRCDataCatalogGenome => { + const tagsString = sourceOrganismsByTaxon.get(row.taxon)?.CustomTags; + return { accession: row.accession, annotationStatus: parseStringOrNull(row.annotationStatus), chromosomes: parseNumberOrNull(row.chromosomeCount), @@ -33,10 +41,11 @@ async function buildGenomes(): Promise { scaffoldCount: parseNumber(row.scaffoldCount), scaffoldL50: parseNumber(row.scaffoldL50), scaffoldN50: parseNumber(row.scaffoldN50), + tags: tagsString ? [tagsString] : [], taxon: row.taxon, ucscBrowserUrl: parseStringOrNull(row.ucscBrowser), - }) - ); + }; + }); return mappedRows.sort((a, b) => a.accession.localeCompare(b.accession)); } diff --git a/files/entities.ts b/files/entities.ts index 3aa6edd..40c1774 100644 --- a/files/entities.ts +++ b/files/entities.ts @@ -15,3 +15,10 @@ export interface SourceGenome { taxonomyId: string; ucscBrowser: string; } + +export interface SourceOrganism { + assemblyCount: string; + CustomTags: string; + taxon: string; + taxonomyId: string; +} diff --git a/files/out/genomes.json b/files/out/genomes.json index 771cac1..b8c9c76 100644 --- a/files/out/genomes.json +++ b/files/out/genomes.json @@ -13,6 +13,9 @@ "scaffoldCount": 2747, "scaffoldL50": 6, "scaffoldN50": 1678596, + "tags": [ + "VEuPathDb" + ], "taxon": "Plasmodium vivax", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000002415.2" }, @@ -30,6 +33,7 @@ "scaffoldCount": 12, "scaffoldL50": 4, "scaffoldN50": 2481190, + "tags": [], "taxon": "Trypanosoma brucei brucei TREU927", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000002445.2" }, @@ -47,6 +51,7 @@ "scaffoldCount": 36, "scaffoldL50": 11, "scaffoldN50": 1091540, + "tags": [], "taxon": "Leishmania major strain Friedlin", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000002725.2" }, @@ -64,6 +69,7 @@ "scaffoldCount": 14, "scaffoldL50": 5, "scaffoldN50": 1687656, + "tags": [], "taxon": "Plasmodium falciparum 3D7", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000002765.5" }, @@ -81,6 +87,7 @@ "scaffoldCount": 138, "scaffoldL50": 11, "scaffoldN50": 992961, + "tags": [], "taxon": "Leishmania braziliensis MHOM/BR/75/M2904", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000002845.2" }, @@ -98,6 +105,7 @@ "scaffoldCount": 2276, "scaffoldL50": 6, "scaffoldN50": 4973582, + "tags": [], "taxon": "Toxoplasma gondii ME49", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000006565.2" }, @@ -115,6 +123,7 @@ "scaffoldCount": 6, "scaffoldL50": 3, "scaffoldN50": 4323945, + "tags": [], "taxon": "Coccidioides immitis RS", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000149335.2" }, @@ -132,6 +141,7 @@ "scaffoldCount": 1, "scaffoldL50": 1, "scaffoldN50": 4411532, + "tags": [], "taxon": "Mycobacterium tuberculosis H37Rv", "ucscBrowserUrl": null }, @@ -149,6 +159,9 @@ "scaffoldCount": 29495, "scaffoldL50": 212, "scaffoldN50": 88624, + "tags": [ + "VEuPathDb" + ], "taxon": "Trypanosoma cruzi", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000209065.1" }, @@ -166,6 +179,9 @@ "scaffoldCount": 36, "scaffoldL50": 11, "scaffoldN50": 1024085, + "tags": [ + "VEuPathDb" + ], "taxon": "Leishmania donovani", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000227135.1" }, @@ -183,6 +199,7 @@ "scaffoldCount": 1, "scaffoldL50": 1, "scaffoldN50": 4411709, + "tags": [], "taxon": "Mycobacterium tuberculosis H37Rv", "ucscBrowserUrl": null }, @@ -200,6 +217,9 @@ "scaffoldCount": 1, "scaffoldL50": 1, "scaffoldN50": 196858, + "tags": [ + "Virus" + ], "taxon": "Monkeypox virus", "ucscBrowserUrl": null }, @@ -217,6 +237,9 @@ "scaffoldCount": 1, "scaffoldL50": 1, "scaffoldN50": 29903, + "tags": [ + "Virus" + ], "taxon": "Severe acute respiratory syndrome coronavirus 2", "ucscBrowserUrl": null }, @@ -234,6 +257,7 @@ "scaffoldCount": 289, "scaffoldL50": 2, "scaffoldN50": 186194774, + "tags": [], "taxon": "Culex pipiens pallens", "ucscBrowserUrl": null }, @@ -251,6 +275,7 @@ "scaffoldCount": 9, "scaffoldL50": 2, "scaffoldN50": 8079863, + "tags": [], "taxon": "Coccidioides posadasii str. Silveira", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCA_018416015.2" }, @@ -268,6 +293,9 @@ "scaffoldCount": 1, "scaffoldL50": 1, "scaffoldN50": 4516435, + "tags": [ + "Bact" + ], "taxon": "Mycobacterium tuberculosis", "ucscBrowserUrl": null }, @@ -285,6 +313,9 @@ "scaffoldCount": 14, "scaffoldL50": 5, "scaffoldN50": 2046250, + "tags": [ + "VEuPathDb" + ], "taxon": "Plasmodium yoelii", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_900002385.2" }, @@ -302,6 +333,7 @@ "scaffoldCount": 14, "scaffoldL50": 5, "scaffoldN50": 1692345, + "tags": [], "taxon": "Plasmodium vinckei vinckei", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_900681995.1" }, @@ -319,6 +351,9 @@ "scaffoldCount": 190, "scaffoldL50": 2, "scaffoldN50": 99149756, + "tags": [ + "VEuPathDb" + ], "taxon": "Anopheles gambiae", "ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_943734735.2" }, @@ -336,6 +371,9 @@ "scaffoldCount": 1, "scaffoldL50": 1, "scaffoldN50": 4469156, + "tags": [ + "Bact" + ], "taxon": "Mycobacterium tuberculosis", "ucscBrowserUrl": null } diff --git a/site-config/brc-analytics/category.ts b/site-config/brc-analytics/category.ts index 92fc0f1..edca097 100644 --- a/site-config/brc-analytics/category.ts +++ b/site-config/brc-analytics/category.ts @@ -11,6 +11,7 @@ export const BRC_DATA_CATALOG_CATEGORY_KEY = { SCAFFOLD_COUNT: "scaffoldCount", SCAFFOLD_L50: "scaffoldL50", SCAFFOLD_N50: "scaffoldN50", + TAGS: "tags", TAXON: "taxon", TAXONOMY_ID: "ncbiTaxonomyId", UCSC_BROWSER_URL: "ucscBrowserUrl", @@ -29,6 +30,7 @@ export const BRC_DATA_CATALOG_CATEGORY_LABEL = { SCAFFOLD_COUNT: "Scaffolds", SCAFFOLD_L50: "Scaffold L50", SCAFFOLD_N50: "Scaffold N50", + TAGS: "Tags", TAXON: "Taxon", TAXONOMY_ID: "Taxonomy ID", UCSC_BROWSER_URL: "UCSC Browser", diff --git a/site-config/brc-analytics/local/index/genomeEntityConfig.ts b/site-config/brc-analytics/local/index/genomeEntityConfig.ts index 208d2a9..612e3be 100644 --- a/site-config/brc-analytics/local/index/genomeEntityConfig.ts +++ b/site-config/brc-analytics/local/index/genomeEntityConfig.ts @@ -61,6 +61,10 @@ export const genomeEntityConfig: BRCEntityConfig = { key: BRC_DATA_CATALOG_CATEGORY_KEY.ANNOTATION_STATUS, label: BRC_DATA_CATALOG_CATEGORY_LABEL.ANNOTATION_STATUS, }, + { + key: BRC_DATA_CATALOG_CATEGORY_KEY.TAGS, + label: BRC_DATA_CATALOG_CATEGORY_LABEL.TAGS, + }, ], }, ], @@ -214,6 +218,15 @@ export const genomeEntityConfig: BRCEntityConfig = { id: BRC_DATA_CATALOG_CATEGORY_KEY.ANNOTATION_STATUS, width: { max: "0.5fr", min: "142px" }, }, + { + componentConfig: { + component: C.NTagCell, + viewBuilder: V.buildTags, + } as ComponentConfig, + header: BRC_DATA_CATALOG_CATEGORY_LABEL.TAGS, + id: BRC_DATA_CATALOG_CATEGORY_KEY.TAGS, + width: { max: "0.5fr", min: "142px" }, + }, ], defaultSort: { desc: SORT_DIRECTION.ASCENDING,