From 37a5cef39bdc606b00e2443fdff3592a067a5d14 Mon Sep 17 00:00:00 2001 From: hunterckx <118154470+hunterckx@users.noreply.github.com> Date: Mon, 18 Nov 2024 18:15:31 -0800 Subject: [PATCH] feat: add organism list (#177) --- .../brc-analytics-catalog/common/entities.ts | 7 + .../brc-analytics-catalog/common/utils.ts | 6 +- .../common/viewModelBuilders.ts | 24 +++- files/build-catalog.ts | 40 ++++-- files/out/organisms.json | 136 ++++++++++++++++++ routes/constants.ts | 1 + site-config/brc-analytics/category.ts | 2 + site-config/brc-analytics/local/config.ts | 14 +- .../local/index/organismEntityConfig.ts | 102 +++++++++++++ 9 files changed, 316 insertions(+), 16 deletions(-) create mode 100644 files/out/organisms.json create mode 100644 site-config/brc-analytics/local/index/organismEntityConfig.ts diff --git a/app/apis/catalog/brc-analytics-catalog/common/entities.ts b/app/apis/catalog/brc-analytics-catalog/common/entities.ts index e61b319..7e713f5 100644 --- a/app/apis/catalog/brc-analytics-catalog/common/entities.ts +++ b/app/apis/catalog/brc-analytics-catalog/common/entities.ts @@ -28,6 +28,13 @@ export interface BRCDataCatalogGenome { ucscBrowserUrl: string | null; } +export interface BRCDataCatalogOrganism { + assemblyCount: number; + ncbiTaxonomyId: string; + tags: string[]; + taxon: string; +} + export interface EntitiesResponse { hits: R[]; pagination: EntitiesResponsePagination; diff --git a/app/apis/catalog/brc-analytics-catalog/common/utils.ts b/app/apis/catalog/brc-analytics-catalog/common/utils.ts index 0baded0..45fc474 100644 --- a/app/apis/catalog/brc-analytics-catalog/common/utils.ts +++ b/app/apis/catalog/brc-analytics-catalog/common/utils.ts @@ -1,4 +1,4 @@ -import { BRCDataCatalogGenome } from "./entities"; +import { BRCDataCatalogGenome, BRCDataCatalogOrganism } from "./entities"; export function getGenomeId(genome: BRCDataCatalogGenome): string { return sanitizeEntityId(genome.accession); @@ -9,6 +9,10 @@ export function getGenomeTitle(genome?: BRCDataCatalogGenome): string { return `${genome.taxon}`; } +export function getOrganismId(organism: BRCDataCatalogOrganism): string { + return sanitizeEntityId(organism.ncbiTaxonomyId); +} + export function sanitizeEntityId(entityId?: string): string { if (!entityId) return ""; return entityId.replace(/\./g, "_"); diff --git a/app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders.ts b/app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders.ts index 72b346f..b7f6e85 100644 --- a/app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders.ts +++ b/app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders.ts @@ -6,7 +6,10 @@ import { import { ViewContext } from "@databiosphere/findable-ui/lib/config/entities"; import { ComponentProps } from "react"; import { ROUTES } from "../../../../../routes/constants"; -import { BRCDataCatalogGenome } from "../../../../apis/catalog/brc-analytics-catalog/common/entities"; +import { + BRCDataCatalogGenome, + BRCDataCatalogOrganism, +} from "../../../../apis/catalog/brc-analytics-catalog/common/entities"; import * as C from "../../../../components"; import { GENOME_BROWSER, NCBI_DATASETS_URL } from "./constants"; @@ -71,6 +74,19 @@ export const buildAnnotationStatus = ( }; }; +/** + * Build props for the assemblies cell. + * @param organism - Genome entity. + * @returns Props to be used for the cell. + */ +export const buildAssemblyCount = ( + organism: BRCDataCatalogOrganism +): ComponentProps => { + return { + value: organism.assemblyCount, + }; +}; + /** * Build props for the chromosomes cell. * @param genome - Genome entity. @@ -155,7 +171,7 @@ export const buildLevel = ( * @returns Props to be used for the cell. */ export const buildTaxon = ( - genome: BRCDataCatalogGenome + genome: BRCDataCatalogOrganism | BRCDataCatalogGenome ): ComponentProps => { return { value: genome.taxon, @@ -207,7 +223,7 @@ export const buildScaffoldN50 = ( * @returns Props to be used for the cell. */ export const buildTags = ( - genome: BRCDataCatalogGenome + genome: BRCDataCatalogOrganism | BRCDataCatalogGenome ): ComponentProps => { return { label: "Tags", @@ -221,7 +237,7 @@ export const buildTags = ( * @returns Props to be used for the cell. */ export const buildTaxonomyId = ( - genome: BRCDataCatalogGenome + genome: BRCDataCatalogOrganism | BRCDataCatalogGenome ): ComponentProps => { return { value: genome.ncbiTaxonomyId, diff --git a/files/build-catalog.ts b/files/build-catalog.ts index 4c8a323..6c881f2 100644 --- a/files/build-catalog.ts +++ b/files/build-catalog.ts @@ -1,6 +1,9 @@ import { parse as parseCsv } from "csv-parse/sync"; import fsp from "fs/promises"; -import { BRCDataCatalogGenome } from "../app/apis/catalog/brc-analytics-catalog/common/entities"; +import { + BRCDataCatalogGenome, + BRCDataCatalogOrganism, +} from "../app/apis/catalog/brc-analytics-catalog/common/entities"; import { SourceGenome, SourceOrganism } from "./entities"; const SOURCE_PATH_ORGANISMS = "files/source/organisms-from-ncbi.tsv"; @@ -9,7 +12,16 @@ const SOURCE_PATH_GENOMES = "files/source/genomes-from-ncbi.tsv"; buildCatalog(); async function buildCatalog(): Promise { - const genomes = await buildGenomes(); + const organisms = await buildOrganisms(); + + const organismsByTaxon = new Map( + organisms.map((organism) => [organism.taxon, organism]) + ); + + const genomes = await buildGenomes(organismsByTaxon); + + console.log("Organisms:", genomes.length); + await saveJson("files/out/organisms.json", organisms); console.log("Genomes:", genomes.length); await saveJson("files/out/genomes.json", genomes); @@ -17,16 +29,28 @@ async function buildCatalog(): Promise { console.log("Done"); } -async function buildGenomes(): Promise { - const sourceOrganismRows = await readValuesFile( +async function buildOrganisms(): Promise { + const sourceRows = await readValuesFile( SOURCE_PATH_ORGANISMS ); - const sourceOrganismsByTaxon = new Map( - sourceOrganismRows.map((row) => [row.taxon, row]) + const mappedRows = sourceRows.map((row): BRCDataCatalogOrganism => { + return { + assemblyCount: parseNumber(row.assemblyCount), + ncbiTaxonomyId: row.taxonomyId, + tags: row.CustomTags ? [row.CustomTags] : [], + taxon: row.taxon, + }; + }); + return mappedRows.sort((a, b) => + a.ncbiTaxonomyId.localeCompare(b.ncbiTaxonomyId) ); +} + +async function buildGenomes( + organismsByTaxon: Map +): Promise { const sourceRows = await readValuesFile(SOURCE_PATH_GENOMES); const mappedRows = sourceRows.map((row): BRCDataCatalogGenome => { - const tagsString = sourceOrganismsByTaxon.get(row.taxon)?.CustomTags; return { accession: row.accession, annotationStatus: parseStringOrNull(row.annotationStatus), @@ -41,7 +65,7 @@ async function buildGenomes(): Promise { scaffoldCount: parseNumber(row.scaffoldCount), scaffoldL50: parseNumber(row.scaffoldL50), scaffoldN50: parseNumber(row.scaffoldN50), - tags: tagsString ? [tagsString] : [], + tags: organismsByTaxon.get(row.taxon)?.tags ?? [], taxon: row.taxon, ucscBrowserUrl: parseStringOrNull(row.ucscBrowser), }; diff --git a/files/out/organisms.json b/files/out/organisms.json new file mode 100644 index 0000000..4ceb6ba --- /dev/null +++ b/files/out/organisms.json @@ -0,0 +1,136 @@ +[ + { + "assemblyCount": 6911, + "ncbiTaxonomyId": "10244", + "tags": [ + "Virus" + ], + "taxon": "Monkeypox virus" + }, + { + "assemblyCount": 7823, + "ncbiTaxonomyId": "1773", + "tags": [ + "Bact" + ], + "taxon": "Mycobacterium tuberculosis" + }, + { + "assemblyCount": 13, + "ncbiTaxonomyId": "199306", + "tags": [ + "VEuPathDb" + ], + "taxon": "Coccidioides posadasii" + }, + { + "assemblyCount": 92, + "ncbiTaxonomyId": "2697049", + "tags": [ + "Virus" + ], + "taxon": "Severe acute respiratory syndrome coronavirus 2" + }, + { + "assemblyCount": 5, + "ncbiTaxonomyId": "5501", + "tags": [], + "taxon": "Coccidioides immitis" + }, + { + "assemblyCount": 11, + "ncbiTaxonomyId": "5660", + "tags": [ + "VEuPathDb" + ], + "taxon": "Leishmania braziliensis" + }, + { + "assemblyCount": 12, + "ncbiTaxonomyId": "5661", + "tags": [ + "VEuPathDb" + ], + "taxon": "Leishmania donovani" + }, + { + "assemblyCount": 7, + "ncbiTaxonomyId": "5664", + "tags": [ + "VEuPathDb" + ], + "taxon": "Leishmania major" + }, + { + "assemblyCount": 5, + "ncbiTaxonomyId": "5691", + "tags": [ + "VEuPathDb" + ], + "taxon": "Trypanosoma brucei" + }, + { + "assemblyCount": 44, + "ncbiTaxonomyId": "5693", + "tags": [ + "VEuPathDb" + ], + "taxon": "Trypanosoma cruzi" + }, + { + "assemblyCount": 29, + "ncbiTaxonomyId": "5811", + "tags": [ + "VEuPathDb" + ], + "taxon": "Toxoplasma gondii" + }, + { + "assemblyCount": 67, + "ncbiTaxonomyId": "5833", + "tags": [ + "VEuPathDb" + ], + "taxon": "Plasmodium falciparum" + }, + { + "assemblyCount": 19, + "ncbiTaxonomyId": "5855", + "tags": [ + "VEuPathDb" + ], + "taxon": "Plasmodium vivax" + }, + { + "assemblyCount": 10, + "ncbiTaxonomyId": "5860", + "tags": [ + "VEuPathDb" + ], + "taxon": "Plasmodium vinckei" + }, + { + "assemblyCount": 15, + "ncbiTaxonomyId": "5861", + "tags": [ + "VEuPathDb" + ], + "taxon": "Plasmodium yoelii" + }, + { + "assemblyCount": 7, + "ncbiTaxonomyId": "7165", + "tags": [ + "VEuPathDb" + ], + "taxon": "Anopheles gambiae" + }, + { + "assemblyCount": 5, + "ncbiTaxonomyId": "7175", + "tags": [ + "VEuPathDb" + ], + "taxon": "Culex pipiens" + } +] diff --git a/routes/constants.ts b/routes/constants.ts index 05eb80a..b1f6088 100644 --- a/routes/constants.ts +++ b/routes/constants.ts @@ -1,5 +1,6 @@ export const ROUTES = { ABOUT: "/about", GENOMES: "/data/genomes", + ORGANISMS: "/data/organisms", ROADMAP: "/roadmap", }; diff --git a/site-config/brc-analytics/category.ts b/site-config/brc-analytics/category.ts index edca097..122bf19 100644 --- a/site-config/brc-analytics/category.ts +++ b/site-config/brc-analytics/category.ts @@ -2,6 +2,7 @@ export const BRC_DATA_CATALOG_CATEGORY_KEY = { ACCESSION: "accession", ANALYZE_GENOME: "analyzeGenome", ANNOTATION_STATUS: "annotationStatus", + ASSEMBLY_COUNT: "assemblyCount", CHROMOSOMES: "chromosomes", COVERAGE: "coverage", GC_PERCENT: "gcPercent", @@ -21,6 +22,7 @@ export const BRC_DATA_CATALOG_CATEGORY_LABEL = { ACCESSION: "Accession", ANALYZE_GENOME: "Action", ANNOTATION_STATUS: "Annotation Status", + ASSEMBLY_COUNT: "Assemblies", CHROMOSOMES: "Chromosomes", COVERAGE: "Coverage", GC_PERCENT: "GC%", diff --git a/site-config/brc-analytics/local/config.ts b/site-config/brc-analytics/local/config.ts index 159fd80..5d419e3 100644 --- a/site-config/brc-analytics/local/config.ts +++ b/site-config/brc-analytics/local/config.ts @@ -1,10 +1,14 @@ import { SiteConfig } from "@databiosphere/findable-ui/lib/config/entities"; import { EntityConfig } from "@databiosphere/findable-ui/src/config/entities"; -import { BRCDataCatalogGenome } from "../../../app/apis/catalog/brc-analytics-catalog/common/entities"; +import { + BRCDataCatalogGenome, + BRCDataCatalogOrganism, +} from "../../../app/apis/catalog/brc-analytics-catalog/common/entities"; import * as C from "../../../app/components"; import { ROUTES } from "../../../routes/constants"; import { floating } from "./floating/floating"; import { genomeEntityConfig } from "./index/genomeEntityConfig"; +import { organismEntityConfig } from "./index/organismEntityConfig"; const LOCALHOST = "http://localhost:3000"; const APP_TITLE = "BRC Analytics"; @@ -32,7 +36,10 @@ export function makeConfig(browserUrl: string): SiteConfig { dataSource: { url: "", }, - entities: [genomeEntityConfig as EntityConfig], + entities: [ + organismEntityConfig as EntityConfig, + genomeEntityConfig as EntityConfig, + ], explorerTitle: APP_TITLE, layout: { floating, @@ -50,7 +57,8 @@ export function makeConfig(browserUrl: string): SiteConfig { undefined, [ { label: "About", url: ROUTES.ABOUT }, - { label: "Datasets", url: ROUTES.GENOMES }, + { label: "Organisms", url: ROUTES.ORGANISMS }, + { label: "Genomes", url: ROUTES.GENOMES }, { label: "Roadmap", url: ROUTES.ROADMAP }, ], undefined, diff --git a/site-config/brc-analytics/local/index/organismEntityConfig.ts b/site-config/brc-analytics/local/index/organismEntityConfig.ts new file mode 100644 index 0000000..1a39944 --- /dev/null +++ b/site-config/brc-analytics/local/index/organismEntityConfig.ts @@ -0,0 +1,102 @@ +import { + ComponentConfig, + ListConfig, + SORT_DIRECTION, +} from "@databiosphere/findable-ui/lib/config/entities"; +import { EXPLORE_MODE } from "@databiosphere/findable-ui/lib/hooks/useExploreMode"; +import { BRCDataCatalogOrganism } from "../../../../app/apis/catalog/brc-analytics-catalog/common/entities"; +import { getOrganismId } from "../../../../app/apis/catalog/brc-analytics-catalog/common/utils"; +import * as C from "../../../../app/components"; +import * as V from "../../../../app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders"; +import { BRCEntityConfig } from "../../../common/entities"; +import { + BRC_DATA_CATALOG_CATEGORY_KEY, + BRC_DATA_CATALOG_CATEGORY_LABEL, +} from "../../category"; + +/** + * Entity config object responsible to config anything related to the /genomes route. + */ +export const organismEntityConfig: BRCEntityConfig = { + categoryGroupConfig: { + categoryGroups: [ + { + categoryConfigs: [ + { + key: BRC_DATA_CATALOG_CATEGORY_KEY.TAXON, + label: BRC_DATA_CATALOG_CATEGORY_LABEL.TAXON, + }, + { + key: BRC_DATA_CATALOG_CATEGORY_KEY.TAXONOMY_ID, + label: BRC_DATA_CATALOG_CATEGORY_LABEL.TAXONOMY_ID, + }, + { + key: BRC_DATA_CATALOG_CATEGORY_KEY.TAGS, + label: BRC_DATA_CATALOG_CATEGORY_LABEL.TAGS, + }, + ], + }, + ], + key: "organisms", + }, + detail: { + detailOverviews: [], + staticLoad: true, + tabs: [], + }, + exploreMode: EXPLORE_MODE.CS_FETCH_CS_FILTERING, + explorerTitle: "Organisms", + getId: getOrganismId, + label: "Organisms", + list: { + columns: [ + { + componentConfig: { + component: C.BasicCell, + viewBuilder: V.buildTaxon, + } as ComponentConfig, + header: BRC_DATA_CATALOG_CATEGORY_LABEL.TAXON, + id: BRC_DATA_CATALOG_CATEGORY_KEY.TAXON, + width: "auto", + }, + { + componentConfig: { + component: C.BasicCell, + viewBuilder: V.buildTaxonomyId, + } as ComponentConfig, + header: BRC_DATA_CATALOG_CATEGORY_LABEL.TAXONOMY_ID, + id: BRC_DATA_CATALOG_CATEGORY_KEY.TAXONOMY_ID, + width: { max: "0.5fr", min: "164px" }, + }, + { + componentConfig: { + component: C.BasicCell, + viewBuilder: V.buildAssemblyCount, + } as ComponentConfig, + header: BRC_DATA_CATALOG_CATEGORY_LABEL.ASSEMBLY_COUNT, + id: BRC_DATA_CATALOG_CATEGORY_KEY.ASSEMBLY_COUNT, + width: { max: "0.5fr", min: "164px" }, + }, + { + componentConfig: { + component: C.NTagCell, + viewBuilder: V.buildTags, + } as ComponentConfig, + header: BRC_DATA_CATALOG_CATEGORY_LABEL.TAGS, + id: BRC_DATA_CATALOG_CATEGORY_KEY.TAGS, + width: { max: "0.5fr", min: "142px" }, + }, + ], + defaultSort: { + desc: SORT_DIRECTION.ASCENDING, + id: BRC_DATA_CATALOG_CATEGORY_KEY.TAXON, + }, + } as ListConfig, + listView: { + disablePagination: true, + enableDownload: true, + enableTab: false, + }, + route: "organisms", + staticLoadFile: "files/out/organisms.json", +};