Skip to content

Commit

Permalink
feat: add organism list (#177)
Browse files Browse the repository at this point in the history
  • Loading branch information
hunterckx committed Nov 19, 2024
1 parent 24969d7 commit 37a5cef
Show file tree
Hide file tree
Showing 9 changed files with 316 additions and 16 deletions.
7 changes: 7 additions & 0 deletions app/apis/catalog/brc-analytics-catalog/common/entities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ export interface BRCDataCatalogGenome {
ucscBrowserUrl: string | null;
}

export interface BRCDataCatalogOrganism {
assemblyCount: number;
ncbiTaxonomyId: string;
tags: string[];
taxon: string;
}

export interface EntitiesResponse<R> {
hits: R[];
pagination: EntitiesResponsePagination;
Expand Down
6 changes: 5 additions & 1 deletion app/apis/catalog/brc-analytics-catalog/common/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { BRCDataCatalogGenome } from "./entities";
import { BRCDataCatalogGenome, BRCDataCatalogOrganism } from "./entities";

export function getGenomeId(genome: BRCDataCatalogGenome): string {
return sanitizeEntityId(genome.accession);
Expand All @@ -9,6 +9,10 @@ export function getGenomeTitle(genome?: BRCDataCatalogGenome): string {
return `${genome.taxon}`;
}

export function getOrganismId(organism: BRCDataCatalogOrganism): string {
return sanitizeEntityId(organism.ncbiTaxonomyId);
}

export function sanitizeEntityId(entityId?: string): string {
if (!entityId) return "";
return entityId.replace(/\./g, "_");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ import {
import { ViewContext } from "@databiosphere/findable-ui/lib/config/entities";
import { ComponentProps } from "react";
import { ROUTES } from "../../../../../routes/constants";
import { BRCDataCatalogGenome } from "../../../../apis/catalog/brc-analytics-catalog/common/entities";
import {
BRCDataCatalogGenome,
BRCDataCatalogOrganism,
} from "../../../../apis/catalog/brc-analytics-catalog/common/entities";
import * as C from "../../../../components";
import { GENOME_BROWSER, NCBI_DATASETS_URL } from "./constants";

Expand Down Expand Up @@ -71,6 +74,19 @@ export const buildAnnotationStatus = (
};
};

/**
* Build props for the assemblies cell.
* @param organism - Genome entity.
* @returns Props to be used for the cell.
*/
export const buildAssemblyCount = (
organism: BRCDataCatalogOrganism
): ComponentProps<typeof C.BasicCell> => {
return {
value: organism.assemblyCount,
};
};

/**
* Build props for the chromosomes cell.
* @param genome - Genome entity.
Expand Down Expand Up @@ -155,7 +171,7 @@ export const buildLevel = (
* @returns Props to be used for the cell.
*/
export const buildTaxon = (
genome: BRCDataCatalogGenome
genome: BRCDataCatalogOrganism | BRCDataCatalogGenome
): ComponentProps<typeof C.BasicCell> => {
return {
value: genome.taxon,
Expand Down Expand Up @@ -207,7 +223,7 @@ export const buildScaffoldN50 = (
* @returns Props to be used for the cell.
*/
export const buildTags = (
genome: BRCDataCatalogGenome
genome: BRCDataCatalogOrganism | BRCDataCatalogGenome
): ComponentProps<typeof C.NTagCell> => {
return {
label: "Tags",
Expand All @@ -221,7 +237,7 @@ export const buildTags = (
* @returns Props to be used for the cell.
*/
export const buildTaxonomyId = (
genome: BRCDataCatalogGenome
genome: BRCDataCatalogOrganism | BRCDataCatalogGenome
): ComponentProps<typeof C.BasicCell> => {
return {
value: genome.ncbiTaxonomyId,
Expand Down
40 changes: 32 additions & 8 deletions files/build-catalog.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { parse as parseCsv } from "csv-parse/sync";
import fsp from "fs/promises";
import { BRCDataCatalogGenome } from "../app/apis/catalog/brc-analytics-catalog/common/entities";
import {
BRCDataCatalogGenome,
BRCDataCatalogOrganism,
} from "../app/apis/catalog/brc-analytics-catalog/common/entities";
import { SourceGenome, SourceOrganism } from "./entities";

const SOURCE_PATH_ORGANISMS = "files/source/organisms-from-ncbi.tsv";
Expand All @@ -9,24 +12,45 @@ const SOURCE_PATH_GENOMES = "files/source/genomes-from-ncbi.tsv";
buildCatalog();

async function buildCatalog(): Promise<void> {
const genomes = await buildGenomes();
const organisms = await buildOrganisms();

const organismsByTaxon = new Map(
organisms.map((organism) => [organism.taxon, organism])
);

const genomes = await buildGenomes(organismsByTaxon);

console.log("Organisms:", genomes.length);
await saveJson("files/out/organisms.json", organisms);

console.log("Genomes:", genomes.length);
await saveJson("files/out/genomes.json", genomes);

console.log("Done");
}

async function buildGenomes(): Promise<BRCDataCatalogGenome[]> {
const sourceOrganismRows = await readValuesFile<SourceOrganism>(
async function buildOrganisms(): Promise<BRCDataCatalogOrganism[]> {
const sourceRows = await readValuesFile<SourceOrganism>(
SOURCE_PATH_ORGANISMS
);
const sourceOrganismsByTaxon = new Map(
sourceOrganismRows.map((row) => [row.taxon, row])
const mappedRows = sourceRows.map((row): BRCDataCatalogOrganism => {
return {
assemblyCount: parseNumber(row.assemblyCount),
ncbiTaxonomyId: row.taxonomyId,
tags: row.CustomTags ? [row.CustomTags] : [],
taxon: row.taxon,
};
});
return mappedRows.sort((a, b) =>
a.ncbiTaxonomyId.localeCompare(b.ncbiTaxonomyId)
);
}

async function buildGenomes(
organismsByTaxon: Map<string, BRCDataCatalogOrganism>
): Promise<BRCDataCatalogGenome[]> {
const sourceRows = await readValuesFile<SourceGenome>(SOURCE_PATH_GENOMES);
const mappedRows = sourceRows.map((row): BRCDataCatalogGenome => {
const tagsString = sourceOrganismsByTaxon.get(row.taxon)?.CustomTags;
return {
accession: row.accession,
annotationStatus: parseStringOrNull(row.annotationStatus),
Expand All @@ -41,7 +65,7 @@ async function buildGenomes(): Promise<BRCDataCatalogGenome[]> {
scaffoldCount: parseNumber(row.scaffoldCount),
scaffoldL50: parseNumber(row.scaffoldL50),
scaffoldN50: parseNumber(row.scaffoldN50),
tags: tagsString ? [tagsString] : [],
tags: organismsByTaxon.get(row.taxon)?.tags ?? [],
taxon: row.taxon,
ucscBrowserUrl: parseStringOrNull(row.ucscBrowser),
};
Expand Down
136 changes: 136 additions & 0 deletions files/out/organisms.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
[
{
"assemblyCount": 6911,
"ncbiTaxonomyId": "10244",
"tags": [
"Virus"
],
"taxon": "Monkeypox virus"
},
{
"assemblyCount": 7823,
"ncbiTaxonomyId": "1773",
"tags": [
"Bact"
],
"taxon": "Mycobacterium tuberculosis"
},
{
"assemblyCount": 13,
"ncbiTaxonomyId": "199306",
"tags": [
"VEuPathDb"
],
"taxon": "Coccidioides posadasii"
},
{
"assemblyCount": 92,
"ncbiTaxonomyId": "2697049",
"tags": [
"Virus"
],
"taxon": "Severe acute respiratory syndrome coronavirus 2"
},
{
"assemblyCount": 5,
"ncbiTaxonomyId": "5501",
"tags": [],
"taxon": "Coccidioides immitis"
},
{
"assemblyCount": 11,
"ncbiTaxonomyId": "5660",
"tags": [
"VEuPathDb"
],
"taxon": "Leishmania braziliensis"
},
{
"assemblyCount": 12,
"ncbiTaxonomyId": "5661",
"tags": [
"VEuPathDb"
],
"taxon": "Leishmania donovani"
},
{
"assemblyCount": 7,
"ncbiTaxonomyId": "5664",
"tags": [
"VEuPathDb"
],
"taxon": "Leishmania major"
},
{
"assemblyCount": 5,
"ncbiTaxonomyId": "5691",
"tags": [
"VEuPathDb"
],
"taxon": "Trypanosoma brucei"
},
{
"assemblyCount": 44,
"ncbiTaxonomyId": "5693",
"tags": [
"VEuPathDb"
],
"taxon": "Trypanosoma cruzi"
},
{
"assemblyCount": 29,
"ncbiTaxonomyId": "5811",
"tags": [
"VEuPathDb"
],
"taxon": "Toxoplasma gondii"
},
{
"assemblyCount": 67,
"ncbiTaxonomyId": "5833",
"tags": [
"VEuPathDb"
],
"taxon": "Plasmodium falciparum"
},
{
"assemblyCount": 19,
"ncbiTaxonomyId": "5855",
"tags": [
"VEuPathDb"
],
"taxon": "Plasmodium vivax"
},
{
"assemblyCount": 10,
"ncbiTaxonomyId": "5860",
"tags": [
"VEuPathDb"
],
"taxon": "Plasmodium vinckei"
},
{
"assemblyCount": 15,
"ncbiTaxonomyId": "5861",
"tags": [
"VEuPathDb"
],
"taxon": "Plasmodium yoelii"
},
{
"assemblyCount": 7,
"ncbiTaxonomyId": "7165",
"tags": [
"VEuPathDb"
],
"taxon": "Anopheles gambiae"
},
{
"assemblyCount": 5,
"ncbiTaxonomyId": "7175",
"tags": [
"VEuPathDb"
],
"taxon": "Culex pipiens"
}
]
1 change: 1 addition & 0 deletions routes/constants.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
export const ROUTES = {
ABOUT: "/about",
GENOMES: "/data/genomes",
ORGANISMS: "/data/organisms",
ROADMAP: "/roadmap",
};
2 changes: 2 additions & 0 deletions site-config/brc-analytics/category.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ export const BRC_DATA_CATALOG_CATEGORY_KEY = {
ACCESSION: "accession",
ANALYZE_GENOME: "analyzeGenome",
ANNOTATION_STATUS: "annotationStatus",
ASSEMBLY_COUNT: "assemblyCount",
CHROMOSOMES: "chromosomes",
COVERAGE: "coverage",
GC_PERCENT: "gcPercent",
Expand All @@ -21,6 +22,7 @@ export const BRC_DATA_CATALOG_CATEGORY_LABEL = {
ACCESSION: "Accession",
ANALYZE_GENOME: "Action",
ANNOTATION_STATUS: "Annotation Status",
ASSEMBLY_COUNT: "Assemblies",
CHROMOSOMES: "Chromosomes",
COVERAGE: "Coverage",
GC_PERCENT: "GC%",
Expand Down
14 changes: 11 additions & 3 deletions site-config/brc-analytics/local/config.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import { SiteConfig } from "@databiosphere/findable-ui/lib/config/entities";
import { EntityConfig } from "@databiosphere/findable-ui/src/config/entities";
import { BRCDataCatalogGenome } from "../../../app/apis/catalog/brc-analytics-catalog/common/entities";
import {
BRCDataCatalogGenome,
BRCDataCatalogOrganism,
} from "../../../app/apis/catalog/brc-analytics-catalog/common/entities";
import * as C from "../../../app/components";
import { ROUTES } from "../../../routes/constants";
import { floating } from "./floating/floating";
import { genomeEntityConfig } from "./index/genomeEntityConfig";
import { organismEntityConfig } from "./index/organismEntityConfig";

const LOCALHOST = "http://localhost:3000";
const APP_TITLE = "BRC Analytics";
Expand Down Expand Up @@ -32,7 +36,10 @@ export function makeConfig(browserUrl: string): SiteConfig {
dataSource: {
url: "",
},
entities: [genomeEntityConfig as EntityConfig<BRCDataCatalogGenome>],
entities: [
organismEntityConfig as EntityConfig<BRCDataCatalogOrganism>,
genomeEntityConfig as EntityConfig<BRCDataCatalogGenome>,
],
explorerTitle: APP_TITLE,
layout: {
floating,
Expand All @@ -50,7 +57,8 @@ export function makeConfig(browserUrl: string): SiteConfig {
undefined,
[
{ label: "About", url: ROUTES.ABOUT },
{ label: "Datasets", url: ROUTES.GENOMES },
{ label: "Organisms", url: ROUTES.ORGANISMS },
{ label: "Genomes", url: ROUTES.GENOMES },
{ label: "Roadmap", url: ROUTES.ROADMAP },
],
undefined,
Expand Down
Loading

0 comments on commit 37a5cef

Please sign in to comment.