Skip to content

Commit

Permalink
feat: add tags to genome list (#177)
Browse files Browse the repository at this point in the history
  • Loading branch information
hunterckx committed Nov 18, 2024
1 parent 910ffd2 commit 24969d7
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 5 deletions.
1 change: 1 addition & 0 deletions app/apis/catalog/brc-analytics-catalog/common/entities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export interface BRCDataCatalogGenome {
scaffoldCount: number;
scaffoldL50: number;
scaffoldN50: number;
tags: string[];
taxon: string;
ucscBrowserUrl: string | null;
}
Expand Down
1 change: 1 addition & 0 deletions app/components/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export {
export { Logo } from "@databiosphere/findable-ui/lib/components/Layout/components/Header/components/Content/components/Logo/logo";
export { Link } from "@databiosphere/findable-ui/lib/components/Links/components/Link/link";
export { BasicCell } from "@databiosphere/findable-ui/lib/components/Table/components/TableCell/components/BasicCell/basicCell";
export { NTagCell } from "@databiosphere/findable-ui/lib/components/Table/components/TableCell/components/NTagCell/nTagCell";
export { CopyText } from "./common/CopyText/copyText";
export { AnalysisMethod } from "./Entity/components/AnalysisMethod/analysisMethod";
export { AnalysisMethods } from "./Entity/components/AnalysisMethods/analysisMethods";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,20 @@ export const buildScaffoldN50 = (
};
};

/**
* Build props for the tags cell.
* @param genome - Genome entity.
* @returns Props to be used for the cell.
*/
export const buildTags = (
genome: BRCDataCatalogGenome
): ComponentProps<typeof C.NTagCell> => {
return {
label: "Tags",
values: genome.tags,
};
};

/**
* Build props for the taxonomy ID cell.
* @param genome - Genome entity.
Expand Down
19 changes: 14 additions & 5 deletions files/build-catalog.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import { parse as parseCsv } from "csv-parse/sync";
import fsp from "fs/promises";
import { BRCDataCatalogGenome } from "../app/apis/catalog/brc-analytics-catalog/common/entities";
import { SourceGenome } from "./entities";
import { SourceGenome, SourceOrganism } from "./entities";

const SOURCE_PATH_ORGANISMS = "files/source/organisms-from-ncbi.tsv";
const SOURCE_PATH_GENOMES = "files/source/genomes-from-ncbi.tsv";

buildCatalog();
Expand All @@ -17,9 +18,16 @@ async function buildCatalog(): Promise<void> {
}

async function buildGenomes(): Promise<BRCDataCatalogGenome[]> {
const sourceOrganismRows = await readValuesFile<SourceOrganism>(
SOURCE_PATH_ORGANISMS
);
const sourceOrganismsByTaxon = new Map(
sourceOrganismRows.map((row) => [row.taxon, row])
);
const sourceRows = await readValuesFile<SourceGenome>(SOURCE_PATH_GENOMES);
const mappedRows = sourceRows.map(
(row): BRCDataCatalogGenome => ({
const mappedRows = sourceRows.map((row): BRCDataCatalogGenome => {
const tagsString = sourceOrganismsByTaxon.get(row.taxon)?.CustomTags;
return {
accession: row.accession,
annotationStatus: parseStringOrNull(row.annotationStatus),
chromosomes: parseNumberOrNull(row.chromosomeCount),
Expand All @@ -33,10 +41,11 @@ async function buildGenomes(): Promise<BRCDataCatalogGenome[]> {
scaffoldCount: parseNumber(row.scaffoldCount),
scaffoldL50: parseNumber(row.scaffoldL50),
scaffoldN50: parseNumber(row.scaffoldN50),
tags: tagsString ? [tagsString] : [],
taxon: row.taxon,
ucscBrowserUrl: parseStringOrNull(row.ucscBrowser),
})
);
};
});
return mappedRows.sort((a, b) => a.accession.localeCompare(b.accession));
}

Expand Down
7 changes: 7 additions & 0 deletions files/entities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,10 @@ export interface SourceGenome {
taxonomyId: string;
ucscBrowser: string;
}

export interface SourceOrganism {
assemblyCount: string;
CustomTags: string;
taxon: string;
taxonomyId: string;
}
38 changes: 38 additions & 0 deletions files/out/genomes.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
"scaffoldCount": 2747,
"scaffoldL50": 6,
"scaffoldN50": 1678596,
"tags": [
"VEuPathDb"
],
"taxon": "Plasmodium vivax",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000002415.2"
},
Expand All @@ -30,6 +33,7 @@
"scaffoldCount": 12,
"scaffoldL50": 4,
"scaffoldN50": 2481190,
"tags": [],
"taxon": "Trypanosoma brucei brucei TREU927",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000002445.2"
},
Expand All @@ -47,6 +51,7 @@
"scaffoldCount": 36,
"scaffoldL50": 11,
"scaffoldN50": 1091540,
"tags": [],
"taxon": "Leishmania major strain Friedlin",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000002725.2"
},
Expand All @@ -64,6 +69,7 @@
"scaffoldCount": 14,
"scaffoldL50": 5,
"scaffoldN50": 1687656,
"tags": [],
"taxon": "Plasmodium falciparum 3D7",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000002765.5"
},
Expand All @@ -81,6 +87,7 @@
"scaffoldCount": 138,
"scaffoldL50": 11,
"scaffoldN50": 992961,
"tags": [],
"taxon": "Leishmania braziliensis MHOM/BR/75/M2904",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000002845.2"
},
Expand All @@ -98,6 +105,7 @@
"scaffoldCount": 2276,
"scaffoldL50": 6,
"scaffoldN50": 4973582,
"tags": [],
"taxon": "Toxoplasma gondii ME49",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000006565.2"
},
Expand All @@ -115,6 +123,7 @@
"scaffoldCount": 6,
"scaffoldL50": 3,
"scaffoldN50": 4323945,
"tags": [],
"taxon": "Coccidioides immitis RS",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000149335.2"
},
Expand All @@ -132,6 +141,7 @@
"scaffoldCount": 1,
"scaffoldL50": 1,
"scaffoldN50": 4411532,
"tags": [],
"taxon": "Mycobacterium tuberculosis H37Rv",
"ucscBrowserUrl": null
},
Expand All @@ -149,6 +159,9 @@
"scaffoldCount": 29495,
"scaffoldL50": 212,
"scaffoldN50": 88624,
"tags": [
"VEuPathDb"
],
"taxon": "Trypanosoma cruzi",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000209065.1"
},
Expand All @@ -166,6 +179,9 @@
"scaffoldCount": 36,
"scaffoldL50": 11,
"scaffoldN50": 1024085,
"tags": [
"VEuPathDb"
],
"taxon": "Leishmania donovani",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_000227135.1"
},
Expand All @@ -183,6 +199,7 @@
"scaffoldCount": 1,
"scaffoldL50": 1,
"scaffoldN50": 4411709,
"tags": [],
"taxon": "Mycobacterium tuberculosis H37Rv",
"ucscBrowserUrl": null
},
Expand All @@ -200,6 +217,9 @@
"scaffoldCount": 1,
"scaffoldL50": 1,
"scaffoldN50": 196858,
"tags": [
"Virus"
],
"taxon": "Monkeypox virus",
"ucscBrowserUrl": null
},
Expand All @@ -217,6 +237,9 @@
"scaffoldCount": 1,
"scaffoldL50": 1,
"scaffoldN50": 29903,
"tags": [
"Virus"
],
"taxon": "Severe acute respiratory syndrome coronavirus 2",
"ucscBrowserUrl": null
},
Expand All @@ -234,6 +257,7 @@
"scaffoldCount": 289,
"scaffoldL50": 2,
"scaffoldN50": 186194774,
"tags": [],
"taxon": "Culex pipiens pallens",
"ucscBrowserUrl": null
},
Expand All @@ -251,6 +275,7 @@
"scaffoldCount": 9,
"scaffoldL50": 2,
"scaffoldN50": 8079863,
"tags": [],
"taxon": "Coccidioides posadasii str. Silveira",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCA_018416015.2"
},
Expand All @@ -268,6 +293,9 @@
"scaffoldCount": 1,
"scaffoldL50": 1,
"scaffoldN50": 4516435,
"tags": [
"Bact"
],
"taxon": "Mycobacterium tuberculosis",
"ucscBrowserUrl": null
},
Expand All @@ -285,6 +313,9 @@
"scaffoldCount": 14,
"scaffoldL50": 5,
"scaffoldN50": 2046250,
"tags": [
"VEuPathDb"
],
"taxon": "Plasmodium yoelii",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_900002385.2"
},
Expand All @@ -302,6 +333,7 @@
"scaffoldCount": 14,
"scaffoldL50": 5,
"scaffoldN50": 1692345,
"tags": [],
"taxon": "Plasmodium vinckei vinckei",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_900681995.1"
},
Expand All @@ -319,6 +351,9 @@
"scaffoldCount": 190,
"scaffoldL50": 2,
"scaffoldN50": 99149756,
"tags": [
"VEuPathDb"
],
"taxon": "Anopheles gambiae",
"ucscBrowserUrl": "https://genome.ucsc.edu/h/GCF_943734735.2"
},
Expand All @@ -336,6 +371,9 @@
"scaffoldCount": 1,
"scaffoldL50": 1,
"scaffoldN50": 4469156,
"tags": [
"Bact"
],
"taxon": "Mycobacterium tuberculosis",
"ucscBrowserUrl": null
}
Expand Down
2 changes: 2 additions & 0 deletions site-config/brc-analytics/category.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export const BRC_DATA_CATALOG_CATEGORY_KEY = {
SCAFFOLD_COUNT: "scaffoldCount",
SCAFFOLD_L50: "scaffoldL50",
SCAFFOLD_N50: "scaffoldN50",
TAGS: "tags",
TAXON: "taxon",
TAXONOMY_ID: "ncbiTaxonomyId",
UCSC_BROWSER_URL: "ucscBrowserUrl",
Expand All @@ -29,6 +30,7 @@ export const BRC_DATA_CATALOG_CATEGORY_LABEL = {
SCAFFOLD_COUNT: "Scaffolds",
SCAFFOLD_L50: "Scaffold L50",
SCAFFOLD_N50: "Scaffold N50",
TAGS: "Tags",
TAXON: "Taxon",
TAXONOMY_ID: "Taxonomy ID",
UCSC_BROWSER_URL: "UCSC Browser",
Expand Down
13 changes: 13 additions & 0 deletions site-config/brc-analytics/local/index/genomeEntityConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ export const genomeEntityConfig: BRCEntityConfig<BRCDataCatalogGenome> = {
key: BRC_DATA_CATALOG_CATEGORY_KEY.ANNOTATION_STATUS,
label: BRC_DATA_CATALOG_CATEGORY_LABEL.ANNOTATION_STATUS,
},
{
key: BRC_DATA_CATALOG_CATEGORY_KEY.TAGS,
label: BRC_DATA_CATALOG_CATEGORY_LABEL.TAGS,
},
],
},
],
Expand Down Expand Up @@ -214,6 +218,15 @@ export const genomeEntityConfig: BRCEntityConfig<BRCDataCatalogGenome> = {
id: BRC_DATA_CATALOG_CATEGORY_KEY.ANNOTATION_STATUS,
width: { max: "0.5fr", min: "142px" },
},
{
componentConfig: {
component: C.NTagCell,
viewBuilder: V.buildTags,
} as ComponentConfig<typeof C.NTagCell, BRCDataCatalogGenome>,
header: BRC_DATA_CATALOG_CATEGORY_LABEL.TAGS,
id: BRC_DATA_CATALOG_CATEGORY_KEY.TAGS,
width: { max: "0.5fr", min: "142px" },
},
],
defaultSort: {
desc: SORT_DIRECTION.ASCENDING,
Expand Down

0 comments on commit 24969d7

Please sign in to comment.