diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 82983d2..d7f2e2f 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.0.37 +current_version = 1.1.0 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))? diff --git a/ebel/__init__.py b/ebel/__init__.py index 5cae1ea..97d20d5 100755 --- a/ebel/__init__.py +++ b/ebel/__init__.py @@ -1,8 +1,8 @@ """Root init for eBEL.""" -from . import cache, constants, errors, parser, transformers -from .manager.orientdb.biodbs.bel import Bel +from ebel import cache, constants, errors, parser, transformers +from ebel.manager.orientdb.biodbs.bel import Bel -__version__ = "1.0.37" +__version__ = "1.1.0" __title__ = "e(BE:L)" __description__ = "Validation and extension of biomedical knowledge graphs" @@ -12,7 +12,7 @@ __email__ = "christian.ebeling@scai.fraunhofer.de" __license__ = "?" -__copyright__ = """Copyright (c) 2021 Christian Ebeling, Fraunhofer Institute for Algorithms and Scientific +__copyright__ = """Copyright (c) 2023 Christian Ebeling, Fraunhofer Institute for Algorithms and Scientific Computing SCAI, Schloss Birlinghoven, 53754 Sankt Augustin, Germany""" project_name = __title__ diff --git a/ebel/constants.py b/ebel/constants.py index 4d8c28b..bb8a088 100755 --- a/ebel/constants.py +++ b/ebel/constants.py @@ -2,38 +2,36 @@ # -*- coding: utf-8 -*- import os +from pathlib import Path -THIS_DIR = os.path.dirname(__file__) +THIS_DIR = Path(__file__).parent PROJECT_NAME = "ebel" -HOME = os.path.expanduser("~") +HOME = Path.home() LIBRARY_NAME = PROJECT_NAME # Path to folder -PROJECT_DIR = os.path.join(HOME, f".{PROJECT_NAME}") -if not os.path.exists(PROJECT_DIR): - os.mkdir(PROJECT_DIR) +PROJECT_DIR = Path(HOME, f".{PROJECT_NAME}") +PROJECT_DIR.mkdir(parents=True, exist_ok=True) # Path to data folder -DATA_DIR = os.path.join(PROJECT_DIR, "data") -if not os.path.exists(DATA_DIR): - os.mkdir(DATA_DIR) +DATA_DIR = Path(PROJECT_DIR, "data") +DATA_DIR.mkdir(parents=True, exist_ok=True) # Path to logs folder -LOG_DIR = os.path.join(PROJECT_DIR, "logs") -if not os.path.exists(LOG_DIR): - os.mkdir(LOG_DIR) +LOG_DIR = Path(PROJECT_DIR, "logs") +LOG_DIR.mkdir(parents=True, exist_ok=True) # Default database name and location -DB_NAME = "{}.db".format(PROJECT_NAME) -DB_PATH = os.path.join(DATA_DIR, DB_NAME) +DB_NAME = f"{PROJECT_NAME}.db" +DB_PATH = Path(DATA_DIR, DB_NAME) GRAMMAR_BEL_PATH = { - "2": os.path.join(THIS_DIR, "grammar", "grammar_bel_2.bnf"), - "2_1": os.path.join(THIS_DIR, "grammar", "grammar_bel_2_1.bnf"), + "2": THIS_DIR.joinpath("grammar", "grammar_bel_2.bnf"), + "2_1": THIS_DIR.joinpath("grammar", "grammar_bel_2_1.bnf"), } -GRAMMAR_NS_ANNO_PATH = os.path.join(THIS_DIR, "grammar", "grammar_belns_belanno_1__2.bnf") +GRAMMAR_NS_ANNO_PATH = THIS_DIR.joinpath("grammar", "grammar_belns_belanno_1__2.bnf") GRAMMAR_START_NS = "belns" GRAMMAR_START_ANNO = "belanno" GRAMMAR_START_LINE = "script_line_by_line" diff --git a/ebel/database.py b/ebel/database.py index 7a006ec..a972cbe 100644 --- a/ebel/database.py +++ b/ebel/database.py @@ -6,9 +6,11 @@ import pymysql from pyorientdb import OrientDB -from pyorientdb.exceptions import (PyOrientCommandException, - PyOrientConnectionException, - PyOrientSecurityAccessException) +from pyorientdb.exceptions import ( + PyOrientCommandException, + PyOrientConnectionException, + PyOrientSecurityAccessException, +) from ebel.config import get_config_as_dict, write_to_config from ebel.constants import TerminalFormatting as TF diff --git a/ebel/defaults.py b/ebel/defaults.py index 566da81..aa8260e 100755 --- a/ebel/defaults.py +++ b/ebel/defaults.py @@ -4,9 +4,8 @@ import logging import logging.handlers as handlers -import os -from .constants import DATA_DIR, LOG_DIR, PROJECT_DIR +from ebel.constants import DATA_DIR, LOG_DIR, PROJECT_DIR ############################################################################### # UNIPROT taxonomy IDs to import @@ -24,16 +23,16 @@ SQLITE_DATABASE_NAME = "ebel.db" SQLITE_TEST_DATABASE_NAME = "ebel_test.db" -DATABASE_LOCATION = os.path.join(DATA_DIR, SQLITE_DATABASE_NAME) -DEFAULT_TEST_DATABASE_LOCATION = os.path.join(DATA_DIR, SQLITE_TEST_DATABASE_NAME) +DATABASE_LOCATION = DATA_DIR.joinpath(SQLITE_DATABASE_NAME) +DEFAULT_TEST_DATABASE_LOCATION = DATA_DIR.joinpath(SQLITE_TEST_DATABASE_NAME) ############################################################################### # SQLAlchemy connection strings # ============================= # SQLite # ------ -CONN_STR_DEFAULT = "sqlite:///" + DATABASE_LOCATION -CONN_STR_TESTS = "sqlite:///" + SQLITE_TEST_DATABASE_NAME +CONN_STR_DEFAULT = "sqlite:///" + DATABASE_LOCATION.name +CONN_STR_TESTS = "sqlite:///" + DEFAULT_TEST_DATABASE_LOCATION.name # MySQL # ----- CONN_STR_MYSQL_PREFIX = "mysql+pymysql://ebel:ebel@localhost/" @@ -42,22 +41,24 @@ ############################################################################### # Config -config_file_path = os.path.join(PROJECT_DIR, "config.ini") +config_file_path = PROJECT_DIR.joinpath("config.ini") ############################################################################### # Log Handling logHandler = handlers.RotatingFileHandler( - filename=os.path.join(LOG_DIR, "ebel.log"), + filename=LOG_DIR.joinpath("ebel.log"), mode="a", maxBytes=4098 * 10, # 4MB file max - backupCount=0, + backupCount=3, ) logh_format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") logHandler.setFormatter(logh_format) logHandler.setLevel(logging.DEBUG) # Console Handler -ch = logging.StreamHandler() -ch_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") -ch.setFormatter(ch_format) -ch.setLevel(logging.WARNING) +streamHandler = logging.StreamHandler() +stream_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") +streamHandler.setFormatter(stream_format) +streamHandler.setLevel(logging.WARNING) + +logging.basicConfig(level=logging.INFO, handlers=[logHandler, streamHandler]) diff --git a/ebel/manager/models.py b/ebel/manager/models.py index 719860b..98fe552 100755 --- a/ebel/manager/models.py +++ b/ebel/manager/models.py @@ -13,15 +13,14 @@ import requests import sqlalchemy from lark import Lark, Token, Tree -from sqlalchemy import Boolean, Column, ForeignKey, Index, Integer, String +from sqlalchemy import Boolean, ForeignKey, Index, Integer, String from sqlalchemy.ext.declarative import declarative_base, declared_attr -from sqlalchemy.orm import relationship +from sqlalchemy.orm import mapped_column, relationship from sqlalchemy.sql.expression import func from sqlalchemy_utils import create_database, database_exists from tqdm import tqdm -from ebel.constants import (FILE, GRAMMAR_NS_ANNO_PATH, GRAMMAR_START_ANNO, - GRAMMAR_START_NS, URL) +from ebel.constants import FILE, GRAMMAR_NS_ANNO_PATH, GRAMMAR_START_ANNO, GRAMMAR_START_NS, URL from ebel.tools import BelRdb Base = declarative_base() @@ -43,6 +42,7 @@ def reset_tables(engine: sqlalchemy.engine.Engine, force_new_db: bool) -> None: if force_new_db: Base.metadata.drop_all(bind=engine) + Base.metadata.create_all(bind=engine, checkfirst=True) @@ -55,7 +55,7 @@ def foreign_key_to(table_name): :rtype: sqlalchemy.Column """ foreign_column = table_name + ".id" - return Column(Integer, ForeignKey(foreign_column)) + return mapped_column(Integer, ForeignKey(foreign_column)) class MasterModel(object): @@ -71,7 +71,7 @@ def __tablename__(self): __mapper_args__ = {"always_refresh": True} - id = Column(Integer, primary_key=True) + id = mapped_column(Integer, primary_key=True) def _to_dict(self): """Protected method for converting values to dictionary.""" @@ -94,10 +94,10 @@ class Namespace(Base, MasterModel): __tablename__ = "namespace" __table_args__ = (Index("idx_url", "url", mysql_length=100),) - url = Column(String(2048), nullable=False) - keyword = Column(String(255), index=True) - cacheable = Column(Boolean) - case_sensitive = Column(Boolean) + url = mapped_column(String(2048), nullable=False) + keyword = mapped_column(String(255), index=True) + cacheable = mapped_column(Boolean) + case_sensitive = mapped_column(Boolean) entries = relationship("NamespaceEntry", back_populates="namespace") @@ -108,8 +108,8 @@ class NamespaceEntry(Base, MasterModel): __tablename__ = "namespace_entry" __table_args__ = (Index("idx_name", "name", mysql_length=100),) - name = Column(String(2048), nullable=True) - encoding = Column(String(8), nullable=True) + name = mapped_column(String(2048), nullable=True) + encoding = mapped_column(String(8), nullable=True) namespace__id = foreign_key_to("namespace") namespace = relationship("Namespace", back_populates="entries") @@ -121,10 +121,10 @@ class Annotation(Base, MasterModel): __tablename__ = "annotation" __table_args__ = (Index("idx_url2", "url", mysql_length=100),) - url = Column(String(2048), nullable=False) - keyword = Column(String(255), index=True) - cacheable = Column(Boolean) - case_sensitive = Column(Boolean) + url = mapped_column(String(2048), nullable=False) + keyword = mapped_column(String(255), index=True) + cacheable = mapped_column(Boolean) + case_sensitive = mapped_column(Boolean) entries = relationship("AnnotationEntry", back_populates="annotation", cascade="all, delete-orphan") @@ -135,8 +135,8 @@ class AnnotationEntry(Base, MasterModel): __tablename__ = "annotation_entry" __table_args__ = (Index("idx_identifier", "identifier", mysql_length=100),) - name = Column(String(2048), nullable=True) - identifier = Column(String(255), nullable=True) + name = mapped_column(String(2048), nullable=True) + identifier = mapped_column(String(255), nullable=True) annotation__id = foreign_key_to("annotation") annotation = relationship("Annotation", back_populates="entries") diff --git a/ebel/manager/orientdb/biodbs/bel.py b/ebel/manager/orientdb/biodbs/bel.py index 10c5106..ed7241c 100644 --- a/ebel/manager/orientdb/biodbs/bel.py +++ b/ebel/manager/orientdb/biodbs/bel.py @@ -30,14 +30,29 @@ from ebel.manager.orientdb.biodbs.reactome import Reactome from ebel.manager.orientdb.biodbs.stringdb import StringDb from ebel.manager.orientdb.biodbs.uniprot import UniProt -from ebel.manager.orientdb.constants import (BIOGRID, CHEBI, CLINICAL_TRIALS, - CLINVAR, DISGENET, DRUGBANK, - ENSEMBL, EXPRESSION_ATLAS, - GWAS_CATALOG, HGNC, INTACT, - IUPHAR, KEGG, MIRTARBASE, NCBI, - NSIDES, PATHWAY_COMMONS, - PROTEIN_ATLAS, REACTOME, STRINGDB, - UNIPROT) +from ebel.manager.orientdb.constants import ( + BIOGRID, + CHEBI, + CLINICAL_TRIALS, + CLINVAR, + DISGENET, + DRUGBANK, + ENSEMBL, + EXPRESSION_ATLAS, + GWAS_CATALOG, + HGNC, + INTACT, + IUPHAR, + KEGG, + MIRTARBASE, + NCBI, + NSIDES, + PATHWAY_COMMONS, + PROTEIN_ATLAS, + REACTOME, + STRINGDB, + UNIPROT, +) from ebel.manager.orientdb.importer import _BelImporter from ebel.manager.orientdb.odb_defaults import bel_func_short from ebel.manager.orientdb.odb_meta import Graph diff --git a/ebel/manager/orientdb/biodbs/biogrid.py b/ebel/manager/orientdb/biodbs/biogrid.py index c2e7e1c..850016c 100644 --- a/ebel/manager/orientdb/biodbs/biogrid.py +++ b/ebel/manager/orientdb/biodbs/biogrid.py @@ -1,5 +1,5 @@ """BioGrid.""" - +import logging import typing from enum import Enum from typing import Dict, Tuple @@ -7,6 +7,8 @@ import numpy as np import pandas as pd from pyorientdb import OrientDB +from sqlalchemy import Integer, cast, func, select +from sqlalchemy.orm import aliased from tqdm import tqdm from ebel import tools @@ -18,6 +20,9 @@ STANDARD_NAMESPACES = {9606: "HGNC", 10090: "MGI", 10116: "RGD"} +logger = logging.getLogger(__name__) + + class BioGridNode: """Custom class definition for BioGRID nodes.""" @@ -282,7 +287,7 @@ def insert_data(self) -> Dict[str, int]: } # main table - df = pd.read_csv(self.file_path, usecols=use_columns.keys(), sep="\t", low_memory=False) + df = pd.read_csv(self.file_path, usecols=list(use_columns.keys()), sep="\t", low_memory=False) df.rename(columns=use_columns, inplace=True) df.replace("-", np.nan, inplace=True) @@ -311,6 +316,8 @@ def insert_data(self) -> Dict[str, int]: df.index += 1 df.index.rename("id", inplace=True) + logger.info("Insert BIOGRID data") + df.to_sql(biogrid.Biogrid.__tablename__, self.engine, if_exists="append") return {self.biodb_name: df.shape[0]} @@ -453,29 +460,32 @@ def _create_source_table(self, df: pd.DataFrame) -> pd.DataFrame: def get_uniprot_modification_pairs(self): """Return all UniProt modification pairs.""" - # TODO: sql as sqlalchemy query - sql = """Select - ia.symbol as subject_symbol, - ia.uniprot as subject_uniprot, - ia.taxonomy_id as subject_taxonomy_id, - ib.symbol as object_symbol, - ib.uniprot as object_uniprot, - ib.taxonomy_id as object_taxonomy_id - from - biogrid b - inner join biogrid_interactor ia on (b.biogrid_a_id=ia.biogrid_id) - inner join biogrid_interactor ib on (b.biogrid_b_id=ib.biogrid_id) - inner join biogrid_modification m on (m.id=b.modification_id) - where - m.modification != 'No Modification' and ia.uniprot IS NOT NULL and ib.uniprot IS NOT NULL - group by - ia.symbol, - ia.uniprot, - ia.taxonomy_id, - ib.symbol, - ib.uniprot, - ib.taxonomy_id""" - return [dict(x) for x in self.engine.execute(sql).fetchall()] + b = biogrid.Biogrid + ia = aliased(biogrid.Interactor) + ib = aliased(biogrid.Interactor) + m = biogrid.Modification + + sql = ( + ( + select( + ia.symbol.label("subject_symbol"), + ia.uniprot.label("subject_uniprot"), + ia.taxonomy_id.label("subject_taxonomy_id"), + ib.symbol.label("object_symbol"), + ib.uniprot.label("object_uniprot"), + ib.taxonomy_id.label("object_taxonomy_id"), + ) + .select_from(b) + .join(ia, b.biogrid_a_id == ia.biogrid_id) + .join(ib, b.biogrid_b_id == ib.biogrid_id) + .join(m, b.modification_id == m.id) + ) + .where(m.modification == "No Modification") + .where(ia.uniprot.isnot(None)) + .group_by(ia.symbol, ia.uniprot, ia.taxonomy_id, ib.symbol, ib.uniprot, ib.taxonomy_id) + ) + results = self.session.execute(sql).fetchall() + return [x._asdict() for x in results] def get_create_pure_protein_rid_by_uniprot(self, taxonomy_id, symbol, uniprot): """Get pure protein rid by UniProt accession ID if the protein is involved in a BEL statement.""" @@ -498,40 +508,12 @@ def get_create_pure_protein_rid_by_uniprot(self, taxonomy_id, symbol, uniprot): def update_interactions(self) -> int: """Updates all BioGrid interactions.""" - # TODO: sql_temp as sqlalchemy query - sql_temp = """ - Select - ia.symbol as subject_symbol, - ia.uniprot as subject_uniprot, - ia.taxonomy_id as subject_taxonomy_id, - m.modification, - ib.symbol as object_symbol, - ib.uniprot as object_uniprot, - ib.taxonomy_id as object_taxonomy_id, - es.experimental_system, - group_concat( distinct b.biogrid_id) as biogrid_ids, - group_concat( distinct if(p.source='PUBMED',CAST(p.source_identifier AS UNSIGNED),NULL)) as pmids, - count(distinct p.source_identifier) as num_pubs, - group_concat( distinct if(p.source='DOI',CAST(p.source_identifier AS UNSIGNED),NULL)) as dois - from - biogrid b - inner join biogrid_interactor ia on (b.biogrid_a_id=ia.biogrid_id) - inner join biogrid_interactor ib on (b.biogrid_b_id=ib.biogrid_id) - inner join biogrid_modification m on (m.id=b.modification_id) - inner join biogrid_publication p on (b.publication_id=p.id) - inner join biogrid_experimental_system es on (b.experimental_system_id=es.id) - where - (ia.uniprot = '{subject_uniprot}' and ib.uniprot = '{object_uniprot}') and - m.modification != 'No Modification' - group by - ia.symbol, - ia.uniprot, - ia.taxonomy_id, - m.modification, - ib.symbol, - ib.uniprot, - ib.taxonomy_id, - es.experimental_system""" + b = biogrid.Biogrid + ia = aliased(biogrid.Interactor) + ib = aliased(biogrid.Interactor) + m = biogrid.Modification + p = biogrid.Publication + es = biogrid.ExperimentalSystem uniprots_in_bel_set = self.get_pure_uniprots_in_bel_context() uniprot_modification_pairs = self.get_uniprot_modification_pairs() @@ -539,6 +521,10 @@ def update_interactions(self) -> int: counter = 0 self.clear_edges() + if_func = func.iif if self.engine.dialect.name == "sqlite" else func.IF + + logger.info("Update BioGRID") + for e in tqdm( uniprot_modification_pairs, desc=f"Update {self.biodb_name.upper()} interactions", @@ -556,68 +542,67 @@ def update_interactions(self) -> int: uniprot=e["object_uniprot"], ) - sql = sql_temp.format( - subject_uniprot=e["subject_uniprot"], - object_uniprot=e["object_uniprot"], + subject_uniprot = e["subject_uniprot"] + object_uniprot = e["object_uniprot"] + + sql = ( + select( + ia.symbol.label("subject_symbol"), + ia.uniprot.label("subject_uniprot"), + ia.taxonomy_id.label("subject_taxonomy_id"), + m.modification, + ib.symbol.label("object_symbol"), + ib.uniprot.label("object_uniprot"), + ib.taxonomy_id.label("object_taxonomy_id"), + es.experimental_system, + func.group_concat(b.biogrid_id.distinct()).label("biogrid_ids"), + func.group_concat( + if_func(p.source == "PUBMED", cast(p.source_identifier, Integer), None).distinct() + ).label("pmids"), + func.count(p.source_identifier.distinct()).label("num_pubs"), + func.group_concat( + if_func(p.source == "DOI", cast(p.source_identifier, Integer), None).distinct() + ).label("dois"), + ) + .join(ia, b.biogrid_a_id == ia.biogrid_id) + .join(ib, b.biogrid_b_id == ib.biogrid_id) + .join(m, m.id == b.modification_id) + .join(p, b.publication_id == p.id) + .join(es, b.experimental_system_id == es.id) + .where(ia.uniprot == subject_uniprot) + .where(ib.uniprot == object_uniprot) + .where(m.modification != "No Modification") ) - for row in self.engine.execute(sql).fetchall(): - row_dict = dict(row) - be = BioGridEdge(subject_rid=subj_pure_rid, object_rid=obj_pure_rid, **row_dict) - edge_value_dict = be.get_edge_value_dict() - - if be.modConfig.bg_mod_name == "Proteolytic Processing": - self.create_edge( - "decreases_bg", - from_rid=subj_pure_rid, - to_rid=obj_pure_rid, - value_dict=edge_value_dict, - ) - counter += 1 - else: - obj_pmod_value_dict = be.obj.get_pmod_protein_as_value_dict() - pmod_protein_rid = self.node_exists("protein", obj_pmod_value_dict, check_for="bel") - if not pmod_protein_rid: - pmod_protein_rid = self.get_create_rid("protein", obj_pmod_value_dict, check_for="bel") - self.create_edge("has_modified_protein", obj_pure_rid, pmod_protein_rid) - pmod_rid = self.insert_record("pmod", be.get_pmod_as_value_dict()) - self.create_edge("has__pmod", pmod_protein_rid, pmod_rid) - self.create_edge( - be.edge_name, - subj_pure_rid, - pmod_protein_rid, - edge_value_dict, - ) - counter += 1 + results = self.session.execute(sql).fetchall() + + for row in results: + if row[0] is not None: # No results for uniprot ID combo + row_dict = row._asdict() # If no modification then no results were returned + be = BioGridEdge(subject_rid=subj_pure_rid, object_rid=obj_pure_rid, **row_dict) + edge_value_dict = be.get_edge_value_dict() + + if be.modConfig.bg_mod_name == "Proteolytic Processing": + self.create_edge( + "decreases_bg", + from_rid=subj_pure_rid, + to_rid=obj_pure_rid, + value_dict=edge_value_dict, + ) + counter += 1 + else: + obj_pmod_value_dict = be.obj.get_pmod_protein_as_value_dict() + pmod_protein_rid = self.node_exists("protein", obj_pmod_value_dict, check_for="bel") + if not pmod_protein_rid: + pmod_protein_rid = self.get_create_rid("protein", obj_pmod_value_dict, check_for="bel") + self.create_edge("has_modified_protein", obj_pure_rid, pmod_protein_rid) + pmod_rid = self.insert_record("pmod", be.get_pmod_as_value_dict()) + self.create_edge("has__pmod", pmod_protein_rid, pmod_rid) + self.create_edge( + be.edge_name, + subj_pure_rid, + pmod_protein_rid, + edge_value_dict, + ) + counter += 1 return counter - - def create_view(self): - """Create SQL view of BioGRID data.""" - sql = """create view if not exists biogrid_view as - select - b.biogrid_id, - ia.symbol as symbol_a, - ia.uniprot as uniprot_a, - ta.taxonomy_id as tax_id_a, - ta.organism_name as organism_a, - ib.symbol as symbol_b, - ib.uniprot as uniprot_b, - tb.taxonomy_id as tax_id_b, - tb.organism_name as organism_b, - es.experimental_system, - m.modification, - s.source, - b.qualification, - p.source as publication_source, - p.source_identifier as publication_identifier - from - biogrid b inner join - biogrid_interactor ia on (ia.biogrid_id=b.biogrid_a_id) inner join - biogrid_interactor ib on (ib.biogrid_id=b.biogrid_b_id) inner join - biogrid_taxonomy ta on (ia.taxonomy_id=ta.taxonomy_id) inner join - biogrid_taxonomy tb on (ib.taxonomy_id=tb.taxonomy_id) left join - biogrid_experimental_system es on (b.experimental_system_id=es.id) left join - biogrid_modification m on (m.id=b.modification_id) left join - biogrid_source s on (s.id=b.source_id) left join - biogrid_publication p on (p.id=b.publication_id)""" - self.engine.execute(sql) diff --git a/ebel/manager/orientdb/biodbs/clinical_trials.py b/ebel/manager/orientdb/biodbs/clinical_trials.py index 008c567..148eba5 100644 --- a/ebel/manager/orientdb/biodbs/clinical_trials.py +++ b/ebel/manager/orientdb/biodbs/clinical_trials.py @@ -47,7 +47,7 @@ def __contains__(self, item): def add_link_to_drugbank(self, data_dict: dict, trial_rid: str): """Create LINKSET in drugbank table for associated clinical trials.""" - # Can't check synonyms untils OrientDB 3.0, need to be able to index on collections + # Can't check synonyms until OrientDB 3.0, need to be able to index on collections # update_sql = 'UPDATE drugbank ADD clinical_trials = {} WHERE name = "{}" OR "{}" in synonyms' # TODO index drugbank.synonyms diff --git a/ebel/manager/orientdb/biodbs/clinvar.py b/ebel/manager/orientdb/biodbs/clinvar.py index a5d0f47..ca9dc7c 100644 --- a/ebel/manager/orientdb/biodbs/clinvar.py +++ b/ebel/manager/orientdb/biodbs/clinvar.py @@ -5,9 +5,11 @@ import pandas as pd from pyorientdb import OrientDB +from sqlalchemy import select, text from tqdm import tqdm from ebel.manager.orientdb import odb_meta, odb_structure, urls +from ebel.manager.orientdb.biodbs.ensembl import Ensembl from ebel.manager.orientdb.constants import CLINVAR from ebel.manager.rdbms.models import clinvar from ebel.tools import get_disease_trait_keywords_from_config, get_file_path @@ -17,7 +19,6 @@ Snp = namedtuple( "Snp", ( - "keyword", "phenotype", "rs_number", "hgnc_id", @@ -56,14 +57,20 @@ def __contains__(self, item): def insert_data(self) -> Dict[str, int]: """Insert data.""" inserted = {} + logger.info("Insert data for ClinVar") + + # Depends on Ensembl + Ensembl().update() + self.recreate_tables() + df = pd.read_csv(self.file_path, sep="\t", low_memory=False) self._standardize_dataframe(df) df.index += 1 df.index.rename("id", inplace=True) - df.drop(columns=["phenotype_ids", "phenotype_list", "other_ids"]).to_sql( - self.biodb_name, self.engine, if_exists="append", chunksize=10000 - ) + + df_base = df.drop(columns=["phenotype_ids", "phenotype_list", "other_ids"]) + df_base.to_sql(clinvar.Clinvar.__tablename__, con=self.engine, if_exists="append", chunksize=10000) df_clinvar__phenotype = ( df["phenotype_list"] @@ -158,26 +165,26 @@ def get_disease_snps_dict(self) -> Dict[str, List[Snp]]: """Get a dictionary {'disease':[snp,snp,... ]} by disease names.""" disease_keywords = get_disease_trait_keywords_from_config() - sql_temp = """Select - '{keyword}', - phenotype, - rs_db_snp as rs_number, - hgnc_id, - chromosome, - start as position, - clinical_significance - from clinvar c inner join - clinvar__phenotype cp on (c.id=cp.clinvar_id) inner JOIN - clinvar_phenotype p on (cp.clinvar_phenotype_id=p.id) - where - p.phenotype like '%%{keyword}%%' - and rs_db_snp != -1""" + cv = clinvar.Clinvar + cp = clinvar.ClinvarPhenotype results = dict() for kwd in disease_keywords: - sql = sql_temp.format(keyword=kwd) - rows = self.engine.execute(sql) - results[kwd] = [Snp(*x) for x in rows.fetchall()] + sql = ( + select( + cp.phenotype, + cv.rs_db_snp.label("rs_number"), + cv.hgnc_id, + cv.chromosome, + cv.start.label("position"), + cv.clinical_significance, + ) + .join(cp, cv.phenotypes) + .where(cv.rs_db_snp != -1) + .where(cp.phenotype.like(f"%{kwd}%")) + ) + rows = self.session.execute(sql).fetchall() + results[kwd] = [Snp(*x) for x in rows] return results @@ -197,6 +204,7 @@ def update_interactions(self) -> int: for snp in tqdm(rows, desc=f"Add has_X_snp_cv edges to BEL for {disease}"): if snp.hgnc_id in hgnc_id_gene_rid_cache: gene_mapped_rid = hgnc_id_gene_rid_cache[snp.hgnc_id] + else: gene_mapped_rid = self._get_set_gene_rid(hgnc_id=snp.hgnc_id) hgnc_id_gene_rid_cache[snp.hgnc_id] = gene_mapped_rid @@ -206,7 +214,7 @@ def update_interactions(self) -> int: value_dict = { "clinical_significance": snp.clinical_significance, "phenotype": snp.phenotype, - "keyword": snp.keyword, + "keyword": disease, } self.create_edge( class_name="has_mapped_snp_cv", diff --git a/ebel/manager/orientdb/biodbs/disgenet.py b/ebel/manager/orientdb/biodbs/disgenet.py index c1d27f7..4d86467 100644 --- a/ebel/manager/orientdb/biodbs/disgenet.py +++ b/ebel/manager/orientdb/biodbs/disgenet.py @@ -4,9 +4,11 @@ import pandas as pd from pyorientdb import OrientDB +from sqlalchemy import select, text from tqdm import tqdm from ebel.manager.orientdb import odb_meta, odb_structure, urls +from ebel.manager.orientdb.biodbs.ensembl import Ensembl from ebel.manager.orientdb.constants import DISGENET from ebel.manager.rdbms.models import disgenet from ebel.tools import get_disease_trait_keywords_from_config, get_file_path @@ -50,6 +52,12 @@ def __repr__(self) -> str: def insert_data(self) -> Dict[str, int]: """Insert data into database.""" logger.info(f"Import {self.biodb_name.upper()}") + + # Update EnSembl first since DisGeNet is dependent on it + ens = Ensembl() + ens.update() + + # Insert data inserted = dict() inserted["sources"] = self._insert_sources() inserted["gene_symbols"] = self._insert_gene_symbols() @@ -73,8 +81,8 @@ def file_path_variant(self): return self.__get_file_for_model(disgenet.DisgenetVariant) def _insert_sources(self): - df_g = pd.read_csv(self.file_path_gene, sep="\t", usecols=["source"]).drop_duplicates() - df_v = pd.read_csv(self.file_path_variant, sep="\t", usecols=["source"]).drop_duplicates() + df_g = pd.read_csv(self.file_path_gene, sep="\t", usecols=["source"]) + df_v = pd.read_csv(self.file_path_variant, sep="\t", usecols=["source"]) df = pd.concat([df_g, df_v]).drop_duplicates() df.reset_index(inplace=True, drop=True) df.index += 1 @@ -115,9 +123,10 @@ def _insert_gene_symbols(self) -> int: return df.shape[0] def _merge_with_source(self, df): - df_sources = pd.read_sql_table(disgenet.DisgenetSource.__tablename__, self.engine).rename( - columns={"id": "source_id"} - ) + with self.engine.connect() as conn: + stmt = select(disgenet.DisgenetSource) + df_sources = pd.read_sql(stmt, conn).rename(columns={"id": "source_id"}) + return pd.merge(df, df_sources, on="source").drop(columns=["source"]) def _insert_gene_disease_pmid_associations(self) -> int: @@ -174,35 +183,23 @@ def update_snps(self) -> int: "downstream": "upstream", "upstream": "downstream", } - # TODO: replace SQL with SQL Alchemy statement - sql_temp = """Select - snp_id, - chromosome, - position, - disease_name, - pmid, - score, - source - FROM - disgenet_variant v INNER JOIN - disgenet_source s on (v.source_id=s.id) INNER JOIN - disgenet_disease d on (v.disease_id=d.disease_id) - WHERE - disease_name like '%%{}%%' and - source!='BEFREE' - GROUP BY - snp_id, - chromosome, - position, - disease_name, - pmid, - score, - source""" + + dv = disgenet.DisgenetVariant + ds = disgenet.DisgenetSource + dd = disgenet.DisgenetDisease results = dict() for kwd in self.disease_keywords: - sql = sql_temp.format(kwd) - rows = self.engine.execute(sql) + sql = ( + select(dv.snp_id, dv.chromosome, dv.position, dd.disease_name, dv.pmid, dv.score, ds.source) + .join(ds) + .join(dd) + .where(dd.disease_name.like(f"%{kwd}%")) + .where(ds.source != "BEFREE") + .group_by(dv.snp_id, dv.chromosome, dv.position, dd.disease_name, dv.pmid, dv.score, ds.source) + ) + + rows = self.session.execute(sql).fetchall() results[kwd] = rows inserted = 0 @@ -213,7 +210,7 @@ def update_snps(self) -> int: for r in tqdm( kwd_disease_results, desc=f"Update DisGeNET variant interactions for {trait}", - total=kwd_disease_results.rowcount, + total=len(kwd_disease_results), ): snp_id, chromosome, position, disease_name, pmid, score, source = r diff --git a/ebel/manager/orientdb/biodbs/expression_atlas.py b/ebel/manager/orientdb/biodbs/expression_atlas.py index cdafe88..8fd8fd9 100644 --- a/ebel/manager/orientdb/biodbs/expression_atlas.py +++ b/ebel/manager/orientdb/biodbs/expression_atlas.py @@ -341,7 +341,7 @@ def get_idf(self, experiment_name: str) -> Optional[pd.DataFrame]: values = [x.strip() for x in line_splitted[1:] if x.strip()] rows.append((key_name, values)) - df = pd.DataFrame(rows, columns=("key_name", "value")).explode("value") + df = pd.DataFrame(rows, columns=["key_name", "value"]).explode("value") return df def get_sdrf_condensed(self, experiment_name: str) -> Optional[pd.DataFrame]: diff --git a/ebel/manager/orientdb/biodbs/hgnc.py b/ebel/manager/orientdb/biodbs/hgnc.py index 8140544..0e505e0 100644 --- a/ebel/manager/orientdb/biodbs/hgnc.py +++ b/ebel/manager/orientdb/biodbs/hgnc.py @@ -8,12 +8,27 @@ import numpy as np import pandas as pd -from pyorientdb import OrientDB +from pyorientdb import OrientDB, OrientRecord +from sqlalchemy import select from tqdm import tqdm from ebel.manager.orientdb import odb_meta, odb_structure, urls from ebel.manager.orientdb.constants import HGNC from ebel.manager.rdbms.models import hgnc +from ebel.manager.rdbms.models.hgnc import AliasName, AliasSymbol, Ccds, Ena, Enzyme, GeneGroupId, GeneGroupName +from ebel.manager.rdbms.models.hgnc import Hgnc as HgncDb +from ebel.manager.rdbms.models.hgnc import ( + Lsdb, + Mgd, + Omim, + PrevName, + PrevSymbol, + PubMed, + RefSeq, + Rgd, + RnaCentral, + UniProt, +) from ebel.tools import get_file_path logger = logging.getLogger(__name__) @@ -117,28 +132,28 @@ def import_hgnc_into_rdbms(self) -> int: df["id"] = pd.to_numeric(df.hgnc_id.str.split(":").str[1]) df.set_index("id", inplace=True) - df[columns].to_sql(hgnc.Hgnc.__tablename__, self.engine, if_exists="append") + df[columns].to_sql(HgncDb.__tablename__, self.engine, if_exists="append") df.hgnc_id = pd.to_numeric(df.hgnc_id.str.split(":").str[1]) for df_col, model, m_col in ( - ("prev_symbol", hgnc.PrevSymbol, None), - ("alias_symbol", hgnc.AliasSymbol, None), - ("alias_name", hgnc.AliasName, None), - ("ccds_id", hgnc.Ccds, "identifier"), - ("ena", hgnc.Ena, "identifier"), - ("enzyme_id", hgnc.Enzyme, "ec_number"), - ("gene_group", hgnc.GeneGroupName, "name"), - ("gene_group_id", hgnc.GeneGroupId, "identifier"), - ("uniprot_ids", hgnc.UniProt, "accession"), - ("rna_central_id", hgnc.RnaCentral, "identifier"), - ("rgd_id", hgnc.Rgd, "identifier"), - ("refseq_accession", hgnc.RefSeq, "accession"), - ("pubmed_id", hgnc.PubMed, "pmid"), - ("prev_name", hgnc.PrevName, None), - ("omim_id", hgnc.Omim, "identifier"), - ("mgd_id", hgnc.Mgd, "identifier"), - ("lsdb", hgnc.Lsdb, "identifier"), + ("prev_symbol", PrevSymbol, None), + ("alias_symbol", AliasSymbol, None), + ("alias_name", AliasName, None), + ("ccds_id", Ccds, "identifier"), + ("ena", Ena, "identifier"), + ("enzyme_id", Enzyme, "ec_number"), + ("gene_group", GeneGroupName, "name"), + ("gene_group_id", GeneGroupId, "identifier"), + ("uniprot_ids", UniProt, "accession"), + ("rna_central_id", RnaCentral, "identifier"), + ("rgd_id", Rgd, "identifier"), + ("refseq_accession", RefSeq, "accession"), + ("pubmed_id", PubMed, "pmid"), + ("prev_name", PrevName, None), + ("omim_id", Omim, "identifier"), + ("mgd_id", Mgd, "identifier"), + ("lsdb", Lsdb, "identifier"), ): df_1n_table = df[[df_col, "hgnc_id"]].explode(df_col).dropna() if m_col: @@ -252,32 +267,34 @@ def get_location(location: str) -> dict: location_dict = {"unknown_schema": location} return location_dict - def get_bel_symbols_without_hgnc_link(self): + def get_bel_symbols_without_hgnc_link(self) -> set: """Return set of all gene symbols in database without a link to HGNC.""" sql_symbols = "Select distinct(name) as symbol from bio_object where namespace='HGNC' and hgnc IS NULL" return {x.oRecordData["symbol"] for x in self.execute(sql_symbols)} - def get_bel_symbols_all(self): + def get_bel_symbols_all(self) -> set: """Return set of all gene symbols in database.""" sql_symbols = "Select distinct(name) as symbol from bio_object where namespace='HGNC'" return {x.oRecordData["symbol"] for x in self.execute(sql_symbols)} - def get_correct_symbol(self, symbol: str): + def get_correct_symbol(self, symbol: str) -> str: """Checks if symbol is valid otherwise checks previsous symbols.""" - result_in_symbol = self.session.query(hgnc.Hgnc).filter(hgnc.Hgnc.symbol == symbol).first() + symbol_query = select(HgncDb).where(HgncDb.symbol == symbol) + result_in_symbol = self.session.execute(symbol_query).first() if not result_in_symbol: - result_in_prev_symbol = ( - self.session.query(hgnc.PrevSymbol).filter(hgnc.PrevSymbol.prev_symbol == symbol).first() - ) + result_in_prev_symbol = self.session.query(PrevSymbol).filter(PrevSymbol.prev_symbol == symbol).first() + if result_in_prev_symbol: symbol = result_in_prev_symbol.hgnc.symbol + else: symbol = None + return symbol def correct_wrong_symbol(self, symbol, bel_symbols_all: set): """Corrects the symbol of the node and relinks all edges to existing node if needed.""" - result = self.session.query(hgnc.PrevSymbol).filter_by(prev_symbol=symbol).first() + result = self.session.query(PrevSymbol).filter_by(prev_symbol=symbol).first() if result: correct_symbol = result.hgnc.symbol if correct_symbol not in bel_symbols_all: @@ -300,7 +317,10 @@ def update_bel(self) -> int: bel_symbols_all = self.get_bel_symbols_all() symbols_without_hgnc = self.get_bel_symbols_without_hgnc_link() - hgnc_symbols = {x[0] for x in self.session.query(hgnc.Hgnc.symbol).all()} + + symbol_query = select(HgncDb.symbol) + symbol_results = self.session.execute(symbol_query).all() + hgnc_symbols = {x[0] for x in symbol_results} for wrong_symbol in symbols_without_hgnc - hgnc_symbols: self.correct_wrong_symbol(wrong_symbol, bel_symbols_all) @@ -320,7 +340,7 @@ def update_gene( location: str, hgnc_symbol: str, suggested_corrections: str, - ) -> int: + ) -> OrientRecord: """Update genes in OrientDB and returns number of updates.""" suggest = ( ", suggested_corrections={{'wrong name': {}}}".format(suggested_corrections) @@ -341,7 +361,7 @@ def update_gene( ) return self.execute(sql)[0] - def update_rna(self, hgnc_rid: str, label: str, hgnc_symbol: str, suggested_corrections: str) -> int: + def update_rna(self, hgnc_rid: str, label: str, hgnc_symbol: str, suggested_corrections: str) -> OrientRecord: """Update RNAs in OrientDB and returns number of updates.""" suggest = ( ", suggested_corrections={{'wrong name': {}}}".format(suggested_corrections) @@ -358,7 +378,7 @@ def update_rna(self, hgnc_rid: str, label: str, hgnc_symbol: str, suggested_corr ) return self.execute(sql)[0] - def update_protein(self, hgnc_rid: str, label: str, hgnc_symbol: str, suggested_corrections: str) -> int: + def update_protein(self, hgnc_rid: str, label: str, hgnc_symbol: str, suggested_corrections: str) -> OrientRecord: """Update proteins in OrientDB and returns number of updates.""" suggest = ( ", suggested_corrections={{'wrong name': {}}}".format(suggested_corrections) @@ -377,28 +397,28 @@ def update_protein(self, hgnc_rid: str, label: str, hgnc_symbol: str, suggested_ def update_nodes_by_symbol(self, symbol) -> dict: """Update all nodes by HGNC symbol.""" - hgnc = self.get_basic_entry_by_symbol(symbol) + hgnc_results = self.get_basic_entry_by_symbol(symbol) - if hgnc: - suggest = json.dumps(hgnc.suggested_corrections) if hgnc.suggested_corrections else None + if hgnc_results: + suggest = json.dumps(hgnc_results.suggested_corrections) if hgnc_results.suggested_corrections else None num_update_genes = self.update_gene( - hgnc_symbol=hgnc.symbol, - hgnc_rid=hgnc.hgnc_rid, - label=hgnc.label, - location=hgnc.location, + hgnc_symbol=hgnc_results.symbol, + hgnc_rid=hgnc_results.hgnc_rid, + label=hgnc_results.label, + location=hgnc_results.location, suggested_corrections=suggest, ) num_update_rnas = self.update_rna( - hgnc_symbol=hgnc.symbol, - hgnc_rid=hgnc.hgnc_rid, - label=hgnc.label, + hgnc_symbol=hgnc_results.symbol, + hgnc_rid=hgnc_results.hgnc_rid, + label=hgnc_results.label, suggested_corrections=suggest, ) num_update_proteins = self.update_protein( - hgnc_symbol=hgnc.symbol, - hgnc_rid=hgnc.hgnc_rid, - label=hgnc.label, + hgnc_symbol=hgnc_results.symbol, + hgnc_rid=hgnc_results.hgnc_rid, + label=hgnc_results.label, suggested_corrections=suggest, ) return { @@ -409,7 +429,7 @@ def update_nodes_by_symbol(self, symbol) -> dict: def get_symbol_entrez_dict(self) -> Dict[str, int]: """Return dictionary with gene symbols as keys and entrez IDs as values.""" - query = self.session.query(hgnc.Hgnc.symbol, hgnc.Hgnc.entrez_id).filter(hgnc.Hgnc.entrez_id.isnot(None)) + query = self.session.query(HgncDb.symbol, HgncDb.entrez_id).filter(HgncDb.entrez_id.isnot(None)) return {r.symbol: r.entrez_id for r in query.all()} def update_interactions(self) -> int: diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py index 39f8bc9..e9fde67 100644 --- a/ebel/manager/orientdb/biodbs/intact.py +++ b/ebel/manager/orientdb/biodbs/intact.py @@ -1,10 +1,13 @@ """IntAct module.""" import logging +import time import zipfile from typing import Dict +import numpy as np import pandas as pd from pyorientdb import OrientDB +from sqlalchemy import or_, select from tqdm import tqdm from ebel.manager.orientdb import odb_meta, odb_structure, urls @@ -34,6 +37,13 @@ def __init__(self, client: OrientDB = None, condition_keyword="Alzheimer"): biodb_name=self.biodb_name, ) + # up = UniProt() + # up.update() + + self.uniprot_rid_dict = self.get_pure_uniprot_rid_dict_in_bel_context() + self.bel_rid_dict = self.get_pure_bel_rid_dict() + self.acc_nn = self.get_uniprot_accession_namespaces() + def __len__(self): return self.number_of_generics @@ -74,20 +84,24 @@ def insert_data(self) -> Dict[str, int]: "Interaction detection method(s)": "dm", } - df = pd.read_csv(zf.open("intact.txt"), sep="\t", usecols=usecols.keys()) + df = pd.read_csv(zf.open("intact.txt"), sep="\t", usecols=list(usecols.keys())) df.rename(columns=usecols, inplace=True) + regex_accession = r"uniprotkb:([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})" df.int_a_uniprot_id = df.int_a_uniprot_id.str.extract(regex_accession)[0] df.int_b_uniprot_id = df.int_b_uniprot_id.str.extract(regex_accession)[0] df = df[(pd.notnull(df.int_a_uniprot_id) & pd.notnull(df.int_b_uniprot_id))] + regex_detection_method = r"psi-mi:\"MI:0*(?P\d+)\"\((?P[^)]+)\)" df = df.join(df.dm.str.extract(regex_detection_method), how="left") df.drop(columns=["dm"], inplace=True) df.pmid = df.pmid.str.extract(r"pubmed:(\d+)") + regex_interaction_type = r"psi-mi:\"MI:0*(?P\d+)\"\((?P[^)]+)\)" df = df.join(df.it.str.extract(regex_interaction_type), how="left") df.drop(columns=["it"], inplace=True) df.confidence_value = df.confidence_value.str.extract(r"intact-miscore:(\d+(\.\d+)?)")[0] + df.index += 1 df.index.rename("id", inplace=True) @@ -97,34 +111,41 @@ def insert_data(self) -> Dict[str, int]: return {self.biodb_name: df.shape[0]} - def get_create_rid_by_uniprot(self, uniprot_accession: str, uniprot_rid_dict: dict) -> str: + def get_create_rid_by_uniprot(self, uniprot_accession: str) -> str: """Create or get rID entry for a given UniProt ID. Parameters ---------- uniprot_accession: str UniProt accession number. - uniprot_rid_dict: dict - Entry parameters matching those of the desired rID entry. Returns ------- str UniProt accession ID. """ - if uniprot_accession not in uniprot_rid_dict: + if uniprot_accession not in self.uniprot_rid_dict: nn = self.get_namespace_name_by_uniprot(uniprot_accession) if nn: namespace, name = nn - value_dict = { - "name": name, - "namespace": namespace, - "pure": True, - "bel": f'p({namespace}:"{name}")', - "uniprot": uniprot_accession, - } - uniprot_rid_dict[uniprot_accession] = self.get_create_rid("protein", value_dict, check_for="bel") - return uniprot_rid_dict.get(uniprot_accession) + bel = f'p({namespace}:"{name}")' + + if bel in self.bel_rid_dict: + self.uniprot_rid_dict[uniprot_accession] = self.bel_rid_dict[bel] + + else: + value_dict = { + "name": name, + "namespace": namespace, + "pure": True, + "bel": bel, + "uniprot": uniprot_accession, + } + new_rid = self.insert_record("protein", value_dict=value_dict) + self.bel_rid_dict[bel] = new_rid + self.uniprot_rid_dict[uniprot_accession] = new_rid + + return self.uniprot_rid_dict.get(uniprot_accession) def get_namespace_name_by_uniprot(self, uniprot_accession: str) -> tuple: """Get the namespace of a given UniProt ID. @@ -139,60 +160,58 @@ def get_namespace_name_by_uniprot(self, uniprot_accession: str) -> tuple: tuple namespace, value """ - return_value = () - sql = f"""Select s.symbol, u.taxid from uniprot u inner join uniprot_gene_symbol s - on (u.id=s.uniprot_id) where u.accession='{uniprot_accession}' limit 1""" - result = self.engine.execute(sql).fetchone() - taxid_to_namespace = {9606: "HGNC", 10090: "MGI", 10116: "RGD"} - if result: - name, taxid = result - namespace = taxid_to_namespace.get(taxid, "UNIPROT") - return_value = (namespace, name) + if uniprot_accession in self.acc_nn: + return self.acc_nn[uniprot_accession] + else: - if self.session.query(uniprot.Uniprot).filter(uniprot.Uniprot.accession == uniprot_accession).first(): - return_value = ("UNIPROT", uniprot_accession) - return return_value + up_r = self.session.query(uniprot.Uniprot).filter(uniprot.Uniprot.accession == uniprot_accession).first() + + if up_r: + return_value = "UNIPROT", uniprot_accession + + else: + return_value = () + + self.acc_nn[uniprot_accession] = return_value + return return_value def update_interactions(self) -> int: """Update intact interactions to graph.""" - uniprot = UniProt(self.client) - uniprot.update() - - uniprot_rid_dict = uniprot.get_pure_uniprot_rid_dict_in_bel_context() - - sql_temp = """SELECT - int_a_uniprot_id, - int_b_uniprot_id, - pmid, - interaction_ids, - interaction_type, - interaction_type_psimi_id, - detection_method, - detection_method_psimi_id, - confidence_value - FROM - intact - WHERE - int_a_uniprot_id = '{uniprot_accession}' or int_b_uniprot_id = '{uniprot_accession}' - GROUP BY - int_a_uniprot_id, - int_b_uniprot_id, - pmid, - interaction_ids, - interaction_type, - interaction_type_psimi_id, - detection_method, - detection_method_psimi_id, - confidence_value""" + logger.info("Update IntAct interactions") updated = 0 - uniprot_accessions = tuple(uniprot_rid_dict.keys()) + uniprot_accessions = tuple(self.uniprot_rid_dict.keys()) + it = intact.Intact + + sql = select( + it.int_a_uniprot_id, + it.int_b_uniprot_id, + it.pmid, + it.interaction_ids, + it.interaction_type, + it.interaction_type_psimi_id, + it.detection_method, + it.detection_method_psimi_id, + it.confidence_value, + ).group_by( + it.int_a_uniprot_id, + it.int_b_uniprot_id, + it.pmid, + it.interaction_ids, + it.interaction_type, + it.interaction_type_psimi_id, + it.detection_method, + it.detection_method_psimi_id, + it.confidence_value, + ) + + intact_df = pd.read_sql(sql, self.engine).replace({np.nan: None}) for uniprot_accession in tqdm(uniprot_accessions, desc="Update IntAct interactions"): - sql = sql_temp.format(uniprot_accession=uniprot_accession) - result = self.engine.execute(sql) - + filtered_df = intact_df[ + (intact_df.int_a_uniprot_id == uniprot_accession) | (intact_df.int_b_uniprot_id == uniprot_accession) + ] for ( up_a, up_b, @@ -203,9 +222,9 @@ def update_interactions(self) -> int: d_method, d_method_id, c_value, - ) in result.fetchall(): - from_rid = self.get_create_rid_by_uniprot(up_a, uniprot_rid_dict) - to_rid = self.get_create_rid_by_uniprot(up_b, uniprot_rid_dict) + ) in filtered_df.itertuples(index=False): + from_rid = self.get_create_rid_by_uniprot(up_a) + to_rid = self.get_create_rid_by_uniprot(up_b) if from_rid and to_rid: value_dict = { diff --git a/ebel/manager/orientdb/biodbs/iuphar.py b/ebel/manager/orientdb/biodbs/iuphar.py index 5f03d50..3691a41 100644 --- a/ebel/manager/orientdb/biodbs/iuphar.py +++ b/ebel/manager/orientdb/biodbs/iuphar.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd from pyorientdb import OrientDB +from sqlalchemy import select from tqdm import tqdm from ebel.manager.orientdb import odb_meta, odb_structure, urls @@ -132,13 +133,30 @@ def update_interactions(self) -> int: "Gating inhibitor": "inhibits_gating__iu", } - sql = """select i.pubmed_id, i.assay_description, i.affinity_units, i.affinity_low, i.affinity_median, - i.affinity_high, i.type, - i.action,i.target_uniprot, l.name as ligand_name, l.pubchem_sid, i.ligand_gene_symbol, i.ligand_species - from iuphar_interaction as i inner join iuphar_ligand as l - on (i.ligand_id=l.id) where i.target_uniprot IS NOT NULL and pubchem_sid IS NOT NULL""" + i_int = iuphar.IupharInteraction + lig = iuphar.IupharLigand + sql = ( + select( + i_int.pubmed_id, + i_int.assay_description, + i_int.affinity_units, + i_int.affinity_low, + i_int.affinity_median, + i_int.affinity_high, + i_int.type, + i_int.action, + i_int.target_uniprot, + lig.name.label("ligand_name"), + lig.pubchem_sid, + ) + .join(lig) + .where(i_int.target_uniprot.isnot(None)) + .where(lig.pubchem_sid.isnot(None)) + ) + + with self.engine.connect() as conn: + df_iuphar = pd.read_sql(sql, conn).replace({np.nan: None}) - df_iuphar = pd.read_sql(sql, self.engine).replace({np.nan: None}) df_iuphar.set_index("target_uniprot", inplace=True) df_graph = pd.DataFrame( uniprot.get_pure_uniprot_rid_dict_in_bel_context().items(), @@ -152,7 +170,11 @@ def update_interactions(self) -> int: total=df_join.shape[0], desc=f"Update {self.biodb_name.upper()} interactions", ): - if data.ligand_gene_symbol and data.ligand_species and "Human" in data.ligand_species: + if ( + "ligand_gene_symbol" in data.index + and "ligand_species" in data.index + and "Human" in data.ligand_species + ): symbol = data.ligand_gene_symbol.split("|")[0] # human seems to always the first a_value_dict = { "pure": True, @@ -161,6 +183,7 @@ def update_interactions(self) -> int: "name": symbol, } a_class = "protein" + else: a_value_dict = { "pure": True, @@ -170,6 +193,7 @@ def update_interactions(self) -> int: "label": data.ligand_name, } a_class = "abundance" + a_rid = self.get_create_rid(a_class, value_dict=a_value_dict, check_for="bel") i_value_dict = { @@ -185,7 +209,4 @@ def update_interactions(self) -> int: edge_class = iuphar_edge_type_mapper.get(data.type, "iuphar_interaction") self.create_edge(edge_class, from_rid=a_rid, to_rid=data.rid, value_dict=i_value_dict) - # not sure if this is really needed - # Hgnc(self.client).update_bel() - return df_join.shape[0] diff --git a/ebel/manager/orientdb/biodbs/kegg.py b/ebel/manager/orientdb/biodbs/kegg.py index 10ad6f9..585b16d 100644 --- a/ebel/manager/orientdb/biodbs/kegg.py +++ b/ebel/manager/orientdb/biodbs/kegg.py @@ -9,6 +9,7 @@ import pandas as pd import requests from pyorientdb import OrientDB +from sqlalchemy import or_, select from tqdm import tqdm from ebel.config import get_config_value @@ -277,33 +278,33 @@ def update_interactions(self) -> int: "phosphorylation": ("pho", "increases", BelPmod.PHO), "ubiquitination": ("ubi", "increases", BelPmod.UBI), } - post_translational_modifications = ",".join([f"'{x}'" for x in pmods.keys()]) - - species_ids = ",".join([f"'{x}'" for x in self.species]) - - sql_temp = f"""Select - interaction_type, - pathway_identifier, - pathway_name, - gene_symbol_a, - gene_symbol_b, - kegg_species_id - from - kegg - where - (gene_symbol_a='{{symbol}}' or gene_symbol_a='{{symbol}}') and - kegg_species_id in ({species_ids}) and - interaction_type in ({{interaction_types}}) - group by - interaction_type, - pathway_identifier, - pathway_name, - gene_symbol_a, - gene_symbol_b, - kegg_species_id""" + kg = kegg.Kegg for symbol, rid in tqdm(symbol_rids_dict.items(), desc="Update KEGG posttranslational modifications"): - sql = sql_temp.format(symbol=symbol, interaction_types=post_translational_modifications) + # sql = sql_temp.format(symbol=symbol, interaction_types=post_translational_modifications) + + sql = ( + select( + kg.interaction_type, + kg.pathway_identifier, + kg.pathway_name, + kg.gene_symbol_a, + kg.gene_symbol_b, + kg.kegg_species_id, + ) + .where(or_(kg.gene_symbol_a == symbol, kg.gene_symbol_b == symbol)) + .where(kg.kegg_species_id.in_(self.species)) + .where(kg.interaction_type.in_(list(pmods.keys()))) + .group_by( + kg.interaction_type, + kg.pathway_identifier, + kg.pathway_name, + kg.gene_symbol_a, + kg.gene_symbol_b, + kg.kegg_species_id, + ) + ) + df = pd.read_sql(sql, self.engine) keys = ( "interaction_type", diff --git a/ebel/manager/orientdb/biodbs/mirtarbase.py b/ebel/manager/orientdb/biodbs/mirtarbase.py index 14eec42..2703586 100644 --- a/ebel/manager/orientdb/biodbs/mirtarbase.py +++ b/ebel/manager/orientdb/biodbs/mirtarbase.py @@ -3,6 +3,7 @@ import pandas as pd from pyorientdb import OrientDB +from sqlalchemy import select, text from tqdm import tqdm from ebel.manager.orientdb import odb_meta, odb_structure, urls @@ -36,7 +37,7 @@ def __contains__(self, item) -> bool: def insert_data(self) -> Dict[str, int]: """Insert mirtarbase data into database.""" - # TODO Fix download error - + # TODO: Fix download error - # ssl.SSLError: [SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:997) df = pd.read_excel(self.file_path) df.columns = self._standardize_column_names(df.columns) @@ -57,20 +58,22 @@ def update_interactions(self) -> int: self.clear_edges() df_symbol_rid = self.get_pure_symbol_rid_df_in_bel_context(class_name="rna", namespace="HGNC") - sql = f"""Select - mi_rna, - target_gene as symbol, - support_type, - references_pmid as pmid, - experiments - from - {mirtarbase.Mirtarbase.__tablename__} - where - species_mi_rna='Homo sapiens' and - species_target_gene='Homo sapiens' and - support_type in ('Functional MTI', 'Non-Functional MTI')""" + mtb = mirtarbase.Mirtarbase + sql = ( + select( + mtb.mi_rna, + mtb.target_gene.label("symbol"), + mtb.support_type, + mtb.references_pmid.label("pmid"), + mtb.experiments, + ) + .where(mtb.species_mi_rna == "Homo sapiens") + .where(mtb.species_target_gene == "Homo sapiens") + .where(mtb.support_type.in_(["Functional MTI", "Non-Functional MTI"])) + ) + cols = ["mi_rna", "symbol", "support_type", "pmid", "experiments"] - df_mirtarbase = pd.DataFrame(self.engine.execute(sql).fetchall(), columns=cols) + df_mirtarbase = pd.DataFrame(self.session.execute(sql).fetchall(), columns=cols) df_mirtarbase.experiments = df_mirtarbase.experiments.str.split("//") df_join = df_mirtarbase.set_index("symbol").join(df_symbol_rid.set_index("symbol"), how="inner") diff --git a/ebel/manager/orientdb/biodbs/ncbi.py b/ebel/manager/orientdb/biodbs/ncbi.py index c395fc9..eb50d44 100644 --- a/ebel/manager/orientdb/biodbs/ncbi.py +++ b/ebel/manager/orientdb/biodbs/ncbi.py @@ -274,6 +274,7 @@ def _insert_info(self, chunksize: int = 1000000) -> int: "type_of_gene", "dbXrefs", } + for df in tqdm( pd.read_csv( file_path, @@ -298,3 +299,8 @@ def _insert_info(self, chunksize: int = 1000000) -> int: def update_interactions(self) -> int: """Abstract method.""" pass + + +if __name__ == "__main__": + n = Ncbi() + n._insert_medgen() diff --git a/ebel/manager/orientdb/biodbs/nsides.py b/ebel/manager/orientdb/biodbs/nsides.py index 50b5909..ef16c77 100644 --- a/ebel/manager/orientdb/biodbs/nsides.py +++ b/ebel/manager/orientdb/biodbs/nsides.py @@ -7,12 +7,13 @@ import pandas as pd from pyorientdb import OrientDB +from sqlalchemy import or_, select, text from tqdm import tqdm from ebel.constants import RID from ebel.manager.orientdb import odb_meta, odb_structure, urls from ebel.manager.orientdb.constants import OFFSIDES, ONSIDES -from ebel.manager.rdbms.models import nsides +from ebel.manager.rdbms.models import drugbank, nsides from ebel.tools import get_file_path logger = logging.getLogger(__name__) @@ -142,26 +143,6 @@ def update_bel(self) -> int: self.delete_nodes_with_no_edges("side_effect") self.delete_nodes_with_no_edges("drug") - # TODO: Translate to sqlalchemy query - sql_temp = """Select - o.condition_meddra_id, - o.condition_concept_name, - o.prr, - o.mean_reporting_frequency - from - drugbank as d inner join - drugbank_external_identifier as dei on (d.id=dei.drugbank_id) inner join - nsides as o on (dei.identifier=o.drug_rxnorn_id) - where - d.drugbank_id='{}' and resource='RxCUI' - and (mean_reporting_frequency>=0.01 OR mean_reporting_frequency is NULL) - group by - o.condition_meddra_id, - o.condition_concept_name, - o.prr, - o.mean_reporting_frequency - """ - drugbank_ids = self.query_class("drug", columns=["drugbank_id"], drugbank_id="notnull") drugbank_id_rids = {d["drugbank_id"]: d[RID] for d in drugbank_ids} @@ -170,8 +151,26 @@ def update_bel(self) -> int: updated = 0 + d = drugbank.Drugbank + dei = drugbank.ExternalIdentifier + o = nsides.Nsides + for drugbank_id, drugbank_rid in tqdm(drugbank_id_rids.items(), desc=f"Update {self.biodb_name.upper()}"): - for r in self.engine.execute(sql_temp.format(drugbank_id)): + sql = ( + ( + select(o.condition_meddra_id, o.condition_concept_name, o.prr, o.mean_reporting_frequency) + .join(dei, dei.identifier == o.drug_rxnorn_id) + .join(d, d.id == dei.drugbank_id) + ) + .where(d.drugbank_id == drugbank_id) + .where(dei.resource == "RxCUI") + .where(or_(o.mean_reporting_frequency >= 0.01, o.mean_reporting_frequency.is_(None))) + .group_by(o.condition_meddra_id, o.condition_concept_name, o.prr, o.mean_reporting_frequency) + ) + + results = self.session.execute(sql) + + for r in results: ( condition_meddra_id, condition_concept_name, diff --git a/ebel/manager/orientdb/biodbs/pathway_commons.py b/ebel/manager/orientdb/biodbs/pathway_commons.py index 1fcc12b..9a44aa2 100644 --- a/ebel/manager/orientdb/biodbs/pathway_commons.py +++ b/ebel/manager/orientdb/biodbs/pathway_commons.py @@ -5,6 +5,7 @@ import pandas as pd from pyorientdb import OrientDB +from sqlalchemy import select from tqdm import tqdm from ebel.constants import RID @@ -62,12 +63,12 @@ def insert_data(self) -> Dict[str, int]: "INTERACTION_PUBMED_ID", "PATHWAY_NAMES", ] - df = pd.read_csv(self.file_path, sep="\t", low_memory=True, usecols=usecols) # Because 2 tables are in file, we have to identify where second table starts and slice the dataframe df = df.iloc[: df[df["PARTICIPANT_A"] == "PARTICIPANT"].index[0]] df.columns = self._standardize_column_names(df.columns) + df.pathway_names = df.pathway_names.str.split(";") df.interaction_data_source = df.interaction_data_source.str.split(";") df.interaction_pubmed_id = df.interaction_pubmed_id.str.split(";") @@ -103,14 +104,17 @@ def create_pmids_table(self, df): columns={"id": "pathway_commons_id", "interaction_pubmed_id": "pmid"}, inplace=True, ) - df_pmids.pmid = pd.to_numeric(df_pmids.pmid, errors="coerce") - df_pmids.to_sql( - pc.Pmid.__tablename__, - con=self.engine, - index=False, - if_exists="append", - chunksize=10000, - ) + df_pmids.pmid = pd.to_numeric(df_pmids.pmid, errors="coerce", downcast="integer") + df_pmids = df_pmids[df_pmids.pmid.notna()] + + with self.engine.connect() as conn: + df_pmids.to_sql( + pc.Pmid.__tablename__, + con=conn, + index=False, + if_exists="append", + chunksize=10000, + ) del df_pmids def create_joining_table_names(self, df, df_pc_names): @@ -212,12 +216,17 @@ def update_interactions(self) -> Dict[str, int]: inserted = {} pc_pathway_name_rid_dict = self.get_pathway_name_rid_dict() + + # Update HGNC in case not in DB + self.hgnc.update() valid_hgnc_symbols = {x[0] for x in self.session.query(hgnc.Hgnc).with_entities(hgnc.Hgnc.symbol).all()} + pure_symbol_rids_dict = self.get_pure_symbol_rids_dict() + symbol_rids_bel_context_dict = self.get_pure_symbol_rids_dict_in_bel_context() + cols = ["symbol", "rid"] - pure_symbol_rids_dict = self.hgnc.get_pure_symbol_rids_dict() df_all = pd.DataFrame(pure_symbol_rids_dict.items(), columns=cols) - df_bel = pd.DataFrame(self.hgnc.get_pure_symbol_rids_dict_in_bel_context().items(), columns=cols) + df_bel = pd.DataFrame(symbol_rids_bel_context_dict.items(), columns=cols) # skip here if there is no pure symbols with or without BEL context if any([df_all.empty, df_bel.empty]): @@ -232,22 +241,25 @@ def update_interactions(self) -> Dict[str, int]: for edge_type in edge_types: inserted[edge_type] = 0 - sql = f"""Select id, participant_a, participant_b from - pathway_commons where interaction_type='{edge_type}'""" + sql = select(pc.PathwayCommons.id, pc.PathwayCommons.participant_a, pc.PathwayCommons.participant_b).where( + pc.PathwayCommons.interaction_type == edge_type + ) + df_ppi_of = pd.read_sql(sql, self.engine) + df_join = ( df_ppi_of.set_index("participant_a") .join(df_all.set_index("symbol")) .rename(columns={"rid": "rid_a_all"}) .join(df_bel.set_index("symbol")) .reset_index() - .rename(columns={"rid": "rid_a_bel", "index": "a"}) + .rename(columns={"rid": "rid_a_bel", "participant_a": "a"}) .set_index("participant_b") .join(df_all.set_index("symbol")) .rename(columns={"rid": "rid_b_all"}) .join(df_bel.set_index("symbol")) .reset_index() - .rename(columns={"rid": "rid_b_bel", "index": "b"}) + .rename(columns={"rid": "rid_b_bel", "participant_b": "b"}) .set_index("id") ) @@ -289,8 +301,14 @@ def update_interactions(self) -> Dict[str, int]: def get_pathway_pmids_sources(self, pc_id, pc_pathway_name_rid_dict) -> tuple: """Return all pathway, PMIDs, and their sources.""" - pc_obj = self.session.query(pc.PathwayCommons).get(pc_id) + pc_obj = self.session.get(pc.PathwayCommons, pc_id) sources = [x.source for x in pc_obj.sources] pmids = [x.pmid for x in pc_obj.pmids] pathways = [pc_pathway_name_rid_dict[x.name] for x in pc_obj.pathway_names] return pathways, pmids, sources + + +if __name__ == "__main__": + p = PathwayCommons() + foo = p.get_pure_symbol_rids_dict() + a = 2 diff --git a/ebel/manager/orientdb/biodbs/protein_atlas.py b/ebel/manager/orientdb/biodbs/protein_atlas.py index 53481b5..efd77eb 100644 --- a/ebel/manager/orientdb/biodbs/protein_atlas.py +++ b/ebel/manager/orientdb/biodbs/protein_atlas.py @@ -170,7 +170,9 @@ def update_interactions(self) -> int: location_rid_cache = {x["bel"]: x["rid"] for x in self.query_class("location", columns=["bel"])} - for ensembl_gene_id, data in tqdm(rid_ensembl_gene_ids.items()): + for ensembl_gene_id, data in tqdm( + rid_ensembl_gene_ids.items(), desc=f"Update {self.biodb_name.upper()} interactions" + ): ns_location = "PROTEIN_ATLAS" pure_protein = data.oRecordData ns = pure_protein["namespace"] diff --git a/ebel/manager/orientdb/biodbs/stringdb.py b/ebel/manager/orientdb/biodbs/stringdb.py index a92c005..68219b6 100644 --- a/ebel/manager/orientdb/biodbs/stringdb.py +++ b/ebel/manager/orientdb/biodbs/stringdb.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd from pyorientdb import OrientDB +from sqlalchemy import and_, or_, select, text from tqdm import tqdm from ebel.manager.orientdb import odb_meta, odb_structure, urls @@ -39,6 +40,9 @@ def __init__(self, client: OrientDB = None): biodb_name=self.biodb_name, ) + self.symbol_rid_dict = self.get_pure_symbol_rids_dict_in_bel_context(namespace="HGNC") + self.bel_rid_dict = self.get_pure_bel_rid_dict() + def __len__(self) -> dict: """Get number of 'biogrid_interaction' graph edges.""" pass @@ -159,13 +163,20 @@ def insert_action_data(self) -> int: def get_stringdb_action_hgnc_set(self): """Get unique HGNC symbols from stringdb_actions table.""" - sql = f"""(Select distinct( symbol1 ) from {self.table_action}) - union (Select distinct( symbol2 ) from {self.table_action})""" - return set([x[0] for x in self.engine.execute(sql).fetchall()]) + # sql = f"""(Select distinct( symbol1 ) from {self.table_action}) + # union (Select distinct( symbol2 ) from {self.table_action})""" + + stmt1 = select(stringdb.StringDbAction.symbol1).distinct() + stmt2 = select(stringdb.StringDbAction.symbol2).distinct() + sql = stmt1.union(stmt2).alias("combined") + print(sql) + + return set([x[0] for x in self.session.execute(sql).fetchall()]) def update_interactions(self) -> Dict[str, int]: """Update the edges with StringDB metadata.""" hgnc = Hgnc(self.client) + hgnc.update() # If users haven't run Hgnc yet updated = dict() updated["interactions"] = self.update_stringdb_interactions(hgnc) updated["actions"] = self.update_action_interactions(hgnc) @@ -194,10 +205,9 @@ def update_stringdb_interactions(self, hgnc: Hgnc) -> int: "combined_score", ) - bel_hgnc_rid_dict = self.get_pure_symbol_rids_dict_in_bel_context(namespace="HGNC") - bel_hgncs = set(bel_hgnc_rid_dict.keys()) + symbols = set(self.symbol_rid_dict.keys()) strdb_hgncs = self.get_stringdb_symbols() - shared_hgncs = bel_hgncs & strdb_hgncs + shared_hgncs = symbols & strdb_hgncs updated = 0 already_inserted = set() @@ -214,8 +224,8 @@ def update_stringdb_interactions(self, hgnc: Hgnc) -> int: if sorted_combi not in already_inserted: value_dict = {k: v for k, v in row.__dict__.items() if k in columns} - from_rid = self.get_create_rid_by_symbol(row.symbol1, bel_hgnc_rid_dict, hgnc) - to_rid = self.get_create_rid_by_symbol(row.symbol2, bel_hgnc_rid_dict, hgnc) + from_rid = self.get_create_rid_by_symbol(row.symbol1, hgnc) + to_rid = self.get_create_rid_by_symbol(row.symbol2, hgnc) if from_rid and to_rid: self.create_edge( @@ -229,15 +239,13 @@ def update_stringdb_interactions(self, hgnc: Hgnc) -> int: return updated - def get_create_rid_by_symbol(self, symbol: str, symbol_rid_dict: dict, hgnc: Hgnc) -> str: + def get_create_rid_by_symbol(self, symbol: str, hgnc: Hgnc) -> str: """Create or get rID entry for a given gene symbol. Parameters ---------- symbol: str Gene symbol. - symbol_rid_dict: dict - Entry parameters matching those of the desired rID entry. hgnc: Hgnc Hgnc model definition. @@ -246,17 +254,26 @@ def get_create_rid_by_symbol(self, symbol: str, symbol_rid_dict: dict, hgnc: Hgn str rID. """ - if symbol not in symbol_rid_dict: + if symbol not in self.symbol_rid_dict: symbol = hgnc.get_correct_symbol(symbol) if symbol: - value_dict = { - "name": symbol, - "namespace": "HGNC", - "pure": True, - "bel": f'p(HGNC:"{symbol}")', - } - symbol_rid_dict[symbol] = self.get_create_rid("protein", value_dict, check_for="bel") - return symbol_rid_dict.get(symbol) + bel = f'p(HGNC:"{symbol}")' + + if bel in self.bel_rid_dict: + self.symbol_rid_dict[symbol] = self.bel_rid_dict[bel] + + else: + value_dict = { + "name": symbol, + "namespace": "HGNC", + "pure": True, + "bel": bel, + } + new_rid = self.insert_record("protein", value_dict) + self.symbol_rid_dict[symbol] = new_rid + self.bel_rid_dict[bel] = new_rid + + return self.symbol_rid_dict.get(symbol) def update_action_interactions(self, hgnc: Hgnc) -> int: """Iterate through BEL proteins and add stringdb_action edges to existing proteins in KG. @@ -279,30 +296,33 @@ def update_action_interactions(self, hgnc: Hgnc) -> int: ("inhibition", "inhibition"): "inhibits_st", } + sdbaction = stringdb.StringDbAction Action = namedtuple("Action", ("symbol1", "symbol2", "mode", "action", "score")) - columns = ", ".join(Action._fields) - sql_temp = f"""Select {columns} from {self.table_action} - where mode in ('activation', 'inhibition', 'ptmod', 'expression') - and (symbol1='{{symbol}}' or symbol2='{{symbol}}') - and is_directional=1 and a_is_acting=1""" + modes = ("activation", "inhibition", "ptmod", "expression") - symbols_rid_dict = self.get_pure_symbol_rids_dict_in_bel_context(namespace="HGNC") - symbols = tuple(symbols_rid_dict.keys()) + symbols = tuple(self.symbol_rid_dict.keys()) already_inserted = set() updated = 0 for symbol in tqdm(symbols, desc="Update has_action_st edges"): - rows = self.engine.execute(sql_temp.format(symbol=symbol)) + sql = ( + select(sdbaction.symbol1, sdbaction.symbol2, sdbaction.mode, sdbaction.action, sdbaction.score) + .where(sdbaction.mode.in_(modes)) + .where(or_(sdbaction.symbol1 == symbol, sdbaction.symbol2 == symbol)) + .where(sdbaction.is_directional == 1) + .where(sdbaction.a_is_acting == 1) + ) + rows = self.session.execute(sql) for row in rows.fetchall(): action = Action(*row) sorted_combi = tuple(sorted([action.symbol1, action.symbol2])) if sorted_combi not in already_inserted: - from_rid = self.get_create_rid_by_symbol(action.symbol1, symbols_rid_dict, hgnc) - to_rid = self.get_create_rid_by_symbol(action.symbol2, symbols_rid_dict, hgnc) + from_rid = self.get_create_rid_by_symbol(action.symbol1, hgnc) + to_rid = self.get_create_rid_by_symbol(action.symbol2, hgnc) if from_rid and to_rid: class_name = translator[(action.mode, action.action)] diff --git a/ebel/manager/orientdb/biodbs/uniprot.py b/ebel/manager/orientdb/biodbs/uniprot.py index bfdfe1d..9fbc0c1 100644 --- a/ebel/manager/orientdb/biodbs/uniprot.py +++ b/ebel/manager/orientdb/biodbs/uniprot.py @@ -4,11 +4,13 @@ import os import re from collections import namedtuple +from pathlib import Path from typing import Dict, List, Tuple, Union import pandas as pd from lxml.etree import iterparse from pyorientdb import OrientDB +from sqlalchemy import select, text from tqdm import tqdm from ebel.defaults import default_tax_ids @@ -145,14 +147,14 @@ def insert_data(self) -> Dict[str, int]: """Insert UniProt data depending on NCBI taxonomy identifier.""" dialect = self.session.bind.dialect.name if dialect == "mysql": - self.engine.execute("SET FOREIGN_KEY_CHECKS=0") + self.session.execute(text("SET FOREIGN_KEY_CHECKS=0")) inserted = self.insert_uniprot() self.add_gene_symbols() self.session.commit() if dialect == "mysql": - self.engine.execute("SET FOREIGN_KEY_CHECKS=1") + self.session.execute(text("SET FOREIGN_KEY_CHECKS=1")) return {self.biodb_name: inserted} @@ -306,12 +308,13 @@ def _get_accesssion_recname(self, taxid, gene_symbol) -> Union[Tuple[str, str], If this has no result it tries uniprot by gene symbol and NCBI taxonomy ID. """ # TODO: This is in general a dangerous method because it selects the first accession number, but there could - # be more than one sql = ( - f"Select accession, recommended_name from uniprot as u inner join uniprot_gene_symbol as gs " - f'on (u.id=gs.uniprot_id) where u.taxid={taxid} and gs.symbol="{gene_symbol}" limit 1' + select(up.Uniprot.accession, up.Uniprot.recommended_name) + .join(up.GeneSymbol) + .where(up.Uniprot.taxid == taxid) + .where(up.GeneSymbol.symbol == gene_symbol) ) - results = self.engine.execute(sql) + results = self.session.execute(sql) return results.fetchone() if results else None def _update_proteins(self, namespace, taxid) -> int: @@ -337,33 +340,36 @@ def _update_proteins(self, namespace, taxid) -> int: def _get_recname_taxid_by_accession_from_uniprot_api(self, accession) -> Tuple[str, int]: """Fetch uniprot entry by accession and adds to the database. Returns recommended name.""" - sql = f"Select recommended_name,taxid from uniprot where accession='{accession}' limit 1" - result = self.engine.execute(sql).fetchone() + # sql = f"Select recommended_name,taxid from uniprot where accession='{accession}' limit 1" + sql = select(up.Uniprot.recommended_name, up.Uniprot.taxid).where(up.Uniprot.accession == accession) + result = self.session.execute(sql).fetchone() if result: return result def _update_uniprot_proteins(self) -> int: """Update all proteins using UNIPROT as namespace. Returns number of updated proteins.""" updated = 0 - sql_temp = "Select recommended_name, taxid from uniprot where accession='{}' limit 1" + sql_uniprot = 'SELECT distinct(name) as accession from protein WHERE namespace="UNIPROT"' sql_update = ( 'Update protein set uniprot = name, label = "{}", species = {} ' 'where namespace = "UNIPROT" and name = "{}"' ) + for protein in self.query(sql_uniprot).itertuples(index=False): - sql = sql_temp.format(protein.accession) - found = self.engine.execute(sql).fetchone() + found = self._get_recname_taxid_by_accession_from_uniprot_api(protein.accession) if found: recommended_name, taxid = found num_updated = self.execute(sql_update.format(recommended_name, taxid, protein.accession))[0] updated += num_updated + else: recname_taxid = self._get_recname_taxid_by_accession_from_uniprot_api(protein.accession) if recname_taxid: recommended_name, taxid = recname_taxid num_updated = self.execute(sql_update.format(recommended_name, taxid, protein.accession))[0] updated += num_updated + return updated def __read_linked_tables( @@ -478,10 +484,13 @@ def insert_uniprot(self) -> int: logger.info("Drop and create Uniprot table in RDBMS") logger.info("Insert data linked to Uniprot entry into RDBMS") - # avoid to use old gunzipped file - if os.path.exists(self.file_path_gunzipped): - os.remove(self.file_path_gunzipped) - if not os.path.exists(self.file_path_gunzipped): + + gunzipped_file = Path(self.file_path_gunzipped) + # Remove previous gunzipped file if present + if gunzipped_file.is_file(): + gunzipped_file.unlink() + + if not gunzipped_file.is_file(): # Gunzip compressed uniprot file gunzip(self.file_path, self.file_path_gunzipped) ( @@ -495,9 +504,9 @@ def insert_uniprot(self) -> int: self.__insert_linked_data(keywords, hosts, xrefs, functions, sclocations) inserted = self.__insert_uniprot_data(xrefs, functions, sclocations, number_of_entries) - # save storage space - if os.path.exists(self.file_path_gunzipped): - os.remove(self.file_path_gunzipped) + # save storage space by deleting uncompressed XML file + if gunzipped_file.is_file(): + gunzipped_file.unlink() # return number_of_entries return inserted diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py index cb9b95a..15fa146 100644 --- a/ebel/manager/orientdb/odb_meta.py +++ b/ebel/manager/orientdb/odb_meta.py @@ -21,21 +21,28 @@ import sqlalchemy as sqla import xmltodict from pyorientdb import OrientDB, orient -from pyorientdb.exceptions import (PyOrientCommandException, - PyOrientIndexException, - PyOrientSecurityAccessException) +from pyorientdb.exceptions import ( + PyOrientBadMethodCallException, + PyOrientCommandException, + PyOrientIndexException, + PyOrientSecurityAccessException, + PyOrientSecurityException, +) from pyorientdb.otypes import OrientRecord +from sqlalchemy import func, select, text from sqlalchemy.sql.schema import Table from sqlalchemy_utils import create_database, database_exists from tqdm import tqdm import ebel.database +import ebel.defaults from ebel.cache import set_mysql_interactive from ebel.config import get_config_as_dict, get_config_value, write_to_config from ebel.constants import DEFAULT_ODB, RID from ebel.manager.orientdb import urls as default_urls -from ebel.manager.orientdb.odb_structure import (Edge, Generic, Node, OClass, - OIndex, OProperty) +from ebel.manager.orientdb.odb_structure import Edge, Generic, Node, OClass, OIndex, OProperty +from ebel.manager.rdbms.models import uniprot +from ebel.manager.rdbms.models.ensembl import Ensembl as ens from ebel.tools import BelRdb, chunks, get_file_path, get_standard_name type_map_inverse = {v: k for k, v in orient.type_map.items()} @@ -96,12 +103,15 @@ def __init__( self.engine = rdb.engine self.session = rdb.session - if not (get_config_value("DATABASE", "sqlalchemy_connection_string") or database_exists(self.engine.url)): - if str(self.engine.url).startswith("mysql"): + conn = get_config_value("DATABASE", "sqlalchemy_connection_string") + + if not conn: + dialect = self.session.bind.dialect.name + if dialect == "mysql": set_mysql_interactive() - else: - create_database(self.engine.url) + if not database_exists(self.engine.url): + create_database(self.engine.url) def __config_params_check(self, overwrite_config: bool = False): """Go through passed/available configuration params.""" @@ -157,15 +167,18 @@ def execute(self, command_str: str) -> List[OrientRecord]: try: return self.client.command(command_str) - # TODO: following exceptions seems not to cover connection error - # except (PyOrientCommandException, PyOrientSecurityAccessException): - except: + except ( + PyOrientCommandException, + PyOrientSecurityAccessException, + PyOrientBadMethodCallException, + PyOrientSecurityException, + ) as e: + logger.error(e) # Try to reconnect self.client.close() self.client = self.get_client() - # self.client.db_open(self.odb_name, self.odb_user, self.odb_password) # print(command_str) - return self.client.command(command_str) + return self.execute(command_str) def set_configuration_parameters(self): """Set configuration for OrientDB database client instance using configuration file or passed params.""" @@ -240,12 +253,12 @@ def clear_and_import_data(self) -> Dict[str, int]: return inserted def create_index_rdbms(self, table_name: str, columns): - """Creates index on column(s) in RDBMS.""" + """Creates index on mapped_column(s) in RDBMS.""" if isinstance(columns, str): columns = [columns] sql_columns = ",".join(columns) index_name = f"idx_{table_name}_" + "_".join(columns) - self.engine.execute(f"CREATE INDEX {index_name} ON {table_name} ({sql_columns})") + self.session.execute(f"CREATE INDEX {index_name} ON {table_name} ({sql_columns})") def clear_edges_by_bel_doc_rid(self, bel_document_rid: str, even_if_other_doc_rids_exists=True): """Delete all edges linked to a specified BEL document rID.""" @@ -400,7 +413,7 @@ def query_class( if distinct and len(cols) == 1: sql_cols = "distinct({})".format(sql_cols) - sql_temp = "SELECT {sql_cols} FROM `{class_name}` {where} {group_by} {sql_limit} {sql_skip}" + sql_temp = "SELECT {sql_cols} FROM {class_name} {where} {group_by} {sql_limit} {sql_skip}" sql = sql_temp.format( sql_cols=sql_cols, @@ -818,8 +831,9 @@ def number_of_generics(self) -> Dict[str, int]: if self.tables_base: for table_name, table in self.tables_base.metadata.tables.items(): if self.table_exists(table_name): - sql = f"Select count(*) from `{table_name}`" - numbers[table_name] = self.engine.execute(sql).fetchone()[0] + # sql = f"Select count(*) from `{table_name}`" + sql = select(func.count(table_name)) + numbers[table_name] = self.session.execute(sql).fetchone()[0] else: numbers[table_name] = 0 elif self.generic_classes: @@ -835,16 +849,21 @@ def __get_sql_where_part(params, where_list: Tuple[str] = ()): for column, value in params.items(): if isinstance(value, (str, list, dict)): if value == "notnull": - where_list.append("`{}` IS NOT NULL".format(column)) + where_list.append("{} IS NOT NULL".format(column)) + else: - where_list.append("`{}` = {}".format(column, json.dumps(value))) + where_list.append("{} = {}".format(column, json.dumps(value))) + elif isinstance(value, (int, float)): - where_list.append("`{}` = {}".format(column, value)) + where_list.append("{} = {}".format(column, value)) + elif value is None: - where_list.append("`{}` IS NULL".format(column)) + where_list.append("{` IS NULL".format(column)) + where = "" if where_list: - where = " WHERE " + " AND ".join(where_list) + where = "WHERE " + " AND ".join(where_list) + return where def get_number_of_class(self, class_name, distinct_column_name: str = None, **params): @@ -940,7 +959,9 @@ def node_exists( if check_for: check_for = [check_for] if isinstance(check_for, str) else check_for check_for_dict = {k: v for k, v in check_for_dict.items() if k in check_for} - result = self.query_class(class_name=class_name, limit=1, print_sql=print_sql, **check_for_dict) + result = self.query_class( + class_name=class_name, columns=[], limit=1, with_rid=True, print_sql=print_sql, **check_for_dict + ) if result: return result[0][RID] @@ -980,8 +1001,10 @@ def get_create_rid(self, class_name: str, value_dict: dict, check_for=None, prin check_for=check_for, print_sql=print_sql, ) + if not rid: rid = self.insert_record(class_name=class_name, value_dict=value_dict, print_sql=print_sql) + return rid def update_correlative_edges(self) -> List[str]: @@ -1319,36 +1342,57 @@ def get_set_gene_rids_by_position( gene_rids = defaultdict(list) sqls = dict() - sqls[ - "mapped" - ] = f"""Select symbol - from ensembl - where - start < {position} and - stop > {position} and - chromosome='{chromosome}' group by symbol""" - - sqls[ - "downstream" - ] = f"""Select symbol - from ensembl - where - start > {position} and - chromosome='{chromosome}' - order by start limit 1""" - - sqls[ - "upstream" - ] = f"""Select symbol - from ensembl - where - stop < {position} and - chromosome='{chromosome}' - order by stop desc limit 1""" + # sqls[ + # "mapped" + # ] = f"""Select symbol + # from ensembl + # where + # start < {position} and + # stop > {position} and + # chromosome='{chromosome}' group by symbol""" + sqls["mapped"] = ( + select(ens.symbol) + .where(ens.start < position) + .where(ens.stop > position) + .where(ens.chromosome == chromosome) + .group_by(ens.symbol) + ) + + # sqls[ + # "downstream" + # ] = f"""Select symbol + # from ensembl + # where + # start > {position} and + # chromosome='{chromosome}' + # order by start limit 1""" + sqls["downstream"] = ( + select(ens.symbol) + .where(ens.start > position) + .where(ens.chromosome == chromosome) + .limit(1) + .order_by(ens.start.asc()) + ) + + # sqls[ + # "upstream" + # ] = f"""Select symbol + # from ensembl + # where + # stop < {position} and + # chromosome='{chromosome}' + # order by stop desc limit 1""" + sqls["upstream"] = ( + select(ens.symbol) + .where(ens.stop < position) + .where(ens.chromosome == chromosome) + .limit(1) + .order_by(ens.stop.desc()) + ) for gene_type, sql in sqls.items(): if gene_type in gene_types: - results = self.engine.execute(sql) + results = self.session.execute(sql) for (symbol,) in results.fetchall(): bel = f'g(HGNC:"{symbol}")' data = { @@ -1438,6 +1482,7 @@ def delete_nodes_with_no_edges(self, class_name=None) -> int: ) logger.warning(wtext) return 0 + else: class_name = class_name if class_name is not None else "V" return self.execute(f"Delete VERTEX {class_name} where both().size() = 0")[0] @@ -1480,7 +1525,7 @@ def get_pure_symbol_rid_df_in_bel_context(self, class_name="protein", namespace= def get_pure_symbol_rids_dict(self, class_name="protein", namespace="HGNC") -> Dict[str, str]: """Return dictionary with protein name as keys and node rIDs as values.""" - results = self.query_class(class_name, pure=True, namespace=namespace) + results = self.query_class(class_name, pure=True, namespace=namespace, columns=["name"], with_rid=True) return {r["name"]: r["rid"] for r in results} def get_pure_rid_by_uniprot(self, uniprot: str): @@ -1499,14 +1544,33 @@ def get_pure_uniprot_rid_dict_in_bel_context(self) -> Dict[str, str]: # only include proteins which are also part of a BEL statement to avoid explosion of graph sql = """Select uniprot, @rid.asString() as rid from protein where pure=true and uniprot in ( - Select unionall(uniprot_list).asSet() as all_uniprots from (select unionall(in.uniprot, out.uniprot).asSet() as - uniprot_list from bel_relation where document IS NOT NULL - and (in.uniprot IS NOT NULL or out.uniprot IS NOT NULL)))""" + select set(unionall(in.uniprot, out.uniprot)) as all_uniprots from bel_relation where document IS NOT NULL)""" + # sql = "select uniprot, @rid.asString() as rid from protein where pure = true and uniprot is not null" return {r["uniprot"]: r["rid"] for r in self.query_get_dict(sql)} + def get_pure_bel_rid_dict(self) -> Dict[str, str]: + """Return a dictionary of pure bel representation and it's rid.""" + sql = "SELECT bel, @rid.asString() as rid from protein where pure=true" + results = self.query_get_dict(sql) + return {r["bel"]: r["rid"] for r in results} + def get_pure_uniprot_rids_dict(self): """Return dictionary with UniProt IDs as keys and node rIDs as values.""" sql = "Select uniprot, @rid.asString() as rid from protein where uniprot IS NOT NULL and pure=true" results = self.query_get_dict(sql) return {r["uniprot"]: r["rid"] for r in results} + + def get_uniprot_accession_namespaces(self) -> Dict[str, Tuple[str, str]]: + """Return a dictionary of uniprot accession keys and namespace and values.""" + sql = select(uniprot.Uniprot.accession, uniprot.GeneSymbol.symbol, uniprot.Uniprot.taxid).join(uniprot.Uniprot) + results = self.session.execute(sql).fetchall() + + acc_dict = dict() + taxid_to_namespace = {9606: "HGNC", 10090: "MGI", 10116: "RGD"} + for r in results: + accession, name, taxid = r + namespace = taxid_to_namespace.get(taxid, "UNIPROT") + acc_dict[accession] = (namespace, name) + + return acc_dict diff --git a/ebel/manager/orientdb/odb_structure.py b/ebel/manager/orientdb/odb_structure.py index a0fceb3..8e09ec0 100755 --- a/ebel/manager/orientdb/odb_structure.py +++ b/ebel/manager/orientdb/odb_structure.py @@ -9,8 +9,7 @@ from enum import Enum from typing import Dict, List, Optional, Tuple -from ebel.manager.orientdb.odb_defaults import (ODataType, OIndexType, - normalized_pmod) +from ebel.manager.orientdb.odb_defaults import ODataType, OIndexType, normalized_pmod class OClassType(Enum): @@ -463,8 +462,8 @@ def __init__(self, odb_class: OClass, columns: Tuple[str, ...], index_type: OInd bel_indices = ( OIndex(bel, ("bel",), OIndexType.NOTUNIQUE_HASH_INDEX), - OIndex(bel, ("involved_genes",), OIndexType.NOTUNIQUE_HASH_INDEX), - OIndex(bel, ("involved_other",), OIndexType.NOTUNIQUE_HASH_INDEX), + # OIndex(bel, ("involved_genes",), OIndexType.NOTUNIQUE_HASH_INDEX), + # OIndex(bel, ("involved_other",), OIndexType.NOTUNIQUE_HASH_INDEX), OIndex(bel_relation, ("evidence",), OIndexType.NOTUNIQUE_HASH_INDEX), OIndex(protein, ("uniprot",), OIndexType.NOTUNIQUE_HASH_INDEX), OIndex(bel_relation, ("annotation",), OIndexType.DICTIONARY), diff --git a/ebel/manager/orientdb/urls.py b/ebel/manager/orientdb/urls.py index cbf84f5..a83b81b 100755 --- a/ebel/manager/orientdb/urls.py +++ b/ebel/manager/orientdb/urls.py @@ -1,9 +1,9 @@ """URLs to download files.""" # HGNC # -HGNC_JSON = "ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/json/hgnc_complete_set.json" -HGNC_TSV = "ftp://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/tsv/hgnc_complete_set.txt" -HCOP_GZIP = "ftp://ftp.ebi.ac.uk/pub/databases/genenames/hcop/human_all_hcop_sixteen_column.txt.gz" +HGNC_JSON = "https://ftp.ebi.ac.uk/pub/databases/genenames/new/json/hgnc_complete_set.json" +HGNC_TSV = "https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/tsv/hgnc_complete_set.txt" +HCOP_GZIP = "https://ftp.ebi.ac.uk/pub/databases/genenames/hcop/human_all_hcop_sixteen_column.txt.gz" # UniProt # UNIPROT_SPROT = ( @@ -32,7 +32,7 @@ BIOGRID = ( "https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive/BIOGRID-4.4.215/BIOGRID-ALL-4.4.215.tab3.zip" ) -INTACT = "ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.zip" +INTACT = "https://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.zip" STITCH = "http://stitch.embl.de/download/protein_chemical.links.transfer.v5.0.tsv.gz" # String # @@ -52,15 +52,15 @@ WIKIPATHWAYS = "http://data.wikipathways.org/20180710/gpml/wikipathways-20180710-gpml-Homo_sapiens.zip" # Ensembl # -ENSEMBL_FASTA_PEP = "ftp://ftp.ensembl.org/pub/release-94/fasta/homo_sapiens/pep/Homo_sapiens.GRCh38.pep.all.fa.gz" -ENSEMBL_CDS = "ftp://ftp.ensembl.org/pub/release-96/fasta/homo_sapiens/cds/Homo_sapiens.GRCh38.cds.all.fa.gz" +ENSEMBL_FASTA_PEP = "https://ftp.ensembl.org/pub/release-94/fasta/homo_sapiens/pep/Homo_sapiens.GRCh38.pep.all.fa.gz" +ENSEMBL_CDS = "https://ftp.ensembl.org/pub/release-96/fasta/homo_sapiens/cds/Homo_sapiens.GRCh38.cds.all.fa.gz" # SIDER # SIDER_ATC = "http://sideeffects.embl.de/media/download/drug_atc.tsv" SIDER_SE = "http://sideeffects.embl.de/media/download/meddra_all_se.tsv.gz" # Expression Atlas # -EXPRESSION_ATLAS_BASE = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/experiments/" +EXPRESSION_ATLAS_BASE = "https://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/experiments/" EXPRESSION_ATLAS_EXPERIMENTS = EXPRESSION_ATLAS_BASE + "atlas-latest-data.tar.gz" # DisGeNet # @@ -79,7 +79,7 @@ IUPHAR_LIGANDS = "https://www.guidetopharmacology.org/DATA/ligands.csv" # CHEBI # -CHEBI_BASE = "ftp://ftp.ebi.ac.uk/pub/databases/chebi/Flat_file_tab_delimited/" +CHEBI_BASE = "https://ftp.ebi.ac.uk/pub/databases/chebi/Flat_file_tab_delimited/" CHEBI_CHEMICALDATA = f"{CHEBI_BASE}chemical_data.tsv" CHEBI_COMMENT = f"{CHEBI_BASE}comments.tsv" CHEBI_COMPOUND = f"{CHEBI_BASE}compounds.tsv.gz" diff --git a/ebel/manager/rdbms/models/biogrid.py b/ebel/manager/rdbms/models/biogrid.py index c3af157..dd0ab2a 100644 --- a/ebel/manager/rdbms/models/biogrid.py +++ b/ebel/manager/rdbms/models/biogrid.py @@ -1,7 +1,8 @@ """BioGRID RDBMS model definition.""" -from sqlalchemy import Column, Float, ForeignKey, Integer, String, Text +from sqlalchemy import Float, ForeignKey, Integer, String, Text, select from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, aliased, mapped_column, relationship +from sqlalchemy_utils import create_view from ebel.manager.rdbms.models import object_as_dict @@ -12,26 +13,28 @@ class Biogrid(Base): """Class definition for the biogrid table.""" __tablename__ = "biogrid" - id = Column(Integer, primary_key=True) - - biogrid_a_id = Column(Integer, ForeignKey("biogrid_interactor.biogrid_id")) - biogrid_a = relationship("Interactor", foreign_keys=[biogrid_a_id]) - biogrid_b_id = Column(Integer, ForeignKey("biogrid_interactor.biogrid_id")) - biogrid_b = relationship("Interactor", foreign_keys=[biogrid_b_id]) - biogrid_id = Column(Integer, nullable=True) - experimental_system_id = Column(Integer, ForeignKey("biogrid_experimental_system.id")) - experimental_system = relationship("ExperimentalSystem", foreign_keys=[experimental_system_id]) - throughput_id = Column(Integer, ForeignKey("biogrid_throughput.id")) - throughput = relationship("Throughput", foreign_keys=[throughput_id]) - score = Column(Float, nullable=True) - modification_id = Column(Integer, ForeignKey("biogrid_modification.id")) - modification = relationship("Modification", foreign_keys=[modification_id]) - qualifications = Column(String(255), nullable=True) - source_id = Column(Integer, ForeignKey("biogrid_source.id")) - source = relationship("Source", foreign_keys=[source_id]) - publication_id = Column(Integer, ForeignKey("biogrid_publication.id")) - publication = relationship("Publication", foreign_keys=[publication_id]) - qualification = Column(Text, nullable=True) + id = mapped_column(Integer, primary_key=True) + + biogrid_a_id: Mapped[int] = mapped_column(ForeignKey("biogrid_interactor.biogrid_id")) + biogrid_a: Mapped["Interactor"] = relationship("Interactor", foreign_keys=[biogrid_a_id]) + biogrid_b_id: Mapped[int] = mapped_column(ForeignKey("biogrid_interactor.biogrid_id")) + biogrid_b: Mapped["Interactor"] = relationship("Interactor", foreign_keys=[biogrid_b_id]) + biogrid_id: Mapped[int] = mapped_column(nullable=True) + experimental_system_id: Mapped[int] = mapped_column(ForeignKey("biogrid_experimental_system.id")) + experimental_system: Mapped["ExperimentalSystem"] = relationship( + "ExperimentalSystem", foreign_keys=[experimental_system_id] + ) + throughput_id: Mapped[int] = mapped_column(ForeignKey("biogrid_throughput.id")) + throughput: Mapped["Throughput"] = relationship("Throughput", foreign_keys=[throughput_id]) + score: Mapped[float] = mapped_column(nullable=True) + modification_id: Mapped[int] = mapped_column(ForeignKey("biogrid_modification.id"), nullable=True) + modification: Mapped["Modification"] = relationship("Modification", foreign_keys=[modification_id]) + qualifications: Mapped[str] = mapped_column(String(255), nullable=True) + source_id: Mapped[int] = mapped_column(ForeignKey("biogrid_source.id")) + source: Mapped["Source"] = relationship("Source", foreign_keys=[source_id]) + publication_id: Mapped[int] = mapped_column(ForeignKey("biogrid_publication.id")) + publication: Mapped["Publication"] = relationship("Publication", foreign_keys=[publication_id]) + qualification: Mapped[str] = mapped_column(Text, nullable=True) def as_dict(self): """Convert object values to dictionary.""" @@ -53,11 +56,11 @@ class Publication(Base): """Class definition for the biogrid_publication table.""" __tablename__ = "biogrid_publication" - id = Column(Integer, primary_key=True) - author_name = Column(String(255), nullable=True) - publication_year = Column(Integer, nullable=True) - source = Column(String(255), nullable=True) - source_identifier = Column(String(255), nullable=True) + id: Mapped[int] = mapped_column(primary_key=True) + author_name: Mapped[str] = mapped_column(String(255), nullable=True) + publication_year: Mapped[int] = mapped_column(nullable=True) + source: Mapped[str] = mapped_column(String(255), nullable=True) + source_identifier: Mapped[str] = mapped_column(String(255), nullable=True) def as_dict(self): """Convert object values to dictionary.""" @@ -68,9 +71,9 @@ class Throughput(Base): """Class definition for the biogrid_throughput table.""" __tablename__ = "biogrid_throughput" - id = Column(Integer, primary_key=True) - throughput = Column(String(255)) - frequency = Column(Integer) + id: Mapped[int] = mapped_column(primary_key=True) + throughput: Mapped[str] = mapped_column(String(255)) + count: Mapped[int] = mapped_column() def as_dict(self): """Convert object values to dictionary.""" @@ -81,8 +84,8 @@ class Taxonomy(Base): """Class definition for the biogrid_taxonomy table.""" __tablename__ = "biogrid_taxonomy" - taxonomy_id = Column(Integer, primary_key=True) # == NCBI Taxonomy ID - organism_name = Column(String(1000)) + taxonomy_id: Mapped[int] = mapped_column(primary_key=True) # == NCBI Taxonomy ID + organism_name: Mapped[str] = mapped_column(String(1000)) def as_dict(self): """Convert object values to dictionary.""" @@ -93,10 +96,10 @@ class ExperimentalSystem(Base): """Class definition for the biogrid_experimental_system table.""" __tablename__ = "biogrid_experimental_system" - id = Column(Integer, primary_key=True) - experimental_system = Column(String(255), nullable=True) - experimental_system_type = Column(String(255), nullable=True) - frequency = Column(Integer) + id: Mapped[int] = mapped_column(primary_key=True) + experimental_system: Mapped[str] = mapped_column(String(255), nullable=True) + experimental_system_type: Mapped[str] = mapped_column(String(255), nullable=True) + count: Mapped[int] = mapped_column() def as_dict(self): """Convert object values to dictionary.""" @@ -107,15 +110,15 @@ class Interactor(Base): """Class definition for the biogrid_interactor table.""" __tablename__ = "biogrid_interactor" - biogrid_id = Column(Integer, primary_key=True) + biogrid_id: Mapped[int] = mapped_column(primary_key=True) - entrez = Column(Integer, nullable=True, index=True) - systematic_name = Column(String(255), nullable=True, index=True) - symbol = Column(String(255), nullable=True, index=True) - taxonomy_id = Column(Integer, ForeignKey("biogrid_taxonomy.taxonomy_id")) - taxonomy = relationship("Taxonomy", foreign_keys=[taxonomy_id]) - uniprot = Column(String(255), nullable=True, index=True) - trembl = Column(String(1000), nullable=True) + entrez: Mapped[int] = mapped_column(nullable=True, index=True) + systematic_name: Mapped[str] = mapped_column(String(255), nullable=True, index=True) + symbol: Mapped[str] = mapped_column(String(255), nullable=True, index=True) + taxonomy_id: Mapped[int] = mapped_column(ForeignKey("biogrid_taxonomy.taxonomy_id")) + taxonomy: Mapped["Taxonomy"] = relationship("Taxonomy", foreign_keys=[taxonomy_id]) + uniprot: Mapped[str] = mapped_column(String(255), nullable=True, index=True) + trembl: Mapped[str] = mapped_column(String(1000), nullable=True) def as_dict(self): """Convert object values to dictionary.""" @@ -133,8 +136,8 @@ class Source(Base): """Class definition for the biogrid_source table.""" __tablename__ = "biogrid_source" - id = Column(Integer, primary_key=True) - source = Column(String(255), nullable=True) + id: Mapped[int] = mapped_column(primary_key=True) + source: Mapped[str] = mapped_column(String(255), nullable=True) def as_dict(self): """Convert object values to dictionary.""" @@ -145,10 +148,56 @@ class Modification(Base): """Class definition for the biogrid_modification table.""" __tablename__ = "biogrid_modification" - id = Column(Integer, primary_key=True) - modification = Column(String(255), nullable=True) - frequency = Column(Integer) + id: Mapped[int] = mapped_column(primary_key=True) + modification: Mapped[str] = mapped_column(String(255), nullable=True) + count: Mapped[int] = mapped_column() def as_dict(self): """Convert object values to dictionary.""" return object_as_dict(self, exclude=["id"]) + + +class BiogridView(Base): + """SQL view for Biogrid.""" + + b = Biogrid + ia = aliased(Interactor) + ib = aliased(Interactor) + m = Modification + p = Publication + es = ExperimentalSystem + ta = aliased(Taxonomy) + tb = aliased(Taxonomy) + s = Source + + stmt = ( + select( + b.biogrid_id, + ia.symbol.label("symbol_a"), + ia.uniprot.label("uniprot_a"), + ta.taxonomy_id.label("tax_id_a"), + ta.organism_name.label("organism_a"), + ib.symbol.label("symbol_b"), + ib.uniprot.label("uniprot_b"), + tb.taxonomy_id.label("tax_id_b"), + tb.organism_name.label("organism_b"), + es.experimental_system, + m.modification, + s.source, + b.qualification, + p.source.label("publication_source"), + p.source_identifier.label("publication_identifier"), + ) + .join(ia, b.biogrid_a_id == ia.biogrid_id) + .join(ib, b.biogrid_b_id == ib.biogrid_id) + .join(ta, ia.taxonomy_id == ta.taxonomy_id) + .join(tb, ib.taxonomy_id == tb.taxonomy_id) + .join(es, b.experimental_system_id == es.id, isouter=True) + .join(m, m.id == b.modification_id, isouter=True) + .join(s, b.source_id == s.id, isouter=True) + .join(p, b.publication_id == p.id, isouter=True) + ) + + view = create_view(name="biogrid_view", selectable=stmt, metadata=Base.metadata) + + __table__ = view diff --git a/ebel/manager/rdbms/models/chebi.py b/ebel/manager/rdbms/models/chebi.py index 28ea3ce..365968e 100644 --- a/ebel/manager/rdbms/models/chebi.py +++ b/ebel/manager/rdbms/models/chebi.py @@ -1,9 +1,10 @@ """CHEBI RDBMS model definition.""" +import datetime +from typing import List, Optional -from sqlalchemy import (Column, DateTime, ForeignKey, Index, Integer, String, - Text) +from sqlalchemy import DateTime, ForeignKey, Index, Integer, String, Text from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship Base = declarative_base() @@ -12,14 +13,14 @@ class ChemicalData(Base): """Class definition for the chebi_chemical_data table.""" __tablename__ = "chebi_chemical_data" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - chemical_data = Column(Text, nullable=True) - source = Column(Text, nullable=False) - type = Column(Text, nullable=False) + chemical_data: Mapped[Optional[str]] = mapped_column(Text) + source: Mapped[str] = mapped_column(Text, nullable=False) + type: Mapped[str] = mapped_column(Text, nullable=False) - compound_id = Column(Integer, ForeignKey("chebi_compound.id")) - compounds = relationship("Compound", back_populates="chemicalData") + compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id")) + compounds: Mapped["Compound"] = relationship("Compound", back_populates="chemicalData") def __str__(self): """Class string definition.""" @@ -38,15 +39,15 @@ class Comment(Base): """Class definition for the chebi_comment table.""" __tablename__ = "chebi_comment" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - text = Column(Text, nullable=False) - created_on = Column(DateTime, nullable=False) - datatype = Column(String(80)) - datatype_id = Column(Integer, nullable=False) + text: Mapped[str] = mapped_column(Text, nullable=False) + created_on: Mapped[datetime.datetime] = mapped_column(DateTime, nullable=False) + datatype: Mapped[str] = mapped_column(String(80)) + datatype_id: Mapped[int] = mapped_column(nullable=False) - compound_id = Column(Integer, ForeignKey("chebi_compound.id")) - compounds = relationship("Compound", back_populates="comments") + compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id")) + compounds: Mapped["Compound"] = relationship("Compound", back_populates="comments") def __str__(self): """Class string definition.""" @@ -64,27 +65,29 @@ class Compound(Base): """Class definition for the chebi_compound table.""" __tablename__ = "chebi_compound" - id = Column(Integer, primary_key=True) - - name = Column(String(2000)) - source = Column(String(32), nullable=False) - parent_id = Column(Integer) - chebi_accession = Column(String(30), nullable=False) - status = Column(String(1), nullable=False) - definition = Column(Text) - star = Column(Integer, nullable=False) - modified_on = Column(Text) - created_by = Column(Text) - - chemicalData = relationship("ChemicalData", back_populates="compounds") - comments = relationship("Comment", back_populates="compounds") - database_accessions = relationship("DatabaseAccession", back_populates="compounds") - names = relationship("Name", back_populates="compounds") - references = relationship("Reference", back_populates="compounds") + id: Mapped[int] = mapped_column(primary_key=True) + + name: Mapped[Optional[str]] = mapped_column(String(2000)) + source: Mapped[str] = mapped_column(String(32), nullable=False) + parent_id: Mapped[Optional[int]] = mapped_column() + chebi_accession: Mapped[str] = mapped_column(String(30), nullable=False) + status: Mapped[str] = mapped_column(String(1), nullable=False) + definition: Mapped[Optional[str]] = mapped_column(Text) + star: Mapped[int] = mapped_column(nullable=False) + modified_on: Mapped[Optional[str]] = mapped_column(Text) + created_by: Mapped[Optional[str]] = mapped_column(Text) + + chemicalData: Mapped[List["ChemicalData"]] = relationship("ChemicalData", back_populates="compounds") + comments: Mapped[List["Comment"]] = relationship("Comment", back_populates="compounds") + database_accessions: Mapped[List["DatabaseAccession"]] = relationship( + "DatabaseAccession", back_populates="compounds" + ) + names: Mapped[List["Name"]] = relationship("Name", back_populates="compounds") + references: Mapped[List["Reference"]] = relationship("Reference", back_populates="compounds") # final_id_relations = relationship("Relation", back_populates="final_id_compounds") # init_id_relations = relationship("Relation", back_populates="init_id_compounds") - structures = relationship("Structure", back_populates="compounds") - inchis = relationship("Inchi", back_populates="compounds") + structures: Mapped[List["Structure"]] = relationship("Structure", back_populates="compounds") + inchis: Mapped[List["Inchi"]] = relationship("Inchi", back_populates="compounds") def __str__(self): return self.name @@ -111,12 +114,12 @@ class Inchi(Base): """Class definition for the chebi_inchi table.""" __tablename__ = "chebi_inchi" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - inchi = Column(Text) + inchi: Mapped[str] = mapped_column(Text) - compound_id = Column(Integer, ForeignKey("chebi_compound.id")) - compounds = relationship("Compound", back_populates="inchis") + compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id")) + compounds: Mapped[List["Compound"]] = relationship("Compound", back_populates="inchis") def __str__(self): return self.inchi @@ -130,14 +133,14 @@ class DatabaseAccession(Base): """Class definition for the chebi_database_accession table.""" __tablename__ = "chebi_database_accession" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - accession_number = Column(String(255), nullable=True) - type = Column(Text, nullable=False) - source = Column(Text, nullable=False) + accession_number: Mapped[Optional[str]] = mapped_column(String(255)) + type: Mapped[str] = mapped_column(Text, nullable=False) + source: Mapped[str] = mapped_column(Text, nullable=False) - compound_id = Column(Integer, ForeignKey("chebi_compound.id")) - compounds = relationship("Compound", back_populates="database_accessions") + compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id")) + compounds: Mapped[List["Compound"]] = relationship("Compound", back_populates="database_accessions") def __str__(self): return self.accession_number @@ -155,16 +158,16 @@ class Name(Base): """Class definition for the chebi_name table.""" __tablename__ = "chebi_name" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - name = Column(Text, nullable=True) - type = Column(Text, nullable=False) - source = Column(Text, nullable=False) - adapted = Column(Text, nullable=False) - language = Column(Text, nullable=False) + name: Mapped[Optional[str]] = mapped_column(Text) + type: Mapped[str] = mapped_column(Text, nullable=False) + source: Mapped[str] = mapped_column(Text, nullable=False) + adapted: Mapped[str] = mapped_column(Text, nullable=False) + language: Mapped[str] = mapped_column(Text, nullable=False) - compound_id = Column(Integer, ForeignKey("chebi_compound.id")) - compounds = relationship("Compound", back_populates="names") + compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id")) + compounds: Mapped[List["Compound"]] = relationship("Compound", back_populates="names") def __str__(self): return self.name @@ -185,15 +188,15 @@ class Reference(Base): __tablename__ = "chebi_reference" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - reference_id = Column(String(60), nullable=False, index=True) - reference_db_name = Column(String(60), nullable=False, index=True) - location_in_ref = Column(String(90), index=True) - reference_name = Column(String(1024)) + reference_id: Mapped[str] = mapped_column(String(60), nullable=False, index=True) + reference_db_name: Mapped[str] = mapped_column(String(60), nullable=False, index=True) + location_in_ref: Mapped[Optional[str]] = mapped_column(String(90), index=True) + reference_name: Mapped[Optional[str]] = mapped_column(String(1024)) - compound_id = Column(Integer, ForeignKey("chebi_compound.id")) - compounds = relationship("Compound", back_populates="references") + compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id")) + compounds: Mapped[List["Compound"]] = relationship("Compound", back_populates="references") __table_args__ = (Index("ix_chebi_reference__reference_name", reference_name, mysql_length=500),) @@ -224,16 +227,16 @@ class Relation(Base): """Class definition for the chebi_relation table.""" __tablename__ = "chebi_relation" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - type = Column(Text, nullable=False) - status = Column(String(1), nullable=False) + type: Mapped[str] = mapped_column(Text, nullable=False) + status: Mapped[str] = mapped_column(String(1), nullable=False) - final_id = Column(Integer, ForeignKey("chebi_compound.id")) - init_id = Column(Integer, ForeignKey("chebi_compound.id")) + final_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id")) + init_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id")) - final_id_compounds = relationship("Compound", foreign_keys=[final_id]) - init_id_compounds = relationship("Compound", foreign_keys=[init_id]) + final_id_compounds: Mapped[List["Compound"]] = relationship("Compound", foreign_keys=[final_id]) + init_id_compounds: Mapped[List["Compound"]] = relationship("Compound", foreign_keys=[init_id]) def __str__(self): return f"{self.type} - {self.status}" @@ -252,16 +255,16 @@ class Structure(Base): """Class definition for the chebi_structure table.""" __tablename__ = "chebi_structure" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - structure = Column(Text, nullable=False) - type = Column(Text, nullable=False) - dimension = Column(Text, nullable=False) - default_structure = Column(String(1), nullable=False) - autogen_structure = Column(String(1), nullable=False) + structure: Mapped[str] = mapped_column(Text, nullable=False) + type: Mapped[str] = mapped_column(Text, nullable=False) + dimension: Mapped[str] = mapped_column(Text, nullable=False) + default_structure: Mapped[str] = mapped_column(String(1), nullable=False) + autogen_structure: Mapped[str] = mapped_column(String(1), nullable=False) - compound_id = Column(Integer, ForeignKey("chebi_compound.id")) - compounds = relationship("Compound", back_populates="structures") + compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id")) + compounds: Mapped[List["Compound"]] = relationship("Compound", back_populates="structures") def __str__(self): return self.structure diff --git a/ebel/manager/rdbms/models/clinical_trials_gov.py b/ebel/manager/rdbms/models/clinical_trials_gov.py index f2f02ba..c56daef 100644 --- a/ebel/manager/rdbms/models/clinical_trials_gov.py +++ b/ebel/manager/rdbms/models/clinical_trials_gov.py @@ -1,9 +1,10 @@ """ClinicalTrials.gov RDBMS model definition.""" import re +from typing import List, Optional from sqlalchemy import Column, ForeignKey, Integer, String, Table, Text from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship from ebel.manager.rdbms.models import object_as_dict @@ -83,49 +84,49 @@ class ClinicalTrialGov(Base): __tablename__ = "clinical_trials_gov" - id = Column(Integer, primary_key=True) - nct_id = Column(String(100), index=True) - org_study_id = Column(Text) - brief_title = Column(Text) - official_title = Column(Text) - is_fda_regulated_drug = Column(Text) - brief_summary = Column(Text) - detailed_description = Column(Text) - overall_status = Column(Text) - start_date = Column(Text) - completion_date = Column(Text) - phase = Column(Text) - study_type = Column(Text) - study_design_intervention_model = Column(Text) - study_design_primary_purpose = Column(Text) - study_design_masking = Column(Text) + id: Mapped[int] = mapped_column(primary_key=True) + nct_id = mapped_column(String(100), index=True) + org_study_id: Mapped[Optional[str]] = mapped_column(Text) + brief_title: Mapped[Optional[str]] = mapped_column(Text) + official_title: Mapped[Optional[str]] = mapped_column(Text) + is_fda_regulated_drug: Mapped[Optional[str]] = mapped_column(Text) + brief_summary: Mapped[Optional[str]] = mapped_column(Text) + detailed_description: Mapped[Optional[str]] = mapped_column(Text) + overall_status: Mapped[Optional[str]] = mapped_column(Text) + start_date: Mapped[Optional[str]] = mapped_column(Text) + completion_date: Mapped[Optional[str]] = mapped_column(Text) + phase: Mapped[Optional[str]] = mapped_column(Text) + study_type: Mapped[Optional[str]] = mapped_column(Text) + study_design_intervention_model: Mapped[Optional[str]] = mapped_column(Text) + study_design_primary_purpose: Mapped[Optional[str]] = mapped_column(Text) + study_design_masking: Mapped[Optional[str]] = mapped_column(Text) # primary_outcomes # secondary_outcomes - patient_data_sharing_ipd = Column(Text) - patient_data_ipd_description = Column(Text) + patient_data_sharing_ipd: Mapped[Optional[str]] = mapped_column(Text) + patient_data_ipd_description: Mapped[Optional[str]] = mapped_column(Text) - keywords = relationship( + keywords: Mapped[List["Keyword"]] = relationship( "Keyword", secondary=ctg_keyword_n2m, back_populates="trials", cascade="save-update", ) - conditions = relationship( + conditions: Mapped[List["Condition"]] = relationship( "Condition", secondary=ctg_condition_n2m, back_populates="trials", cascade="save-update", ) - mesh_terms = relationship( + mesh_terms: Mapped[List["MeshTerm"]] = relationship( "MeshTerm", secondary=ctg_mesh_term_n2m, back_populates="trials", cascade="save-update", ) - interventions = relationship( + interventions: Mapped[List["Intervention"]] = relationship( "Intervention", secondary=ctg_intervention_n2m, back_populates="trials", @@ -157,9 +158,11 @@ class Keyword(Base): """Class definition for the clinical_trials_gov_keyword table.""" __tablename__ = "clinical_trials_gov_keyword" - id = Column(Integer, primary_key=True) - keyword = Column(String(255), index=True) - trials = relationship("ClinicalTrialGov", secondary=ctg_keyword_n2m, back_populates="keywords") + id: Mapped[int] = mapped_column(primary_key=True) + keyword: Mapped[str] = mapped_column(String(255), index=True) + trials: Mapped[List["ClinicalTrialGov"]] = relationship( + "ClinicalTrialGov", secondary=ctg_keyword_n2m, back_populates="keywords" + ) def as_dict(self): """Convert object values to dictionary.""" @@ -170,9 +173,11 @@ class Condition(Base): """Class definition for the clinical_trials_gov_condition table.""" __tablename__ = "clinical_trials_gov_condition" - id = Column(Integer, primary_key=True) - condition = Column(Text) - trials = relationship("ClinicalTrialGov", secondary=ctg_condition_n2m, back_populates="conditions") + id: Mapped[int] = mapped_column(primary_key=True) + condition: Mapped[str] = mapped_column(Text) + trials: Mapped[List["ClinicalTrialGov"]] = relationship( + "ClinicalTrialGov", secondary=ctg_condition_n2m, back_populates="conditions" + ) def as_dict(self): """Convert object values to dictionary.""" @@ -183,9 +188,11 @@ class MeshTerm(Base): """Class definition for the clinical_trials_gov_mesh_term table.""" __tablename__ = "clinical_trials_gov_mesh_term" - id = Column(Integer, primary_key=True) - mesh_term = Column(String(100), unique=True) - trials = relationship("ClinicalTrialGov", secondary=ctg_mesh_term_n2m, back_populates="mesh_terms") + id: Mapped[int] = mapped_column(primary_key=True) + mesh_term: Mapped[str] = mapped_column(String(100), unique=True) + trials: Mapped[List["ClinicalTrialGov"]] = relationship( + "ClinicalTrialGov", secondary=ctg_mesh_term_n2m, back_populates="mesh_terms" + ) def as_dict(self): """Convert object values to dictionary.""" @@ -200,10 +207,10 @@ class Intervention(Base): """Class definition for the clinical_trials_gov_intervention table.""" __tablename__ = "clinical_trials_gov_intervention" - id = Column(Integer, primary_key=True) - intervention_type = Column(String(100), index=True) - intervention_name = Column(String(255), index=True) - trials = relationship( + id: Mapped[int] = mapped_column(primary_key=True) + intervention_type: Mapped[str] = mapped_column(String(100), index=True) + intervention_name: Mapped[str] = mapped_column(String(255), index=True) + trials: Mapped[List["ClinicalTrialGov"]] = relationship( "ClinicalTrialGov", secondary=ctg_intervention_n2m, back_populates="interventions", diff --git a/ebel/manager/rdbms/models/clinvar.py b/ebel/manager/rdbms/models/clinvar.py index a7995ab..23ffd65 100644 --- a/ebel/manager/rdbms/models/clinvar.py +++ b/ebel/manager/rdbms/models/clinvar.py @@ -1,7 +1,9 @@ """ClinVar RDBMS model definition.""" +from typing import List, Optional + from sqlalchemy import Column, ForeignKey, Index, Integer, String, Table, Text from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship from ebel.manager.rdbms.models import object_as_dict @@ -19,23 +21,23 @@ class ClinvarPhenotypeMedgen(Base): """Class definition for the clinvar_phenotype_medgen table.""" __tablename__ = "clinvar_phenotype_medgen" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - identifier = Column(String(100), index=True) - clinvar_id = Column(Integer, ForeignKey("clinvar.id")) - clinvar = relationship("Clinvar", foreign_keys=[clinvar_id], viewonly=True) + identifier: Mapped[str] = mapped_column(String(100), index=True) + clinvar_id: Mapped[int] = mapped_column(Integer, ForeignKey("clinvar.id")) + clinvar: Mapped["Clinvar"] = relationship("Clinvar", foreign_keys=[clinvar_id], viewonly=True) class ClinvarOtherIdentifier(Base): """Class definition for the clinvar_other_identifier table.""" __tablename__ = "clinvar_other_identifier" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - db = Column(String(100), index=True) - identifier = Column(String(100), index=True) - clinvar_id = Column(Integer, ForeignKey("clinvar.id")) - clinvar = relationship("Clinvar", foreign_keys=[clinvar_id], viewonly=True) + db: Mapped[str] = mapped_column(String(100), index=True) + identifier: Mapped[str] = mapped_column(String(100), index=True) + clinvar_id: Mapped[int] = mapped_column(ForeignKey("clinvar.id")) + clinvar: Mapped["Clinvar"] = relationship("Clinvar", foreign_keys=[clinvar_id], viewonly=True) def as_dict(self): """Convert object values to dictionary.""" @@ -46,44 +48,50 @@ class Clinvar(Base): """Class definition for the clinvar table.""" __tablename__ = "clinvar" - id = Column(Integer, primary_key=True) - - allele_id = Column(Integer) - type = Column(String(100)) - name = Column(String(1000)) - gene_id = Column(Integer, index=True) - gene_symbol = Column(String(1000)) - hgnc_id = Column(String(100)) - clinical_significance = Column(String(100)) - clin_sig_simple = Column(Integer) - last_evaluated = Column(String(100)) - rs_db_snp = Column(Integer, index=True) - nsv_esv_db_var = Column(String(100)) - rcvaccession = Column(String(1000)) - origin = Column(Text) - origin_simple = Column(Text) - assembly = Column(String(100), index=True) - chromosome_accession = Column(Text) - chromosome = Column(Text) - start = Column(Integer) - stop = Column(Integer) - reference_allele = Column(Text) - alternate_allele = Column(Text) - cytogenetic = Column(Text) - review_status = Column(Text) - number_submitters = Column(Integer) - guidelines = Column(Text) - tested_in_gtr = Column(Text) - submitter_categories = Column(Integer) - variation_id = Column(Integer) - position_vcf = Column(Integer) - reference_allele_vcf = Column(Text(100000)) - alternate_allele_vcf = Column(Text(100000)) - - phenotypeMedgens = relationship("ClinvarPhenotypeMedgen", foreign_keys=[ClinvarPhenotypeMedgen.clinvar_id]) - otherIdentifiers = relationship("ClinvarOtherIdentifier", foreign_keys=[ClinvarOtherIdentifier.clinvar_id]) - - phenotypes = relationship("ClinvarPhenotype", secondary=clinvar__clinvar_phenotype) + id: Mapped[int] = mapped_column(primary_key=True) + + allele_id: Mapped[int] = mapped_column() + type: Mapped[str] = mapped_column(String(100)) + name: Mapped[str] = mapped_column(String(1000)) + gene_id: Mapped[int] = mapped_column(index=True) + gene_symbol: Mapped[Optional[str]] = mapped_column(String(1000)) + hgnc_id: Mapped[Optional[str]] = mapped_column(String(100)) + clinical_significance: Mapped[str] = mapped_column(String(100)) + clin_sig_simple: Mapped[int] = mapped_column() + last_evaluated: Mapped[Optional[str]] = mapped_column(String(100)) + rs_db_snp: Mapped[int] = mapped_column(index=True) + nsv_esv_db_var: Mapped[Optional[str]] = mapped_column(String(100)) + rcvaccession: Mapped[Optional[str]] = mapped_column(String(1000)) + origin: Mapped[str] = mapped_column(Text) + origin_simple: Mapped[str] = mapped_column(Text) + assembly: Mapped[str] = mapped_column(String(100), index=True) + chromosome_accession: Mapped[str] = mapped_column(Text) + chromosome: Mapped[str] = mapped_column(Text) + start: Mapped[int] = mapped_column() + stop: Mapped[int] = mapped_column() + reference_allele: Mapped[Optional[str]] = mapped_column(Text) + alternate_allele: Mapped[Optional[str]] = mapped_column(Text) + cytogenetic: Mapped[Optional[str]] = mapped_column(Text) + review_status: Mapped[str] = mapped_column(Text) + number_submitters: Mapped[int] = mapped_column() + guidelines: Mapped[Optional[str]] = mapped_column(Text) + tested_in_gtr: Mapped[str] = mapped_column(Text) + submitter_categories: Mapped[int] = mapped_column() + variation_id: Mapped[int] = mapped_column() + position_vcf: Mapped[int] = mapped_column() + reference_allele_vcf: Mapped[Optional[str]] = mapped_column(Text(100000)) + alternate_allele_vcf: Mapped[Optional[str]] = mapped_column(Text(100000)) + + phenotypeMedgens: Mapped[List["ClinvarPhenotypeMedgen"]] = relationship( + "ClinvarPhenotypeMedgen", foreign_keys=[ClinvarPhenotypeMedgen.clinvar_id] + ) + otherIdentifiers: Mapped[List["ClinvarOtherIdentifier"]] = relationship( + "ClinvarOtherIdentifier", foreign_keys=[ClinvarOtherIdentifier.clinvar_id] + ) + + phenotypes: Mapped[List["ClinvarPhenotype"]] = relationship( + "ClinvarPhenotype", secondary=clinvar__clinvar_phenotype + ) __table_args__ = (Index("ix_clinvar__gene_symbol", gene_symbol, mysql_length=500),) @@ -99,8 +107,8 @@ class ClinvarPhenotype(Base): """Class definition for the clinvar_phenotype table.""" __tablename__ = "clinvar_phenotype" - id = Column(Integer, primary_key=True) - phenotype = Column(Text) + id = mapped_column(Integer, primary_key=True) + phenotype = mapped_column(Text) clinvars = relationship("Clinvar", secondary=clinvar__clinvar_phenotype, back_populates="phenotypes") diff --git a/ebel/manager/rdbms/models/disgenet.py b/ebel/manager/rdbms/models/disgenet.py index cd32bcf..7959b9a 100644 --- a/ebel/manager/rdbms/models/disgenet.py +++ b/ebel/manager/rdbms/models/disgenet.py @@ -1,7 +1,9 @@ """DisGeNet RDBMS model definition.""" -from sqlalchemy import BigInteger, Column, Float, ForeignKey, Integer, String +from typing import List, Optional + +from sqlalchemy import BigInteger, Float, ForeignKey, Integer, String from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship from ebel.manager.rdbms.models import object_as_dict @@ -12,16 +14,18 @@ class DisgenetGene(Base): """Class definition for the disgenet_gene table.""" __tablename__ = "disgenet_gene" - id = Column(Integer, primary_key=True) - - gene_id = Column(Integer, ForeignKey("disgenet_gene_symbol.gene_id")) - gene_symbol = relationship("DisgenetGeneSymbol", back_populates="gene_disease_pmid_associations") - disease_id = Column(String(100), ForeignKey("disgenet_disease.disease_id")) - disease = relationship("DisgenetDisease", foreign_keys=[disease_id]) - score = Column(Float) - pmid = Column(BigInteger) - source_id = Column(Integer, ForeignKey("disgenet_source.id")) - source = relationship("DisgenetSource", foreign_keys=[source_id]) + id: Mapped[int] = mapped_column(primary_key=True) + + gene_id: Mapped[int] = mapped_column(ForeignKey("disgenet_gene_symbol.gene_id")) + gene_symbol: Mapped["DisgenetGeneSymbol"] = relationship( + "DisgenetGeneSymbol", back_populates="gene_disease_pmid_associations" + ) + disease_id: Mapped[str] = mapped_column(String(100), ForeignKey("disgenet_disease.disease_id")) + disease: Mapped["DisgenetDisease"] = relationship("DisgenetDisease", foreign_keys=[disease_id]) + score: Mapped[float] = mapped_column() + pmid: Mapped[Optional[int]] = mapped_column() + source_id: Mapped[Optional[int]] = mapped_column(ForeignKey("disgenet_source.id")) + source: Mapped["DisgenetSource"] = relationship("DisgenetSource", foreign_keys=[source_id]) def as_dict(self): """Convert object values to dictionary.""" @@ -40,10 +44,12 @@ class DisgenetGeneSymbol(Base): """Class definition for the disgenet_gene_symbol table.""" __tablename__ = "disgenet_gene_symbol" - gene_id = Column(Integer, primary_key=True) - gene_symbol = Column(String(50), index=True) + gene_id: Mapped[int] = mapped_column(primary_key=True) + gene_symbol: Mapped[str] = mapped_column(String(50), index=True) - gene_disease_pmid_associations = relationship("DisgenetGene", back_populates="gene_symbol") + gene_disease_pmid_associations: Mapped[List["DisgenetGene"]] = relationship( + "DisgenetGene", back_populates="gene_symbol" + ) def as_dict(self): """Convert object values to dictionary.""" @@ -54,17 +60,17 @@ class DisgenetVariant(Base): """Class definition for the disgenet_variant table.""" __tablename__ = "disgenet_variant" - id = Column(Integer, primary_key=True) - - snp_id = Column(String(20), index=True) - chromosome = Column(String(2)) - position = Column(BigInteger) - disease_id = Column(String(100), ForeignKey("disgenet_disease.disease_id")) - disease = relationship("DisgenetDisease", foreign_keys=[disease_id]) - score = Column(Float) - pmid = Column(BigInteger, index=True) - source_id = Column(Integer, ForeignKey("disgenet_source.id")) - source = relationship("DisgenetSource", foreign_keys=[source_id]) + id: Mapped[int] = mapped_column(primary_key=True) + + snp_id: Mapped[str] = mapped_column(String(20), index=True) + chromosome: Mapped[str] = mapped_column(String(2)) + position: Mapped[int] = mapped_column() + disease_id: Mapped[str] = mapped_column(String(100), ForeignKey("disgenet_disease.disease_id")) + disease: Mapped["DisgenetDisease"] = relationship("DisgenetDisease", foreign_keys=[disease_id]) + score: Mapped[float] = mapped_column() + pmid: Mapped[Optional[int]] = mapped_column(index=True) + source_id: Mapped[Optional[int]] = mapped_column(ForeignKey("disgenet_source.id")) + source: Mapped["DisgenetSource"] = relationship("DisgenetSource", foreign_keys=[source_id]) def as_dict(self): """Convert object values to dictionary.""" @@ -77,8 +83,8 @@ class DisgenetDisease(Base): """Class definition for the disgenet_disease table.""" __tablename__ = "disgenet_disease" - disease_id = Column(String(100), primary_key=True) - disease_name = Column(String(255), index=True) + disease_id: Mapped[str] = mapped_column(String(100), primary_key=True) + disease_name: Mapped[str] = mapped_column(String(255), index=True) def as_dict(self): """Convert object values to dictionary.""" @@ -89,8 +95,8 @@ class DisgenetSource(Base): """Class definition for the disgenet_source table.""" __tablename__ = "disgenet_source" - id = Column(Integer, primary_key=True) - source = Column(String(100), index=True) + id: Mapped[int] = mapped_column(primary_key=True) + source: Mapped[str] = mapped_column(String(100), index=True) def as_dict(self): """Convert object values to dictionary.""" diff --git a/ebel/manager/rdbms/models/drugbank.py b/ebel/manager/rdbms/models/drugbank.py index e045bba..3f3800f 100644 --- a/ebel/manager/rdbms/models/drugbank.py +++ b/ebel/manager/rdbms/models/drugbank.py @@ -1,8 +1,10 @@ """DrugBank RDBMS model definition.""" +import datetime +from typing import List, Optional from sqlalchemy import Column, Date, ForeignKey, Integer, String, Text from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship Base = declarative_base() @@ -11,34 +13,41 @@ class Drugbank(Base): """Class definition for the drugbank table.""" __tablename__ = "drugbank" - id = Column(Integer, primary_key=True) - drugbank_id = Column(String(10), index=True) - name = Column(String(255)) - description = Column(Text) - cas_number = Column(String(20)) - unii = Column(String(20)) - state = Column(String(20)) - indication = Column(Text) - pharmacodynamics = Column(Text) - toxicity = Column(Text) - metabolism = Column(Text) - absorption = Column(Text) - half_life = Column(Text) - route_of_elimination = Column(Text) - volume_of_distribution = Column(Text) - clearance = Column(Text) - mechanism_of_action = Column(Text) - fda_label = Column(Text) - - references = relationship("Reference", back_populates="drugbank", cascade="save-update") - synonyms = relationship("Synonym", back_populates="drugbank", cascade="save-update") - targets = relationship("Target", back_populates="drugbank", cascade="save-update") - external_identifiers = relationship("ExternalIdentifier", back_populates="drugbank", cascade="save-update") - product_names = relationship("ProductName", back_populates="drugbank", cascade="save-update") - drug_interactions = relationship("DrugInteraction", back_populates="drugbank", cascade="save-update") - statuses = relationship("Status", back_populates="drugbank", cascade="save-update") - patents = relationship("Patent", back_populates="drugbank", cascade="save-update") - pathways = relationship("Pathway", back_populates="drugbank", cascade="save-update") + + id: Mapped[int] = mapped_column(primary_key=True) + drugbank_id: Mapped[str] = mapped_column(String(10), index=True) + name: Mapped[str] = mapped_column(String(255)) + description: Mapped[Optional[str]] = mapped_column(Text) + cas_number: Mapped[Optional[str]] = mapped_column(String(20)) + unii: Mapped[Optional[str]] = mapped_column(String(20)) + state: Mapped[Optional[str]] = mapped_column(String(20)) + indication: Mapped[Optional[str]] = mapped_column(Text) + pharmacodynamics: Mapped[Optional[str]] = mapped_column(Text) + toxicity: Mapped[Optional[str]] = mapped_column(Text) + metabolism: Mapped[Optional[str]] = mapped_column(Text) + absorption: Mapped[Optional[str]] = mapped_column(Text) + half_life: Mapped[Optional[str]] = mapped_column(Text) + route_of_elimination: Mapped[Optional[str]] = mapped_column(Text) + volume_of_distribution: Mapped[Optional[str]] = mapped_column(Text) + clearance: Mapped[Optional[str]] = mapped_column(Text) + mechanism_of_action: Mapped[Optional[str]] = mapped_column(Text) + fda_label: Mapped[Optional[str]] = mapped_column(Text) + + references: Mapped[List["Reference"]] = relationship("Reference", back_populates="drugbank", cascade="save-update") + synonyms: Mapped[List["Synonym"]] = relationship("Synonym", back_populates="drugbank", cascade="save-update") + targets: Mapped[List["Target"]] = relationship("Target", back_populates="drugbank", cascade="save-update") + external_identifiers: Mapped[List["ExternalIdentifier"]] = relationship( + "ExternalIdentifier", back_populates="drugbank", cascade="save-update" + ) + product_names: Mapped[List["ProductName"]] = relationship( + "ProductName", back_populates="drugbank", cascade="save-update" + ) + drug_interactions: Mapped[List["DrugInteraction"]] = relationship( + "DrugInteraction", back_populates="drugbank", cascade="save-update" + ) + statuses: Mapped[List["Status"]] = relationship("Status", back_populates="drugbank", cascade="save-update") + patents: Mapped[List["Patent"]] = relationship("Patent", back_populates="drugbank", cascade="save-update") + pathways: Mapped[List["Pathway"]] = relationship("Pathway", back_populates="drugbank", cascade="save-update") def __str__(self): """Class string definition.""" @@ -77,11 +86,12 @@ class Pathway(Base): """Class definition for the drugbank_pathway table.""" __tablename__ = "drugbank_pathway" - id = Column(Integer, primary_key=True) - smpdb_id = Column(String(255)) - drugbank_id = Column(Integer, ForeignKey("drugbank.id")) - drugbank = relationship("Drugbank", back_populates="pathways") + id: Mapped[int] = mapped_column(primary_key=True) + smpdb_id: Mapped[str] = mapped_column(String(255)) + + drugbank_id: Mapped[str] = mapped_column(ForeignKey("drugbank.id")) + drugbank: Mapped["Drugbank"] = relationship("Drugbank", back_populates="pathways") def __str__(self): return self.smpdb_id @@ -95,15 +105,16 @@ class Patent(Base): """Class definition for the drugbank_patent table.""" __tablename__ = "drugbank_patent" - id = Column(Integer, primary_key=True) - number = Column(String(255)) - country = Column(String(255)) - approved = Column(Date) - expires = Column(Date) - pediatric_extension = Column(String(255)) - drugbank_id = Column(Integer, ForeignKey("drugbank.id")) - drugbank = relationship("Drugbank", back_populates="patents") + id: Mapped[int] = mapped_column(primary_key=True) + number: Mapped[str] = mapped_column(String(255)) + country: Mapped[str] = mapped_column(String(255)) + approved: Mapped[datetime.date] = mapped_column(Date) + expires: Mapped[datetime.date] = mapped_column(Date) + pediatric_extension: Mapped[str] = mapped_column(String(255)) + + drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id")) + drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="patents") def __str__(self): return self.number @@ -124,11 +135,12 @@ class Status(Base): """Class definition for the drugbank_status table.""" __tablename__ = "drugbank_status" - id = Column(Integer, primary_key=True) - status = Column(String(20), index=True) - drugbank_id = Column(Integer, ForeignKey("drugbank.id")) - drugbank = relationship("Drugbank", back_populates="statuses") + id: Mapped[int] = mapped_column(primary_key=True) + status: Mapped[str] = mapped_column(String(20), index=True) + + drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id")) + drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="statuses") def __str__(self): return self.status @@ -142,12 +154,13 @@ class ExternalIdentifier(Base): """Class definition for the drugbank_external_identifier table.""" __tablename__ = "drugbank_external_identifier" - id = Column(Integer, primary_key=True) - resource = Column(String(255), index=True) - identifier = Column(String(255), index=True) - drugbank_id = Column(Integer, ForeignKey("drugbank.id")) - drugbank = relationship("Drugbank", back_populates="external_identifiers") + id: Mapped[int] = mapped_column(primary_key=True) + resource: Mapped[str] = mapped_column(String(255), index=True) + identifier: Mapped[str] = mapped_column(String(255), index=True) + + drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id")) + drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="external_identifiers") def __str__(self): return self.identifier @@ -165,11 +178,12 @@ class Reference(Base): """Class definition for the drugbank_reference table.""" __tablename__ = "drugbank_reference" - id = Column(Integer, primary_key=True) - pmid = Column(Integer) - drugbank_id = Column(Integer, ForeignKey("drugbank.id")) - drugbank = relationship("Drugbank", back_populates="references") + id: Mapped[int] = mapped_column(primary_key=True) + pmid: Mapped[int] = mapped_column() + + drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id")) + drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="references") def __str__(self): return self.pmid @@ -183,13 +197,14 @@ class Target(Base): """Class definition for the drugbank_target table.""" __tablename__ = "drugbank_target" - id = Column(Integer, primary_key=True) - uniprot = Column(String(20), index=True) - action = Column(String(50), index=True) - known_action = Column(String(20), index=True) - drugbank_id = Column(Integer, ForeignKey("drugbank.id")) - drugbank = relationship("Drugbank", back_populates="targets") + id: Mapped[int] = mapped_column(primary_key=True) + uniprot: Mapped[str] = mapped_column(String(20), index=True) + action: Mapped[Optional[str]] = mapped_column(String(50), index=True) + known_action: Mapped[str] = mapped_column(String(20), index=True) + + drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id")) + drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="targets") def __str__(self): return self.uniprot @@ -208,13 +223,14 @@ class DrugInteraction(Base): """Class definition for the drugbank_drug_interaction table.""" __tablename__ = "drugbank_drug_interaction" - id = Column(Integer, primary_key=True) - drugbank_id = Column(String(10), index=True) - name = Column(Text) - description = Column(Text) - db_id = Column(Integer, ForeignKey("drugbank.id")) # exception because drugbank_id is already a field - drugbank = relationship("Drugbank", back_populates="drug_interactions") + id: Mapped[int] = mapped_column(primary_key=True) + drugbank_id: Mapped[str] = mapped_column(String(10), index=True) + name: Mapped[str] = mapped_column(Text) + description: Mapped[str] = mapped_column(Text) + + db_id: Mapped[str] = mapped_column(ForeignKey("drugbank.id")) # exception because drugbank_id is already a field + drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="drug_interactions") def __str__(self): return self.drugbank_id @@ -233,11 +249,12 @@ class ProductName(Base): """Class definition for the drugbank_product_name table.""" __tablename__ = "drugbank_product_name" - id = Column(Integer, primary_key=True) - name = Column(Text) - drugbank_id = Column(Integer, ForeignKey("drugbank.id")) - drugbank = relationship("Drugbank", back_populates="product_names") + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] = mapped_column(Text) + + drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id")) + drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="product_names") def __str__(self): return self.name @@ -251,11 +268,12 @@ class Synonym(Base): """Class definition for the drugbank_synonym table.""" __tablename__ = "drugbank_synonym" - id = Column(Integer, primary_key=True) - synonym = Column(Text) - drugbank_id = Column(Integer, ForeignKey("drugbank.id")) - drugbank = relationship("Drugbank", back_populates="synonyms") + id: Mapped[int] = mapped_column(primary_key=True) + synonym: Mapped[str] = mapped_column(Text) + + drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id")) + drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="synonyms") def __str__(self): return self.synonym diff --git a/ebel/manager/rdbms/models/ensembl.py b/ebel/manager/rdbms/models/ensembl.py index 6c7115e..5d386ec 100644 --- a/ebel/manager/rdbms/models/ensembl.py +++ b/ebel/manager/rdbms/models/ensembl.py @@ -2,6 +2,7 @@ from sqlalchemy import Column, Integer, String from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Mapped, mapped_column from ebel.manager.rdbms.models import object_as_dict @@ -12,17 +13,17 @@ class Ensembl(Base): """Class definition for the ensembl table.""" __tablename__ = "ensembl" - id = Column(Integer, primary_key=True) - enst = Column(String(20), index=True) - version = Column(Integer) - chromosome = Column(String(10), index=True) - start = Column(Integer, index=True) - stop = Column(Integer, index=True) - orientation = Column(Integer) - gene_id = Column(String(255)) - gene_id_short = Column(String(255)) - hgnc_id = Column(String(255), index=True) - symbol = Column(String(50), index=True) + id: Mapped[int] = mapped_column(primary_key=True) + enst: Mapped[str] = mapped_column(String(20), index=True) + version: Mapped[int] = mapped_column() + chromosome: Mapped[str] = mapped_column(String(10), index=True) + start: Mapped[int] = mapped_column(index=True) + stop: Mapped[int] = mapped_column(index=True) + orientation: Mapped[int] = mapped_column() + gene_id: Mapped[str] = mapped_column(String(255)) + gene_id_short: Mapped[str] = mapped_column(String(255)) + hgnc_id: Mapped[str] = mapped_column(String(255), index=True) + symbol: Mapped[str] = mapped_column(String(50), index=True) def as_dict(self): """Convert object values to dictionary.""" diff --git a/ebel/manager/rdbms/models/expression_atlas.py b/ebel/manager/rdbms/models/expression_atlas.py index ce70217..8e38c5e 100644 --- a/ebel/manager/rdbms/models/expression_atlas.py +++ b/ebel/manager/rdbms/models/expression_atlas.py @@ -1,7 +1,9 @@ """Expression Atlas RDBMS model definition.""" +from typing import List + from sqlalchemy import Column, Float, ForeignKey, Integer, String, Text from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship from ebel.manager.rdbms.models import object_as_dict @@ -13,14 +15,14 @@ class Experiment(Base): __tablename__ = "expression_atlas_experiment" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - name = Column(String(100), index=True) - title = Column(Text) + name: Mapped[str] = mapped_column(String(100), index=True) + title: Mapped[str] = mapped_column(Text) - idfs = relationship("Idf", back_populates="experiment") - group_comparisons = relationship("GroupComparison", back_populates="experiment") - sdrf_condenseds = relationship("SdrfCondensed", back_populates="experiment") + idfs: Mapped[List["Idf"]] = relationship("Idf", back_populates="experiment") + group_comparisons: Mapped[List["GroupComparison"]] = relationship("GroupComparison", back_populates="experiment") + sdrf_condenseds: Mapped[List["SdrfCondensed"]] = relationship("SdrfCondensed", back_populates="experiment") def as_dict(self): """Convert object values to dictionary.""" @@ -36,13 +38,13 @@ class Idf(Base): __tablename__ = "expression_atlas_idf" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - key_name = Column(Text, nullable=False) - value = Column(Text, nullable=False) + key_name: Mapped[str] = mapped_column(Text, nullable=False) + value: Mapped[str] = mapped_column(Text, nullable=False) - experiment_id = Column(Integer, ForeignKey("expression_atlas_experiment.id")) - experiment = relationship("Experiment", back_populates="idfs") + experiment_id: Mapped[int] = mapped_column(ForeignKey("expression_atlas_experiment.id")) + experiment: Mapped[Experiment] = relationship("Experiment", back_populates="idfs") def as_dict(self): """Convert object values to dictionary.""" @@ -54,16 +56,16 @@ class GroupComparison(Base): __tablename__ = "expression_atlas_group_comparison" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - experiment_id = Column(Integer, ForeignKey("expression_atlas_experiment.id")) - experiment = relationship("Experiment", back_populates="group_comparisons") + experiment_id: Mapped[int] = mapped_column(ForeignKey("expression_atlas_experiment.id")) + experiment: Mapped[Experiment] = relationship("Experiment", back_populates="group_comparisons") - group_comparison = Column(String(100)) - name = Column(Text) + group_comparison: Mapped[str] = mapped_column(String(100)) + name: Mapped[str] = mapped_column(Text) - fold_changes = relationship("FoldChange", back_populates="group_comparison") - gseas = relationship("Gsea", back_populates="group_comparison") + fold_changes: Mapped[List["FoldChange"]] = relationship("FoldChange", back_populates="group_comparison") + gseas: Mapped[List["Gsea"]] = relationship("Gsea", back_populates="group_comparison") def as_dict(self): """Convert object values to dictionary.""" @@ -75,16 +77,16 @@ class FoldChange(Base): __tablename__ = "expression_atlas_foldchange" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - gene_id = Column(String(255)) - gene_name = Column(String(100), index=True) - log2foldchange = Column(Float, index=True) - p_value = Column(Float, index=True) - t_statistic = Column(Float) + gene_id: Mapped[str] = mapped_column(String(255)) + gene_name: Mapped[str] = mapped_column(String(100), index=True) + log2foldchange: Mapped[float] = mapped_column(index=True) + p_value: Mapped[float] = mapped_column(index=True) + t_statistic: Mapped[float] = mapped_column() - group_comparison_id = Column(Integer, ForeignKey("expression_atlas_group_comparison.id")) - group_comparison = relationship("GroupComparison", back_populates="fold_changes") + group_comparison_id: Mapped[int] = mapped_column(ForeignKey("expression_atlas_group_comparison.id")) + group_comparison: Mapped[GroupComparison] = relationship("GroupComparison", back_populates="fold_changes") def as_dict(self): """Convert object values to dictionary.""" @@ -96,17 +98,17 @@ class SdrfCondensed(Base): __tablename__ = "expression_atlas_sdrf_condensed" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - experiment_id = Column(Integer, ForeignKey("expression_atlas_experiment.id")) - experiment = relationship("Experiment", back_populates="sdrf_condenseds") + experiment_id: Mapped[int] = mapped_column(ForeignKey("expression_atlas_experiment.id")) + experiment: Mapped[Experiment] = relationship("Experiment", back_populates="sdrf_condenseds") - method = Column(String(255)) - sample = Column(String(255)) - parameter_type = Column(String(255)) - parameter = Column(String(255)) - value = Column(String(255)) - url = Column(String(255)) + method: Mapped[str] = mapped_column(String(255)) + sample: Mapped[str] = mapped_column(String(255)) + parameter_type: Mapped[str] = mapped_column(String(255)) + parameter: Mapped[str] = mapped_column(String(255)) + value: Mapped[str] = mapped_column(String(255)) + url: Mapped[str] = mapped_column(String(255)) def as_dict(self): """Convert object values to dictionary.""" @@ -118,22 +120,22 @@ class Gsea(Base): __tablename__ = "expression_atlas_gsea" - id = Column(Integer, primary_key=True) - - group_comparison_id = Column(Integer, ForeignKey("expression_atlas_group_comparison.id")) - group_comparison = relationship("GroupComparison", back_populates="gseas") - - term = Column(String(255), index=True) - accession = Column(String(255)) - genes_tot = Column(Integer) - stat_non_dir_p = Column(Float) - p_adj_non_dir = Column(Float, index=True) - significant_in_gene_set = Column(Integer) - non_significant_in_gene_set = Column(Integer) - significant_not_in_gene_set = Column(Integer) - non_significant_not_in_gene_set = Column(Integer) - effect_size = Column(Float) - gsea_type = Column(String(100)) + id: Mapped[int] = mapped_column(primary_key=True) + + group_comparison_id: Mapped[int] = mapped_column(ForeignKey("expression_atlas_group_comparison.id")) + group_comparison: Mapped[GroupComparison] = relationship("GroupComparison", back_populates="gseas") + + term: Mapped[str] = mapped_column(String(255), index=True) + accession: Mapped[str] = mapped_column(String(255)) + genes_tot: Mapped[int] = mapped_column() + stat_non_dir_p: Mapped[float] = mapped_column() + p_adj_non_dir: Mapped[float] = mapped_column(index=True) + significant_in_gene_set: Mapped[int] = mapped_column() + non_significant_in_gene_set: Mapped[int] = mapped_column() + significant_not_in_gene_set: Mapped[int] = mapped_column() + non_significant_not_in_gene_set: Mapped[int] = mapped_column() + effect_size: Mapped[float] = mapped_column() + gsea_type: Mapped[str] = mapped_column(String(100)) def as_dict(self): """Convert object values to dictionary.""" diff --git a/ebel/manager/rdbms/models/gwas_catalog.py b/ebel/manager/rdbms/models/gwas_catalog.py index 97a6f8c..bcd336e 100644 --- a/ebel/manager/rdbms/models/gwas_catalog.py +++ b/ebel/manager/rdbms/models/gwas_catalog.py @@ -1,7 +1,9 @@ """GWAS Catalog RDBMS model definition.""" +from typing import List, Optional + from sqlalchemy import Column, Float, ForeignKey, Integer, String, Text from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship from ebel.manager.rdbms.models import object_as_dict @@ -12,42 +14,42 @@ class GwasCatalog(Base): """Class definition for the gwascatalog table.""" __tablename__ = "gwascatalog" - id = Column(Integer, primary_key=True) - date_added_to_catalog = Column(String(255)) - pubmedid = Column(Integer) - first_author = Column(String(255)) - date = Column(String(255)) - journal = Column(String(255)) - link = Column(String(255)) - study = Column(Text) - disease_trait = Column(String(255)) - initial_sample_size = Column(Text) - replication_sample_size = Column(Text) - region = Column(String(50)) - chr_id = Column(Text) - chr_pos = Column(Text) - reported_gene_s = Column(Text) - mapped_gene = Column(Text) - upstream_gene_id = Column(String(50)) - downstream_gene_id = Column(String(50)) - upstream_gene_distance = Column(Integer) - downstream_gene_distance = Column(Integer) - strongest_snp_risk_allele = Column(Text) - snp = Column(Text) - merged = Column(Integer) - snp_id_current = Column(Text) - context = Column(Text) - intergenic = Column(Integer) - risk_allele_frequency = Column(Text) - p_value = Column(Float) - pvalue_mlog = Column(Float) - p_value_text = Column(Text) - or_or_beta = Column(Float) - _95_ci_text = Column(Text) - platform_snps_passing_qc = Column(Text) - cnv = Column(Text) + id: Mapped[int] = mapped_column(primary_key=True) + date_added_to_catalog: Mapped[str] = mapped_column(String(255)) + pubmedid: Mapped[int] = mapped_column() + first_author: Mapped[str] = mapped_column(String(255)) + date: Mapped[str] = mapped_column(String(255)) + journal: Mapped[str] = mapped_column(String(255)) + link: Mapped[str] = mapped_column(String(255)) + study: Mapped[str] = mapped_column(Text) + disease_trait: Mapped[str] = mapped_column(String(255)) + initial_sample_size: Mapped[Optional[str]] = mapped_column(Text) + replication_sample_size: Mapped[Optional[str]] = mapped_column(Text) + region: Mapped[Optional[str]] = mapped_column(String(50)) + chr_id: Mapped[Optional[str]] = mapped_column(Text) + chr_pos: Mapped[Optional[str]] = mapped_column(Text) + reported_gene_s: Mapped[Optional[str]] = mapped_column(Text) + mapped_gene: Mapped[Optional[str]] = mapped_column(Text) + upstream_gene_id: Mapped[Optional[str]] = mapped_column(String(50)) + downstream_gene_id: Mapped[Optional[str]] = mapped_column(String(50)) + upstream_gene_distance: Mapped[Optional[int]] = mapped_column() + downstream_gene_distance: Mapped[Optional[int]] = mapped_column() + strongest_snp_risk_allele: Mapped[Optional[int]] = mapped_column(Text) + snp: Mapped[Optional[int]] = mapped_column(Text) + merged: Mapped[Optional[int]] = mapped_column() + snp_id_current: Mapped[Optional[str]] = mapped_column(Text) + context: Mapped[Optional[str]] = mapped_column(Text) + intergenic: Mapped[Optional[int]] = mapped_column() + risk_allele_frequency: Mapped[Optional[str]] = mapped_column(Text) + p_value: Mapped[Optional[float]] = mapped_column() + pvalue_mlog: Mapped[Optional[float]] = mapped_column() + p_value_text: Mapped[Optional[str]] = mapped_column(Text) + or_or_beta: Mapped[Optional[float]] = mapped_column() + _95_ci_text: Mapped[Optional[str]] = mapped_column(Text) + platform_snps_passing_qc: Mapped[Optional[str]] = mapped_column(Text) + cnv: Mapped[Optional[str]] = mapped_column(Text) - snp_genes = relationship("SnpGene", back_populates="gwascatalog") + snp_genes: Mapped[List["SnpGene"]] = relationship("SnpGene", back_populates="gwascatalog") def as_dict(self): """Convert object values to dictionary.""" @@ -60,7 +62,7 @@ class SnpGene(Base): """Class definition for the gwascatalog_snpgene table.""" __tablename__ = "gwascatalog_snpgene" - id = Column(Integer, primary_key=True) - ensembl_identifier = Column(String(100), nullable=False, index=True) - gwascatalog_id = Column(Integer, ForeignKey("gwascatalog.id")) - gwascatalog = relationship("GwasCatalog", back_populates="snp_genes") + id: Mapped[int] = mapped_column(primary_key=True) + ensembl_identifier: Mapped[str] = mapped_column(String(100), nullable=False, index=True) + gwascatalog_id: Mapped[int] = mapped_column(ForeignKey("gwascatalog.id")) + gwascatalog: Mapped[GwasCatalog] = relationship("GwasCatalog", back_populates="snp_genes") diff --git a/ebel/manager/rdbms/models/hgnc.py b/ebel/manager/rdbms/models/hgnc.py index da21ff4..e6f5d1e 100644 --- a/ebel/manager/rdbms/models/hgnc.py +++ b/ebel/manager/rdbms/models/hgnc.py @@ -1,8 +1,10 @@ """HGNC RDBMS model definition.""" -from sqlalchemy import (BigInteger, Column, Date, ForeignKey, Integer, String, - Text) +import datetime +from typing import List, Optional + +from sqlalchemy import BigInteger, Date, ForeignKey, Integer, String, Text from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship from ebel.manager.rdbms.models import object_as_dict @@ -13,59 +15,60 @@ class Hgnc(Base): """Class definition for the hgnc table.""" __tablename__ = "hgnc" - id = Column(Integer, primary_key=True) - hgnc_id = Column(String(20)) - version = Column(BigInteger) - bioparadigms_slc = Column(String(20)) - cd = Column(String(20)) - cosmic = Column(String(50)) - date_approved_reserved = Column(Date) - date_modified = Column(Date) - date_name_changed = Column(Date) - date_symbol_changed = Column(Date) - ensembl_gene_id = Column(String(20)) - entrez_id = Column(Integer) - homeodb = Column(Integer) - horde_id = Column(String(50)) - imgt = Column(String(50)) - iuphar = Column(String(50)) - kznf_gene_catalog = Column(Integer) - lncipedia = Column(String(50)) - lncrnadb = Column(String(50)) - location = Column(String(100)) - location_sortable = Column(String(100)) - locus_group = Column(String(50)) - locus_type = Column(String(50)) - merops = Column(String(20)) - mirbase = Column(String(20)) - name = Column(String(255)) - orphanet = Column(Integer) - snornabase = Column(String(20)) - status = Column(String(50)) - symbol = Column(String(100), index=True) - ucsc_id = Column(String(50)) - uuid = Column(String(50)) - vega_id = Column(String(50)) - agr = Column(String(50)) - kznf_gene_catalog = Column(Text) - - pre_symbols = relationship("PrevSymbol", back_populates="hgnc") - alias_names = relationship("AliasName", back_populates="hgnc") - alias_symbols = relationship("AliasSymbol", back_populates="hgnc") - ccdss = relationship("Ccds", back_populates="hgnc") - enas = relationship("Ena", back_populates="hgnc") - enzymes = relationship("Enzyme", back_populates="hgnc") - gene_group_names = relationship("GeneGroupName", back_populates="hgnc") - gene_group_ids = relationship("GeneGroupId", back_populates="hgnc") - uniprots = relationship("UniProt", back_populates="hgnc") - rna_centrals = relationship("RnaCentral", back_populates="hgnc") - rgds = relationship("Rgd", back_populates="hgnc") - refseqs = relationship("RefSeq", back_populates="hgnc") - pubmeds = relationship("PubMed", back_populates="hgnc") - prev_names = relationship("PrevName", back_populates="hgnc") - omims = relationship("Omim", back_populates="hgnc") - mgds = relationship("Mgd", back_populates="hgnc") - lsdbs = relationship("Lsdb", back_populates="hgnc") + + id: Mapped[int] = mapped_column(primary_key=True) + hgnc_id: Mapped[str] = mapped_column(String(20)) + version: Mapped[int] = mapped_column(BigInteger) + bioparadigms_slc: Mapped[Optional[str]] = mapped_column(String(20)) + cd: Mapped[Optional[str]] = mapped_column(String(20)) + cosmic: Mapped[Optional[str]] = mapped_column(String(50)) + date_approved_reserved: Mapped[datetime.date] = mapped_column(Date) + date_modified: Mapped[Optional[datetime.date]] = mapped_column(Date) + date_name_changed: Mapped[Optional[datetime.date]] = mapped_column(Date) + date_symbol_changed: Mapped[Optional[datetime.date]] = mapped_column(Date) + ensembl_gene_id: Mapped[Optional[str]] = mapped_column(String(20)) + entrez_id: Mapped[Optional[int]] = mapped_column() + homeodb: Mapped[Optional[int]] = mapped_column() + horde_id: Mapped[Optional[str]] = mapped_column(String(50)) + imgt: Mapped[Optional[str]] = mapped_column(String(50)) + iuphar: Mapped[Optional[str]] = mapped_column(String(50)) + kznf_gene_catalog: Mapped[int] = mapped_column() + lncipedia: Mapped[Optional[str]] = mapped_column(String(50)) + lncrnadb: Mapped[Optional[str]] = mapped_column(String(50)) + location: Mapped[Optional[str]] = mapped_column(String(100)) + location_sortable: Mapped[Optional[str]] = mapped_column(String(100)) + locus_group: Mapped[str] = mapped_column(String(50)) + locus_type: Mapped[str] = mapped_column(String(50)) + merops: Mapped[Optional[str]] = mapped_column(String(20)) + mirbase: Mapped[Optional[str]] = mapped_column(String(20)) + name: Mapped[str] = mapped_column(String(255)) + orphanet: Mapped[Optional[int]] = mapped_column() + snornabase: Mapped[Optional[str]] = mapped_column(String(20)) + status: Mapped[str] = mapped_column(String(50)) + symbol: Mapped[str] = mapped_column(String(100), index=True) + ucsc_id: Mapped[Optional[str]] = mapped_column(String(50)) + uuid: Mapped[str] = mapped_column(String(50)) + vega_id: Mapped[Optional[str]] = mapped_column(String(50)) + agr: Mapped[Optional[str]] = mapped_column(String(50)) + kznf_gene_catalog: Mapped[Optional[str]] = mapped_column(Text) + + pre_symbols: Mapped[List["PrevSymbol"]] = relationship("PrevSymbol", back_populates="hgnc") + alias_names: Mapped[List["AliasName"]] = relationship("AliasName", back_populates="hgnc") + alias_symbols: Mapped[List["AliasSymbol"]] = relationship("AliasSymbol", back_populates="hgnc") + ccdss: Mapped[List["Ccds"]] = relationship("Ccds", back_populates="hgnc") + enas: Mapped[List["Ena"]] = relationship("Ena", back_populates="hgnc") + enzymes: Mapped[List["Enzyme"]] = relationship("Enzyme", back_populates="hgnc") + gene_group_names: Mapped[List["GeneGroupName"]] = relationship("GeneGroupName", back_populates="hgnc") + gene_group_ids: Mapped[List["GeneGroupId"]] = relationship("GeneGroupId", back_populates="hgnc") + uniprots: Mapped[List["UniProt"]] = relationship("UniProt", back_populates="hgnc") + rna_centrals: Mapped[List["RnaCentral"]] = relationship("RnaCentral", back_populates="hgnc") + rgds: Mapped[List["Rgd"]] = relationship("Rgd", back_populates="hgnc") + refseqs: Mapped[List["RefSeq"]] = relationship("RefSeq", back_populates="hgnc") + pubmeds: Mapped[List["PubMed"]] = relationship("PubMed", back_populates="hgnc") + prev_names: Mapped[List["PrevName"]] = relationship("PrevName", back_populates="hgnc") + omims: Mapped[List["Omim"]] = relationship("Omim", back_populates="hgnc") + mgds: Mapped[List["Mgd"]] = relationship("Mgd", back_populates="hgnc") + lsdbs: Mapped[List["Lsdb"]] = relationship("Lsdb", back_populates="hgnc") def as_dict(self): """Convert object values to dictionary.""" @@ -127,12 +130,12 @@ class PrevSymbol(Base): """Class definition for the hgnc_prev_symbol table.""" __tablename__ = "hgnc_prev_symbol" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - prev_symbol = Column(String(50), index=True) + prev_symbol: Mapped[str] = mapped_column(String(50), index=True) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) - hgnc = relationship("Hgnc", back_populates="pre_symbols") + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) + hgnc: Mapped[Hgnc] = relationship("Hgnc", back_populates="pre_symbols") def __str__(self): return self.prev_symbol @@ -142,12 +145,12 @@ class AliasName(Base): """Class definition for the hgnc_alias_name table.""" __tablename__ = "hgnc_alias_name" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - alias_name = Column(String(255)) + alias_name: Mapped[str] = mapped_column(String(255)) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) - hgnc = relationship("Hgnc", back_populates="alias_names") + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) + hgnc: Mapped[Hgnc] = relationship("Hgnc", back_populates="alias_names") def __str__(self): return self.alias_name @@ -157,12 +160,12 @@ class AliasSymbol(Base): """Class definition for the hgnc_alias_symbol table.""" __tablename__ = "hgnc_alias_symbol" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - alias_symbol = Column(String(50), index=True) + alias_symbol: Mapped[str] = mapped_column(String(50), index=True) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) - hgnc = relationship("Hgnc", back_populates="alias_symbols") + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) + hgnc: Mapped[Hgnc] = relationship("Hgnc", back_populates="alias_symbols") def __str__(self): return self.alias_symbol @@ -172,12 +175,12 @@ class Ccds(Base): """Class definition for the hgnc_ccds table.""" __tablename__ = "hgnc_ccds" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - identifier = Column(String(50), index=True) + identifier: Mapped[str] = mapped_column(String(50), index=True) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) - hgnc = relationship("Hgnc", back_populates="ccdss") + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) + hgnc: Mapped[Hgnc] = relationship("Hgnc", back_populates="ccdss") def __str__(self): return self.identifier @@ -187,12 +190,12 @@ class Ena(Base): """Class definition for the hgnc_ena table.""" __tablename__ = "hgnc_ena" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - identifier = Column(String(50), index=True) + identifier: Mapped[str] = mapped_column(String(50), index=True) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) - hgnc = relationship("Hgnc", back_populates="enas") + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) + hgnc: Mapped[Hgnc] = relationship("Hgnc", back_populates="enas") def __str__(self): return self.identifier @@ -202,11 +205,11 @@ class Enzyme(Base): """Class definition for the hgnc_enzyme table.""" __tablename__ = "hgnc_enzyme" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - ec_number = Column(String(50), index=True) + ec_number = mapped_column(String(50), index=True) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) hgnc = relationship("Hgnc", back_populates="enzymes") def __str__(self): @@ -217,11 +220,11 @@ class GeneGroupName(Base): """Class definition for the hgnc_gene_group_name table.""" __tablename__ = "hgnc_gene_group_name" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - name = Column(String(255)) + name = mapped_column(String(255)) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) hgnc = relationship("Hgnc", back_populates="gene_group_names") def __str__(self): @@ -236,11 +239,11 @@ class GeneGroupId(Base): """Class definition for the hgnc_gene_group_id table.""" __tablename__ = "hgnc_gene_group_id" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - identifier = Column(Integer) + identifier = mapped_column(Integer) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) hgnc = relationship("Hgnc", back_populates="gene_group_ids") def __str__(self): @@ -251,11 +254,11 @@ class UniProt(Base): """Class definition for the hgnc_uniprot table.""" __tablename__ = "hgnc_uniprot" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - accession = Column(String(50), index=True) + accession = mapped_column(String(50), index=True) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) hgnc = relationship("Hgnc", back_populates="uniprots") def __str__(self): @@ -266,11 +269,11 @@ class RnaCentral(Base): """Class definition for the hgnc_rna_central table.""" __tablename__ = "hgnc_rna_central" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - identifier = Column(String(50), index=True) + identifier = mapped_column(String(50), index=True) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) hgnc = relationship("Hgnc", back_populates="rna_centrals") def __str__(self): @@ -281,11 +284,11 @@ class Rgd(Base): """Class definition for the hgnc_rgd table.""" __tablename__ = "hgnc_rgd" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - identifier = Column(String(50), index=True) + identifier = mapped_column(String(50), index=True) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) hgnc = relationship("Hgnc", back_populates="rgds") def __str__(self): @@ -296,11 +299,11 @@ class RefSeq(Base): """Class definition for the hgnc_refseq table.""" __tablename__ = "hgnc_refseq" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - accession = Column(String(50), index=True) + accession = mapped_column(String(50), index=True) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) hgnc = relationship("Hgnc", back_populates="refseqs") def __str__(self): @@ -311,11 +314,11 @@ class PubMed(Base): """Class definition for the hgnc_pubmed table.""" __tablename__ = "hgnc_pubmed" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - pmid = Column(Integer, index=True) + pmid = mapped_column(Integer, index=True) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) hgnc = relationship("Hgnc", back_populates="pubmeds") def __str__(self): @@ -326,11 +329,11 @@ class PrevName(Base): """Class definition for the hgnc_prev_name table.""" __tablename__ = "hgnc_prev_name" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - prev_name = Column(String(255)) + prev_name = mapped_column(String(255)) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) hgnc = relationship("Hgnc", back_populates="prev_names") def __str__(self): @@ -341,11 +344,11 @@ class Omim(Base): """Class definition for the hgnc_omim table.""" __tablename__ = "hgnc_omim" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - identifier = Column(Integer, index=True) + identifier = mapped_column(Integer, index=True) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) hgnc = relationship("Hgnc", back_populates="omims") def __str__(self): @@ -356,11 +359,11 @@ class Mgd(Base): """Class definition for the hgnc_mgd table.""" __tablename__ = "hgnc_mgd" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - identifier = Column(String(50), index=True) + identifier = mapped_column(String(50), index=True) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) hgnc = relationship("Hgnc", back_populates="mgds") def __str__(self): @@ -371,11 +374,11 @@ class Lsdb(Base): """Class definition for the hgnc_lsdb table.""" __tablename__ = "hgnc_lsdb" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - identifier = Column(Text) + identifier: Mapped[str] = mapped_column(Text) - hgnc_id = Column(Integer, ForeignKey("hgnc.id")) + hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id")) hgnc = relationship("Hgnc", back_populates="lsdbs") def __str__(self): diff --git a/ebel/manager/rdbms/models/human_ortholog.py b/ebel/manager/rdbms/models/human_ortholog.py index a1ccd37..1b1ab01 100644 --- a/ebel/manager/rdbms/models/human_ortholog.py +++ b/ebel/manager/rdbms/models/human_ortholog.py @@ -1,6 +1,7 @@ """HGNC Human Ortholog RDBMS model definition.""" from sqlalchemy import Column, Integer, String, Text from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Mapped, mapped_column from ebel.manager.rdbms.models import object_as_dict @@ -12,18 +13,18 @@ class HumanOrtholog(Base): __tablename__ = "human_ortholog" - id = Column(Integer, primary_key=True) - - hgnc_id = Column(String(20), index=True) - human_entrez_gene = Column(Integer) - human_ensembl_gene = Column(String(20)) - human_symbol = Column(String(50), index=True) - ortholog_species = Column(Integer, index=True) - ortholog_species_entrez_gene = Column(Integer) - ortholog_species_ensembl_gene = Column(String(50)) - ortholog_species_db_id = Column(String(50)) - ortholog_species_symbol = Column(String(50), index=True) - support = Column(Text) + id: Mapped[int] = mapped_column(primary_key=True) + + hgnc_id: Mapped[str] = mapped_column(String(20), index=True) + human_entrez_gene: Mapped[int] = mapped_column() + human_ensembl_gene: Mapped[str] = mapped_column(String(20)) + human_symbol: Mapped[str] = mapped_column(String(50), index=True) + ortholog_species: Mapped[int] = mapped_column(index=True) + ortholog_species_entrez_gene: Mapped[int] = mapped_column() + ortholog_species_ensembl_gene: Mapped[str] = mapped_column(String(50)) + ortholog_species_db_id: Mapped[str] = mapped_column(String(50)) + ortholog_species_symbol: Mapped[str] = mapped_column(String(50), index=True) + support: Mapped[str] = mapped_column(Text) def as_dict(self): """Convert object values to dictionary.""" diff --git a/ebel/manager/rdbms/models/intact.py b/ebel/manager/rdbms/models/intact.py index ab5ac33..62c6062 100644 --- a/ebel/manager/rdbms/models/intact.py +++ b/ebel/manager/rdbms/models/intact.py @@ -1,6 +1,9 @@ """IntAct RDBMS model definition.""" +from typing import Optional + from sqlalchemy import Column, Float, Integer, String, Text from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Mapped, mapped_column from ebel.manager.rdbms.models import object_as_dict @@ -11,16 +14,16 @@ class Intact(Base): """Class definition for the intact table.""" __tablename__ = "intact" - id = Column(Integer, primary_key=True) - confidence_value = Column(Float, index=True) - detection_method = Column(String(100), index=True) - detection_method_psimi_id = Column(Integer) - int_a_uniprot_id = Column(String(50), index=True) - int_b_uniprot_id = Column(String(50), index=True) - interaction_ids = Column(Text) - interaction_type = Column(String(100), index=True) - interaction_type_psimi_id = Column(Integer) - pmid = Column(Integer) + id: Mapped[int] = mapped_column(primary_key=True) + confidence_value: Mapped[float] = mapped_column(index=True) + detection_method: Mapped[str] = mapped_column(String(100), index=True) + detection_method_psimi_id: Mapped[int] = mapped_column() + int_a_uniprot_id: Mapped[str] = mapped_column(String(50), index=True) + int_b_uniprot_id: Mapped[str] = mapped_column(String(50), index=True) + interaction_ids: Mapped[str] = mapped_column(Text) + interaction_type: Mapped[str] = mapped_column(String(100), index=True) + interaction_type_psimi_id: Mapped[int] = mapped_column() + pmid: Mapped[Optional[int]] = mapped_column() def as_dict(self): """Convert object values to dictionary.""" diff --git a/ebel/manager/rdbms/models/iuphar.py b/ebel/manager/rdbms/models/iuphar.py index 790a929..57a8b36 100644 --- a/ebel/manager/rdbms/models/iuphar.py +++ b/ebel/manager/rdbms/models/iuphar.py @@ -1,8 +1,9 @@ """IUPHAR RDBMS model definition.""" -from sqlalchemy import (BigInteger, Boolean, Column, ForeignKey, Integer, - Numeric, String, Text) +from typing import List, Optional + +from sqlalchemy import BigInteger, Boolean, Column, ForeignKey, Integer, Numeric, String, Text from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship from ebel.manager.rdbms.models import object_as_dict @@ -13,34 +14,36 @@ class IupharLigand(Base): """Class definition for the iuphar_ligand table.""" __tablename__ = "iuphar_ligand" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - name = Column(Text) - species = Column(Text) - type = Column(Text) - approved = Column(Boolean) - withdrawn = Column(Boolean) - labelled = Column(Boolean) - radioactive = Column(Boolean) - pubchem_sid = Column(BigInteger) - pubchem_cid = Column(Text) # TODO: This is a integer, but for import reasons this changed to text - uniprot_id = Column(Text) - ensembl_id = Column(Text) - ligand_subunit_ids = Column(Text) - ligand_subunit_name = Column(Text) - ligand_subunit_uni_prot_ids = Column(Text) - ligand_subunit_ensembl_ids = Column(Text) - iupac_name = Column(Text) - inn = Column(Text) - synonyms = Column(Text) - smiles = Column(Text) - inchi_key = Column(Text) - inchi = Column(Text) - gto_immu_pdb = Column(Boolean) - gto_mpdb = Column(Boolean) - antibacterial = Column(Boolean) + name: Mapped[str] = mapped_column(Text) + species: Mapped[Optional[str]] = mapped_column(Text) + type: Mapped[str] = mapped_column(Text) + approved: Mapped[Optional[bool]] = mapped_column() + withdrawn: Mapped[Optional[bool]] = mapped_column() + labelled: Mapped[Optional[bool]] = mapped_column() + radioactive: Mapped[Optional[bool]] = mapped_column() + pubchem_sid: Mapped[Optional[int]] = mapped_column() + pubchem_cid: Mapped[Optional[int]] = mapped_column( + Text + ) # TODO: This is a integer, but for import reasons this changed to text + uniprot_id: Mapped[Optional[str]] = mapped_column(Text) + ensembl_id: Mapped[Optional[str]] = mapped_column(Text) + ligand_subunit_ids: Mapped[Optional[str]] = mapped_column(Text) + ligand_subunit_name: Mapped[Optional[str]] = mapped_column(Text) + ligand_subunit_uni_prot_ids: Mapped[Optional[str]] = mapped_column(Text) + ligand_subunit_ensembl_ids: Mapped[Optional[str]] = mapped_column(Text) + iupac_name: Mapped[Optional[str]] = mapped_column(Text) + inn: Mapped[Optional[str]] = mapped_column(Text) + synonyms: Mapped[Optional[str]] = mapped_column(Text) + smiles: Mapped[Optional[str]] = mapped_column(Text) + inchi_key: Mapped[Optional[str]] = mapped_column(Text) + inchi: Mapped[Optional[str]] = mapped_column(Text) + gto_immu_pdb: Mapped[Optional[bool]] = mapped_column() + gto_mpdb: Mapped[Optional[bool]] = mapped_column() + antibacterial: Mapped[Optional[bool]] = mapped_column() - interactions = relationship("IupharInteraction") + interactions: Mapped[List["IupharInteraction"]] = relationship("IupharInteraction") def as_dict(self): """Convert object values to dictionary.""" @@ -51,50 +54,50 @@ class IupharInteraction(Base): """Class definition for the iuphar_interaction table.""" __tablename__ = "iuphar_interaction" - id = Column(Integer, primary_key=True) + id = mapped_column(Integer, primary_key=True) - target = Column(String(255)) - target_id = Column(Integer) - target_subunit_ids = Column(Text) - target_gene_symbol = Column(String(100)) - target_uniprot = Column(String(100)) - target_ensembl_gene_id = Column(String(200)) - target_ligand = Column(String(100)) - target_ligand_id = Column(Integer) - target_ligand_subunit_ids = Column(Text) - target_ligand_gene_symbol = Column(String(50)) - target_ligand_uniprot_id = Column(String(200)) - target_ligand_ensembl_gene_id = Column(String(50)) - target_ligand_pubchem_sid = Column(Integer) - target_species = Column(String(100)) - ligand = Column(String(255)) - ligand_id = Column(Integer, ForeignKey("iuphar_ligand.id"), index=True) - ligand_subunit_ids = Column(Text) - ligand_gene_symbol = Column(String(50)) - ligand_species = Column(String(50)) - ligand_pubchem_sid = Column(Integer) - ligand_type = Column(Text) - approved = Column(Boolean) - type = Column(String(100)) - action = Column(String(100)) - action_comment = Column(String(255)) - selectivity = Column(String(50)) - endogenous = Column(Boolean) - primary_target = Column(Boolean) - concentration_range = Column(String(50)) - affinity_units = Column(String(10)) - affinity_high = Column(Numeric(6, 2)) - affinity_median = Column(Numeric(6, 2)) - affinity_low = Column(Numeric(6, 2)) - original_affinity_units = Column(String(10)) - original_affinity_low_nm = Column(Numeric(12, 3)) - original_affinity_median_nm = Column(Numeric(12, 3)) - original_affinity_high_nm = Column(Numeric(12, 3)) - original_affinity_relation = Column(String(1)) - assay_description = Column(Text) - receptor_site = Column(String(100)) - ligand_context = Column(String(50)) - pubmed_id = Column(Text) + target: Mapped[Optional[str]] = mapped_column(String(255)) + target_id: Mapped[Optional[int]] = mapped_column() + target_subunit_ids: Mapped[Optional[str]] = mapped_column(Text) + target_gene_symbol: Mapped[Optional[str]] = mapped_column(String(100)) + target_uniprot: Mapped[Optional[str]] = mapped_column(String(100)) + target_ensembl_gene_id: Mapped[Optional[str]] = mapped_column(String(200)) + target_ligand: Mapped[Optional[str]] = mapped_column(String(100)) + target_ligand_id: Mapped[Optional[str]] = mapped_column(String(100)) + target_ligand_subunit_ids: Mapped[Optional[str]] = mapped_column(Text) + target_ligand_gene_symbol: Mapped[Optional[str]] = mapped_column(String(50)) + target_ligand_uniprot_id: Mapped[Optional[str]] = mapped_column(String(200)) + target_ligand_ensembl_gene_id: Mapped[Optional[str]] = mapped_column(String(50)) + target_ligand_pubchem_sid: Mapped[Optional[str]] = mapped_column(String(100)) + target_species: Mapped[Optional[str]] = mapped_column(String(100)) + ligand: Mapped[str] = mapped_column(String(255)) + ligand_id: Mapped[int] = mapped_column(ForeignKey("iuphar_ligand.id"), index=True) + ligand_subunit_ids: Mapped[Optional[str]] = mapped_column(Text) + ligand_gene_symbol: Mapped[Optional[str]] = mapped_column(String(50)) + ligand_species: Mapped[Optional[str]] = mapped_column(String(50)) + ligand_pubchem_sid: Mapped[Optional[int]] = mapped_column() + ligand_type: Mapped[str] = mapped_column(Text) + approved: Mapped[bool] = mapped_column() + type: Mapped[Optional[str]] = mapped_column(String(100)) + action: Mapped[Optional[str]] = mapped_column(String(100)) + action_comment: Mapped[Optional[str]] = mapped_column(String(255)) + selectivity: Mapped[Optional[str]] = mapped_column(String(50)) + endogenous: Mapped[bool] = mapped_column() + primary_target: Mapped[bool] = mapped_column() + concentration_range: Mapped[Optional[str]] = mapped_column(String(50)) + affinity_units: Mapped[str] = mapped_column(String(10)) + affinity_high: Mapped[Optional[float]] = mapped_column(Numeric(6, 2)) + affinity_median: Mapped[Optional[float]] = mapped_column(Numeric(6, 2)) + affinity_low: Mapped[Optional[float]] = mapped_column(Numeric(6, 2)) + original_affinity_units: Mapped[Optional[str]] = mapped_column(String(10)) + original_affinity_low_nm: Mapped[Optional[float]] = mapped_column(Numeric(12, 3)) + original_affinity_median_nm: Mapped[Optional[float]] = mapped_column(Numeric(12, 3)) + original_affinity_high_nm: Mapped[Optional[float]] = mapped_column(Numeric(12, 3)) + original_affinity_relation: Mapped[Optional[str]] = mapped_column(String(1)) + assay_description: Mapped[Optional[str]] = mapped_column(Text) + receptor_site: Mapped[Optional[str]] = mapped_column(String(100)) + ligand_context: Mapped[Optional[str]] = mapped_column(String(50)) + pubmed_id: Mapped[Optional[str]] = mapped_column(Text) def as_dict(self): """Convert object values to dictionary.""" diff --git a/ebel/manager/rdbms/models/kegg.py b/ebel/manager/rdbms/models/kegg.py index c5b07ee..d0975e0 100644 --- a/ebel/manager/rdbms/models/kegg.py +++ b/ebel/manager/rdbms/models/kegg.py @@ -1,6 +1,7 @@ """KEGG RDBMS model definition.""" from sqlalchemy import Column, Integer, String from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Mapped, mapped_column from ebel.manager.rdbms.models import object_as_dict @@ -11,17 +12,17 @@ class Kegg(Base): """Class definition for the kegg table.""" __tablename__ = "kegg" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - pathway_identifier = Column(String(100)) - pathway_name = Column(String(1000)) - kegg_species_id = Column(String(100)) - kegg_gene_id_a = Column(String(100)) - gene_symbol_a = Column(String(100), index=True) - kegg_gene_id_b = Column(String(100)) - gene_symbol_b = Column(String(100), index=True) - kegg_int_type = Column(String(100)) - interaction_type = Column(String(50), index=True) + pathway_identifier: Mapped[str] = mapped_column(String(100)) + pathway_name: Mapped[str] = mapped_column(String(1000)) + kegg_species_id: Mapped[str] = mapped_column(String(100)) + kegg_gene_id_a: Mapped[str] = mapped_column(String(100)) + gene_symbol_a: Mapped[str] = mapped_column(String(100), index=True) + kegg_gene_id_b: Mapped[str] = mapped_column(String(100)) + gene_symbol_b: Mapped[str] = mapped_column(String(100), index=True) + kegg_int_type: Mapped[str] = mapped_column(String(100)) + interaction_type: Mapped[str] = mapped_column(String(50), index=True) def as_dict(self): """Convert object values to dictionary.""" diff --git a/ebel/manager/rdbms/models/mirtarbase.py b/ebel/manager/rdbms/models/mirtarbase.py index 6f5014e..f44aaaf 100644 --- a/ebel/manager/rdbms/models/mirtarbase.py +++ b/ebel/manager/rdbms/models/mirtarbase.py @@ -1,6 +1,9 @@ """KEGG RDBMS model definition.""" +from typing import Optional + from sqlalchemy import Column, Integer, String, Text from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Mapped, mapped_column from ebel.manager.rdbms.models import object_as_dict @@ -11,17 +14,17 @@ class Mirtarbase(Base): """Class definition for the mirtarbase table.""" __tablename__ = "mirtarbase" - id = Column(Integer, primary_key=True) - - mi_rtar_base_id = Column(String(20)) - mi_rna = Column(String(50)) - species_mi_rna = Column(String(50), index=True) - target_gene = Column(String(50), index=True) - target_gene_entrez_id = Column(Integer) - species_target_gene = Column(String(50), index=True) - experiments = Column(Text) - support_type = Column(String(50), index=True) - references_pmid = Column(Integer) + id: Mapped[int] = mapped_column(primary_key=True) + + mi_rtar_base_id: Mapped[str] = mapped_column(String(20)) + mi_rna: Mapped[str] = mapped_column(String(50)) + species_mi_rna: Mapped[str] = mapped_column(String(50), index=True) + target_gene: Mapped[str] = mapped_column(String(50), index=True) + target_gene_entrez_id: Mapped[int] = mapped_column() + species_target_gene: Mapped[str] = mapped_column(String(50), index=True) + experiments: Mapped[str] = mapped_column(Text) + support_type: Mapped[Optional[str]] = mapped_column(String(50), index=True) + references_pmid: Mapped[int] = mapped_column() def as_dict(self): """Convert object values to dictionary.""" diff --git a/ebel/manager/rdbms/models/ncbi.py b/ebel/manager/rdbms/models/ncbi.py index 59a56f6..fb0231c 100644 --- a/ebel/manager/rdbms/models/ncbi.py +++ b/ebel/manager/rdbms/models/ncbi.py @@ -1,7 +1,9 @@ """NCBI RDBMS model definition.""" +from typing import List, Optional + from sqlalchemy import Column, ForeignKey, Integer, String, Text from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship from . import object_as_dict @@ -12,27 +14,35 @@ class NcbiGeneInfo(Base): """Class definition for the ncbi_gene_info table.""" __tablename__ = "ncbi_gene_info" - gene_id = Column(Integer, primary_key=True) - - tax_id = Column(Integer, index=True) - symbol = Column(String(100), index=True) - type_of_gene = Column(String(100), index=True) - locus_tag = Column(String(100)) - chromosome = Column(String(100)) - map_location = Column(String(100)) - description_id = Column(Integer, ForeignKey("ncbi_gene_info_description.id")) - description = relationship("NcbiGeneInfoDescription", foreign_keys=[description_id]) - xrefs = relationship("NcbiGeneInfoXref", back_populates="gene") - mims = relationship("NcbiGeneMim", foreign_keys="NcbiGeneMim.gene_id", back_populates="gene") - orthologs = relationship( + gene_id: Mapped[int] = mapped_column(primary_key=True) + + tax_id: Mapped[int] = mapped_column(index=True) + symbol: Mapped[Optional[str]] = mapped_column(String(100), index=True) + type_of_gene: Mapped[Optional[str]] = mapped_column(String(100), index=True) + locus_tag: Mapped[Optional[str]] = mapped_column(String(100)) + chromosome: Mapped[Optional[str]] = mapped_column(String(100)) + map_location: Mapped[Optional[str]] = mapped_column(String(100)) + description_id: Mapped[Optional[int]] = mapped_column(ForeignKey("ncbi_gene_info_description.id")) + description: Mapped["NcbiGeneInfoDescription"] = relationship( + "NcbiGeneInfoDescription", foreign_keys=[description_id] + ) + xrefs: Mapped[List["NcbiGeneInfoXref"]] = relationship("NcbiGeneInfoXref", back_populates="gene") + mims: Mapped[List["NcbiGeneMim"]] = relationship( + "NcbiGeneMim", foreign_keys="NcbiGeneMim.gene_id", back_populates="gene" + ) + orthologs: Mapped[List["NcbiGeneOrtholog"]] = relationship( "NcbiGeneOrtholog", foreign_keys="NcbiGeneOrtholog.gene_id", back_populates="gene", ) - ensembl_ids = relationship("NcbiGeneEnsembl", back_populates="genes") - gene_ids_right = relationship("NcbiGeneOnRight", foreign_keys="NcbiGeneOnRight.gene_id", back_populates="gene") - gene_ids_left = relationship("NcbiGeneOnLeft", foreign_keys="NcbiGeneOnLeft.gene_id", back_populates="gene") - gene_ids_overlapping = relationship( + ensembl_ids: Mapped[List["NcbiGeneEnsembl"]] = relationship("NcbiGeneEnsembl", back_populates="genes") + gene_ids_right: Mapped["NcbiGeneOnRight"] = relationship( + "NcbiGeneOnRight", foreign_keys="NcbiGeneOnRight.gene_id", back_populates="gene" + ) + gene_ids_left: Mapped["NcbiGeneOnLeft"] = relationship( + "NcbiGeneOnLeft", foreign_keys="NcbiGeneOnLeft.gene_id", back_populates="gene" + ) + gene_ids_overlapping: Mapped["NcbiGeneOverlapping"] = relationship( "NcbiGeneOverlapping", foreign_keys="NcbiGeneOverlapping.gene_id", back_populates="gene", @@ -60,76 +70,76 @@ class NcbiGeneInfoDescription(Base): """Class definition for the ncbi_gene_info_description table.""" __tablename__ = "ncbi_gene_info_description" - id = Column(Integer, primary_key=True, autoincrement=True) - description = Column(Text) + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + description: Mapped[str] = mapped_column(Text) class NcbiGeneOnRight(Base): """Class definition for the ncbi_gene_on_right table.""" __tablename__ = "ncbi_gene_on_right" - id = Column(Integer, primary_key=True, autoincrement=True) - gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) - gene_id_on_right = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) + gene_id_on_right: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) - gene = relationship("NcbiGeneInfo", foreign_keys=[gene_id]) + gene: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", foreign_keys=[gene_id]) class NcbiGeneOnLeft(Base): """Class definition for the ncbi_gene_on_left table.""" __tablename__ = "ncbi_gene_on_left" - id = Column(Integer, primary_key=True, autoincrement=True) - gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) - gene_id_on_left = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) + gene_id_on_left: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) - gene = relationship("NcbiGeneInfo", foreign_keys=[gene_id]) + gene: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", foreign_keys=[gene_id]) class NcbiGeneOverlapping(Base): """Class definition for the ncbi_gene_overlapping table.""" __tablename__ = "ncbi_gene_overlapping" - id = Column(Integer, primary_key=True, autoincrement=True) - gene_id = gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) - overlapping_gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) + overlapping_gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) - gene = relationship("NcbiGeneInfo", foreign_keys=[gene_id]) + gene: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", foreign_keys=[gene_id]) class NcbiGeneOrtholog(Base): """Class definition for the ncbi_gene_ortholog table.""" __tablename__ = "ncbi_gene_ortholog" - id = Column(Integer, primary_key=True, autoincrement=True) - tax_id = Column(Integer, index=True) - gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) - other_tax_id = Column(Integer, index=True) - other_gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + tax_id: Mapped[int] = mapped_column(index=True) + gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) + other_tax_id: Mapped[int] = mapped_column(index=True) + other_gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) - gene = relationship("NcbiGeneInfo", foreign_keys=[gene_id]) + gene: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", foreign_keys=[gene_id]) class NcbiGenePubmed(Base): """Class definition for the ncbi_gene_pubmed table.""" __tablename__ = "ncbi_gene_pubmed" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - tax_id = Column(Integer, index=True) - gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) - pub_med_id = Column(Integer) + tax_id: Mapped[int] = mapped_column(index=True) + gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) + pub_med_id: Mapped[int] = mapped_column() class NcbiGeneInfoXref(Base): """Class definition for the ncbi_gene_info_xref table.""" __tablename__ = "ncbi_gene_info_xref" - id = Column(Integer, primary_key=True, autoincrement=True) + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - db = Column(String(100), index=True) - dbid = Column(String(100), index=True) - gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) + db: Mapped[str] = mapped_column(String(100), index=True) + dbid: Mapped[str] = mapped_column(String(100), index=True) + gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) gene = relationship("NcbiGeneInfo", back_populates="xrefs") @@ -138,16 +148,16 @@ class NcbiGeneMim(Base): """Class definition for the ncbi_gene_mim table.""" __tablename__ = "ncbi_gene_mim" - id = Column(Integer, primary_key=True, autoincrement=True) + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - mim_number = Column(Integer) - gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) - type = Column(String(100)) - source = Column(String(100)) - med_gen_cui = Column(String(100), index=True) - comment = Column(String(100)) + mim_number: Mapped[int] = mapped_column() + gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) + type: Mapped[str] = mapped_column(String(100)) + source: Mapped[str] = mapped_column(String(100)) + med_gen_cui: Mapped[str] = mapped_column(String(100), index=True) + comment: Mapped[str] = mapped_column(String(100)) - gene = relationship("NcbiGeneInfo", back_populates="mims") + gene: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", back_populates="mims") def as_dict(self): """Convert object values to dictionary.""" @@ -165,17 +175,17 @@ class NcbiGeneEnsembl(Base): """Class definition for the ncbi_gene_ensembl table.""" __tablename__ = "ncbi_gene_ensembl" - id = Column(Integer, primary_key=True, autoincrement=True) + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - tax_id = Column(Integer, index=True) - gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) - ensembl_gene_identifier = Column(String(100)) - rna_nucleotide_accession_version = Column(String(100)) - ensembl_rna_identifier = Column(String(100)) - protein_accession_version = Column(String(100)) - ensembl_protein_identifier = Column(String(100)) + tax_id: Mapped[int] = mapped_column(index=True) + gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) + ensembl_gene_identifier: Mapped[str] = mapped_column(String(100)) + rna_nucleotide_accession_version: Mapped[Optional[str]] = mapped_column(String(100)) + ensembl_rna_identifier: Mapped[Optional[str]] = mapped_column(String(100)) + protein_accession_version: Mapped[Optional[str]] = mapped_column(String(100)) + ensembl_protein_identifier: Mapped[Optional[str]] = mapped_column(String(100)) - genes = relationship("NcbiGeneInfo", back_populates="ensembl_ids") + genes: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", back_populates="ensembl_ids") def as_dict(self): """Convert object values to dictionary.""" @@ -194,15 +204,15 @@ class NcbiGeneGo(Base): """Class definition for the ncbi_gene_go table.""" __tablename__ = "ncbi_gene_go" - id = Column(Integer, primary_key=True, autoincrement=True) + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - tax_id = Column(Integer, index=True) - gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id")) - go_id = Column(String(100), index=True) - evidence = Column(String(10)) - qualifier = Column(String(100)) - go_term = Column(String(255)) - category = Column(String(10)) + tax_id: Mapped[int] = mapped_column(index=True) + gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id")) + go_id: Mapped[str] = mapped_column(String(100), index=True) + evidence: Mapped[str] = mapped_column(String(10)) + qualifier: Mapped[str] = mapped_column(String(100)) + go_term: Mapped[str] = mapped_column(String(255)) + category: Mapped[str] = mapped_column(String(10)) pmids = relationship("NcbiGeneGoPmid", back_populates="gos") @@ -224,25 +234,25 @@ class NcbiGeneGoPmid(Base): """Class definition for the ncbi_gene_go_pmid table.""" __tablename__ = "ncbi_gene_go_pmid" - id = Column(Integer, primary_key=True, autoincrement=True) + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - ncbi_gene_go_id = Column(Integer, ForeignKey("ncbi_gene_go.id")) - pmid = Column(Integer) + ncbi_gene_go_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_go.id")) + pmid: Mapped[int] = mapped_column() - gos = relationship("NcbiGeneGo", back_populates="pmids") + gos: Mapped[List[NcbiGeneGo]] = relationship("NcbiGeneGo", back_populates="pmids") class NcbiMedGenName(Base): """Class definition for the ncbi_medgen_name table.""" __tablename__ = "ncbi_medgen_name" - id = Column(Integer, primary_key=True, autoincrement=True) + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - cui = Column(String(100)) - name = Column(Text) - source = Column(String(100)) - suppress = Column(String(1)) - pmids = relationship("NcbiMedGenPmid", back_populates="med_gen_name") + cui: Mapped[str] = mapped_column(String(100)) + name: Mapped[str] = mapped_column(Text) + source: Mapped[str] = mapped_column(String(100)) + suppress: Mapped[str] = mapped_column(String(1)) + pmids: Mapped[List["NcbiMedGenPmid"]] = relationship("NcbiMedGenPmid", back_populates="med_gen_name") def as_dict(self): """Convert object values to dictionary.""" @@ -255,10 +265,10 @@ class NcbiMedGenPmid(Base): """Class definition for the ncbi_medgen_pmid table.""" __tablename__ = "ncbi_medgen_pmid" - id = Column(Integer, primary_key=True, autoincrement=True) + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - ncbi_medgen_name_id = Column(Integer, ForeignKey("ncbi_medgen_name.id")) - pmid = Column(Integer, index=True) + ncbi_medgen_name_id: Mapped[int] = mapped_column(ForeignKey("ncbi_medgen_name.id")) + pmid: Mapped[int] = mapped_column(index=True) med_gen_name = relationship("NcbiMedGenName", back_populates="pmids") diff --git a/ebel/manager/rdbms/models/nsides.py b/ebel/manager/rdbms/models/nsides.py index ef5da64..577c09f 100644 --- a/ebel/manager/rdbms/models/nsides.py +++ b/ebel/manager/rdbms/models/nsides.py @@ -2,6 +2,7 @@ from sqlalchemy import Column, Float, Index, Integer, String from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Mapped, mapped_column from ebel.manager.rdbms.models import object_as_dict @@ -21,24 +22,26 @@ class Nsides(Base): "mean_reporting_frequency", ), ) - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - drug_rxnorn_id = Column(String(20), index=True) # This has to be a String because of mapping to drugbank ids - drug_concept_name = Column(String(255), index=True) + drug_rxnorn_id: Mapped[str] = mapped_column( + String(20), index=True + ) # This has to be a String because of mapping to drugbank ids + drug_concept_name: Mapped[str] = mapped_column(String(255), index=True) - source = Column(String(10), index=True) + source: Mapped[str] = mapped_column(String(10), index=True) - condition_meddra_id = Column(Integer) - condition_concept_name = Column(String(255), index=True) + condition_meddra_id: Mapped[int] = mapped_column() + condition_concept_name: Mapped[str] = mapped_column(String(255), index=True) # OFFSIDES specific - a = Column(Integer) - b = Column(Integer) - c = Column(Integer) - d = Column(Integer) - prr = Column(Float) - prr_error = Column(Float) - mean_reporting_frequency = Column(Float, index=True) + a = mapped_column(Integer) + b = mapped_column(Integer) + c = mapped_column(Integer) + d = mapped_column(Integer) + prr = mapped_column(Float) + prr_error = mapped_column(Float) + mean_reporting_frequency = mapped_column(Float, index=True) def as_dict(self): """Convert object values to dictionary.""" diff --git a/ebel/manager/rdbms/models/pathway_commons.py b/ebel/manager/rdbms/models/pathway_commons.py index 5478aed..ef5207c 100644 --- a/ebel/manager/rdbms/models/pathway_commons.py +++ b/ebel/manager/rdbms/models/pathway_commons.py @@ -1,7 +1,9 @@ """Pathway Commons RDBMS model definition.""" +from typing import List, Optional + from sqlalchemy import BigInteger, Column, ForeignKey, Integer, String, Table from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship from ebel.manager.rdbms.models import object_as_dict @@ -36,21 +38,23 @@ class PathwayCommons(Base): """Class definition for the pathway_commons table.""" __tablename__ = "pathway_commons" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - participant_a = Column(String(50), index=True) - interaction_type = Column(String(50), index=True) - participant_b = Column(String(50), index=True) + participant_a: Mapped[str] = mapped_column(String(50), index=True) + interaction_type: Mapped[str] = mapped_column(String(50), index=True) + participant_b: Mapped[str] = mapped_column(String(50), index=True) - pmids = relationship("Pmid", back_populates="pathway_commons") + pmids: Mapped[List["Pmid"]] = relationship("Pmid", back_populates="pathway_commons") - pathway_names = relationship( + pathway_names: Mapped[List["PathwayName"]] = relationship( "PathwayName", secondary=pathway_commons__pathway_name, back_populates="pathway_commonses", ) - sources = relationship("Source", secondary=pathway_commons__source, back_populates="pathway_commonses") + sources: Mapped[List["Source"]] = relationship( + "Source", secondary=pathway_commons__source, back_populates="pathway_commonses" + ) def __str__(self): return f"{self.participant_a} {self.interaction_type} {self.participant_b}" @@ -68,11 +72,11 @@ class PathwayName(Base): """Class definition for the pathway_commons_pathway_name table.""" __tablename__ = "pathway_commons_pathway_name" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - name = Column(String(255), index=True) + name: Mapped[str] = mapped_column(String(255), index=True) - pathway_commonses = relationship( + pathway_commonses: Mapped[List[PathwayCommons]] = relationship( "PathwayCommons", secondary=pathway_commons__pathway_name, back_populates="pathway_names", @@ -87,12 +91,12 @@ class Pmid(Base): """Class definition for the pathway_commons_pmid table.""" __tablename__ = "pathway_commons_pmid" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - pmid = Column(BigInteger, index=True) + pmid: Mapped[int] = mapped_column(BigInteger, index=True) - pathway_commons_id = Column(Integer, ForeignKey("pathway_commons.id"), index=True) - pathway_commons = relationship("PathwayCommons", back_populates="pmids") + pathway_commons_id: Mapped[int] = mapped_column(ForeignKey("pathway_commons.id"), index=True) + pathway_commons: Mapped[List[PathwayCommons]] = relationship("PathwayCommons", back_populates="pmids") def __str__(self): """Class string definition.""" @@ -103,11 +107,13 @@ class Source(Base): """Class definition for the pathway_commons_source table.""" __tablename__ = "pathway_commons_source" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - source = Column(String(50)) + source: Mapped[str] = mapped_column(String(50)) - pathway_commonses = relationship("PathwayCommons", secondary=pathway_commons__source, back_populates="sources") + pathway_commonses: Mapped[List[PathwayCommons]] = relationship( + "PathwayCommons", secondary=pathway_commons__source, back_populates="sources" + ) def __str__(self): """Class string definition.""" diff --git a/ebel/manager/rdbms/models/protein_atlas.py b/ebel/manager/rdbms/models/protein_atlas.py index 167a33a..0857120 100644 --- a/ebel/manager/rdbms/models/protein_atlas.py +++ b/ebel/manager/rdbms/models/protein_atlas.py @@ -1,6 +1,9 @@ """Protein Atlas RDBMS model definition.""" +from typing import Optional + from sqlalchemy import Column, Integer, Numeric, String, Text from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Mapped, mapped_column Base = declarative_base() @@ -9,14 +12,14 @@ class ProteinAtlasNormalTissue(Base): """Class definition for the protein_atlas_normal_tissue table.""" __tablename__ = "protein_atlas_normal_tissue" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - gene = Column(String(100), index=True) - gene_name = Column(String(100)) - tissue = Column(String(100)) - cell_type = Column(String(100)) - level = Column(String(100), index=True) - reliability = Column(String(100), index=True) + gene: Mapped[str] = mapped_column(String(100), index=True) + gene_name: Mapped[str] = mapped_column(String(100)) + tissue: Mapped[Optional[str]] = mapped_column(String(100)) + cell_type: Mapped[Optional[str]] = mapped_column(String(100)) + level: Mapped[Optional[str]] = mapped_column(String(100), index=True) + reliability: Mapped[str] = mapped_column(String(100), index=True) def as_dict(self): """Convert object values to dictionary.""" @@ -34,22 +37,22 @@ class ProteinAtlasSubcellularLocation(Base): """Class definition for the protein_atlas_subcellular_location table.""" __tablename__ = "protein_atlas_subcellular_location" - id = Column(Integer, primary_key=True) - - gene = Column(String(100)) - gene_name = Column(String(100)) - reliability = Column(String(100)) - main_location = Column(String(100)) - additional_location = Column(String(100)) - extracellular_location = Column(String(100)) - enhanced = Column(String(100)) - supported = Column(String(100)) - approved = Column(String(100)) - uncertain = Column(String(100)) - single_cell_variation_intensity = Column(String(100)) - single_cell_variation_spatial = Column(String(100)) - cell_cycle_dependency = Column(Text) - go_id = Column(Text) + id: Mapped[int] = mapped_column(primary_key=True) + + gene: Mapped[str] = mapped_column(String(100)) + gene_name: Mapped[str] = mapped_column(String(100)) + reliability: Mapped[str] = mapped_column(String(100)) + main_location: Mapped[Optional[str]] = mapped_column(String(100)) + additional_location: Mapped[Optional[str]] = mapped_column(String(100)) + extracellular_location: Mapped[Optional[str]] = mapped_column(String(100)) + enhanced: Mapped[Optional[str]] = mapped_column(String(100)) + supported: Mapped[Optional[str]] = mapped_column(String(100)) + approved: Mapped[Optional[str]] = mapped_column(String(100)) + uncertain: Mapped[Optional[str]] = mapped_column(String(100)) + single_cell_variation_intensity: Mapped[Optional[str]] = mapped_column(String(100)) + single_cell_variation_spatial: Mapped[Optional[str]] = mapped_column(String(100)) + cell_cycle_dependency: Mapped[Optional[str]] = mapped_column(Text) + go_id: Mapped[str] = mapped_column(Text) def as_dict(self): """Convert object values to dictionary.""" @@ -75,12 +78,12 @@ class ProteinAtlasRnaTissueConsensus(Base): """Class definition for the protein_atlas_rna_tissue_consensus table.""" __tablename__ = "protein_atlas_rna_tissue_consensus" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - gene = Column(String(100), index=True) - gene_name = Column(String(100), index=True) - tissue = Column(String(100), index=True) - n_tpm = Column(Numeric(8, 1)) + gene: Mapped[str] = mapped_column(String(100), index=True) + gene_name: Mapped[str] = mapped_column(String(100), index=True) + tissue: Mapped[str] = mapped_column(String(100), index=True) + n_tpm: Mapped[float] = mapped_column(Numeric(8, 1)) def as_dict(self): """Convert object values to dictionary.""" @@ -96,14 +99,14 @@ class ProteinAtlasRnaBrainGtex(Base): """Class definition for the protein_atlas_rna_brain_gtex table.""" __tablename__ = "protein_atlas_rna_brain_gtex" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - gene = Column(String(100), index=True) - gene_name = Column(String(100), index=True) - brain_region = Column(String(100), index=True) - tpm = Column(Numeric(8, 1)) - p_tpm = Column(Numeric(8, 1)) - n_tpm = Column(Numeric(8, 1)) + gene: Mapped[str] = mapped_column(String(100), index=True) + gene_name: Mapped[str] = mapped_column(String(100), index=True) + brain_region: Mapped[str] = mapped_column(String(100), index=True) + tpm: Mapped[float] = mapped_column(Numeric(8, 1)) + p_tpm: Mapped[float] = mapped_column(Numeric(8, 1)) + n_tpm: Mapped[float] = mapped_column(Numeric(8, 1)) def as_dict(self): """Convert object values to dictionary.""" @@ -121,14 +124,14 @@ class ProteinAtlasRnaBrainFantom(Base): """Class definition for the protein_atlas_rna_brain_fantom table.""" __tablename__ = "protein_atlas_rna_brain_fantom" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - gene = Column(String(100)) - gene_name = Column(String(100)) - brain_region = Column(String(100)) - tags_per_million = Column(String(100)) - scaled_tags_per_million = Column(String(100)) - n_tpm = Column(String(100)) + gene: Mapped[str] = mapped_column(String(100)) + gene_name: Mapped[str] = mapped_column(String(100)) + brain_region: Mapped[str] = mapped_column(String(100)) + tags_per_million: Mapped[str] = mapped_column(String(100)) + scaled_tags_per_million: Mapped[str] = mapped_column(String(100)) + n_tpm: Mapped[str] = mapped_column(String(100)) def as_dict(self): """Convert object values to dictionary.""" @@ -146,12 +149,12 @@ class ProteinAtlasRnaMouseBrainAllen(Base): """Class definition for the protein_atlas_rna_mouse_brain_allen table.""" __tablename__ = "protein_atlas_rna_mouse_brain_allen" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - gene = Column(String(100)) - gene_name = Column(String(100)) - brain_region = Column(String(100)) - expression_energy = Column(Numeric(8, 1)) + gene: Mapped[str] = mapped_column(String(100)) + gene_name: Mapped[str] = mapped_column(String(100)) + brain_region: Mapped[str] = mapped_column(String(100)) + expression_energy: Mapped[float] = mapped_column(Numeric(8, 1)) def as_dict(self): """Convert object values to dictionary.""" diff --git a/ebel/manager/rdbms/models/reactome.py b/ebel/manager/rdbms/models/reactome.py index 0624899..42f5b68 100644 --- a/ebel/manager/rdbms/models/reactome.py +++ b/ebel/manager/rdbms/models/reactome.py @@ -1,6 +1,7 @@ """Reactome RDBMS model definition.""" from sqlalchemy import Column, Integer, String from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Mapped, mapped_column from ebel.manager.rdbms.models import object_as_dict @@ -11,12 +12,12 @@ class Reactome(Base): """Class definition for the reactome table.""" __tablename__ = "reactome" - id = Column(Integer, primary_key=True) - identifier = Column(String(50), index=True) - uniprot_accession = Column(String(50), index=True) - organism = Column(String(255)) - name = Column(String(255)) - evidence_type = Column(String(255)) + id: Mapped[int] = mapped_column(primary_key=True) + identifier: Mapped[str] = mapped_column(String(50), index=True) + uniprot_accession: Mapped[str] = mapped_column(String(50), index=True) + organism: Mapped[str] = mapped_column(String(255)) + name: Mapped[str] = mapped_column(String(255)) + evidence_type: Mapped[str] = mapped_column(String(255)) def as_dict(self): """Convert object values to dictionary.""" diff --git a/ebel/manager/rdbms/models/stringdb.py b/ebel/manager/rdbms/models/stringdb.py index 31842a5..1d1992f 100644 --- a/ebel/manager/rdbms/models/stringdb.py +++ b/ebel/manager/rdbms/models/stringdb.py @@ -1,7 +1,9 @@ """StringDB RDBMS model definition.""" +from typing import Optional from sqlalchemy import Boolean, Column, Integer, SmallInteger, String from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Mapped, mapped_column from ebel.manager.rdbms.models import object_as_dict @@ -13,26 +15,26 @@ class StringDb(Base): __tablename__ = "stringdb" - id = Column(Integer, primary_key=True) - - protein1 = Column(String(50), nullable=False) - protein2 = Column(String(50), nullable=False) - symbol1 = Column(String(50), nullable=False, index=True) - symbol2 = Column(String(50), nullable=False, index=True) - neighborhood = Column(Integer) - neighborhood_transferred = Column(SmallInteger) - fusion = Column(SmallInteger) - cooccurence = Column(SmallInteger) - homology = Column(SmallInteger) - coexpression = Column(SmallInteger) - coexpression_transferred = Column(SmallInteger) - experiments = Column(SmallInteger, index=True) - experiments_transferred = Column(SmallInteger) - database = Column(Integer) - database_transferred = Column(SmallInteger) - textmining = Column(SmallInteger) - textmining_transferred = Column(SmallInteger) - combined_score = Column(SmallInteger) + id: Mapped[int] = mapped_column(primary_key=True) + + protein1: Mapped[str] = mapped_column(String(50), nullable=False) + protein2: Mapped[str] = mapped_column(String(50), nullable=False) + symbol1: Mapped[str] = mapped_column(String(50), nullable=False, index=True) + symbol2: Mapped[str] = mapped_column(String(50), nullable=False, index=True) + neighborhood: Mapped[int] = mapped_column() + neighborhood_transferred: Mapped[int] = mapped_column(SmallInteger) + fusion: Mapped[int] = mapped_column(SmallInteger) + cooccurence: Mapped[int] = mapped_column(SmallInteger) + homology: Mapped[int] = mapped_column(SmallInteger) + coexpression: Mapped[int] = mapped_column(SmallInteger) + coexpression_transferred: Mapped[int] = mapped_column(SmallInteger) + experiments: Mapped[int] = mapped_column(SmallInteger, index=True) + experiments_transferred: Mapped[int] = mapped_column(SmallInteger) + database: Mapped[int] = mapped_column() + database_transferred: Mapped[int] = mapped_column(SmallInteger) + textmining: Mapped[int] = mapped_column(SmallInteger) + textmining_transferred: Mapped[int] = mapped_column(SmallInteger) + combined_score: Mapped[int] = mapped_column(SmallInteger) def as_dict(self): """Convert object values to dictionary.""" @@ -44,9 +46,9 @@ class StringDbProtein(Base): __tablename__ = "stringdb_protein" - id = Column(Integer, primary_key=True) - string_protein_id = Column(String(50), nullable=False, index=True) - preferred_name = Column(String(50), nullable=False, index=True) + id: Mapped[int] = mapped_column(primary_key=True) + string_protein_id: Mapped[str] = mapped_column(String(50), nullable=False, index=True) + preferred_name: Mapped[str] = mapped_column(String(50), nullable=False, index=True) def as_dict(self): """Convert object values to dictionary.""" @@ -57,16 +59,16 @@ class StringDbAction(Base): """Class definition for the stringdb_action table.""" __tablename__ = "stringdb_action" - id = Column(Integer, primary_key=True) - item_id_a = Column(String(50), nullable=False) - item_id_b = Column(String(50), nullable=False) - symbol1 = Column(String(50), nullable=False, index=True) - symbol2 = Column(String(50), nullable=False, index=True) - mode = Column(String(20), nullable=False, index=True) - action = Column(String(20)) - is_directional = Column(Boolean, nullable=False, index=True) - a_is_acting = Column(Boolean, nullable=False, index=True) - score = Column(SmallInteger) + id: Mapped[int] = mapped_column(primary_key=True) + item_id_a: Mapped[str] = mapped_column(String(50), nullable=False) + item_id_b: Mapped[str] = mapped_column(String(50), nullable=False) + symbol1: Mapped[str] = mapped_column(String(50), nullable=False, index=True) + symbol2: Mapped[str] = mapped_column(String(50), nullable=False, index=True) + mode: Mapped[str] = mapped_column(String(20), nullable=False, index=True) + action: Mapped[Optional[str]] = mapped_column(String(20)) + is_directional: Mapped[bool] = mapped_column(Boolean, nullable=False, index=True) + a_is_acting: Mapped[bool] = mapped_column(Boolean, nullable=False, index=True) + score: Mapped[int] = mapped_column(SmallInteger) def as_dict(self): """Convert object values to dictionary.""" diff --git a/ebel/manager/rdbms/models/uniprot.py b/ebel/manager/rdbms/models/uniprot.py index 39a86f3..50e3170 100644 --- a/ebel/manager/rdbms/models/uniprot.py +++ b/ebel/manager/rdbms/models/uniprot.py @@ -1,9 +1,10 @@ """UniProt RDBMS model definition.""" from collections import defaultdict +from typing import List from sqlalchemy import Column, ForeignKey, Integer, String, Table, Text from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship Base = declarative_base() @@ -45,29 +46,33 @@ class Uniprot(Base): __tablename__ = "uniprot" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - accession = Column(String(20), unique=True) - name = Column(String(100), nullable=False, unique=True) - recommended_name = Column(String(255), nullable=True) + accession: Mapped[str] = mapped_column(String(20), unique=True) + name: Mapped[str] = mapped_column(String(100), nullable=False, unique=True) + recommended_name: Mapped[str] = mapped_column(String(255), nullable=True) - taxid = Column(Integer, ForeignKey("uniprot_organism.taxid"), nullable=False, index=True) - organism = relationship("Organism") + taxid: Mapped[int] = mapped_column(ForeignKey("uniprot_organism.taxid"), nullable=False, index=True) + organism: Mapped["Organism"] = relationship("Organism") - function_id = Column(Integer, ForeignKey("uniprot_function.id"), nullable=True) - function = relationship("Function") + function_id: Mapped[int] = mapped_column(ForeignKey("uniprot_function.id"), nullable=True) + function: Mapped["Function"] = relationship("Function") - gene_names = relationship("Gene", back_populates="uniprot") + gene_names: Mapped[List["Gene"]] = relationship("Gene", back_populates="uniprot") - gene_symbol = relationship("GeneSymbol", uselist=False, back_populates="uniprot") + gene_symbol: Mapped["GeneSymbol"] = relationship("GeneSymbol", uselist=False, back_populates="uniprot") - keywords = relationship("Keyword", secondary=uniprot__uniprot_keyword, back_populates="uniprots") + keywords: Mapped[List["Keyword"]] = relationship( + "Keyword", secondary=uniprot__uniprot_keyword, back_populates="uniprots" + ) - hosts = relationship("Organism", secondary=uniprot__uniprot_host, back_populates="uniprots") + hosts: Mapped[List["Organism"]] = relationship( + "Organism", secondary=uniprot__uniprot_host, back_populates="uniprots" + ) - xrefs = relationship("Xref", secondary=uniprot__uniprot_xref, back_populates="uniprots") + xrefs: Mapped[List["Xref"]] = relationship("Xref", secondary=uniprot__uniprot_xref, back_populates="uniprots") - subcellular_locations = relationship( + subcellular_locations: Mapped[List["SubcellularLocation"]] = relationship( "SubcellularLocation", secondary=uniprot__uniprot_subcellular_location, back_populates="uniprots", @@ -103,10 +108,10 @@ class GeneSymbol(Base): """Class definition for the uniprot_gene_symbol table.""" __tablename__ = "uniprot_gene_symbol" - id = Column(Integer, primary_key=True) - symbol = Column(String(100), nullable=False, index=True) - uniprot_id = Column(Integer, ForeignKey("uniprot.id")) - uniprot = relationship("Uniprot", back_populates="gene_symbol") + id: Mapped[int] = mapped_column(primary_key=True) + symbol: Mapped[str] = mapped_column(String(100), nullable=False, index=True) + uniprot_id: Mapped[int] = mapped_column(ForeignKey("uniprot.id")) + uniprot: Mapped[Uniprot] = relationship("Uniprot", back_populates="gene_symbol") def __repr__(self): """Define repr.""" @@ -117,10 +122,10 @@ class Gene(Base): """Class definition for the uniprot_gene table.""" __tablename__ = "uniprot_gene" - id = Column(Integer, primary_key=True) - name = Column(String(100), nullable=False, index=True) - uniprot_id = Column(Integer, ForeignKey("uniprot.id")) - uniprot = relationship("Uniprot", back_populates="gene_names") + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] = mapped_column(String(100), nullable=False, index=True) + uniprot_id: Mapped[int] = mapped_column(ForeignKey("uniprot.id")) + uniprot: Mapped[Uniprot] = relationship("Uniprot", back_populates="gene_names") class Keyword(Base): @@ -128,10 +133,12 @@ class Keyword(Base): __tablename__ = "uniprot_keyword" - keywordid = Column(Integer, primary_key=True) - keyword_name = Column(String(100), index=True) + keywordid: Mapped[int] = mapped_column(primary_key=True) + keyword_name: Mapped[str] = mapped_column(String(100), index=True) - uniprots = relationship("Uniprot", secondary=uniprot__uniprot_keyword, back_populates="keywords") + uniprots: Mapped[List[Uniprot]] = relationship( + "Uniprot", secondary=uniprot__uniprot_keyword, back_populates="keywords" + ) def __repr__(self): """Define repr.""" @@ -143,10 +150,10 @@ class Organism(Base): __tablename__ = "uniprot_organism" - taxid = Column(Integer, primary_key=True) - scientific_name = Column(String(255)) # TODO:Check if index=True with is possible + taxid: Mapped[int] = mapped_column(primary_key=True) + scientific_name: Mapped[str] = mapped_column(String(255)) # TODO:Check if index=True with is possible - uniprots = relationship("Uniprot", secondary=uniprot__uniprot_host, back_populates="hosts") + uniprots: Mapped[List[Uniprot]] = relationship("Uniprot", secondary=uniprot__uniprot_host, back_populates="hosts") class SubcellularLocation(Base): @@ -154,11 +161,10 @@ class SubcellularLocation(Base): __tablename__ = "uniprot_subcellular_location" - id = Column(Integer, primary_key=True) - - name = Column(String(100), index=True) + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] = mapped_column(String(100), index=True) - uniprots = relationship( + uniprots: Mapped[List[Uniprot]] = relationship( "Uniprot", secondary=uniprot__uniprot_subcellular_location, back_populates="subcellular_locations", @@ -170,12 +176,12 @@ class Xref(Base): __tablename__ = "uniprot_xref" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - db = Column(String(50), index=True) - identifier = Column(String(100), index=True) + db: Mapped[str] = mapped_column(String(50), index=True) + identifier: Mapped[str] = mapped_column(String(100), index=True) - uniprots = relationship("Uniprot", secondary=uniprot__uniprot_xref, back_populates="xrefs") + uniprots: Mapped[List[Uniprot]] = relationship("Uniprot", secondary=uniprot__uniprot_xref, back_populates="xrefs") class Function(Base): @@ -183,8 +189,8 @@ class Function(Base): __tablename__ = "uniprot_function" - id = Column(Integer, primary_key=True) + id: Mapped[int] = mapped_column(primary_key=True) - description = Column(Text) + description: Mapped[str] = mapped_column(Text) - uniprots = relationship("Uniprot", back_populates="function") + uniprots: Mapped[List[Uniprot]] = relationship("Uniprot", back_populates="function") diff --git a/ebel/tools.py b/ebel/tools.py index 62fba6f..309a57d 100644 --- a/ebel/tools.py +++ b/ebel/tools.py @@ -5,6 +5,7 @@ import os.path import re import shutil +from os import PathLike from types import GeneratorType from typing import Iterable, List, Union @@ -93,7 +94,7 @@ def md5(file_path): return hash_md5.hexdigest() -def get_file_path(url: str, biodb: str): +def get_file_path(url: str, biodb: str) -> str: """Get standard file path by file_name and DATADIR.""" file_name = os.path.basename(url) bio_db_dir = os.path.join(DATA_DIR, biodb) diff --git a/ebel/web/api/ebel/v1/bel.py b/ebel/web/api/ebel/v1/bel.py index 23c8d72..e14c76b 100644 --- a/ebel/web/api/ebel/v1/bel.py +++ b/ebel/web/api/ebel/v1/bel.py @@ -17,8 +17,7 @@ from graphviz import Digraph from ebel import Bel -from ebel.manager.orientdb.odb_structure import (get_columns, - get_node_view_labels) +from ebel.manager.orientdb.odb_structure import get_columns, get_node_view_labels from ebel.validate import validate_bel_file from ebel.web.api.ebel.v1 import DataType, OrientDbSqlOperator, _get_pagination diff --git a/ebel/web/api/ebel/v1/bel_against_expression.py b/ebel/web/api/ebel/v1/bel_against_expression.py index 7ee4e39..da7696f 100644 --- a/ebel/web/api/ebel/v1/bel_against_expression.py +++ b/ebel/web/api/ebel/v1/bel_against_expression.py @@ -8,8 +8,7 @@ from sqlalchemy.sql import func from ebel import Bel -from ebel.manager.rdbms.models.expression_atlas import (Experiment, FoldChange, - GroupComparison) +from ebel.manager.rdbms.models.expression_atlas import Experiment, FoldChange, GroupComparison from ebel.web.api.ebel.v1 import _get_pagination Relation = namedtuple( diff --git a/ebel/web/api/ebel/v1/biogrid.py b/ebel/web/api/ebel/v1/biogrid.py index ebc6563..25b43d3 100644 --- a/ebel/web/api/ebel/v1/biogrid.py +++ b/ebel/web/api/ebel/v1/biogrid.py @@ -9,9 +9,15 @@ from ebel import Bel from ebel.manager.orientdb.biodbs.biogrid import MODIFICATIONS -from ebel.manager.rdbms.models.biogrid import (Biogrid, ExperimentalSystem, - Interactor, Modification, - Publication, Source, Taxonomy) +from ebel.manager.rdbms.models.biogrid import ( + Biogrid, + ExperimentalSystem, + Interactor, + Modification, + Publication, + Source, + Taxonomy, +) from ebel.web.api import RDBMS from ebel.web.api.ebel.v1 import _get_data diff --git a/ebel/web/api/ebel/v1/clinical_trials_gov.py b/ebel/web/api/ebel/v1/clinical_trials_gov.py index 88d91d2..5332ab9 100644 --- a/ebel/web/api/ebel/v1/clinical_trials_gov.py +++ b/ebel/web/api/ebel/v1/clinical_trials_gov.py @@ -5,8 +5,7 @@ from ebel.manager.rdbms.models import clinical_trials_gov as ct from ebel.web.api import RDBMS -from ebel.web.api.ebel.v1 import (_get_paginated_query_result, - _get_terms_from_model_starts_with) +from ebel.web.api.ebel.v1 import _get_paginated_query_result, _get_terms_from_model_starts_with def get_ct_by_nct_id(): diff --git a/ebel/web/api/ebel/v1/clinvar.py b/ebel/web/api/ebel/v1/clinvar.py index 46feeac..5502e7c 100644 --- a/ebel/web/api/ebel/v1/clinvar.py +++ b/ebel/web/api/ebel/v1/clinvar.py @@ -7,9 +7,12 @@ from ebel import Bel from ebel.manager.rdbms.models import clinvar from ebel.web.api import RDBMS -from ebel.web.api.ebel.v1 import (_get_data, _get_paginated_query_result, - _get_pagination, - _get_terms_from_model_starts_with) +from ebel.web.api.ebel.v1 import ( + _get_data, + _get_paginated_query_result, + _get_pagination, + _get_terms_from_model_starts_with, +) def get_clinvar(): diff --git a/ebel/web/api/ebel/v1/disgenet.py b/ebel/web/api/ebel/v1/disgenet.py index 71dbab4..62ac705 100644 --- a/ebel/web/api/ebel/v1/disgenet.py +++ b/ebel/web/api/ebel/v1/disgenet.py @@ -3,9 +3,11 @@ from ebel.manager.rdbms.models import disgenet from ebel.web.api import RDBMS -from ebel.web.api.ebel.v1 import (_get_paginated_ebel_query_result, - _get_paginated_query_result, - _get_terms_from_model_starts_with) +from ebel.web.api.ebel.v1 import ( + _get_paginated_ebel_query_result, + _get_paginated_query_result, + _get_terms_from_model_starts_with, +) def get_sources(): diff --git a/ebel/web/api/ebel/v1/drugbank.py b/ebel/web/api/ebel/v1/drugbank.py index 6b55fa9..fd1205c 100644 --- a/ebel/web/api/ebel/v1/drugbank.py +++ b/ebel/web/api/ebel/v1/drugbank.py @@ -4,8 +4,7 @@ from ebel.manager.rdbms.models import drugbank from ebel.web.api import RDBMS -from ebel.web.api.ebel.v1 import (_get_data, _get_paginated_ebel_query_result, - _get_paginated_query_result) +from ebel.web.api.ebel.v1 import _get_data, _get_paginated_ebel_query_result, _get_paginated_query_result def get_by_id(): diff --git a/ebel/web/api/ebel/v1/expression_atlas.py b/ebel/web/api/ebel/v1/expression_atlas.py index 9302eaa..e1c9c23 100644 --- a/ebel/web/api/ebel/v1/expression_atlas.py +++ b/ebel/web/api/ebel/v1/expression_atlas.py @@ -8,9 +8,14 @@ from sqlalchemy import inspect from ebel import Bel -from ebel.manager.rdbms.models.expression_atlas import (Experiment, FoldChange, - GroupComparison, Gsea, - Idf, SdrfCondensed) +from ebel.manager.rdbms.models.expression_atlas import ( + Experiment, + FoldChange, + GroupComparison, + Gsea, + Idf, + SdrfCondensed, +) from ebel.web.api import RDBMS from ebel.web.api.ebel.v1 import _get_data diff --git a/ebel/web/api/ebel/v1/intact.py b/ebel/web/api/ebel/v1/intact.py index f625920..75d8d5d 100644 --- a/ebel/web/api/ebel/v1/intact.py +++ b/ebel/web/api/ebel/v1/intact.py @@ -5,8 +5,7 @@ from ebel.manager.orientdb.odb_structure import intact_edges from ebel.manager.rdbms.models.intact import Intact from ebel.web.api import RDBMS -from ebel.web.api.ebel.v1 import (_get_data, _get_paginated_ebel_query_result, - _get_paginated_query_result) +from ebel.web.api.ebel.v1 import _get_data, _get_paginated_ebel_query_result, _get_paginated_query_result def get_intact(): diff --git a/ebel/web/api/ebel/v1/kegg.py b/ebel/web/api/ebel/v1/kegg.py index e0bd768..f2be69b 100644 --- a/ebel/web/api/ebel/v1/kegg.py +++ b/ebel/web/api/ebel/v1/kegg.py @@ -5,8 +5,7 @@ from ebel.manager.rdbms.models.kegg import Kegg from ebel.web.api import RDBMS -from ebel.web.api.ebel.v1 import (_get_data, _get_paginated_ebel_query_result, - _get_paginated_query_result) +from ebel.web.api.ebel.v1 import _get_data, _get_paginated_ebel_query_result, _get_paginated_query_result def get_kegg(): diff --git a/ebel/web/api/ebel/v1/pathway_commons.py b/ebel/web/api/ebel/v1/pathway_commons.py index 3f5d1f4..6c68070 100644 --- a/ebel/web/api/ebel/v1/pathway_commons.py +++ b/ebel/web/api/ebel/v1/pathway_commons.py @@ -3,12 +3,14 @@ from flask import request from sqlalchemy import or_ -from ebel.manager.rdbms.models.pathway_commons import ( - PathwayCommons, PathwayName, Pmid, pathway_commons__pathway_name) +from ebel.manager.rdbms.models.pathway_commons import PathwayCommons, PathwayName, Pmid, pathway_commons__pathway_name from ebel.web.api import RDBMS -from ebel.web.api.ebel.v1 import (_get_data, _get_paginated_ebel_query_result, - _get_paginated_query_result, - _get_terms_from_model_starts_with) +from ebel.web.api.ebel.v1 import ( + _get_data, + _get_paginated_ebel_query_result, + _get_paginated_query_result, + _get_terms_from_model_starts_with, +) def get_pathway_commons(): diff --git a/ebel/web/api/ebel/v1/uniprot.py b/ebel/web/api/ebel/v1/uniprot.py index 0f231e5..51858c3 100644 --- a/ebel/web/api/ebel/v1/uniprot.py +++ b/ebel/web/api/ebel/v1/uniprot.py @@ -6,8 +6,7 @@ from ebel import Bel from ebel.manager.rdbms.models import uniprot from ebel.web.api import RDBMS -from ebel.web.api.ebel.v1 import (_get_paginated_query_result, - _get_terms_from_model_starts_with) +from ebel.web.api.ebel.v1 import _get_paginated_query_result, _get_terms_from_model_starts_with from . import add_query_filters diff --git a/mkdocs.yml b/mkdocs.yml index 8be399d..bd87227 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -8,7 +8,7 @@ repo_url: https://github.com/e-bel/ebel theme: readthedocs extra: - version: 1.0.37 + version: 1.1.0 nav: # - Home: index.md diff --git a/pyproject.toml b/pyproject.toml index 030a032..56014ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "ebel" -version = "1.0.37" +version = "1.1.0" description = "e(BE:L) - validation and extension of BEL networks." authors = [ "Bruce Schultz ", @@ -39,27 +39,27 @@ Issues = 'https://github.com/e-bel/ebel/issues' Documentation = 'https://ebel.readthedocs.io/en/latest/' [tool.poetry.dependencies] -lark-parser = "^0.11.2" -click = "^7.1.2" -requests = "^2.25.1" -tqdm = "^4.59.0" -pandas = "^1.2.4" -sqlalchemy = "^1.4.46" -SQLAlchemy-Utils = "^0.37.7" +lark-parser = "^0.11.3" +click = "^8.1.7" +requests = "^2.31.0" +tqdm = "^4.66.1" +pandas = "^2.1.1" +sqlalchemy = "^2.0.20" +SQLAlchemy-Utils = "^0.41.1" xlwt = "^1.3.0" xlrd = "^2.0.1" -xlsxwriter = "^1.3.8" +xlsxwriter = "^1.4.5" xmltodict = "^0.12.0" -GitPython = "^3.1.14" -lxml = "^4.6.5" -flask = "^2.0.1" +GitPython = "^3.1.36" +lxml = "^4.9.3" +flask = "^2.2.5" flask_cors = "^3.0.10" -connexion = {version = "^2.14.1", extras = ["swagger-ui"]} -cryptography = "^3.4.7" -openpyxl = "^3.0.10" +connexion = {version = "^2.14.2", extras = ["swagger-ui"]} +cryptography = "^3.4.8" +openpyxl = "^3.1.2" graphviz = "0.20" pyorientdb = "^1.0.0" -PyMySQL = "^1.0.2" +PyMySQL = "^1.1.0" python = "^3.9" mkdocstrings = {version = "^0.18", extras = ["python"]} @@ -117,4 +117,4 @@ source = [ ] [tool.coverage.html] -directory = "coverage_html_report" \ No newline at end of file +directory = "coverage_html_report" diff --git a/requirements.txt b/requirements.txt index f00f628..206a5d6 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,21 @@ -lark-parser==0.11.2 -click>=7.1.2 -requests>=2.25.1 -tqdm>=4.59.0 -pandas>=1.2.4 -sqlalchemy>=1.4.15 -SQLAlchemy-Utils==0.37.7 +lark-parser==0.11.3 +click>=8.1.7 +requests>=2.31.0 +tqdm>=4.66.1 +pandas>=2.1.1 +sqlalchemy>=2.0.20 +SQLAlchemy-Utils==0.41.1 xlwt==1.3.0 xlrd==2.0.1 -xlsxwriter==1.3.8 -pymysql==1.0.2 +xlsxwriter==1.4.5 +pymysql==1.1.0 xmltodict==0.12.0 -GitPython==3.1.14 -lxml>=4.6.5 -flask==2.0.1 +GitPython==3.1.36 +lxml>=4.9.3 +flask==2.2.5 flask_cors==3.0.10 -connexion[swagger-ui]==2.14.1 -cryptography==3.4.7 -openpyxl==3.0.7 -graphviz -pyorientdb \ No newline at end of file +connexion[swagger-ui]==2.14.2 +cryptography==3.4.8 +openpyxl==3.1.2 +graphviz==0.20 +pyorientdb==1.0.0