From 35a07a4804bc2a37005472fa22c84db8d15fa561 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Fri, 15 Sep 2023 12:02:59 +0200
Subject: [PATCH 01/58] feat: begin update to sqlalchemy2, update models and
 reqs

---
 ebel/manager/models.py                        |  32 +--
 ebel/manager/orientdb/biodbs/bel.py           |   4 +
 ebel/manager/orientdb/biodbs/biogrid.py       |  28 ++-
 ebel/manager/orientdb/biodbs/clinvar.py       |   3 +-
 ebel/manager/orientdb/biodbs/disgenet.py      |   3 +-
 ebel/manager/orientdb/biodbs/intact.py        |   5 +-
 ebel/manager/orientdb/biodbs/mirtarbase.py    |   3 +-
 ebel/manager/orientdb/biodbs/nsides.py        |   4 +-
 ebel/manager/orientdb/biodbs/stringdb.py      |   6 +-
 ebel/manager/orientdb/biodbs/uniprot.py       |  11 +-
 ebel/manager/orientdb/odb_meta.py             |   9 +-
 ebel/manager/orientdb/urls.py                 |   2 +-
 ebel/manager/rdbms/models/biogrid.py          | 100 ++++----
 ebel/manager/rdbms/models/chebi.py            | 152 ++++++------
 .../rdbms/models/clinical_trials_gov.py       |  81 ++++---
 ebel/manager/rdbms/models/clinvar.py          | 110 +++++----
 ebel/manager/rdbms/models/disgenet.py         |  66 +++---
 ebel/manager/rdbms/models/drugbank.py         | 150 ++++++------
 ebel/manager/rdbms/models/ensembl.py          |  23 +-
 ebel/manager/rdbms/models/expression_atlas.py | 106 +++++----
 ebel/manager/rdbms/models/gwas_catalog.py     |  82 +++----
 ebel/manager/rdbms/models/hgnc.py             | 223 +++++++++---------
 ebel/manager/rdbms/models/human_ortholog.py   |  25 +-
 ebel/manager/rdbms/models/intact.py           |  21 +-
 ebel/manager/rdbms/models/iuphar.py           | 142 +++++------
 ebel/manager/rdbms/models/kegg.py             |  21 +-
 ebel/manager/rdbms/models/mirtarbase.py       |  21 +-
 ebel/manager/rdbms/models/ncbi.py             | 178 +++++++-------
 ebel/manager/rdbms/models/nsides.py           |  27 ++-
 ebel/manager/rdbms/models/pathway_commons.py  |  42 ++--
 ebel/manager/rdbms/models/protein_atlas.py    |  95 ++++----
 ebel/manager/rdbms/models/reactome.py         |  13 +-
 ebel/manager/rdbms/models/stringdb.py         |  67 +++---
 ebel/manager/rdbms/models/uniprot.py          |  48 ++--
 pyproject.toml                                |  28 +--
 requirements.txt                              |  32 +--
 36 files changed, 1023 insertions(+), 940 deletions(-)

diff --git a/ebel/manager/models.py b/ebel/manager/models.py
index 719860b..1ab5587 100755
--- a/ebel/manager/models.py
+++ b/ebel/manager/models.py
@@ -13,9 +13,9 @@
 import requests
 import sqlalchemy
 from lark import Lark, Token, Tree
-from sqlalchemy import Boolean, Column, ForeignKey, Index, Integer, String
+from sqlalchemy import Boolean, ForeignKey, Index, Integer, String
 from sqlalchemy.ext.declarative import declarative_base, declared_attr
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column
 from sqlalchemy.sql.expression import func
 from sqlalchemy_utils import create_database, database_exists
 from tqdm import tqdm
@@ -55,7 +55,7 @@ def foreign_key_to(table_name):
     :rtype: sqlalchemy.Column
     """
     foreign_column = table_name + ".id"
-    return Column(Integer, ForeignKey(foreign_column))
+    return mapped_column(Integer, ForeignKey(foreign_column))
 
 
 class MasterModel(object):
@@ -71,7 +71,7 @@ def __tablename__(self):
 
     __mapper_args__ = {"always_refresh": True}
 
-    id = Column(Integer, primary_key=True)
+    id = mapped_column(Integer, primary_key=True)
 
     def _to_dict(self):
         """Protected method for converting values to dictionary."""
@@ -94,10 +94,10 @@ class Namespace(Base, MasterModel):
     __tablename__ = "namespace"
     __table_args__ = (Index("idx_url", "url", mysql_length=100),)
 
-    url = Column(String(2048), nullable=False)
-    keyword = Column(String(255), index=True)
-    cacheable = Column(Boolean)
-    case_sensitive = Column(Boolean)
+    url = mapped_column(String(2048), nullable=False)
+    keyword = mapped_column(String(255), index=True)
+    cacheable = mapped_column(Boolean)
+    case_sensitive = mapped_column(Boolean)
 
     entries = relationship("NamespaceEntry", back_populates="namespace")
 
@@ -108,8 +108,8 @@ class NamespaceEntry(Base, MasterModel):
     __tablename__ = "namespace_entry"
     __table_args__ = (Index("idx_name", "name", mysql_length=100),)
 
-    name = Column(String(2048), nullable=True)
-    encoding = Column(String(8), nullable=True)
+    name = mapped_column(String(2048), nullable=True)
+    encoding = mapped_column(String(8), nullable=True)
 
     namespace__id = foreign_key_to("namespace")
     namespace = relationship("Namespace", back_populates="entries")
@@ -121,10 +121,10 @@ class Annotation(Base, MasterModel):
     __tablename__ = "annotation"
     __table_args__ = (Index("idx_url2", "url", mysql_length=100),)
 
-    url = Column(String(2048), nullable=False)
-    keyword = Column(String(255), index=True)
-    cacheable = Column(Boolean)
-    case_sensitive = Column(Boolean)
+    url = mapped_column(String(2048), nullable=False)
+    keyword = mapped_column(String(255), index=True)
+    cacheable = mapped_column(Boolean)
+    case_sensitive = mapped_column(Boolean)
 
     entries = relationship("AnnotationEntry", back_populates="annotation", cascade="all, delete-orphan")
 
@@ -135,8 +135,8 @@ class AnnotationEntry(Base, MasterModel):
     __tablename__ = "annotation_entry"
     __table_args__ = (Index("idx_identifier", "identifier", mysql_length=100),)
 
-    name = Column(String(2048), nullable=True)
-    identifier = Column(String(255), nullable=True)
+    name = mapped_column(String(2048), nullable=True)
+    identifier = mapped_column(String(255), nullable=True)
 
     annotation__id = foreign_key_to("annotation")
     annotation = relationship("Annotation", back_populates="entries")
diff --git a/ebel/manager/orientdb/biodbs/bel.py b/ebel/manager/orientdb/biodbs/bel.py
index 10c5106..d5a0f20 100644
--- a/ebel/manager/orientdb/biodbs/bel.py
+++ b/ebel/manager/orientdb/biodbs/bel.py
@@ -680,3 +680,7 @@ def insert_data(self) -> Dict[str, int]:
     def update_interactions(self) -> int:
         """Abstract method."""
         pass
+
+if __name__ == "__main__":
+    b = Bel()
+    b.clinical_trials.update()
\ No newline at end of file
diff --git a/ebel/manager/orientdb/biodbs/biogrid.py b/ebel/manager/orientdb/biodbs/biogrid.py
index c2e7e1c..63df5a5 100644
--- a/ebel/manager/orientdb/biodbs/biogrid.py
+++ b/ebel/manager/orientdb/biodbs/biogrid.py
@@ -1,5 +1,5 @@
 """BioGrid."""
-
+import logging
 import typing
 from enum import Enum
 from typing import Dict, Tuple
@@ -7,6 +7,7 @@
 import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
+from sqlalchemy import text
 from tqdm import tqdm
 
 from ebel import tools
@@ -18,6 +19,8 @@
 STANDARD_NAMESPACES = {9606: "HGNC", 10090: "MGI", 10116: "RGD"}
 
 
+logger = logging.getLogger(__name__)
+
 class BioGridNode:
     """Custom class definition for BioGRID nodes."""
 
@@ -311,6 +314,8 @@ def insert_data(self) -> Dict[str, int]:
         df.index += 1
         df.index.rename("id", inplace=True)
 
+        logger.info("Insert BIOGRID data")
+
         df.to_sql(biogrid.Biogrid.__tablename__, self.engine, if_exists="append")
 
         return {self.biodb_name: df.shape[0]}
@@ -469,13 +474,14 @@ def get_uniprot_modification_pairs(self):
         where
             m.modification != 'No Modification' and ia.uniprot IS NOT NULL and ib.uniprot IS NOT NULL
         group by
-            ia.symbol,
-            ia.uniprot,
-            ia.taxonomy_id,
-            ib.symbol,
-            ib.uniprot,
-            ib.taxonomy_id"""
-        return [dict(x) for x in self.engine.execute(sql).fetchall()]
+            subject_symbol,
+            subject_uniprot,
+            subject_taxonomy_id,
+            object_symbol,
+            object_uniprot,
+            object_taxonomy_id"""
+        results = self.session.execute(text(sql)).fetchall()
+        return [x._asdict() for x in results]
 
     def get_create_pure_protein_rid_by_uniprot(self, taxonomy_id, symbol, uniprot):
         """Get pure protein rid by UniProt accession ID if the protein is involved in a BEL statement."""
@@ -561,8 +567,8 @@ def update_interactions(self) -> int:
                     object_uniprot=e["object_uniprot"],
                 )
 
-                for row in self.engine.execute(sql).fetchall():
-                    row_dict = dict(row)
+                for row in self.session.execute(text(sql)).fetchall():
+                    row_dict = row._asdict()
                     be = BioGridEdge(subject_rid=subj_pure_rid, object_rid=obj_pure_rid, **row_dict)
                     edge_value_dict = be.get_edge_value_dict()
 
@@ -620,4 +626,4 @@ def create_view(self):
                 biogrid_modification m on (m.id=b.modification_id) left join
                 biogrid_source s on (s.id=b.source_id) left join
                 biogrid_publication p on (p.id=b.publication_id)"""
-        self.engine.execute(sql)
+        self.session.execute(text(sql))
diff --git a/ebel/manager/orientdb/biodbs/clinvar.py b/ebel/manager/orientdb/biodbs/clinvar.py
index a5d0f47..ef8f237 100644
--- a/ebel/manager/orientdb/biodbs/clinvar.py
+++ b/ebel/manager/orientdb/biodbs/clinvar.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
+from sqlalchemy import text
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
@@ -176,7 +177,7 @@ def get_disease_snps_dict(self) -> Dict[str, List[Snp]]:
         results = dict()
         for kwd in disease_keywords:
             sql = sql_temp.format(keyword=kwd)
-            rows = self.engine.execute(sql)
+            rows = self.session.execute(text(sql))
             results[kwd] = [Snp(*x) for x in rows.fetchall()]
 
         return results
diff --git a/ebel/manager/orientdb/biodbs/disgenet.py b/ebel/manager/orientdb/biodbs/disgenet.py
index c1d27f7..d884350 100644
--- a/ebel/manager/orientdb/biodbs/disgenet.py
+++ b/ebel/manager/orientdb/biodbs/disgenet.py
@@ -4,6 +4,7 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
+from sqlalchemy import text
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
@@ -202,7 +203,7 @@ def update_snps(self) -> int:
         results = dict()
         for kwd in self.disease_keywords:
             sql = sql_temp.format(kwd)
-            rows = self.engine.execute(sql)
+            rows = self.session.execute(text(sql))
             results[kwd] = rows
 
         inserted = 0
diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index 39f8bc9..0a4d57d 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
+from sqlalchemy import text
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
@@ -142,7 +143,7 @@ def get_namespace_name_by_uniprot(self, uniprot_accession: str) -> tuple:
         return_value = ()
         sql = f"""Select s.symbol, u.taxid from uniprot u inner join uniprot_gene_symbol s
                   on (u.id=s.uniprot_id) where u.accession='{uniprot_accession}' limit 1"""
-        result = self.engine.execute(sql).fetchone()
+        result = self.session.execute(text(sql)).fetchone()
         taxid_to_namespace = {9606: "HGNC", 10090: "MGI", 10116: "RGD"}
         if result:
             name, taxid = result
@@ -191,7 +192,7 @@ def update_interactions(self) -> int:
 
         for uniprot_accession in tqdm(uniprot_accessions, desc="Update IntAct interactions"):
             sql = sql_temp.format(uniprot_accession=uniprot_accession)
-            result = self.engine.execute(sql)
+            result = self.session.execute(text(sql))
 
             for (
                 up_a,
diff --git a/ebel/manager/orientdb/biodbs/mirtarbase.py b/ebel/manager/orientdb/biodbs/mirtarbase.py
index 14eec42..4085b2a 100644
--- a/ebel/manager/orientdb/biodbs/mirtarbase.py
+++ b/ebel/manager/orientdb/biodbs/mirtarbase.py
@@ -3,6 +3,7 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
+from sqlalchemy import text
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
@@ -70,7 +71,7 @@ def update_interactions(self) -> int:
                 species_target_gene='Homo sapiens' and
                 support_type in ('Functional MTI', 'Non-Functional MTI')"""
         cols = ["mi_rna", "symbol", "support_type", "pmid", "experiments"]
-        df_mirtarbase = pd.DataFrame(self.engine.execute(sql).fetchall(), columns=cols)
+        df_mirtarbase = pd.DataFrame(self.session.execute(text(sql)).fetchall(), columns=cols)
         df_mirtarbase.experiments = df_mirtarbase.experiments.str.split("//")
         df_join = df_mirtarbase.set_index("symbol").join(df_symbol_rid.set_index("symbol"), how="inner")
 
diff --git a/ebel/manager/orientdb/biodbs/nsides.py b/ebel/manager/orientdb/biodbs/nsides.py
index 50b5909..f9cb0a7 100644
--- a/ebel/manager/orientdb/biodbs/nsides.py
+++ b/ebel/manager/orientdb/biodbs/nsides.py
@@ -7,6 +7,7 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
+from sqlalchemy import text
 from tqdm import tqdm
 
 from ebel.constants import RID
@@ -171,7 +172,8 @@ def update_bel(self) -> int:
         updated = 0
 
         for drugbank_id, drugbank_rid in tqdm(drugbank_id_rids.items(), desc=f"Update {self.biodb_name.upper()}"):
-            for r in self.engine.execute(sql_temp.format(drugbank_id)):
+            sql = sql_temp.format(drugbank_id)
+            for r in self.session.execute(text(sql)):
                 (
                     condition_meddra_id,
                     condition_concept_name,
diff --git a/ebel/manager/orientdb/biodbs/stringdb.py b/ebel/manager/orientdb/biodbs/stringdb.py
index a92c005..27272e9 100644
--- a/ebel/manager/orientdb/biodbs/stringdb.py
+++ b/ebel/manager/orientdb/biodbs/stringdb.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
+from sqlalchemy import text
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
@@ -161,7 +162,7 @@ def get_stringdb_action_hgnc_set(self):
         """Get unique HGNC symbols from stringdb_actions table."""
         sql = f"""(Select distinct( symbol1 ) from {self.table_action})
                 union (Select distinct( symbol2 ) from {self.table_action})"""
-        return set([x[0] for x in self.engine.execute(sql).fetchall()])
+        return set([x[0] for x in self.session.execute(text(sql)).fetchall()])
 
     def update_interactions(self) -> Dict[str, int]:
         """Update the edges with StringDB metadata."""
@@ -294,7 +295,8 @@ def update_action_interactions(self, hgnc: Hgnc) -> int:
 
         updated = 0
         for symbol in tqdm(symbols, desc="Update has_action_st edges"):
-            rows = self.engine.execute(sql_temp.format(symbol=symbol))
+            sql = sql_temp.format(symbol=symbol)
+            rows = self.engine.execute(text(sql))
             for row in rows.fetchall():
                 action = Action(*row)
 
diff --git a/ebel/manager/orientdb/biodbs/uniprot.py b/ebel/manager/orientdb/biodbs/uniprot.py
index bfdfe1d..4e4c131 100644
--- a/ebel/manager/orientdb/biodbs/uniprot.py
+++ b/ebel/manager/orientdb/biodbs/uniprot.py
@@ -9,6 +9,7 @@
 import pandas as pd
 from lxml.etree import iterparse
 from pyorientdb import OrientDB
+from sqlalchemy import text
 from tqdm import tqdm
 
 from ebel.defaults import default_tax_ids
@@ -145,14 +146,14 @@ def insert_data(self) -> Dict[str, int]:
         """Insert UniProt data depending on NCBI taxonomy identifier."""
         dialect = self.session.bind.dialect.name
         if dialect == "mysql":
-            self.engine.execute("SET FOREIGN_KEY_CHECKS=0")
+            self.session.execute(text("SET FOREIGN_KEY_CHECKS=0"))
 
         inserted = self.insert_uniprot()
         self.add_gene_symbols()
         self.session.commit()
 
         if dialect == "mysql":
-            self.engine.execute("SET FOREIGN_KEY_CHECKS=1")
+            self.session.execute(text("SET FOREIGN_KEY_CHECKS=1"))
 
         return {self.biodb_name: inserted}
 
@@ -311,7 +312,7 @@ def _get_accesssion_recname(self, taxid, gene_symbol) -> Union[Tuple[str, str],
             f"Select accession, recommended_name from uniprot as u inner join uniprot_gene_symbol as gs "
             f'on (u.id=gs.uniprot_id) where u.taxid={taxid} and gs.symbol="{gene_symbol}" limit 1'
         )
-        results = self.engine.execute(sql)
+        results = self.session.execute(text(sql))
         return results.fetchone() if results else None
 
     def _update_proteins(self, namespace, taxid) -> int:
@@ -338,7 +339,7 @@ def _update_proteins(self, namespace, taxid) -> int:
     def _get_recname_taxid_by_accession_from_uniprot_api(self, accession) -> Tuple[str, int]:
         """Fetch uniprot entry by accession and adds to the database. Returns recommended name."""
         sql = f"Select recommended_name,taxid from uniprot where accession='{accession}' limit 1"
-        result = self.engine.execute(sql).fetchone()
+        result = self.session.execute(text(sql)).fetchone()
         if result:
             return result
 
@@ -353,7 +354,7 @@ def _update_uniprot_proteins(self) -> int:
         )
         for protein in self.query(sql_uniprot).itertuples(index=False):
             sql = sql_temp.format(protein.accession)
-            found = self.engine.execute(sql).fetchone()
+            found = self.session.execute(text(sql)).fetchone()
             if found:
                 recommended_name, taxid = found
                 num_updated = self.execute(sql_update.format(recommended_name, taxid, protein.accession))[0]
diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index cb9b95a..10c1972 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -25,6 +25,7 @@
                                    PyOrientIndexException,
                                    PyOrientSecurityAccessException)
 from pyorientdb.otypes import OrientRecord
+from sqlalchemy import text
 from sqlalchemy.sql.schema import Table
 from sqlalchemy_utils import create_database, database_exists
 from tqdm import tqdm
@@ -240,12 +241,12 @@ def clear_and_import_data(self) -> Dict[str, int]:
         return inserted
 
     def create_index_rdbms(self, table_name: str, columns):
-        """Creates index on column(s) in RDBMS."""
+        """Creates index on mapped_column(s) in RDBMS."""
         if isinstance(columns, str):
             columns = [columns]
         sql_columns = ",".join(columns)
         index_name = f"idx_{table_name}_" + "_".join(columns)
-        self.engine.execute(f"CREATE INDEX {index_name} ON {table_name} ({sql_columns})")
+        self.session.execute(f"CREATE INDEX {index_name} ON {table_name} ({sql_columns})")
 
     def clear_edges_by_bel_doc_rid(self, bel_document_rid: str, even_if_other_doc_rids_exists=True):
         """Delete all edges linked to a specified BEL document rID."""
@@ -819,7 +820,7 @@ def number_of_generics(self) -> Dict[str, int]:
             for table_name, table in self.tables_base.metadata.tables.items():
                 if self.table_exists(table_name):
                     sql = f"Select count(*) from `{table_name}`"
-                    numbers[table_name] = self.engine.execute(sql).fetchone()[0]
+                    numbers[table_name] = self.session.execute(text(sql)).fetchone()[0]
                 else:
                     numbers[table_name] = 0
         elif self.generic_classes:
@@ -1348,7 +1349,7 @@ def get_set_gene_rids_by_position(
 
         for gene_type, sql in sqls.items():
             if gene_type in gene_types:
-                results = self.engine.execute(sql)
+                results = self.session.execute(sql)
                 for (symbol,) in results.fetchall():
                     bel = f'g(HGNC:"{symbol}")'
                     data = {
diff --git a/ebel/manager/orientdb/urls.py b/ebel/manager/orientdb/urls.py
index cbf84f5..a6c31d2 100755
--- a/ebel/manager/orientdb/urls.py
+++ b/ebel/manager/orientdb/urls.py
@@ -79,7 +79,7 @@
 IUPHAR_LIGANDS = "https://www.guidetopharmacology.org/DATA/ligands.csv"
 
 # CHEBI #
-CHEBI_BASE = "ftp://ftp.ebi.ac.uk/pub/databases/chebi/Flat_file_tab_delimited/"
+CHEBI_BASE = "https://ftp.ebi.ac.uk/pub/databases/chebi/Flat_file_tab_delimited/"
 CHEBI_CHEMICALDATA = f"{CHEBI_BASE}chemical_data.tsv"
 CHEBI_COMMENT = f"{CHEBI_BASE}comments.tsv"
 CHEBI_COMPOUND = f"{CHEBI_BASE}compounds.tsv.gz"
diff --git a/ebel/manager/rdbms/models/biogrid.py b/ebel/manager/rdbms/models/biogrid.py
index c3af157..d552d56 100644
--- a/ebel/manager/rdbms/models/biogrid.py
+++ b/ebel/manager/rdbms/models/biogrid.py
@@ -1,7 +1,7 @@
 """BioGRID RDBMS model definition."""
-from sqlalchemy import Column, Float, ForeignKey, Integer, String, Text
+from sqlalchemy import Float, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -12,26 +12,28 @@ class Biogrid(Base):
     """Class definition for the biogrid table."""
 
     __tablename__ = "biogrid"
-    id = Column(Integer, primary_key=True)
-
-    biogrid_a_id = Column(Integer, ForeignKey("biogrid_interactor.biogrid_id"))
-    biogrid_a = relationship("Interactor", foreign_keys=[biogrid_a_id])
-    biogrid_b_id = Column(Integer, ForeignKey("biogrid_interactor.biogrid_id"))
-    biogrid_b = relationship("Interactor", foreign_keys=[biogrid_b_id])
-    biogrid_id = Column(Integer, nullable=True)
-    experimental_system_id = Column(Integer, ForeignKey("biogrid_experimental_system.id"))
-    experimental_system = relationship("ExperimentalSystem", foreign_keys=[experimental_system_id])
-    throughput_id = Column(Integer, ForeignKey("biogrid_throughput.id"))
-    throughput = relationship("Throughput", foreign_keys=[throughput_id])
-    score = Column(Float, nullable=True)
-    modification_id = Column(Integer, ForeignKey("biogrid_modification.id"))
-    modification = relationship("Modification", foreign_keys=[modification_id])
-    qualifications = Column(String(255), nullable=True)
-    source_id = Column(Integer, ForeignKey("biogrid_source.id"))
-    source = relationship("Source", foreign_keys=[source_id])
-    publication_id = Column(Integer, ForeignKey("biogrid_publication.id"))
-    publication = relationship("Publication", foreign_keys=[publication_id])
-    qualification = Column(Text, nullable=True)
+    id = mapped_column(Integer, primary_key=True)
+
+    biogrid_a_id: Mapped[int] = mapped_column(ForeignKey("biogrid_interactor.biogrid_id"))
+    biogrid_a: Mapped["Interactor"] = relationship("Interactor", foreign_keys=[biogrid_a_id])
+    biogrid_b_id: Mapped[int] = mapped_column(ForeignKey("biogrid_interactor.biogrid_id"))
+    biogrid_b: Mapped["Interactor"] = relationship("Interactor", foreign_keys=[biogrid_b_id])
+    biogrid_id: Mapped[int] = mapped_column(nullable=True)
+    experimental_system_id: Mapped[int] = mapped_column(ForeignKey("biogrid_experimental_system.id"))
+    experimental_system: Mapped["ExperimentalSystem"] = relationship(
+        "ExperimentalSystem", foreign_keys=[experimental_system_id]
+    )
+    throughput_id: Mapped[int] = mapped_column(ForeignKey("biogrid_throughput.id"))
+    throughput: Mapped["Throughput"] = relationship("Throughput", foreign_keys=[throughput_id])
+    score: Mapped[float] = mapped_column(nullable=True)
+    modification_id: Mapped[int] = mapped_column(ForeignKey("biogrid_modification.id"), nullable=True)
+    modification: Mapped["Modification"] = relationship("Modification", foreign_keys=[modification_id])
+    qualifications: Mapped[str] = mapped_column(String(255), nullable=True)
+    source_id: Mapped[int] = mapped_column(ForeignKey("biogrid_source.id"))
+    source: Mapped["Source"] = relationship("Source", foreign_keys=[source_id])
+    publication_id: Mapped[int] = mapped_column(ForeignKey("biogrid_publication.id"))
+    publication: Mapped["Publication"] = relationship("Publication", foreign_keys=[publication_id])
+    qualification: Mapped[str] = mapped_column(Text, nullable=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -53,11 +55,11 @@ class Publication(Base):
     """Class definition for the biogrid_publication table."""
 
     __tablename__ = "biogrid_publication"
-    id = Column(Integer, primary_key=True)
-    author_name = Column(String(255), nullable=True)
-    publication_year = Column(Integer, nullable=True)
-    source = Column(String(255), nullable=True)
-    source_identifier = Column(String(255), nullable=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    author_name: Mapped[str] = mapped_column(String(255), nullable=True)
+    publication_year: Mapped[int] = mapped_column(nullable=True)
+    source: Mapped[str] = mapped_column(String(255), nullable=True)
+    source_identifier: Mapped[str] = mapped_column(String(255), nullable=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -68,9 +70,9 @@ class Throughput(Base):
     """Class definition for the biogrid_throughput table."""
 
     __tablename__ = "biogrid_throughput"
-    id = Column(Integer, primary_key=True)
-    throughput = Column(String(255))
-    frequency = Column(Integer)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    throughput: Mapped[str] = mapped_column(String(255))
+    frequency: Mapped[int] = mapped_column()
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -81,8 +83,8 @@ class Taxonomy(Base):
     """Class definition for the biogrid_taxonomy table."""
 
     __tablename__ = "biogrid_taxonomy"
-    taxonomy_id = Column(Integer, primary_key=True)  # == NCBI Taxonomy ID
-    organism_name = Column(String(1000))
+    taxonomy_id: Mapped[int] = mapped_column(primary_key=True)  # == NCBI Taxonomy ID
+    organism_name: Mapped[str] = mapped_column(String(1000))
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -93,10 +95,10 @@ class ExperimentalSystem(Base):
     """Class definition for the biogrid_experimental_system table."""
 
     __tablename__ = "biogrid_experimental_system"
-    id = Column(Integer, primary_key=True)
-    experimental_system = Column(String(255), nullable=True)
-    experimental_system_type = Column(String(255), nullable=True)
-    frequency = Column(Integer)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    experimental_system: Mapped[str] = mapped_column(String(255), nullable=True)
+    experimental_system_type: Mapped[str] = mapped_column(String(255), nullable=True)
+    frequency: Mapped[int] = mapped_column()
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -107,15 +109,15 @@ class Interactor(Base):
     """Class definition for the biogrid_interactor table."""
 
     __tablename__ = "biogrid_interactor"
-    biogrid_id = Column(Integer, primary_key=True)
+    biogrid_id: Mapped[int] = mapped_column(primary_key=True)
 
-    entrez = Column(Integer, nullable=True, index=True)
-    systematic_name = Column(String(255), nullable=True, index=True)
-    symbol = Column(String(255), nullable=True, index=True)
-    taxonomy_id = Column(Integer, ForeignKey("biogrid_taxonomy.taxonomy_id"))
-    taxonomy = relationship("Taxonomy", foreign_keys=[taxonomy_id])
-    uniprot = Column(String(255), nullable=True, index=True)
-    trembl = Column(String(1000), nullable=True)
+    entrez: Mapped[int] = mapped_column(nullable=True, index=True)
+    systematic_name: Mapped[str] = mapped_column(String(255), nullable=True, index=True)
+    symbol: Mapped[str] = mapped_column(String(255), nullable=True, index=True)
+    taxonomy_id: Mapped[int] = mapped_column(ForeignKey("biogrid_taxonomy.taxonomy_id"))
+    taxonomy: Mapped["Taxonomy"] = relationship("Taxonomy", foreign_keys=[taxonomy_id])
+    uniprot: Mapped[str] = mapped_column(String(255), nullable=True, index=True)
+    trembl: Mapped[str] = mapped_column(String(1000), nullable=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -133,8 +135,8 @@ class Source(Base):
     """Class definition for the biogrid_source table."""
 
     __tablename__ = "biogrid_source"
-    id = Column(Integer, primary_key=True)
-    source = Column(String(255), nullable=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    source: Mapped[str] = mapped_column(String(255), nullable=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -145,9 +147,9 @@ class Modification(Base):
     """Class definition for the biogrid_modification table."""
 
     __tablename__ = "biogrid_modification"
-    id = Column(Integer, primary_key=True)
-    modification = Column(String(255), nullable=True)
-    frequency = Column(Integer)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    modification: Mapped[str] = mapped_column(String(255), nullable=True)
+    frequency: Mapped[int] = mapped_column()
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/chebi.py b/ebel/manager/rdbms/models/chebi.py
index 28ea3ce..99876ff 100644
--- a/ebel/manager/rdbms/models/chebi.py
+++ b/ebel/manager/rdbms/models/chebi.py
@@ -1,9 +1,11 @@
 """CHEBI RDBMS model definition."""
+import datetime
+from typing import List
 
-from sqlalchemy import (Column, DateTime, ForeignKey, Index, Integer, String,
+from sqlalchemy import (DateTime, ForeignKey, Index, Integer, String,
                         Text)
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 Base = declarative_base()
 
@@ -12,14 +14,14 @@ class ChemicalData(Base):
     """Class definition for the chebi_chemical_data table."""
 
     __tablename__ = "chebi_chemical_data"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    chemical_data = Column(Text, nullable=True)
-    source = Column(Text, nullable=False)
-    type = Column(Text, nullable=False)
+    chemical_data: Mapped[str] = mapped_column(Text, nullable=True)
+    source: Mapped[str] = mapped_column(Text, nullable=False)
+    type: Mapped[str] = mapped_column(Text, nullable=False)
 
-    compound_id = Column(Integer, ForeignKey("chebi_compound.id"))
-    compounds = relationship("Compound", back_populates="chemicalData")
+    compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id"))
+    compounds: Mapped["Compound"] = relationship("Compound", back_populates="chemicalData")
 
     def __str__(self):
         """Class string definition."""
@@ -38,15 +40,15 @@ class Comment(Base):
     """Class definition for the chebi_comment table."""
 
     __tablename__ = "chebi_comment"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    text = Column(Text, nullable=False)
-    created_on = Column(DateTime, nullable=False)
-    datatype = Column(String(80))
-    datatype_id = Column(Integer, nullable=False)
+    text: Mapped[str] = mapped_column(Text, nullable=False)
+    created_on: Mapped[datetime.datetime] = mapped_column(DateTime, nullable=False)
+    datatype: Mapped[str] = mapped_column(String(80))
+    datatype_id: Mapped[int] = mapped_column(nullable=False)
 
-    compound_id = Column(Integer, ForeignKey("chebi_compound.id"))
-    compounds = relationship("Compound", back_populates="comments")
+    compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id"))
+    compounds: Mapped["Compound"] = relationship("Compound", back_populates="comments")
 
     def __str__(self):
         """Class string definition."""
@@ -64,27 +66,29 @@ class Compound(Base):
     """Class definition for the chebi_compound table."""
 
     __tablename__ = "chebi_compound"
-    id = Column(Integer, primary_key=True)
-
-    name = Column(String(2000))
-    source = Column(String(32), nullable=False)
-    parent_id = Column(Integer)
-    chebi_accession = Column(String(30), nullable=False)
-    status = Column(String(1), nullable=False)
-    definition = Column(Text)
-    star = Column(Integer, nullable=False)
-    modified_on = Column(Text)
-    created_by = Column(Text)
-
-    chemicalData = relationship("ChemicalData", back_populates="compounds")
-    comments = relationship("Comment", back_populates="compounds")
-    database_accessions = relationship("DatabaseAccession", back_populates="compounds")
-    names = relationship("Name", back_populates="compounds")
-    references = relationship("Reference", back_populates="compounds")
+    id: Mapped[int] = mapped_column(primary_key=True)
+
+    name: Mapped[str] = mapped_column(String(2000), nullable=True)
+    source: Mapped[str] = mapped_column(String(32), nullable=False)
+    parent_id: Mapped[int] = mapped_column(nullable=True)
+    chebi_accession: Mapped[str] = mapped_column(String(30), nullable=False)
+    status: Mapped[str] = mapped_column(String(1), nullable=False)
+    definition: Mapped[str] = mapped_column(Text, nullable=True)
+    star: Mapped[int] = mapped_column(nullable=False)
+    modified_on: Mapped[str] = mapped_column(Text, nullable=True)
+    created_by: Mapped[int] = mapped_column(Text, nullable=True)
+
+    chemicalData: Mapped[List["ChemicalData"]] = relationship("ChemicalData", back_populates="compounds")
+    comments: Mapped[List["Comment"]] = relationship("Comment", back_populates="compounds")
+    database_accessions: Mapped[List["DatabaseAccession"]] = relationship(
+        "DatabaseAccession", back_populates="compounds"
+    )
+    names: Mapped[List["Name"]] = relationship("Name", back_populates="compounds")
+    references: Mapped[List["Reference"]] = relationship("Reference", back_populates="compounds")
     # final_id_relations = relationship("Relation", back_populates="final_id_compounds")
     # init_id_relations = relationship("Relation", back_populates="init_id_compounds")
-    structures = relationship("Structure", back_populates="compounds")
-    inchis = relationship("Inchi", back_populates="compounds")
+    structures: Mapped[List["Structure"]] = relationship("Structure", back_populates="compounds")
+    inchis: Mapped[List["Inchi"]] = relationship("Inchi", back_populates="compounds")
 
     def __str__(self):
         return self.name
@@ -111,12 +115,12 @@ class Inchi(Base):
     """Class definition for the chebi_inchi table."""
 
     __tablename__ = "chebi_inchi"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    inchi = Column(Text)
+    inchi: Mapped[str] = mapped_column(Text)
 
-    compound_id = Column(Integer, ForeignKey("chebi_compound.id"))
-    compounds = relationship("Compound", back_populates="inchis")
+    compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id"))
+    compounds: Mapped[List["Compound"]] = relationship("Compound", back_populates="inchis")
 
     def __str__(self):
         return self.inchi
@@ -130,14 +134,14 @@ class DatabaseAccession(Base):
     """Class definition for the chebi_database_accession table."""
 
     __tablename__ = "chebi_database_accession"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    accession_number = Column(String(255), nullable=True)
-    type = Column(Text, nullable=False)
-    source = Column(Text, nullable=False)
+    accession_number: Mapped[str] = mapped_column(String(255), nullable=True)
+    type: Mapped[str] = mapped_column(Text, nullable=False)
+    source: Mapped[str] = mapped_column(Text, nullable=False)
 
-    compound_id = Column(Integer, ForeignKey("chebi_compound.id"))
-    compounds = relationship("Compound", back_populates="database_accessions")
+    compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id"))
+    compounds: Mapped[List["Compound"]] = relationship("Compound", back_populates="database_accessions")
 
     def __str__(self):
         return self.accession_number
@@ -155,16 +159,16 @@ class Name(Base):
     """Class definition for the chebi_name table."""
 
     __tablename__ = "chebi_name"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    name = Column(Text, nullable=True)
-    type = Column(Text, nullable=False)
-    source = Column(Text, nullable=False)
-    adapted = Column(Text, nullable=False)
-    language = Column(Text, nullable=False)
+    name: Mapped[str] = mapped_column(Text, nullable=True)
+    type: Mapped[str] = mapped_column(Text, nullable=False)
+    source: Mapped[str] = mapped_column(Text, nullable=False)
+    adapted: Mapped[str] = mapped_column(Text, nullable=False)
+    language: Mapped[str] = mapped_column(Text, nullable=False)
 
-    compound_id = Column(Integer, ForeignKey("chebi_compound.id"))
-    compounds = relationship("Compound", back_populates="names")
+    compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id"))
+    compounds: Mapped[List["Compound"]] = relationship("Compound", back_populates="names")
 
     def __str__(self):
         return self.name
@@ -185,15 +189,15 @@ class Reference(Base):
 
     __tablename__ = "chebi_reference"
 
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    reference_id = Column(String(60), nullable=False, index=True)
-    reference_db_name = Column(String(60), nullable=False, index=True)
-    location_in_ref = Column(String(90), index=True)
-    reference_name = Column(String(1024))
+    reference_id: Mapped[str] = mapped_column(String(60), nullable=False, index=True)
+    reference_db_name: Mapped[str] = mapped_column(String(60), nullable=False, index=True)
+    location_in_ref: Mapped[str] = mapped_column(String(90), nullable=True, index=True)
+    reference_name: Mapped[str] = mapped_column(String(1024), nullable=True)
 
-    compound_id = Column(Integer, ForeignKey("chebi_compound.id"))
-    compounds = relationship("Compound", back_populates="references")
+    compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id"))
+    compounds: Mapped[List["Compound"]] = relationship("Compound", back_populates="references")
 
     __table_args__ = (Index("ix_chebi_reference__reference_name", reference_name, mysql_length=500),)
 
@@ -224,16 +228,16 @@ class Relation(Base):
     """Class definition for the chebi_relation table."""
 
     __tablename__ = "chebi_relation"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    type = Column(Text, nullable=False)
-    status = Column(String(1), nullable=False)
+    type: Mapped[str] = mapped_column(Text, nullable=False)
+    status: Mapped[str] = mapped_column(String(1), nullable=False)
 
-    final_id = Column(Integer, ForeignKey("chebi_compound.id"))
-    init_id = Column(Integer, ForeignKey("chebi_compound.id"))
+    final_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id"))
+    init_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id"))
 
-    final_id_compounds = relationship("Compound", foreign_keys=[final_id])
-    init_id_compounds = relationship("Compound", foreign_keys=[init_id])
+    final_id_compounds: Mapped[List["Compound"]] = relationship("Compound", foreign_keys=[final_id])
+    init_id_compounds: Mapped[List["Compound"]] = relationship("Compound", foreign_keys=[init_id])
 
     def __str__(self):
         return f"{self.type} - {self.status}"
@@ -252,16 +256,16 @@ class Structure(Base):
     """Class definition for the chebi_structure table."""
 
     __tablename__ = "chebi_structure"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    structure = Column(Text, nullable=False)
-    type = Column(Text, nullable=False)
-    dimension = Column(Text, nullable=False)
-    default_structure = Column(String(1), nullable=False)
-    autogen_structure = Column(String(1), nullable=False)
+    structure: Mapped[str] = mapped_column(Text, nullable=False)
+    type: Mapped[str] = mapped_column(Text, nullable=False)
+    dimension: Mapped[str] = mapped_column(Text, nullable=False)
+    default_structure: Mapped[str] = mapped_column(String(1), nullable=False)
+    autogen_structure: Mapped[str] = mapped_column(String(1), nullable=False)
 
-    compound_id = Column(Integer, ForeignKey("chebi_compound.id"))
-    compounds = relationship("Compound", back_populates="structures")
+    compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id"))
+    compounds: Mapped[List["Compound"]] = relationship("Compound", back_populates="structures")
 
     def __str__(self):
         return self.structure
diff --git a/ebel/manager/rdbms/models/clinical_trials_gov.py b/ebel/manager/rdbms/models/clinical_trials_gov.py
index f2f02ba..a94ff4f 100644
--- a/ebel/manager/rdbms/models/clinical_trials_gov.py
+++ b/ebel/manager/rdbms/models/clinical_trials_gov.py
@@ -1,9 +1,10 @@
 """ClinicalTrials.gov RDBMS model definition."""
 import re
+from typing import List
 
-from sqlalchemy import Column, ForeignKey, Integer, String, Table, Text
+from sqlalchemy import ForeignKey, Integer, String, Table, Text, Column
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -83,49 +84,49 @@ class ClinicalTrialGov(Base):
 
     __tablename__ = "clinical_trials_gov"
 
-    id = Column(Integer, primary_key=True)
-    nct_id = Column(String(100), index=True)
-    org_study_id = Column(Text)
-    brief_title = Column(Text)
-    official_title = Column(Text)
-    is_fda_regulated_drug = Column(Text)
-    brief_summary = Column(Text)
-    detailed_description = Column(Text)
-    overall_status = Column(Text)
-    start_date = Column(Text)
-    completion_date = Column(Text)
-    phase = Column(Text)
-    study_type = Column(Text)
-    study_design_intervention_model = Column(Text)
-    study_design_primary_purpose = Column(Text)
-    study_design_masking = Column(Text)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    nct_id = mapped_column(String(100), index=True)
+    org_study_id: Mapped[str] = mapped_column(Text)
+    brief_title: Mapped[str] = mapped_column(Text)
+    official_title: Mapped[str] = mapped_column(Text)
+    is_fda_regulated_drug: Mapped[str] = mapped_column(Text)
+    brief_summary: Mapped[str] = mapped_column(Text)
+    detailed_description: Mapped[str] = mapped_column(Text)
+    overall_status: Mapped[str] = mapped_column(Text)
+    start_date: Mapped[str] = mapped_column(Text)
+    completion_date: Mapped[str] = mapped_column(Text)
+    phase: Mapped[str] = mapped_column(Text)
+    study_type: Mapped[str] = mapped_column(Text)
+    study_design_intervention_model: Mapped[str] = mapped_column(Text)
+    study_design_primary_purpose: Mapped[str] = mapped_column(Text)
+    study_design_masking: Mapped[str] = mapped_column(Text)
     # primary_outcomes
     # secondary_outcomes
-    patient_data_sharing_ipd = Column(Text)
-    patient_data_ipd_description = Column(Text)
+    patient_data_sharing_ipd: Mapped[str] = mapped_column(Text)
+    patient_data_ipd_description: Mapped[str] = mapped_column(Text)
 
-    keywords = relationship(
+    keywords: Mapped[List["Keyword"]] = relationship(
         "Keyword",
         secondary=ctg_keyword_n2m,
         back_populates="trials",
         cascade="save-update",
     )
 
-    conditions = relationship(
+    conditions: Mapped[List["Condition"]] = relationship(
         "Condition",
         secondary=ctg_condition_n2m,
         back_populates="trials",
         cascade="save-update",
     )
 
-    mesh_terms = relationship(
+    mesh_terms: Mapped[List["MeshTerm"]] = relationship(
         "MeshTerm",
         secondary=ctg_mesh_term_n2m,
         back_populates="trials",
         cascade="save-update",
     )
 
-    interventions = relationship(
+    interventions: Mapped[List["Intervention"]] = relationship(
         "Intervention",
         secondary=ctg_intervention_n2m,
         back_populates="trials",
@@ -157,9 +158,11 @@ class Keyword(Base):
     """Class definition for the clinical_trials_gov_keyword table."""
 
     __tablename__ = "clinical_trials_gov_keyword"
-    id = Column(Integer, primary_key=True)
-    keyword = Column(String(255), index=True)
-    trials = relationship("ClinicalTrialGov", secondary=ctg_keyword_n2m, back_populates="keywords")
+    id: Mapped[int] = mapped_column(primary_key=True)
+    keyword: Mapped[str] = mapped_column(String(255), index=True)
+    trials: Mapped[List["ClinicalTrialGov"]] = relationship(
+        "ClinicalTrialGov", secondary=ctg_keyword_n2m, back_populates="keywords"
+    )
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -170,9 +173,11 @@ class Condition(Base):
     """Class definition for the clinical_trials_gov_condition table."""
 
     __tablename__ = "clinical_trials_gov_condition"
-    id = Column(Integer, primary_key=True)
-    condition = Column(Text)
-    trials = relationship("ClinicalTrialGov", secondary=ctg_condition_n2m, back_populates="conditions")
+    id: Mapped[int] = mapped_column(primary_key=True)
+    condition: Mapped[str] = mapped_column(Text)
+    trials: Mapped[List["ClinicalTrialGov"]] = relationship(
+        "ClinicalTrialGov", secondary=ctg_condition_n2m, back_populates="conditions"
+    )
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -183,9 +188,11 @@ class MeshTerm(Base):
     """Class definition for the clinical_trials_gov_mesh_term table."""
 
     __tablename__ = "clinical_trials_gov_mesh_term"
-    id = Column(Integer, primary_key=True)
-    mesh_term = Column(String(100), unique=True)
-    trials = relationship("ClinicalTrialGov", secondary=ctg_mesh_term_n2m, back_populates="mesh_terms")
+    id: Mapped[int] = mapped_column(primary_key=True)
+    mesh_term: Mapped[str] = mapped_column(String(100), unique=True)
+    trials: Mapped[List["ClinicalTrialGov"]] = relationship(
+        "ClinicalTrialGov", secondary=ctg_mesh_term_n2m, back_populates="mesh_terms"
+    )
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -200,10 +207,10 @@ class Intervention(Base):
     """Class definition for the clinical_trials_gov_intervention table."""
 
     __tablename__ = "clinical_trials_gov_intervention"
-    id = Column(Integer, primary_key=True)
-    intervention_type = Column(String(100), index=True)
-    intervention_name = Column(String(255), index=True)
-    trials = relationship(
+    id: Mapped[int] = mapped_column(primary_key=True)
+    intervention_type: Mapped[str] = mapped_column(String(100), index=True)
+    intervention_name: Mapped[str] = mapped_column(String(255), index=True)
+    trials: Mapped[List["ClinicalTrialGov"]] = relationship(
         "ClinicalTrialGov",
         secondary=ctg_intervention_n2m,
         back_populates="interventions",
diff --git a/ebel/manager/rdbms/models/clinvar.py b/ebel/manager/rdbms/models/clinvar.py
index a7995ab..3d91d16 100644
--- a/ebel/manager/rdbms/models/clinvar.py
+++ b/ebel/manager/rdbms/models/clinvar.py
@@ -1,7 +1,9 @@
 """ClinVar RDBMS model definition."""
-from sqlalchemy import Column, ForeignKey, Index, Integer, String, Table, Text
+from typing import List
+
+from sqlalchemy import ForeignKey, Index, Integer, String, Table, Text, Column
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -19,23 +21,23 @@ class ClinvarPhenotypeMedgen(Base):
     """Class definition for the clinvar_phenotype_medgen table."""
 
     __tablename__ = "clinvar_phenotype_medgen"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    identifier = Column(String(100), index=True)
-    clinvar_id = Column(Integer, ForeignKey("clinvar.id"))
-    clinvar = relationship("Clinvar", foreign_keys=[clinvar_id], viewonly=True)
+    identifier: Mapped[str] = mapped_column(String(100), index=True)
+    clinvar_id: Mapped[int] = mapped_column(Integer, ForeignKey("clinvar.id"))
+    clinvar: Mapped["Clinvar"] = relationship("Clinvar", foreign_keys=[clinvar_id], viewonly=True)
 
 
 class ClinvarOtherIdentifier(Base):
     """Class definition for the clinvar_other_identifier table."""
 
     __tablename__ = "clinvar_other_identifier"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    db = Column(String(100), index=True)
-    identifier = Column(String(100), index=True)
-    clinvar_id = Column(Integer, ForeignKey("clinvar.id"))
-    clinvar = relationship("Clinvar", foreign_keys=[clinvar_id], viewonly=True)
+    db: Mapped[str] = mapped_column(String(100), index=True)
+    identifier: Mapped[str] = mapped_column(String(100), index=True)
+    clinvar_id: Mapped[int] = mapped_column(ForeignKey("clinvar.id"))
+    clinvar: Mapped["Clinvar"] = relationship("Clinvar", foreign_keys=[clinvar_id], viewonly=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -46,44 +48,50 @@ class Clinvar(Base):
     """Class definition for the clinvar table."""
 
     __tablename__ = "clinvar"
-    id = Column(Integer, primary_key=True)
-
-    allele_id = Column(Integer)
-    type = Column(String(100))
-    name = Column(String(1000))
-    gene_id = Column(Integer, index=True)
-    gene_symbol = Column(String(1000))
-    hgnc_id = Column(String(100))
-    clinical_significance = Column(String(100))
-    clin_sig_simple = Column(Integer)
-    last_evaluated = Column(String(100))
-    rs_db_snp = Column(Integer, index=True)
-    nsv_esv_db_var = Column(String(100))
-    rcvaccession = Column(String(1000))
-    origin = Column(Text)
-    origin_simple = Column(Text)
-    assembly = Column(String(100), index=True)
-    chromosome_accession = Column(Text)
-    chromosome = Column(Text)
-    start = Column(Integer)
-    stop = Column(Integer)
-    reference_allele = Column(Text)
-    alternate_allele = Column(Text)
-    cytogenetic = Column(Text)
-    review_status = Column(Text)
-    number_submitters = Column(Integer)
-    guidelines = Column(Text)
-    tested_in_gtr = Column(Text)
-    submitter_categories = Column(Integer)
-    variation_id = Column(Integer)
-    position_vcf = Column(Integer)
-    reference_allele_vcf = Column(Text(100000))
-    alternate_allele_vcf = Column(Text(100000))
-
-    phenotypeMedgens = relationship("ClinvarPhenotypeMedgen", foreign_keys=[ClinvarPhenotypeMedgen.clinvar_id])
-    otherIdentifiers = relationship("ClinvarOtherIdentifier", foreign_keys=[ClinvarOtherIdentifier.clinvar_id])
-
-    phenotypes = relationship("ClinvarPhenotype", secondary=clinvar__clinvar_phenotype)
+    id: Mapped[int] = mapped_column(primary_key=True)
+
+    allele_id: Mapped[int] = mapped_column()
+    type: Mapped[str] = mapped_column(String(100))
+    name: Mapped[str] = mapped_column(String(1000))
+    gene_id: Mapped[int] = mapped_column(index=True)
+    gene_symbol: Mapped[str] = mapped_column(String(1000))
+    hgnc_id: Mapped[str] = mapped_column(String(100))
+    clinical_significance: Mapped[str] = mapped_column(String(100))
+    clin_sig_simple: Mapped[int] = mapped_column()
+    last_evaluated: Mapped[str] = mapped_column(String(100))
+    rs_db_snp: Mapped[int] = mapped_column(index=True)
+    nsv_esv_db_var: Mapped[str] = mapped_column(String(100))
+    rcvaccession: Mapped[str] = mapped_column(String(1000))
+    origin: Mapped[str] = mapped_column(Text)
+    origin_simple: Mapped[str] = mapped_column(Text)
+    assembly: Mapped[str] = mapped_column(String(100), index=True)
+    chromosome_accession: Mapped[str] = mapped_column(Text)
+    chromosome: Mapped[str] = mapped_column(Text)
+    start: Mapped[int] = mapped_column()
+    stop: Mapped[int] = mapped_column()
+    reference_allele: Mapped[str] = mapped_column(Text)
+    alternate_allele: Mapped[str] = mapped_column(Text)
+    cytogenetic: Mapped[str] = mapped_column(Text)
+    review_status: Mapped[str] = mapped_column(Text)
+    number_submitters: Mapped[int] = mapped_column()
+    guidelines: Mapped[str] = mapped_column(Text)
+    tested_in_gtr: Mapped[str] = mapped_column(Text)
+    submitter_categories: Mapped[int] = mapped_column()
+    variation_id: Mapped[int] = mapped_column()
+    position_vcf: Mapped[int] = mapped_column()
+    reference_allele_vcf: Mapped[str] = mapped_column(Text(100000))
+    alternate_allele_vcf: Mapped[str] = mapped_column(Text(100000))
+
+    phenotypeMedgens: Mapped[List["ClinvarPhenotypeMedgen"]] = relationship(
+        "ClinvarPhenotypeMedgen", foreign_keys=[ClinvarPhenotypeMedgen.clinvar_id]
+    )
+    otherIdentifiers: Mapped[List["ClinvarOtherIdentifier"]] = relationship(
+        "ClinvarOtherIdentifier", foreign_keys=[ClinvarOtherIdentifier.clinvar_id]
+    )
+
+    phenotypes: Mapped[List["ClinvarPhenotype"]] = relationship(
+        "ClinvarPhenotype", secondary=clinvar__clinvar_phenotype
+    )
 
     __table_args__ = (Index("ix_clinvar__gene_symbol", gene_symbol, mysql_length=500),)
 
@@ -99,8 +107,8 @@ class ClinvarPhenotype(Base):
     """Class definition for the clinvar_phenotype table."""
 
     __tablename__ = "clinvar_phenotype"
-    id = Column(Integer, primary_key=True)
-    phenotype = Column(Text)
+    id = mapped_column(Integer, primary_key=True)
+    phenotype = mapped_column(Text)
 
     clinvars = relationship("Clinvar", secondary=clinvar__clinvar_phenotype, back_populates="phenotypes")
 
diff --git a/ebel/manager/rdbms/models/disgenet.py b/ebel/manager/rdbms/models/disgenet.py
index cd32bcf..3127fd4 100644
--- a/ebel/manager/rdbms/models/disgenet.py
+++ b/ebel/manager/rdbms/models/disgenet.py
@@ -1,7 +1,9 @@
 """DisGeNet RDBMS model definition."""
-from sqlalchemy import BigInteger, Column, Float, ForeignKey, Integer, String
+from typing import List
+
+from sqlalchemy import BigInteger, Float, ForeignKey, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -12,16 +14,18 @@ class DisgenetGene(Base):
     """Class definition for the disgenet_gene table."""
 
     __tablename__ = "disgenet_gene"
-    id = Column(Integer, primary_key=True)
-
-    gene_id = Column(Integer, ForeignKey("disgenet_gene_symbol.gene_id"))
-    gene_symbol = relationship("DisgenetGeneSymbol", back_populates="gene_disease_pmid_associations")
-    disease_id = Column(String(100), ForeignKey("disgenet_disease.disease_id"))
-    disease = relationship("DisgenetDisease", foreign_keys=[disease_id])
-    score = Column(Float)
-    pmid = Column(BigInteger)
-    source_id = Column(Integer, ForeignKey("disgenet_source.id"))
-    source = relationship("DisgenetSource", foreign_keys=[source_id])
+    id: Mapped[int] = mapped_column(primary_key=True)
+
+    gene_id: Mapped[int] = mapped_column(ForeignKey("disgenet_gene_symbol.gene_id"))
+    gene_symbol: Mapped["DisgenetGeneSymbol"] = relationship(
+        "DisgenetGeneSymbol", back_populates="gene_disease_pmid_associations"
+    )
+    disease_id: Mapped[str] = mapped_column(String(100), ForeignKey("disgenet_disease.disease_id"))
+    disease: Mapped["DisgenetDisease"] = relationship("DisgenetDisease", foreign_keys=[disease_id])
+    score: Mapped[float] = mapped_column()
+    pmid: Mapped[int] = mapped_column()
+    source_id: Mapped[int] = mapped_column(ForeignKey("disgenet_source.id"))
+    source: Mapped["DisgenetSource"] = relationship("DisgenetSource", foreign_keys=[source_id])
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -40,10 +44,12 @@ class DisgenetGeneSymbol(Base):
     """Class definition for the disgenet_gene_symbol table."""
 
     __tablename__ = "disgenet_gene_symbol"
-    gene_id = Column(Integer, primary_key=True)
-    gene_symbol = Column(String(50), index=True)
+    gene_id: Mapped[int] = mapped_column(primary_key=True)
+    gene_symbol: Mapped[str] = mapped_column(String(50), index=True)
 
-    gene_disease_pmid_associations = relationship("DisgenetGene", back_populates="gene_symbol")
+    gene_disease_pmid_associations: Mapped[List["DisgenetGene"]] = relationship(
+        "DisgenetGene", back_populates="gene_symbol"
+    )
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -54,17 +60,17 @@ class DisgenetVariant(Base):
     """Class definition for the disgenet_variant table."""
 
     __tablename__ = "disgenet_variant"
-    id = Column(Integer, primary_key=True)
-
-    snp_id = Column(String(20), index=True)
-    chromosome = Column(String(2))
-    position = Column(BigInteger)
-    disease_id = Column(String(100), ForeignKey("disgenet_disease.disease_id"))
-    disease = relationship("DisgenetDisease", foreign_keys=[disease_id])
-    score = Column(Float)
-    pmid = Column(BigInteger, index=True)
-    source_id = Column(Integer, ForeignKey("disgenet_source.id"))
-    source = relationship("DisgenetSource", foreign_keys=[source_id])
+    id: Mapped[int] = mapped_column(primary_key=True)
+
+    snp_id: Mapped[str] = mapped_column(String(20), index=True)
+    chromosome: Mapped[str] = mapped_column(String(2))
+    position: Mapped[int] = mapped_column()
+    disease_id: Mapped[str] = mapped_column(String(100), ForeignKey("disgenet_disease.disease_id"))
+    disease: Mapped["DisgenetDisease"] = relationship("DisgenetDisease", foreign_keys=[disease_id])
+    score: Mapped[float] = mapped_column()
+    pmid: Mapped[int] = mapped_column(index=True)
+    source_id: Mapped[int] = mapped_column(ForeignKey("disgenet_source.id"))
+    source: Mapped["DisgenetSource"] = relationship("DisgenetSource", foreign_keys=[source_id])
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -77,8 +83,8 @@ class DisgenetDisease(Base):
     """Class definition for the disgenet_disease table."""
 
     __tablename__ = "disgenet_disease"
-    disease_id = Column(String(100), primary_key=True)
-    disease_name = Column(String(255), index=True)
+    disease_id: Mapped[str] = mapped_column(String(100), primary_key=True)
+    disease_name: Mapped[str] = mapped_column(String(255), index=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -89,8 +95,8 @@ class DisgenetSource(Base):
     """Class definition for the disgenet_source table."""
 
     __tablename__ = "disgenet_source"
-    id = Column(Integer, primary_key=True)
-    source = Column(String(100), index=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    source: Mapped[str] = mapped_column(String(100), index=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/drugbank.py b/ebel/manager/rdbms/models/drugbank.py
index e045bba..c0f1ba6 100644
--- a/ebel/manager/rdbms/models/drugbank.py
+++ b/ebel/manager/rdbms/models/drugbank.py
@@ -1,8 +1,10 @@
 """DrugBank RDBMS model definition."""
+import datetime
+from typing import List
 
 from sqlalchemy import Column, Date, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 Base = declarative_base()
 
@@ -11,34 +13,34 @@ class Drugbank(Base):
     """Class definition for the drugbank table."""
 
     __tablename__ = "drugbank"
-    id = Column(Integer, primary_key=True)
-    drugbank_id = Column(String(10), index=True)
-    name = Column(String(255))
-    description = Column(Text)
-    cas_number = Column(String(20))
-    unii = Column(String(20))
-    state = Column(String(20))
-    indication = Column(Text)
-    pharmacodynamics = Column(Text)
-    toxicity = Column(Text)
-    metabolism = Column(Text)
-    absorption = Column(Text)
-    half_life = Column(Text)
-    route_of_elimination = Column(Text)
-    volume_of_distribution = Column(Text)
-    clearance = Column(Text)
-    mechanism_of_action = Column(Text)
-    fda_label = Column(Text)
-
-    references = relationship("Reference", back_populates="drugbank", cascade="save-update")
-    synonyms = relationship("Synonym", back_populates="drugbank", cascade="save-update")
-    targets = relationship("Target", back_populates="drugbank", cascade="save-update")
-    external_identifiers = relationship("ExternalIdentifier", back_populates="drugbank", cascade="save-update")
-    product_names = relationship("ProductName", back_populates="drugbank", cascade="save-update")
-    drug_interactions = relationship("DrugInteraction", back_populates="drugbank", cascade="save-update")
-    statuses = relationship("Status", back_populates="drugbank", cascade="save-update")
-    patents = relationship("Patent", back_populates="drugbank", cascade="save-update")
-    pathways = relationship("Pathway", back_populates="drugbank", cascade="save-update")
+    id: Mapped[int] = mapped_column(primary_key=True)
+    drugbank_id: Mapped[str] = mapped_column(String(10), index=True)
+    name: Mapped[str] = mapped_column(String(255))
+    description: Mapped[str] = mapped_column(Text)
+    cas_number: Mapped[str] = mapped_column(String(20))
+    unii: Mapped[str] = mapped_column(String(20))
+    state: Mapped[str] = mapped_column(String(20))
+    indication: Mapped[str] = mapped_column(Text)
+    pharmacodynamics: Mapped[str] = mapped_column(Text)
+    toxicity: Mapped[str] = mapped_column(Text)
+    metabolism: Mapped[str] = mapped_column(Text)
+    absorption: Mapped[str] = mapped_column(Text)
+    half_life: Mapped[str] = mapped_column(Text)
+    route_of_elimination: Mapped[str] = mapped_column(Text)
+    volume_of_distribution: Mapped[str] = mapped_column(Text)
+    clearance: Mapped[str] = mapped_column(Text)
+    mechanism_of_action: Mapped[str] = mapped_column(Text)
+    fda_label: Mapped[str] = mapped_column(Text)
+
+    references: Mapped[List["Reference"]] = relationship("Reference", back_populates="drugbank", cascade="save-update")
+    synonyms: Mapped[List["Synonym"]] = relationship("Synonym", back_populates="drugbank", cascade="save-update")
+    targets: Mapped[List["Target"]] = relationship("Target", back_populates="drugbank", cascade="save-update")
+    external_identifiers: Mapped[List["ExternalIdentifier"]] = relationship("ExternalIdentifier", back_populates="drugbank", cascade="save-update")
+    product_names: Mapped[List["ProductName"]] = relationship("ProductName", back_populates="drugbank", cascade="save-update")
+    drug_interactions: Mapped[List["DrugInteraction"]] = relationship("DrugInteraction", back_populates="drugbank", cascade="save-update")
+    statuses: Mapped[List["Status"]] = relationship("Status", back_populates="drugbank", cascade="save-update")
+    patents: Mapped[List["Patent"]] = relationship("Patent", back_populates="drugbank", cascade="save-update")
+    pathways: Mapped[List["Pathway"]] = relationship("Pathway", back_populates="drugbank", cascade="save-update")
 
     def __str__(self):
         """Class string definition."""
@@ -77,11 +79,11 @@ class Pathway(Base):
     """Class definition for the drugbank_pathway table."""
 
     __tablename__ = "drugbank_pathway"
-    id = Column(Integer, primary_key=True)
-    smpdb_id = Column(String(255))
+    id: Mapped[int] = mapped_column(primary_key=True)
+    smpdb_id: Mapped[str] = mapped_column(String(255))
 
-    drugbank_id = Column(Integer, ForeignKey("drugbank.id"))
-    drugbank = relationship("Drugbank", back_populates="pathways")
+    drugbank_id: Mapped[str] = mapped_column(ForeignKey("drugbank.id"))
+    drugbank: Mapped["Drugbank"] = relationship("Drugbank", back_populates="pathways")
 
     def __str__(self):
         return self.smpdb_id
@@ -95,15 +97,15 @@ class Patent(Base):
     """Class definition for the drugbank_patent table."""
 
     __tablename__ = "drugbank_patent"
-    id = Column(Integer, primary_key=True)
-    number = Column(String(255))
-    country = Column(String(255))
-    approved = Column(Date)
-    expires = Column(Date)
-    pediatric_extension = Column(String(255))
+    id: Mapped[int] = mapped_column(primary_key=True)
+    number: Mapped[str] = mapped_column(String(255))
+    country: Mapped[str] = mapped_column(String(255))
+    approved: Mapped[datetime.date] = mapped_column(Date)
+    expires: Mapped[datetime.date] = mapped_column(Date)
+    pediatric_extension: Mapped[str] = mapped_column(String(255))
 
-    drugbank_id = Column(Integer, ForeignKey("drugbank.id"))
-    drugbank = relationship("Drugbank", back_populates="patents")
+    drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id"))
+    drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="patents")
 
     def __str__(self):
         return self.number
@@ -124,11 +126,11 @@ class Status(Base):
     """Class definition for the drugbank_status table."""
 
     __tablename__ = "drugbank_status"
-    id = Column(Integer, primary_key=True)
-    status = Column(String(20), index=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    status: Mapped[str] = mapped_column(String(20), index=True)
 
-    drugbank_id = Column(Integer, ForeignKey("drugbank.id"))
-    drugbank = relationship("Drugbank", back_populates="statuses")
+    drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id"))
+    drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="statuses")
 
     def __str__(self):
         return self.status
@@ -142,12 +144,12 @@ class ExternalIdentifier(Base):
     """Class definition for the drugbank_external_identifier table."""
 
     __tablename__ = "drugbank_external_identifier"
-    id = Column(Integer, primary_key=True)
-    resource = Column(String(255), index=True)
-    identifier = Column(String(255), index=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    resource: Mapped[str] = mapped_column(String(255), index=True)
+    identifier: Mapped[str] = mapped_column(String(255), index=True)
 
-    drugbank_id = Column(Integer, ForeignKey("drugbank.id"))
-    drugbank = relationship("Drugbank", back_populates="external_identifiers")
+    drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id"))
+    drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="external_identifiers")
 
     def __str__(self):
         return self.identifier
@@ -165,11 +167,11 @@ class Reference(Base):
     """Class definition for the drugbank_reference table."""
 
     __tablename__ = "drugbank_reference"
-    id = Column(Integer, primary_key=True)
-    pmid = Column(Integer)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    pmid: Mapped[int] = mapped_column()
 
-    drugbank_id = Column(Integer, ForeignKey("drugbank.id"))
-    drugbank = relationship("Drugbank", back_populates="references")
+    drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id"))
+    drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="references")
 
     def __str__(self):
         return self.pmid
@@ -183,13 +185,13 @@ class Target(Base):
     """Class definition for the drugbank_target table."""
 
     __tablename__ = "drugbank_target"
-    id = Column(Integer, primary_key=True)
-    uniprot = Column(String(20), index=True)
-    action = Column(String(50), index=True)
-    known_action = Column(String(20), index=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    uniprot: Mapped[str] = mapped_column(String(20), index=True)
+    action: Mapped[str] = mapped_column(String(50), index=True)
+    known_action: Mapped[str] = mapped_column(String(20), index=True)
 
-    drugbank_id = Column(Integer, ForeignKey("drugbank.id"))
-    drugbank = relationship("Drugbank", back_populates="targets")
+    drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id"))
+    drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="targets")
 
     def __str__(self):
         return self.uniprot
@@ -208,13 +210,13 @@ class DrugInteraction(Base):
     """Class definition for the drugbank_drug_interaction table."""
 
     __tablename__ = "drugbank_drug_interaction"
-    id = Column(Integer, primary_key=True)
-    drugbank_id = Column(String(10), index=True)
-    name = Column(Text)
-    description = Column(Text)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    drugbank_id: Mapped[str] = mapped_column(String(10), index=True)
+    name: Mapped[str] = mapped_column(Text)
+    description: Mapped[str] = mapped_column(Text)
 
-    db_id = Column(Integer, ForeignKey("drugbank.id"))  # exception because drugbank_id is already a field
-    drugbank = relationship("Drugbank", back_populates="drug_interactions")
+    db_id: Mapped[str] = mapped_column(ForeignKey("drugbank.id"))  # exception because drugbank_id is already a field
+    drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="drug_interactions")
 
     def __str__(self):
         return self.drugbank_id
@@ -233,11 +235,11 @@ class ProductName(Base):
     """Class definition for the drugbank_product_name table."""
 
     __tablename__ = "drugbank_product_name"
-    id = Column(Integer, primary_key=True)
-    name = Column(Text)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    name: Mapped[str] = mapped_column(Text)
 
-    drugbank_id = Column(Integer, ForeignKey("drugbank.id"))
-    drugbank = relationship("Drugbank", back_populates="product_names")
+    drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id"))
+    drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="product_names")
 
     def __str__(self):
         return self.name
@@ -251,11 +253,11 @@ class Synonym(Base):
     """Class definition for the drugbank_synonym table."""
 
     __tablename__ = "drugbank_synonym"
-    id = Column(Integer, primary_key=True)
-    synonym = Column(Text)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    synonym: Mapped[str] = mapped_column(Text)
 
-    drugbank_id = Column(Integer, ForeignKey("drugbank.id"))
-    drugbank = relationship("Drugbank", back_populates="synonyms")
+    drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id"))
+    drugbank: Mapped[Drugbank] = relationship("Drugbank", back_populates="synonyms")
 
     def __str__(self):
         return self.synonym
diff --git a/ebel/manager/rdbms/models/ensembl.py b/ebel/manager/rdbms/models/ensembl.py
index 6c7115e..6d88a66 100644
--- a/ebel/manager/rdbms/models/ensembl.py
+++ b/ebel/manager/rdbms/models/ensembl.py
@@ -2,6 +2,7 @@
 
 from sqlalchemy import Column, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -12,17 +13,17 @@ class Ensembl(Base):
     """Class definition for the ensembl table."""
 
     __tablename__ = "ensembl"
-    id = Column(Integer, primary_key=True)
-    enst = Column(String(20), index=True)
-    version = Column(Integer)
-    chromosome = Column(String(10), index=True)
-    start = Column(Integer, index=True)
-    stop = Column(Integer, index=True)
-    orientation = Column(Integer)
-    gene_id = Column(String(255))
-    gene_id_short = Column(String(255))
-    hgnc_id = Column(String(255), index=True)
-    symbol = Column(String(50), index=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    enst: Mapped[str] = mapped_column(String(20), index=True)
+    version: Mapped[int] = mapped_column()
+    chromosome: Mapped[str] = mapped_column(String(10), index=True)
+    start: Mapped[int] = mapped_column(index=True)
+    stop: Mapped[int] = mapped_column(index=True)
+    orientation: Mapped[int] = mapped_column()
+    gene_id: Mapped[str] = mapped_column(String(255))
+    gene_id_short: Mapped[str] = mapped_column(String(255))
+    hgnc_id: Mapped[str] = mapped_column(String(255), index=True)
+    symbol: Mapped[str] = mapped_column(String(50), index=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/expression_atlas.py b/ebel/manager/rdbms/models/expression_atlas.py
index ce70217..cf7afaf 100644
--- a/ebel/manager/rdbms/models/expression_atlas.py
+++ b/ebel/manager/rdbms/models/expression_atlas.py
@@ -1,7 +1,9 @@
 """Expression Atlas RDBMS model definition."""
+from typing import List
+
 from sqlalchemy import Column, Float, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -13,14 +15,14 @@ class Experiment(Base):
 
     __tablename__ = "expression_atlas_experiment"
 
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    name = Column(String(100), index=True)
-    title = Column(Text)
+    name: Mapped[str] = mapped_column(String(100), index=True)
+    title: Mapped[str] = mapped_column(Text)
 
-    idfs = relationship("Idf", back_populates="experiment")
-    group_comparisons = relationship("GroupComparison", back_populates="experiment")
-    sdrf_condenseds = relationship("SdrfCondensed", back_populates="experiment")
+    idfs: Mapped[List["Idf"]] = relationship("Idf", back_populates="experiment")
+    group_comparisons: Mapped[List["GroupComparison"]] = relationship("GroupComparison", back_populates="experiment")
+    sdrf_condenseds: Mapped[List["SdrfCondensed"]] = relationship("SdrfCondensed", back_populates="experiment")
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -36,13 +38,13 @@ class Idf(Base):
 
     __tablename__ = "expression_atlas_idf"
 
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    key_name = Column(Text, nullable=False)
-    value = Column(Text, nullable=False)
+    key_name: Mapped[str] = mapped_column(Text, nullable=False)
+    value: Mapped[str] = mapped_column(Text, nullable=False)
 
-    experiment_id = Column(Integer, ForeignKey("expression_atlas_experiment.id"))
-    experiment = relationship("Experiment", back_populates="idfs")
+    experiment_id: Mapped[int] = mapped_column(ForeignKey("expression_atlas_experiment.id"))
+    experiment: Mapped[Experiment] = relationship("Experiment", back_populates="idfs")
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -54,16 +56,16 @@ class GroupComparison(Base):
 
     __tablename__ = "expression_atlas_group_comparison"
 
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    experiment_id = Column(Integer, ForeignKey("expression_atlas_experiment.id"))
-    experiment = relationship("Experiment", back_populates="group_comparisons")
+    experiment_id: Mapped[int] = mapped_column(ForeignKey("expression_atlas_experiment.id"))
+    experiment: Mapped[Experiment] = relationship("Experiment", back_populates="group_comparisons")
 
-    group_comparison = Column(String(100))
-    name = Column(Text)
+    group_comparison: Mapped[str] = mapped_column(String(100))
+    name: Mapped[str] = mapped_column(Text)
 
-    fold_changes = relationship("FoldChange", back_populates="group_comparison")
-    gseas = relationship("Gsea", back_populates="group_comparison")
+    fold_changes: Mapped[List["FoldChange"]] = relationship("FoldChange", back_populates="group_comparison")
+    gseas: Mapped[List["Gsea"]] = relationship("Gsea", back_populates="group_comparison")
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -75,16 +77,16 @@ class FoldChange(Base):
 
     __tablename__ = "expression_atlas_foldchange"
 
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    gene_id = Column(String(255))
-    gene_name = Column(String(100), index=True)
-    log2foldchange = Column(Float, index=True)
-    p_value = Column(Float, index=True)
-    t_statistic = Column(Float)
+    gene_id: Mapped[str] = mapped_column(String(255))
+    gene_name: Mapped[str] = mapped_column(String(100), index=True)
+    log2foldchange: Mapped[float] = mapped_column(index=True)
+    p_value: Mapped[float] = mapped_column(index=True)
+    t_statistic: Mapped[float] = mapped_column()
 
-    group_comparison_id = Column(Integer, ForeignKey("expression_atlas_group_comparison.id"))
-    group_comparison = relationship("GroupComparison", back_populates="fold_changes")
+    group_comparison_id: Mapped[int] = mapped_column(ForeignKey("expression_atlas_group_comparison.id"))
+    group_comparison: Mapped[GroupComparison] = relationship("GroupComparison", back_populates="fold_changes")
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -96,17 +98,17 @@ class SdrfCondensed(Base):
 
     __tablename__ = "expression_atlas_sdrf_condensed"
 
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    experiment_id = Column(Integer, ForeignKey("expression_atlas_experiment.id"))
-    experiment = relationship("Experiment", back_populates="sdrf_condenseds")
+    experiment_id: Mapped[int] = mapped_column(ForeignKey("expression_atlas_experiment.id"))
+    experiment: Mapped[Experiment] = relationship("Experiment", back_populates="sdrf_condenseds")
 
-    method = Column(String(255))
-    sample = Column(String(255))
-    parameter_type = Column(String(255))
-    parameter = Column(String(255))
-    value = Column(String(255))
-    url = Column(String(255))
+    method: Mapped[str] = mapped_column(String(255))
+    sample: Mapped[str] = mapped_column(String(255))
+    parameter_type: Mapped[str] = mapped_column(String(255))
+    parameter: Mapped[str] = mapped_column(String(255))
+    value: Mapped[str] = mapped_column(String(255))
+    url: Mapped[str] = mapped_column(String(255))
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -118,22 +120,22 @@ class Gsea(Base):
 
     __tablename__ = "expression_atlas_gsea"
 
-    id = Column(Integer, primary_key=True)
-
-    group_comparison_id = Column(Integer, ForeignKey("expression_atlas_group_comparison.id"))
-    group_comparison = relationship("GroupComparison", back_populates="gseas")
-
-    term = Column(String(255), index=True)
-    accession = Column(String(255))
-    genes_tot = Column(Integer)
-    stat_non_dir_p = Column(Float)
-    p_adj_non_dir = Column(Float, index=True)
-    significant_in_gene_set = Column(Integer)
-    non_significant_in_gene_set = Column(Integer)
-    significant_not_in_gene_set = Column(Integer)
-    non_significant_not_in_gene_set = Column(Integer)
-    effect_size = Column(Float)
-    gsea_type = Column(String(100))
+    id: Mapped[int] = mapped_column(primary_key=True)
+
+    group_comparison_id: Mapped[int] = mapped_column(ForeignKey("expression_atlas_group_comparison.id"))
+    group_comparison: Mapped[GroupComparison] = relationship("GroupComparison", back_populates="gseas")
+
+    term: Mapped[str] = mapped_column(String(255), index=True)
+    accession: Mapped[str] = mapped_column(String(255))
+    genes_tot: Mapped[int] = mapped_column()
+    stat_non_dir_p: Mapped[float] = mapped_column()
+    p_adj_non_dir: Mapped[float] = mapped_column(index=True)
+    significant_in_gene_set: Mapped[int] = mapped_column()
+    non_significant_in_gene_set: Mapped[int] = mapped_column()
+    significant_not_in_gene_set: Mapped[int] = mapped_column()
+    non_significant_not_in_gene_set: Mapped[int] = mapped_column()
+    effect_size: Mapped[float] = mapped_column()
+    gsea_type: Mapped[str] = mapped_column(String(100))
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/gwas_catalog.py b/ebel/manager/rdbms/models/gwas_catalog.py
index 97a6f8c..6c2c9a9 100644
--- a/ebel/manager/rdbms/models/gwas_catalog.py
+++ b/ebel/manager/rdbms/models/gwas_catalog.py
@@ -1,7 +1,9 @@
 """GWAS Catalog RDBMS model definition."""
+from typing import List
+
 from sqlalchemy import Column, Float, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -12,42 +14,42 @@ class GwasCatalog(Base):
     """Class definition for the gwascatalog table."""
 
     __tablename__ = "gwascatalog"
-    id = Column(Integer, primary_key=True)
-    date_added_to_catalog = Column(String(255))
-    pubmedid = Column(Integer)
-    first_author = Column(String(255))
-    date = Column(String(255))
-    journal = Column(String(255))
-    link = Column(String(255))
-    study = Column(Text)
-    disease_trait = Column(String(255))
-    initial_sample_size = Column(Text)
-    replication_sample_size = Column(Text)
-    region = Column(String(50))
-    chr_id = Column(Text)
-    chr_pos = Column(Text)
-    reported_gene_s = Column(Text)
-    mapped_gene = Column(Text)
-    upstream_gene_id = Column(String(50))
-    downstream_gene_id = Column(String(50))
-    upstream_gene_distance = Column(Integer)
-    downstream_gene_distance = Column(Integer)
-    strongest_snp_risk_allele = Column(Text)
-    snp = Column(Text)
-    merged = Column(Integer)
-    snp_id_current = Column(Text)
-    context = Column(Text)
-    intergenic = Column(Integer)
-    risk_allele_frequency = Column(Text)
-    p_value = Column(Float)
-    pvalue_mlog = Column(Float)
-    p_value_text = Column(Text)
-    or_or_beta = Column(Float)
-    _95_ci_text = Column(Text)
-    platform_snps_passing_qc = Column(Text)
-    cnv = Column(Text)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    date_added_to_catalog: Mapped[str] = mapped_column(String(255))
+    pubmedid: Mapped[int] = mapped_column()
+    first_author: Mapped[str] = mapped_column(String(255))
+    date: Mapped[str] = mapped_column(String(255))
+    journal: Mapped[str] = mapped_column(String(255))
+    link: Mapped[str] = mapped_column(String(255))
+    study: Mapped[str] = mapped_column(Text)
+    disease_trait: Mapped[str] = mapped_column(String(255))
+    initial_sample_size: Mapped[str] = mapped_column(Text)
+    replication_sample_size: Mapped[str] = mapped_column(Text)
+    region: Mapped[str] = mapped_column(String(50))
+    chr_id: Mapped[str] = mapped_column(Text)
+    chr_pos: Mapped[str] = mapped_column(Text)
+    reported_gene_s: Mapped[str] = mapped_column(Text)
+    mapped_gene: Mapped[str] = mapped_column(Text)
+    upstream_gene_id: Mapped[str] = mapped_column(String(50))
+    downstream_gene_id: Mapped[str] = mapped_column(String(50))
+    upstream_gene_distance: Mapped[int] = mapped_column()
+    downstream_gene_distance: Mapped[int] = mapped_column()
+    strongest_snp_risk_allele: Mapped[str] = mapped_column(Text)
+    snp: Mapped[str] = mapped_column(Text)
+    merged: Mapped[int] = mapped_column()
+    snp_id_current: Mapped[str] = mapped_column(Text)
+    context: Mapped[str] = mapped_column(Text)
+    intergenic: Mapped[int] = mapped_column()
+    risk_allele_frequency: Mapped[str] = mapped_column(Text)
+    p_value: Mapped[float] = mapped_column()
+    pvalue_mlog: Mapped[float] = mapped_column()
+    p_value_text: Mapped[str] = mapped_column(Text)
+    or_or_beta: Mapped[float] = mapped_column()
+    _95_ci_text: Mapped[str] = mapped_column(Text)
+    platform_snps_passing_qc: Mapped[str] = mapped_column(Text)
+    cnv: Mapped[str] = mapped_column(Text)
 
-    snp_genes = relationship("SnpGene", back_populates="gwascatalog")
+    snp_genes: Mapped[List["SnpGene"]] = relationship("SnpGene", back_populates="gwascatalog")
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -60,7 +62,7 @@ class SnpGene(Base):
     """Class definition for the gwascatalog_snpgene table."""
 
     __tablename__ = "gwascatalog_snpgene"
-    id = Column(Integer, primary_key=True)
-    ensembl_identifier = Column(String(100), nullable=False, index=True)
-    gwascatalog_id = Column(Integer, ForeignKey("gwascatalog.id"))
-    gwascatalog = relationship("GwasCatalog", back_populates="snp_genes")
+    id: Mapped[int] = mapped_column(primary_key=True)
+    ensembl_identifier: Mapped[str] = mapped_column(String(100), nullable=False, index=True)
+    gwascatalog_id: Mapped[int] = mapped_column(ForeignKey("gwascatalog.id"))
+    gwascatalog: Mapped[GwasCatalog] = relationship("GwasCatalog", back_populates="snp_genes")
diff --git a/ebel/manager/rdbms/models/hgnc.py b/ebel/manager/rdbms/models/hgnc.py
index da21ff4..26c5a20 100644
--- a/ebel/manager/rdbms/models/hgnc.py
+++ b/ebel/manager/rdbms/models/hgnc.py
@@ -1,8 +1,11 @@
 """HGNC RDBMS model definition."""
+import datetime
+from typing import List
+
 from sqlalchemy import (BigInteger, Column, Date, ForeignKey, Integer, String,
                         Text)
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -13,59 +16,59 @@ class Hgnc(Base):
     """Class definition for the hgnc table."""
 
     __tablename__ = "hgnc"
-    id = Column(Integer, primary_key=True)
-    hgnc_id = Column(String(20))
-    version = Column(BigInteger)
-    bioparadigms_slc = Column(String(20))
-    cd = Column(String(20))
-    cosmic = Column(String(50))
-    date_approved_reserved = Column(Date)
-    date_modified = Column(Date)
-    date_name_changed = Column(Date)
-    date_symbol_changed = Column(Date)
-    ensembl_gene_id = Column(String(20))
-    entrez_id = Column(Integer)
-    homeodb = Column(Integer)
-    horde_id = Column(String(50))
-    imgt = Column(String(50))
-    iuphar = Column(String(50))
-    kznf_gene_catalog = Column(Integer)
-    lncipedia = Column(String(50))
-    lncrnadb = Column(String(50))
-    location = Column(String(100))
-    location_sortable = Column(String(100))
-    locus_group = Column(String(50))
-    locus_type = Column(String(50))
-    merops = Column(String(20))
-    mirbase = Column(String(20))
-    name = Column(String(255))
-    orphanet = Column(Integer)
-    snornabase = Column(String(20))
-    status = Column(String(50))
-    symbol = Column(String(100), index=True)
-    ucsc_id = Column(String(50))
-    uuid = Column(String(50))
-    vega_id = Column(String(50))
-    agr = Column(String(50))
-    kznf_gene_catalog = Column(Text)
-
-    pre_symbols = relationship("PrevSymbol", back_populates="hgnc")
-    alias_names = relationship("AliasName", back_populates="hgnc")
-    alias_symbols = relationship("AliasSymbol", back_populates="hgnc")
-    ccdss = relationship("Ccds", back_populates="hgnc")
-    enas = relationship("Ena", back_populates="hgnc")
-    enzymes = relationship("Enzyme", back_populates="hgnc")
-    gene_group_names = relationship("GeneGroupName", back_populates="hgnc")
-    gene_group_ids = relationship("GeneGroupId", back_populates="hgnc")
-    uniprots = relationship("UniProt", back_populates="hgnc")
-    rna_centrals = relationship("RnaCentral", back_populates="hgnc")
-    rgds = relationship("Rgd", back_populates="hgnc")
-    refseqs = relationship("RefSeq", back_populates="hgnc")
-    pubmeds = relationship("PubMed", back_populates="hgnc")
-    prev_names = relationship("PrevName", back_populates="hgnc")
-    omims = relationship("Omim", back_populates="hgnc")
-    mgds = relationship("Mgd", back_populates="hgnc")
-    lsdbs = relationship("Lsdb", back_populates="hgnc")
+    id: Mapped[int] = mapped_column(primary_key=True)
+    hgnc_id: Mapped[str] = mapped_column(String(20))
+    version: Mapped[int] = mapped_column()
+    bioparadigms_slc: Mapped[str] = mapped_column(String(20))
+    cd: Mapped[str] = mapped_column(String(20))
+    cosmic: Mapped[str] = mapped_column(String(50))
+    date_approved_reserved: Mapped[datetime.date] = mapped_column(Date)
+    date_modified: Mapped[datetime.date] = mapped_column(Date)
+    date_name_changed: Mapped[datetime.date] = mapped_column(Date)
+    date_symbol_changed: Mapped[datetime.date] = mapped_column(Date)
+    ensembl_gene_id: Mapped[str] = mapped_column(String(20))
+    entrez_id: Mapped[int] = mapped_column()
+    homeodb: Mapped[int] = mapped_column()
+    horde_id: Mapped[str] = mapped_column(String(50))
+    imgt: Mapped[str] = mapped_column(String(50))
+    iuphar: Mapped[str] = mapped_column(String(50))
+    kznf_gene_catalog: Mapped[int] = mapped_column()
+    lncipedia: Mapped[str] = mapped_column(String(50))
+    lncrnadb: Mapped[str] = mapped_column(String(50))
+    location: Mapped[str] = mapped_column(String(100))
+    location_sortable: Mapped[str] = mapped_column(String(100))
+    locus_group: Mapped[str] = mapped_column(String(50))
+    locus_type: Mapped[str] = mapped_column(String(50))
+    merops: Mapped[str] = mapped_column(String(20))
+    mirbase: Mapped[str] = mapped_column(String(20))
+    name: Mapped[str] = mapped_column(String(255))
+    orphanet: Mapped[int] = mapped_column()
+    snornabase: Mapped[str] = mapped_column(String(20))
+    status: Mapped[str] = mapped_column(String(50))
+    symbol: Mapped[str] = mapped_column(String(100), index=True)
+    ucsc_id: Mapped[str] = mapped_column(String(50))
+    uuid: Mapped[str] = mapped_column(String(50))
+    vega_id: Mapped[str] = mapped_column(String(50))
+    agr: Mapped[str] = mapped_column(String(50))
+    kznf_gene_catalog: Mapped[str] = mapped_column(Text)
+
+    pre_symbols: Mapped[List["PrevSymbol"]] = relationship("PrevSymbol", back_populates="hgnc")
+    alias_names: Mapped[List["AliasName"]] = relationship("AliasName", back_populates="hgnc")
+    alias_symbols: Mapped[List["AliasSymbol"]] = relationship("AliasSymbol", back_populates="hgnc")
+    ccdss: Mapped[List["Ccds"]] = relationship("Ccds", back_populates="hgnc")
+    enas: Mapped[List["Ena"]] = relationship("Ena", back_populates="hgnc")
+    enzymes: Mapped[List["Enzyme"]] = relationship("Enzyme", back_populates="hgnc")
+    gene_group_names: Mapped[List["GeneGroupName"]] = relationship("GeneGroupName", back_populates="hgnc")
+    gene_group_ids: Mapped[List["GeneGroupId"]] = relationship("GeneGroupId", back_populates="hgnc")
+    uniprots: Mapped[List["UniProt"]] = relationship("UniProt", back_populates="hgnc")
+    rna_centrals: Mapped[List["RnaCentral"]] = relationship("RnaCentral", back_populates="hgnc")
+    rgds: Mapped[List["Rgd"]] = relationship("Rgd", back_populates="hgnc")
+    refseqs: Mapped[List["RefSeq"]] = relationship("RefSeq", back_populates="hgnc")
+    pubmeds: Mapped[List["PubMed"]] = relationship("PubMed", back_populates="hgnc")
+    prev_names: Mapped[List["PrevName"]] = relationship("PrevName", back_populates="hgnc")
+    omims: Mapped[List["Omim"]] = relationship("Omim", back_populates="hgnc")
+    mgds: Mapped[List["Mgd"]] = relationship("Mgd", back_populates="hgnc")
+    lsdbs: Mapped[List["Lsdb"]] = relationship("Lsdb", back_populates="hgnc")
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -127,12 +130,12 @@ class PrevSymbol(Base):
     """Class definition for the hgnc_prev_symbol table."""
 
     __tablename__ = "hgnc_prev_symbol"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    prev_symbol = Column(String(50), index=True)
+    prev_symbol: Mapped[str] = mapped_column(String(50), index=True)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
-    hgnc = relationship("Hgnc", back_populates="pre_symbols")
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
+    hgnc: Mapped[Hgnc] = relationship("Hgnc", back_populates="pre_symbols")
 
     def __str__(self):
         return self.prev_symbol
@@ -142,12 +145,12 @@ class AliasName(Base):
     """Class definition for the hgnc_alias_name table."""
 
     __tablename__ = "hgnc_alias_name"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    alias_name = Column(String(255))
+    alias_name: Mapped[str] = mapped_column(String(255))
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
-    hgnc = relationship("Hgnc", back_populates="alias_names")
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
+    hgnc: Mapped[Hgnc] = relationship("Hgnc", back_populates="alias_names")
 
     def __str__(self):
         return self.alias_name
@@ -157,12 +160,12 @@ class AliasSymbol(Base):
     """Class definition for the hgnc_alias_symbol table."""
 
     __tablename__ = "hgnc_alias_symbol"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    alias_symbol = Column(String(50), index=True)
+    alias_symbol: Mapped[str] = mapped_column(String(50), index=True)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
-    hgnc = relationship("Hgnc", back_populates="alias_symbols")
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
+    hgnc: Mapped[Hgnc] = relationship("Hgnc", back_populates="alias_symbols")
 
     def __str__(self):
         return self.alias_symbol
@@ -172,12 +175,12 @@ class Ccds(Base):
     """Class definition for the hgnc_ccds table."""
 
     __tablename__ = "hgnc_ccds"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    identifier = Column(String(50), index=True)
+    identifier: Mapped[str] = mapped_column(String(50), index=True)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
-    hgnc = relationship("Hgnc", back_populates="ccdss")
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
+    hgnc: Mapped[Hgnc] = relationship("Hgnc", back_populates="ccdss")
 
     def __str__(self):
         return self.identifier
@@ -187,12 +190,12 @@ class Ena(Base):
     """Class definition for the hgnc_ena table."""
 
     __tablename__ = "hgnc_ena"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    identifier = Column(String(50), index=True)
+    identifier: Mapped[str] = mapped_column(String(50), index=True)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
-    hgnc = relationship("Hgnc", back_populates="enas")
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
+    hgnc: Mapped[Hgnc] = relationship("Hgnc", back_populates="enas")
 
     def __str__(self):
         return self.identifier
@@ -202,11 +205,11 @@ class Enzyme(Base):
     """Class definition for the hgnc_enzyme table."""
 
     __tablename__ = "hgnc_enzyme"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    ec_number = Column(String(50), index=True)
+    ec_number = mapped_column(String(50), index=True)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
     hgnc = relationship("Hgnc", back_populates="enzymes")
 
     def __str__(self):
@@ -217,11 +220,11 @@ class GeneGroupName(Base):
     """Class definition for the hgnc_gene_group_name table."""
 
     __tablename__ = "hgnc_gene_group_name"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    name = Column(String(255))
+    name = mapped_column(String(255))
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
     hgnc = relationship("Hgnc", back_populates="gene_group_names")
 
     def __str__(self):
@@ -236,11 +239,11 @@ class GeneGroupId(Base):
     """Class definition for the hgnc_gene_group_id table."""
 
     __tablename__ = "hgnc_gene_group_id"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    identifier = Column(Integer)
+    identifier = mapped_column(Integer)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
     hgnc = relationship("Hgnc", back_populates="gene_group_ids")
 
     def __str__(self):
@@ -251,11 +254,11 @@ class UniProt(Base):
     """Class definition for the hgnc_uniprot table."""
 
     __tablename__ = "hgnc_uniprot"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    accession = Column(String(50), index=True)
+    accession = mapped_column(String(50), index=True)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
     hgnc = relationship("Hgnc", back_populates="uniprots")
 
     def __str__(self):
@@ -266,11 +269,11 @@ class RnaCentral(Base):
     """Class definition for the hgnc_rna_central table."""
 
     __tablename__ = "hgnc_rna_central"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    identifier = Column(String(50), index=True)
+    identifier = mapped_column(String(50), index=True)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
     hgnc = relationship("Hgnc", back_populates="rna_centrals")
 
     def __str__(self):
@@ -281,11 +284,11 @@ class Rgd(Base):
     """Class definition for the hgnc_rgd table."""
 
     __tablename__ = "hgnc_rgd"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    identifier = Column(String(50), index=True)
+    identifier = mapped_column(String(50), index=True)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
     hgnc = relationship("Hgnc", back_populates="rgds")
 
     def __str__(self):
@@ -296,11 +299,11 @@ class RefSeq(Base):
     """Class definition for the hgnc_refseq table."""
 
     __tablename__ = "hgnc_refseq"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    accession = Column(String(50), index=True)
+    accession = mapped_column(String(50), index=True)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
     hgnc = relationship("Hgnc", back_populates="refseqs")
 
     def __str__(self):
@@ -311,11 +314,11 @@ class PubMed(Base):
     """Class definition for the hgnc_pubmed table."""
 
     __tablename__ = "hgnc_pubmed"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    pmid = Column(Integer, index=True)
+    pmid = mapped_column(Integer, index=True)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
     hgnc = relationship("Hgnc", back_populates="pubmeds")
 
     def __str__(self):
@@ -326,11 +329,11 @@ class PrevName(Base):
     """Class definition for the hgnc_prev_name table."""
 
     __tablename__ = "hgnc_prev_name"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    prev_name = Column(String(255))
+    prev_name = mapped_column(String(255))
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
     hgnc = relationship("Hgnc", back_populates="prev_names")
 
     def __str__(self):
@@ -341,11 +344,11 @@ class Omim(Base):
     """Class definition for the hgnc_omim table."""
 
     __tablename__ = "hgnc_omim"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    identifier = Column(Integer, index=True)
+    identifier = mapped_column(Integer, index=True)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
     hgnc = relationship("Hgnc", back_populates="omims")
 
     def __str__(self):
@@ -356,11 +359,11 @@ class Mgd(Base):
     """Class definition for the hgnc_mgd table."""
 
     __tablename__ = "hgnc_mgd"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    identifier = Column(String(50), index=True)
+    identifier = mapped_column(String(50), index=True)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
     hgnc = relationship("Hgnc", back_populates="mgds")
 
     def __str__(self):
@@ -371,11 +374,11 @@ class Lsdb(Base):
     """Class definition for the hgnc_lsdb table."""
 
     __tablename__ = "hgnc_lsdb"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    identifier = Column(Text)
+    identifier: Mapped[str] = mapped_column(Text)
 
-    hgnc_id = Column(Integer, ForeignKey("hgnc.id"))
+    hgnc_id: Mapped[int] = mapped_column(ForeignKey("hgnc.id"))
     hgnc = relationship("Hgnc", back_populates="lsdbs")
 
     def __str__(self):
diff --git a/ebel/manager/rdbms/models/human_ortholog.py b/ebel/manager/rdbms/models/human_ortholog.py
index a1ccd37..565d88c 100644
--- a/ebel/manager/rdbms/models/human_ortholog.py
+++ b/ebel/manager/rdbms/models/human_ortholog.py
@@ -1,6 +1,7 @@
 """HGNC Human Ortholog RDBMS model definition."""
 from sqlalchemy import Column, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -12,18 +13,18 @@ class HumanOrtholog(Base):
 
     __tablename__ = "human_ortholog"
 
-    id = Column(Integer, primary_key=True)
-
-    hgnc_id = Column(String(20), index=True)
-    human_entrez_gene = Column(Integer)
-    human_ensembl_gene = Column(String(20))
-    human_symbol = Column(String(50), index=True)
-    ortholog_species = Column(Integer, index=True)
-    ortholog_species_entrez_gene = Column(Integer)
-    ortholog_species_ensembl_gene = Column(String(50))
-    ortholog_species_db_id = Column(String(50))
-    ortholog_species_symbol = Column(String(50), index=True)
-    support = Column(Text)
+    id: Mapped[int] = mapped_column(primary_key=True)
+
+    hgnc_id: Mapped[str] = mapped_column(String(20), index=True)
+    human_entrez_gene: Mapped[int] = mapped_column()
+    human_ensembl_gene: Mapped[str] = mapped_column(String(20))
+    human_symbol: Mapped[str] = mapped_column(String(50), index=True)
+    ortholog_species: Mapped[int] = mapped_column(index=True)
+    ortholog_species_entrez_gene: Mapped[int] = mapped_column()
+    ortholog_species_ensembl_gene: Mapped[str] = mapped_column(String(50))
+    ortholog_species_db_id: Mapped[str] = mapped_column(String(50))
+    ortholog_species_symbol: Mapped[str] = mapped_column(String(50), index=True)
+    support: Mapped[str] = mapped_column(Text)
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/intact.py b/ebel/manager/rdbms/models/intact.py
index ab5ac33..7067f37 100644
--- a/ebel/manager/rdbms/models/intact.py
+++ b/ebel/manager/rdbms/models/intact.py
@@ -1,6 +1,7 @@
 """IntAct RDBMS model definition."""
 from sqlalchemy import Column, Float, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -11,16 +12,16 @@ class Intact(Base):
     """Class definition for the intact table."""
 
     __tablename__ = "intact"
-    id = Column(Integer, primary_key=True)
-    confidence_value = Column(Float, index=True)
-    detection_method = Column(String(100), index=True)
-    detection_method_psimi_id = Column(Integer)
-    int_a_uniprot_id = Column(String(50), index=True)
-    int_b_uniprot_id = Column(String(50), index=True)
-    interaction_ids = Column(Text)
-    interaction_type = Column(String(100), index=True)
-    interaction_type_psimi_id = Column(Integer)
-    pmid = Column(Integer)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    confidence_value: Mapped[float] = mapped_column(index=True)
+    detection_method: Mapped[str] = mapped_column(String(100), index=True)
+    detection_method_psimi_id: Mapped[int] = mapped_column()
+    int_a_uniprot_id: Mapped[str] = mapped_column(String(50), index=True)
+    int_b_uniprot_id: Mapped[str] = mapped_column(String(50), index=True)
+    interaction_ids: Mapped[str] = mapped_column(Text)
+    interaction_type: Mapped[str] = mapped_column(String(100), index=True)
+    interaction_type_psimi_id: Mapped[int] = mapped_column()
+    pmid: Mapped[int] = mapped_column()
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/iuphar.py b/ebel/manager/rdbms/models/iuphar.py
index 790a929..cc1c1b8 100644
--- a/ebel/manager/rdbms/models/iuphar.py
+++ b/ebel/manager/rdbms/models/iuphar.py
@@ -1,8 +1,10 @@
 """IUPHAR RDBMS model definition."""
+from typing import List
+
 from sqlalchemy import (BigInteger, Boolean, Column, ForeignKey, Integer,
                         Numeric, String, Text)
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -13,34 +15,34 @@ class IupharLigand(Base):
     """Class definition for the iuphar_ligand table."""
 
     __tablename__ = "iuphar_ligand"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    name = Column(Text)
-    species = Column(Text)
-    type = Column(Text)
-    approved = Column(Boolean)
-    withdrawn = Column(Boolean)
-    labelled = Column(Boolean)
-    radioactive = Column(Boolean)
-    pubchem_sid = Column(BigInteger)
-    pubchem_cid = Column(Text)  # TODO: This is a integer, but for import reasons this changed to text
-    uniprot_id = Column(Text)
-    ensembl_id = Column(Text)
-    ligand_subunit_ids = Column(Text)
-    ligand_subunit_name = Column(Text)
-    ligand_subunit_uni_prot_ids = Column(Text)
-    ligand_subunit_ensembl_ids = Column(Text)
-    iupac_name = Column(Text)
-    inn = Column(Text)
-    synonyms = Column(Text)
-    smiles = Column(Text)
-    inchi_key = Column(Text)
-    inchi = Column(Text)
-    gto_immu_pdb = Column(Boolean)
-    gto_mpdb = Column(Boolean)
-    antibacterial = Column(Boolean)
+    name: Mapped[str] = mapped_column(Text)
+    species: Mapped[str] = mapped_column(Text)
+    type: Mapped[str] = mapped_column(Text)
+    approved: Mapped[bool] = mapped_column()
+    withdrawn: Mapped[bool] = mapped_column()
+    labelled: Mapped[bool] = mapped_column()
+    radioactive: Mapped[bool] = mapped_column()
+    pubchem_sid: Mapped[int] = mapped_column()
+    pubchem_cid: Mapped[str] = mapped_column(Text)  # TODO: This is a integer, but for import reasons this changed to text
+    uniprot_id: Mapped[str] = mapped_column(Text)
+    ensembl_id: Mapped[str] = mapped_column(Text)
+    ligand_subunit_ids: Mapped[str] = mapped_column(Text)
+    ligand_subunit_name: Mapped[str] = mapped_column(Text)
+    ligand_subunit_uni_prot_ids: Mapped[str] = mapped_column(Text)
+    ligand_subunit_ensembl_ids: Mapped[str] = mapped_column(Text)
+    iupac_name: Mapped[str] = mapped_column(Text)
+    inn: Mapped[str] = mapped_column(Text)
+    synonyms: Mapped[str] = mapped_column(Text)
+    smiles: Mapped[str] = mapped_column(Text)
+    inchi_key: Mapped[str] = mapped_column(Text)
+    inchi: Mapped[str] = mapped_column(Text)
+    gto_immu_pdb: Mapped[bool] = mapped_column()
+    gto_mpdb: Mapped[bool] = mapped_column()
+    antibacterial: Mapped[bool] = mapped_column()
 
-    interactions = relationship("IupharInteraction")
+    interactions: Mapped[List["IupharInteraction"]] = relationship("IupharInteraction")
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -51,50 +53,50 @@ class IupharInteraction(Base):
     """Class definition for the iuphar_interaction table."""
 
     __tablename__ = "iuphar_interaction"
-    id = Column(Integer, primary_key=True)
+    id = mapped_column(Integer, primary_key=True)
 
-    target = Column(String(255))
-    target_id = Column(Integer)
-    target_subunit_ids = Column(Text)
-    target_gene_symbol = Column(String(100))
-    target_uniprot = Column(String(100))
-    target_ensembl_gene_id = Column(String(200))
-    target_ligand = Column(String(100))
-    target_ligand_id = Column(Integer)
-    target_ligand_subunit_ids = Column(Text)
-    target_ligand_gene_symbol = Column(String(50))
-    target_ligand_uniprot_id = Column(String(200))
-    target_ligand_ensembl_gene_id = Column(String(50))
-    target_ligand_pubchem_sid = Column(Integer)
-    target_species = Column(String(100))
-    ligand = Column(String(255))
-    ligand_id = Column(Integer, ForeignKey("iuphar_ligand.id"), index=True)
-    ligand_subunit_ids = Column(Text)
-    ligand_gene_symbol = Column(String(50))
-    ligand_species = Column(String(50))
-    ligand_pubchem_sid = Column(Integer)
-    ligand_type = Column(Text)
-    approved = Column(Boolean)
-    type = Column(String(100))
-    action = Column(String(100))
-    action_comment = Column(String(255))
-    selectivity = Column(String(50))
-    endogenous = Column(Boolean)
-    primary_target = Column(Boolean)
-    concentration_range = Column(String(50))
-    affinity_units = Column(String(10))
-    affinity_high = Column(Numeric(6, 2))
-    affinity_median = Column(Numeric(6, 2))
-    affinity_low = Column(Numeric(6, 2))
-    original_affinity_units = Column(String(10))
-    original_affinity_low_nm = Column(Numeric(12, 3))
-    original_affinity_median_nm = Column(Numeric(12, 3))
-    original_affinity_high_nm = Column(Numeric(12, 3))
-    original_affinity_relation = Column(String(1))
-    assay_description = Column(Text)
-    receptor_site = Column(String(100))
-    ligand_context = Column(String(50))
-    pubmed_id = Column(Text)
+    target: Mapped[str] = mapped_column(String(255))
+    target_id: Mapped[int] = mapped_column()
+    target_subunit_ids: Mapped[str] = mapped_column(Text)
+    target_gene_symbol: Mapped[str] = mapped_column(String(100))
+    target_uniprot: Mapped[str] = mapped_column(String(100))
+    target_ensembl_gene_id: Mapped[str] = mapped_column(String(200))
+    target_ligand: Mapped[str] = mapped_column(String(100))
+    target_ligand_id: Mapped[int] = mapped_column()
+    target_ligand_subunit_ids: Mapped[str] = mapped_column(Text)
+    target_ligand_gene_symbol: Mapped[str] = mapped_column(String(50))
+    target_ligand_uniprot_id: Mapped[str] = mapped_column(String(200))
+    target_ligand_ensembl_gene_id: Mapped[str] = mapped_column(String(50))
+    target_ligand_pubchem_sid: Mapped[int] = mapped_column()
+    target_species: Mapped[str] = mapped_column(String(100))
+    ligand: Mapped[str] = mapped_column(String(255))
+    ligand_id: Mapped[int] = mapped_column(ForeignKey("iuphar_ligand.id"), index=True)
+    ligand_subunit_ids: Mapped[str] = mapped_column(Text)
+    ligand_gene_symbol: Mapped[str] = mapped_column(String(50))
+    ligand_species: Mapped[str] = mapped_column(String(50))
+    ligand_pubchem_sid: Mapped[int] = mapped_column()
+    ligand_type: Mapped[str] = mapped_column(Text)
+    approved: Mapped[bool] = mapped_column()
+    type: Mapped[str] = mapped_column(String(100))
+    action: Mapped[str] = mapped_column(String(100))
+    action_comment: Mapped[str] = mapped_column(String(255))
+    selectivity: Mapped[str] = mapped_column(String(50))
+    endogenous: Mapped[bool] = mapped_column()
+    primary_target: Mapped[bool] = mapped_column()
+    concentration_range: Mapped[str] = mapped_column(String(50))
+    affinity_units: Mapped[str] = mapped_column(String(10))
+    affinity_high: Mapped[float] = mapped_column(Numeric(6, 2))
+    affinity_median: Mapped[float] = mapped_column(Numeric(6, 2))
+    affinity_low: Mapped[float] = mapped_column(Numeric(6, 2))
+    original_affinity_units: Mapped[str] = mapped_column(String(10))
+    original_affinity_low_nm: Mapped[float] = mapped_column(Numeric(12, 3))
+    original_affinity_median_nm: Mapped[float] = mapped_column(Numeric(12, 3))
+    original_affinity_high_nm: Mapped[float] = mapped_column(Numeric(12, 3))
+    original_affinity_relation: Mapped[str] = mapped_column(String(1))
+    assay_description: Mapped[str] = mapped_column(Text)
+    receptor_site: Mapped[str] = mapped_column(String(100))
+    ligand_context: Mapped[str] = mapped_column(String(50))
+    pubmed_id: Mapped[str] = mapped_column(Text)
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/kegg.py b/ebel/manager/rdbms/models/kegg.py
index c5b07ee..d26d78d 100644
--- a/ebel/manager/rdbms/models/kegg.py
+++ b/ebel/manager/rdbms/models/kegg.py
@@ -1,6 +1,7 @@
 """KEGG RDBMS model definition."""
 from sqlalchemy import Column, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -11,17 +12,17 @@ class Kegg(Base):
     """Class definition for the kegg table."""
 
     __tablename__ = "kegg"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    pathway_identifier = Column(String(100))
-    pathway_name = Column(String(1000))
-    kegg_species_id = Column(String(100))
-    kegg_gene_id_a = Column(String(100))
-    gene_symbol_a = Column(String(100), index=True)
-    kegg_gene_id_b = Column(String(100))
-    gene_symbol_b = Column(String(100), index=True)
-    kegg_int_type = Column(String(100))
-    interaction_type = Column(String(50), index=True)
+    pathway_identifier: Mapped[str] = mapped_column(String(100))
+    pathway_name: Mapped[str] = mapped_column(String(1000))
+    kegg_species_id: Mapped[str] = mapped_column(String(100))
+    kegg_gene_id_a: Mapped[str] = mapped_column(String(100))
+    gene_symbol_a: Mapped[str] = mapped_column(String(100), index=True)
+    kegg_gene_id_b: Mapped[str] = mapped_column(String(100))
+    gene_symbol_b: Mapped[str] = mapped_column(String(100), index=True)
+    kegg_int_type: Mapped[str] = mapped_column(String(100))
+    interaction_type: Mapped[str] = mapped_column(String(50), index=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/mirtarbase.py b/ebel/manager/rdbms/models/mirtarbase.py
index 6f5014e..700543f 100644
--- a/ebel/manager/rdbms/models/mirtarbase.py
+++ b/ebel/manager/rdbms/models/mirtarbase.py
@@ -1,6 +1,7 @@
 """KEGG RDBMS model definition."""
 from sqlalchemy import Column, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -11,17 +12,17 @@ class Mirtarbase(Base):
     """Class definition for the mirtarbase table."""
 
     __tablename__ = "mirtarbase"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    mi_rtar_base_id = Column(String(20))
-    mi_rna = Column(String(50))
-    species_mi_rna = Column(String(50), index=True)
-    target_gene = Column(String(50), index=True)
-    target_gene_entrez_id = Column(Integer)
-    species_target_gene = Column(String(50), index=True)
-    experiments = Column(Text)
-    support_type = Column(String(50), index=True)
-    references_pmid = Column(Integer)
+    mi_rtar_base_id: Mapped[str] = mapped_column(String(20))
+    mi_rna: Mapped[str] = mapped_column(String(50))
+    species_mi_rna: Mapped[str] = mapped_column(String(50), index=True)
+    target_gene: Mapped[str] = mapped_column(String(50), index=True)
+    target_gene_entrez_id: Mapped[int] = mapped_column()
+    species_target_gene: Mapped[str] = mapped_column(String(50), index=True)
+    experiments: Mapped[str] = mapped_column(Text)
+    support_type: Mapped[str] = mapped_column(String(50), index=True)
+    references_pmid: Mapped[int] = mapped_column()
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/ncbi.py b/ebel/manager/rdbms/models/ncbi.py
index 59a56f6..ade3a7b 100644
--- a/ebel/manager/rdbms/models/ncbi.py
+++ b/ebel/manager/rdbms/models/ncbi.py
@@ -1,7 +1,9 @@
 """NCBI RDBMS model definition."""
+from typing import List
+
 from sqlalchemy import Column, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 from . import object_as_dict
 
@@ -12,27 +14,35 @@ class NcbiGeneInfo(Base):
     """Class definition for the ncbi_gene_info table."""
 
     __tablename__ = "ncbi_gene_info"
-    gene_id = Column(Integer, primary_key=True)
-
-    tax_id = Column(Integer, index=True)
-    symbol = Column(String(100), index=True)
-    type_of_gene = Column(String(100), index=True)
-    locus_tag = Column(String(100))
-    chromosome = Column(String(100))
-    map_location = Column(String(100))
-    description_id = Column(Integer, ForeignKey("ncbi_gene_info_description.id"))
-    description = relationship("NcbiGeneInfoDescription", foreign_keys=[description_id])
-    xrefs = relationship("NcbiGeneInfoXref", back_populates="gene")
-    mims = relationship("NcbiGeneMim", foreign_keys="NcbiGeneMim.gene_id", back_populates="gene")
-    orthologs = relationship(
+    gene_id: Mapped[int] = mapped_column(primary_key=True)
+
+    tax_id: Mapped[int] = mapped_column(index=True)
+    symbol: Mapped[str] = mapped_column(String(100), index=True)
+    type_of_gene: Mapped[str] = mapped_column(String(100), index=True)
+    locus_tag: Mapped[str] = mapped_column(String(100))
+    chromosome: Mapped[str] = mapped_column(String(100))
+    map_location: Mapped[str] = mapped_column(String(100))
+    description_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info_description.id"))
+    description: Mapped["NcbiGeneInfoDescription"] = relationship(
+        "NcbiGeneInfoDescription", foreign_keys=[description_id]
+    )
+    xrefs: Mapped[List["NcbiGeneInfoXref"]] = relationship("NcbiGeneInfoXref", back_populates="gene")
+    mims: Mapped[List["NcbiGeneMim"]] = relationship(
+        "NcbiGeneMim", foreign_keys="NcbiGeneMim.gene_id", back_populates="gene"
+    )
+    orthologs: Mapped[List["NcbiGeneOrtholog"]] = relationship(
         "NcbiGeneOrtholog",
         foreign_keys="NcbiGeneOrtholog.gene_id",
         back_populates="gene",
     )
-    ensembl_ids = relationship("NcbiGeneEnsembl", back_populates="genes")
-    gene_ids_right = relationship("NcbiGeneOnRight", foreign_keys="NcbiGeneOnRight.gene_id", back_populates="gene")
-    gene_ids_left = relationship("NcbiGeneOnLeft", foreign_keys="NcbiGeneOnLeft.gene_id", back_populates="gene")
-    gene_ids_overlapping = relationship(
+    ensembl_ids: Mapped[List["NcbiGeneEnsembl"]] = relationship("NcbiGeneEnsembl", back_populates="genes")
+    gene_ids_right: Mapped["NcbiGeneOnRight"] = relationship(
+        "NcbiGeneOnRight", foreign_keys="NcbiGeneOnRight.gene_id", back_populates="gene"
+    )
+    gene_ids_left: Mapped["NcbiGeneOnLeft"] = relationship(
+        "NcbiGeneOnLeft", foreign_keys="NcbiGeneOnLeft.gene_id", back_populates="gene"
+    )
+    gene_ids_overlapping: Mapped["NcbiGeneOverlapping"] = relationship(
         "NcbiGeneOverlapping",
         foreign_keys="NcbiGeneOverlapping.gene_id",
         back_populates="gene",
@@ -60,76 +70,76 @@ class NcbiGeneInfoDescription(Base):
     """Class definition for the ncbi_gene_info_description table."""
 
     __tablename__ = "ncbi_gene_info_description"
-    id = Column(Integer, primary_key=True, autoincrement=True)
-    description = Column(Text)
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
+    description: Mapped[str] = mapped_column(Text)
 
 
 class NcbiGeneOnRight(Base):
     """Class definition for the ncbi_gene_on_right table."""
 
     __tablename__ = "ncbi_gene_on_right"
-    id = Column(Integer, primary_key=True, autoincrement=True)
-    gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
-    gene_id_on_right = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
+    gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
+    gene_id_on_right: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
 
-    gene = relationship("NcbiGeneInfo", foreign_keys=[gene_id])
+    gene: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", foreign_keys=[gene_id])
 
 
 class NcbiGeneOnLeft(Base):
     """Class definition for the ncbi_gene_on_left table."""
 
     __tablename__ = "ncbi_gene_on_left"
-    id = Column(Integer, primary_key=True, autoincrement=True)
-    gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
-    gene_id_on_left = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
+    gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
+    gene_id_on_left: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
 
-    gene = relationship("NcbiGeneInfo", foreign_keys=[gene_id])
+    gene: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", foreign_keys=[gene_id])
 
 
 class NcbiGeneOverlapping(Base):
     """Class definition for the ncbi_gene_overlapping table."""
 
     __tablename__ = "ncbi_gene_overlapping"
-    id = Column(Integer, primary_key=True, autoincrement=True)
-    gene_id = gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
-    overlapping_gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
+    gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
+    overlapping_gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
 
-    gene = relationship("NcbiGeneInfo", foreign_keys=[gene_id])
+    gene: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", foreign_keys=[gene_id])
 
 
 class NcbiGeneOrtholog(Base):
     """Class definition for the ncbi_gene_ortholog table."""
 
     __tablename__ = "ncbi_gene_ortholog"
-    id = Column(Integer, primary_key=True, autoincrement=True)
-    tax_id = Column(Integer, index=True)
-    gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
-    other_tax_id = Column(Integer, index=True)
-    other_gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
+    tax_id: Mapped[int] = mapped_column(index=True)
+    gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
+    other_tax_id: Mapped[int] = mapped_column(index=True)
+    other_gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
 
-    gene = relationship("NcbiGeneInfo", foreign_keys=[gene_id])
+    gene: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", foreign_keys=[gene_id])
 
 
 class NcbiGenePubmed(Base):
     """Class definition for the ncbi_gene_pubmed table."""
 
     __tablename__ = "ncbi_gene_pubmed"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    tax_id = Column(Integer, index=True)
-    gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
-    pub_med_id = Column(Integer)
+    tax_id: Mapped[int] = mapped_column(index=True)
+    gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
+    pub_med_id: Mapped[int] = mapped_column()
 
 
 class NcbiGeneInfoXref(Base):
     """Class definition for the ncbi_gene_info_xref table."""
 
     __tablename__ = "ncbi_gene_info_xref"
-    id = Column(Integer, primary_key=True, autoincrement=True)
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
 
-    db = Column(String(100), index=True)
-    dbid = Column(String(100), index=True)
-    gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
+    db: Mapped[str] = mapped_column(String(100), index=True)
+    dbid: Mapped[str] = mapped_column(String(100), index=True)
+    gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
 
     gene = relationship("NcbiGeneInfo", back_populates="xrefs")
 
@@ -138,16 +148,16 @@ class NcbiGeneMim(Base):
     """Class definition for the ncbi_gene_mim table."""
 
     __tablename__ = "ncbi_gene_mim"
-    id = Column(Integer, primary_key=True, autoincrement=True)
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
 
-    mim_number = Column(Integer)
-    gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
-    type = Column(String(100))
-    source = Column(String(100))
-    med_gen_cui = Column(String(100), index=True)
-    comment = Column(String(100))
+    mim_number: Mapped[int] = mapped_column()
+    gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
+    type: Mapped[str] = mapped_column(String(100))
+    source: Mapped[str] = mapped_column(String(100))
+    med_gen_cui: Mapped[str] = mapped_column(String(100), index=True)
+    comment: Mapped[str] = mapped_column(String(100))
 
-    gene = relationship("NcbiGeneInfo", back_populates="mims")
+    gene: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", back_populates="mims")
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -165,17 +175,17 @@ class NcbiGeneEnsembl(Base):
     """Class definition for the ncbi_gene_ensembl table."""
 
     __tablename__ = "ncbi_gene_ensembl"
-    id = Column(Integer, primary_key=True, autoincrement=True)
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
 
-    tax_id = Column(Integer, index=True)
-    gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
-    ensembl_gene_identifier = Column(String(100))
-    rna_nucleotide_accession_version = Column(String(100))
-    ensembl_rna_identifier = Column(String(100))
-    protein_accession_version = Column(String(100))
-    ensembl_protein_identifier = Column(String(100))
+    tax_id: Mapped[int] = mapped_column(index=True)
+    gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
+    ensembl_gene_identifier: Mapped[str] = mapped_column(String(100))
+    rna_nucleotide_accession_version: Mapped[str] = mapped_column(String(100))
+    ensembl_rna_identifier: Mapped[str] = mapped_column(String(100))
+    protein_accession_version: Mapped[str] = mapped_column(String(100))
+    ensembl_protein_identifier: Mapped[str] = mapped_column(String(100))
 
-    genes = relationship("NcbiGeneInfo", back_populates="ensembl_ids")
+    genes: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", back_populates="ensembl_ids")
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -194,15 +204,15 @@ class NcbiGeneGo(Base):
     """Class definition for the ncbi_gene_go table."""
 
     __tablename__ = "ncbi_gene_go"
-    id = Column(Integer, primary_key=True, autoincrement=True)
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
 
-    tax_id = Column(Integer, index=True)
-    gene_id = Column(Integer, ForeignKey("ncbi_gene_info.gene_id"))
-    go_id = Column(String(100), index=True)
-    evidence = Column(String(10))
-    qualifier = Column(String(100))
-    go_term = Column(String(255))
-    category = Column(String(10))
+    tax_id: Mapped[int] = mapped_column(index=True)
+    gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
+    go_id: Mapped[str] = mapped_column(String(100), index=True)
+    evidence: Mapped[str] = mapped_column(String(10))
+    qualifier: Mapped[str] = mapped_column(String(100))
+    go_term: Mapped[str] = mapped_column(String(255))
+    category: Mapped[str] = mapped_column(String(10))
 
     pmids = relationship("NcbiGeneGoPmid", back_populates="gos")
 
@@ -224,25 +234,25 @@ class NcbiGeneGoPmid(Base):
     """Class definition for the ncbi_gene_go_pmid table."""
 
     __tablename__ = "ncbi_gene_go_pmid"
-    id = Column(Integer, primary_key=True, autoincrement=True)
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
 
-    ncbi_gene_go_id = Column(Integer, ForeignKey("ncbi_gene_go.id"))
-    pmid = Column(Integer)
+    ncbi_gene_go_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_go.id"))
+    pmid: Mapped[int] = mapped_column()
 
-    gos = relationship("NcbiGeneGo", back_populates="pmids")
+    gos: Mapped[List[NcbiGeneGo]] = relationship("NcbiGeneGo", back_populates="pmids")
 
 
 class NcbiMedGenName(Base):
     """Class definition for the ncbi_medgen_name table."""
 
     __tablename__ = "ncbi_medgen_name"
-    id = Column(Integer, primary_key=True, autoincrement=True)
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
 
-    cui = Column(String(100))
-    name = Column(Text)
-    source = Column(String(100))
-    suppress = Column(String(1))
-    pmids = relationship("NcbiMedGenPmid", back_populates="med_gen_name")
+    cui: Mapped[str] = mapped_column(String(100))
+    name: Mapped[str] = mapped_column(Text)
+    source: Mapped[str] = mapped_column(String(100))
+    suppress: Mapped[str] = mapped_column(String(1))
+    pmids: Mapped[List["NcbiMedGenPmid"]] = relationship("NcbiMedGenPmid", back_populates="med_gen_name")
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -255,10 +265,10 @@ class NcbiMedGenPmid(Base):
     """Class definition for the ncbi_medgen_pmid table."""
 
     __tablename__ = "ncbi_medgen_pmid"
-    id = Column(Integer, primary_key=True, autoincrement=True)
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
 
-    ncbi_medgen_name_id = Column(Integer, ForeignKey("ncbi_medgen_name.id"))
-    pmid = Column(Integer, index=True)
+    ncbi_medgen_name_id: Mapped[int] = mapped_column(ForeignKey("ncbi_medgen_name.id"))
+    pmid: Mapped[int] = mapped_column(index=True)
 
     med_gen_name = relationship("NcbiMedGenName", back_populates="pmids")
 
diff --git a/ebel/manager/rdbms/models/nsides.py b/ebel/manager/rdbms/models/nsides.py
index ef5da64..2d92762 100644
--- a/ebel/manager/rdbms/models/nsides.py
+++ b/ebel/manager/rdbms/models/nsides.py
@@ -2,6 +2,7 @@
 
 from sqlalchemy import Column, Float, Index, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -21,24 +22,24 @@ class Nsides(Base):
             "mean_reporting_frequency",
         ),
     )
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    drug_rxnorn_id = Column(String(20), index=True)  # This has to be a String because of mapping to drugbank ids
-    drug_concept_name = Column(String(255), index=True)
+    drug_rxnorn_id: Mapped[str] = mapped_column(String(20), index=True)  # This has to be a String because of mapping to drugbank ids
+    drug_concept_name: Mapped[str] = mapped_column(String(255), index=True)
 
-    source = Column(String(10), index=True)
+    source: Mapped[str] = mapped_column(String(10), index=True)
 
-    condition_meddra_id = Column(Integer)
-    condition_concept_name = Column(String(255), index=True)
+    condition_meddra_id: Mapped[int] = mapped_column()
+    condition_concept_name: Mapped[str] = mapped_column(String(255), index=True)
 
     # OFFSIDES specific
-    a = Column(Integer)
-    b = Column(Integer)
-    c = Column(Integer)
-    d = Column(Integer)
-    prr = Column(Float)
-    prr_error = Column(Float)
-    mean_reporting_frequency = Column(Float, index=True)
+    a = mapped_column(Integer)
+    b = mapped_column(Integer)
+    c = mapped_column(Integer)
+    d = mapped_column(Integer)
+    prr = mapped_column(Float)
+    prr_error = mapped_column(Float)
+    mean_reporting_frequency = mapped_column(Float, index=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/pathway_commons.py b/ebel/manager/rdbms/models/pathway_commons.py
index 5478aed..fde22a0 100644
--- a/ebel/manager/rdbms/models/pathway_commons.py
+++ b/ebel/manager/rdbms/models/pathway_commons.py
@@ -1,7 +1,9 @@
 """Pathway Commons RDBMS model definition."""
+from typing import List
+
 from sqlalchemy import BigInteger, Column, ForeignKey, Integer, String, Table
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -36,21 +38,23 @@ class PathwayCommons(Base):
     """Class definition for the pathway_commons table."""
 
     __tablename__ = "pathway_commons"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    participant_a = Column(String(50), index=True)
-    interaction_type = Column(String(50), index=True)
-    participant_b = Column(String(50), index=True)
+    participant_a: Mapped[str] = mapped_column(String(50), index=True)
+    interaction_type: Mapped[str] = mapped_column(String(50), index=True)
+    participant_b: Mapped[str] = mapped_column(String(50), index=True)
 
-    pmids = relationship("Pmid", back_populates="pathway_commons")
+    pmids: Mapped[List["Pmid"]] = relationship("Pmid", back_populates="pathway_commons")
 
-    pathway_names = relationship(
+    pathway_names: Mapped[List["PathwayName"]] = relationship(
         "PathwayName",
         secondary=pathway_commons__pathway_name,
         back_populates="pathway_commonses",
     )
 
-    sources = relationship("Source", secondary=pathway_commons__source, back_populates="pathway_commonses")
+    sources: Mapped[List["Source"]] = relationship(
+        "Source", secondary=pathway_commons__source, back_populates="pathway_commonses"
+    )
 
     def __str__(self):
         return f"{self.participant_a} {self.interaction_type} {self.participant_b}"
@@ -68,11 +72,11 @@ class PathwayName(Base):
     """Class definition for the pathway_commons_pathway_name table."""
 
     __tablename__ = "pathway_commons_pathway_name"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    name = Column(String(255), index=True)
+    name: Mapped[str] = mapped_column(String(255), index=True)
 
-    pathway_commonses = relationship(
+    pathway_commonses: Mapped[List[PathwayCommons]] = relationship(
         "PathwayCommons",
         secondary=pathway_commons__pathway_name,
         back_populates="pathway_names",
@@ -87,12 +91,12 @@ class Pmid(Base):
     """Class definition for the pathway_commons_pmid table."""
 
     __tablename__ = "pathway_commons_pmid"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    pmid = Column(BigInteger, index=True)
+    pmid: Mapped[int] = mapped_column(index=True)
 
-    pathway_commons_id = Column(Integer, ForeignKey("pathway_commons.id"), index=True)
-    pathway_commons = relationship("PathwayCommons", back_populates="pmids")
+    pathway_commons_id: Mapped[int] = mapped_column(ForeignKey("pathway_commons.id"), index=True)
+    pathway_commons: Mapped[List[PathwayCommons]] = relationship("PathwayCommons", back_populates="pmids")
 
     def __str__(self):
         """Class string definition."""
@@ -103,11 +107,13 @@ class Source(Base):
     """Class definition for the pathway_commons_source table."""
 
     __tablename__ = "pathway_commons_source"
-    id = Column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    source = Column(String(50))
+    source: Mapped[str] = mapped_column(String(50))
 
-    pathway_commonses = relationship("PathwayCommons", secondary=pathway_commons__source, back_populates="sources")
+    pathway_commonses: Mapped[List[PathwayCommons]] = relationship(
+        "PathwayCommons", secondary=pathway_commons__source, back_populates="sources"
+    )
 
     def __str__(self):
         """Class string definition."""
diff --git a/ebel/manager/rdbms/models/protein_atlas.py b/ebel/manager/rdbms/models/protein_atlas.py
index 167a33a..a1a57e7 100644
--- a/ebel/manager/rdbms/models/protein_atlas.py
+++ b/ebel/manager/rdbms/models/protein_atlas.py
@@ -1,6 +1,7 @@
 """Protein Atlas RDBMS model definition."""
 from sqlalchemy import Column, Integer, Numeric, String, Text
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import mapped_column
 
 Base = declarative_base()
 
@@ -9,14 +10,14 @@ class ProteinAtlasNormalTissue(Base):
     """Class definition for the protein_atlas_normal_tissue table."""
 
     __tablename__ = "protein_atlas_normal_tissue"
-    id = Column(Integer, primary_key=True)
+    id = mapped_column(Integer, primary_key=True)
 
-    gene = Column(String(100), index=True)
-    gene_name = Column(String(100))
-    tissue = Column(String(100))
-    cell_type = Column(String(100))
-    level = Column(String(100), index=True)
-    reliability = Column(String(100), index=True)
+    gene = mapped_column(String(100), index=True)
+    gene_name = mapped_column(String(100))
+    tissue = mapped_column(String(100))
+    cell_type = mapped_column(String(100))
+    level = mapped_column(String(100), index=True)
+    reliability = mapped_column(String(100), index=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -34,22 +35,22 @@ class ProteinAtlasSubcellularLocation(Base):
     """Class definition for the protein_atlas_subcellular_location table."""
 
     __tablename__ = "protein_atlas_subcellular_location"
-    id = Column(Integer, primary_key=True)
-
-    gene = Column(String(100))
-    gene_name = Column(String(100))
-    reliability = Column(String(100))
-    main_location = Column(String(100))
-    additional_location = Column(String(100))
-    extracellular_location = Column(String(100))
-    enhanced = Column(String(100))
-    supported = Column(String(100))
-    approved = Column(String(100))
-    uncertain = Column(String(100))
-    single_cell_variation_intensity = Column(String(100))
-    single_cell_variation_spatial = Column(String(100))
-    cell_cycle_dependency = Column(Text)
-    go_id = Column(Text)
+    id = mapped_column(Integer, primary_key=True)
+
+    gene = mapped_column(String(100))
+    gene_name = mapped_column(String(100))
+    reliability = mapped_column(String(100))
+    main_location = mapped_column(String(100))
+    additional_location = mapped_column(String(100))
+    extracellular_location = mapped_column(String(100))
+    enhanced = mapped_column(String(100))
+    supported = mapped_column(String(100))
+    approved = mapped_column(String(100))
+    uncertain = mapped_column(String(100))
+    single_cell_variation_intensity = mapped_column(String(100))
+    single_cell_variation_spatial = mapped_column(String(100))
+    cell_cycle_dependency = mapped_column(Text)
+    go_id = mapped_column(Text)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -75,12 +76,12 @@ class ProteinAtlasRnaTissueConsensus(Base):
     """Class definition for the protein_atlas_rna_tissue_consensus table."""
 
     __tablename__ = "protein_atlas_rna_tissue_consensus"
-    id = Column(Integer, primary_key=True)
+    id = mapped_column(Integer, primary_key=True)
 
-    gene = Column(String(100), index=True)
-    gene_name = Column(String(100), index=True)
-    tissue = Column(String(100), index=True)
-    n_tpm = Column(Numeric(8, 1))
+    gene = mapped_column(String(100), index=True)
+    gene_name = mapped_column(String(100), index=True)
+    tissue = mapped_column(String(100), index=True)
+    n_tpm = mapped_column(Numeric(8, 1))
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -96,14 +97,14 @@ class ProteinAtlasRnaBrainGtex(Base):
     """Class definition for the protein_atlas_rna_brain_gtex table."""
 
     __tablename__ = "protein_atlas_rna_brain_gtex"
-    id = Column(Integer, primary_key=True)
+    id = mapped_column(Integer, primary_key=True)
 
-    gene = Column(String(100), index=True)
-    gene_name = Column(String(100), index=True)
-    brain_region = Column(String(100), index=True)
-    tpm = Column(Numeric(8, 1))
-    p_tpm = Column(Numeric(8, 1))
-    n_tpm = Column(Numeric(8, 1))
+    gene = mapped_column(String(100), index=True)
+    gene_name = mapped_column(String(100), index=True)
+    brain_region = mapped_column(String(100), index=True)
+    tpm = mapped_column(Numeric(8, 1))
+    p_tpm = mapped_column(Numeric(8, 1))
+    n_tpm = mapped_column(Numeric(8, 1))
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -121,14 +122,14 @@ class ProteinAtlasRnaBrainFantom(Base):
     """Class definition for the protein_atlas_rna_brain_fantom table."""
 
     __tablename__ = "protein_atlas_rna_brain_fantom"
-    id = Column(Integer, primary_key=True)
+    id = mapped_column(Integer, primary_key=True)
 
-    gene = Column(String(100))
-    gene_name = Column(String(100))
-    brain_region = Column(String(100))
-    tags_per_million = Column(String(100))
-    scaled_tags_per_million = Column(String(100))
-    n_tpm = Column(String(100))
+    gene = mapped_column(String(100))
+    gene_name = mapped_column(String(100))
+    brain_region = mapped_column(String(100))
+    tags_per_million = mapped_column(String(100))
+    scaled_tags_per_million = mapped_column(String(100))
+    n_tpm = mapped_column(String(100))
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -146,12 +147,12 @@ class ProteinAtlasRnaMouseBrainAllen(Base):
     """Class definition for the protein_atlas_rna_mouse_brain_allen table."""
 
     __tablename__ = "protein_atlas_rna_mouse_brain_allen"
-    id = Column(Integer, primary_key=True)
+    id = mapped_column(Integer, primary_key=True)
 
-    gene = Column(String(100))
-    gene_name = Column(String(100))
-    brain_region = Column(String(100))
-    expression_energy = Column(Numeric(8, 1))
+    gene = mapped_column(String(100))
+    gene_name = mapped_column(String(100))
+    brain_region = mapped_column(String(100))
+    expression_energy = mapped_column(Numeric(8, 1))
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/reactome.py b/ebel/manager/rdbms/models/reactome.py
index 0624899..e986cb1 100644
--- a/ebel/manager/rdbms/models/reactome.py
+++ b/ebel/manager/rdbms/models/reactome.py
@@ -1,6 +1,7 @@
 """Reactome RDBMS model definition."""
 from sqlalchemy import Column, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import mapped_column
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -11,12 +12,12 @@ class Reactome(Base):
     """Class definition for the reactome table."""
 
     __tablename__ = "reactome"
-    id = Column(Integer, primary_key=True)
-    identifier = Column(String(50), index=True)
-    uniprot_accession = Column(String(50), index=True)
-    organism = Column(String(255))
-    name = Column(String(255))
-    evidence_type = Column(String(255))
+    id = mapped_column(Integer, primary_key=True)
+    identifier = mapped_column(String(50), index=True)
+    uniprot_accession = mapped_column(String(50), index=True)
+    organism = mapped_column(String(255))
+    name = mapped_column(String(255))
+    evidence_type = mapped_column(String(255))
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/stringdb.py b/ebel/manager/rdbms/models/stringdb.py
index 31842a5..df1fedf 100644
--- a/ebel/manager/rdbms/models/stringdb.py
+++ b/ebel/manager/rdbms/models/stringdb.py
@@ -2,6 +2,7 @@
 
 from sqlalchemy import Boolean, Column, Integer, SmallInteger, String
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import mapped_column
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -13,26 +14,26 @@ class StringDb(Base):
 
     __tablename__ = "stringdb"
 
-    id = Column(Integer, primary_key=True)
-
-    protein1 = Column(String(50), nullable=False)
-    protein2 = Column(String(50), nullable=False)
-    symbol1 = Column(String(50), nullable=False, index=True)
-    symbol2 = Column(String(50), nullable=False, index=True)
-    neighborhood = Column(Integer)
-    neighborhood_transferred = Column(SmallInteger)
-    fusion = Column(SmallInteger)
-    cooccurence = Column(SmallInteger)
-    homology = Column(SmallInteger)
-    coexpression = Column(SmallInteger)
-    coexpression_transferred = Column(SmallInteger)
-    experiments = Column(SmallInteger, index=True)
-    experiments_transferred = Column(SmallInteger)
-    database = Column(Integer)
-    database_transferred = Column(SmallInteger)
-    textmining = Column(SmallInteger)
-    textmining_transferred = Column(SmallInteger)
-    combined_score = Column(SmallInteger)
+    id = mapped_column(Integer, primary_key=True)
+
+    protein1 = mapped_column(String(50), nullable=False)
+    protein2 = mapped_column(String(50), nullable=False)
+    symbol1 = mapped_column(String(50), nullable=False, index=True)
+    symbol2 = mapped_column(String(50), nullable=False, index=True)
+    neighborhood = mapped_column(Integer)
+    neighborhood_transferred = mapped_column(SmallInteger)
+    fusion = mapped_column(SmallInteger)
+    cooccurence = mapped_column(SmallInteger)
+    homology = mapped_column(SmallInteger)
+    coexpression = mapped_column(SmallInteger)
+    coexpression_transferred = mapped_column(SmallInteger)
+    experiments = mapped_column(SmallInteger, index=True)
+    experiments_transferred = mapped_column(SmallInteger)
+    database = mapped_column(Integer)
+    database_transferred = mapped_column(SmallInteger)
+    textmining = mapped_column(SmallInteger)
+    textmining_transferred = mapped_column(SmallInteger)
+    combined_score = mapped_column(SmallInteger)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -44,9 +45,9 @@ class StringDbProtein(Base):
 
     __tablename__ = "stringdb_protein"
 
-    id = Column(Integer, primary_key=True)
-    string_protein_id = Column(String(50), nullable=False, index=True)
-    preferred_name = Column(String(50), nullable=False, index=True)
+    id = mapped_column(Integer, primary_key=True)
+    string_protein_id = mapped_column(String(50), nullable=False, index=True)
+    preferred_name = mapped_column(String(50), nullable=False, index=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -57,16 +58,16 @@ class StringDbAction(Base):
     """Class definition for the stringdb_action table."""
 
     __tablename__ = "stringdb_action"
-    id = Column(Integer, primary_key=True)
-    item_id_a = Column(String(50), nullable=False)
-    item_id_b = Column(String(50), nullable=False)
-    symbol1 = Column(String(50), nullable=False, index=True)
-    symbol2 = Column(String(50), nullable=False, index=True)
-    mode = Column(String(20), nullable=False, index=True)
-    action = Column(String(20))
-    is_directional = Column(Boolean, nullable=False, index=True)
-    a_is_acting = Column(Boolean, nullable=False, index=True)
-    score = Column(SmallInteger)
+    id = mapped_column(Integer, primary_key=True)
+    item_id_a = mapped_column(String(50), nullable=False)
+    item_id_b = mapped_column(String(50), nullable=False)
+    symbol1 = mapped_column(String(50), nullable=False, index=True)
+    symbol2 = mapped_column(String(50), nullable=False, index=True)
+    mode = mapped_column(String(20), nullable=False, index=True)
+    action = mapped_column(String(20))
+    is_directional = mapped_column(Boolean, nullable=False, index=True)
+    a_is_acting = mapped_column(Boolean, nullable=False, index=True)
+    score = mapped_column(SmallInteger)
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/uniprot.py b/ebel/manager/rdbms/models/uniprot.py
index 39a86f3..331ea82 100644
--- a/ebel/manager/rdbms/models/uniprot.py
+++ b/ebel/manager/rdbms/models/uniprot.py
@@ -3,7 +3,7 @@
 
 from sqlalchemy import Column, ForeignKey, Integer, String, Table, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, mapped_column
 
 Base = declarative_base()
 
@@ -45,16 +45,16 @@ class Uniprot(Base):
 
     __tablename__ = "uniprot"
 
-    id = Column(Integer, primary_key=True)
+    id = mapped_column(Integer, primary_key=True)
 
-    accession = Column(String(20), unique=True)
-    name = Column(String(100), nullable=False, unique=True)
-    recommended_name = Column(String(255), nullable=True)
+    accession = mapped_column(String(20), unique=True)
+    name = mapped_column(String(100), nullable=False, unique=True)
+    recommended_name = mapped_column(String(255), nullable=True)
 
-    taxid = Column(Integer, ForeignKey("uniprot_organism.taxid"), nullable=False, index=True)
+    taxid = mapped_column(Integer, ForeignKey("uniprot_organism.taxid"), nullable=False, index=True)
     organism = relationship("Organism")
 
-    function_id = Column(Integer, ForeignKey("uniprot_function.id"), nullable=True)
+    function_id = mapped_column(Integer, ForeignKey("uniprot_function.id"), nullable=True)
     function = relationship("Function")
 
     gene_names = relationship("Gene", back_populates="uniprot")
@@ -103,9 +103,9 @@ class GeneSymbol(Base):
     """Class definition for the uniprot_gene_symbol table."""
 
     __tablename__ = "uniprot_gene_symbol"
-    id = Column(Integer, primary_key=True)
-    symbol = Column(String(100), nullable=False, index=True)
-    uniprot_id = Column(Integer, ForeignKey("uniprot.id"))
+    id = mapped_column(Integer, primary_key=True)
+    symbol = mapped_column(String(100), nullable=False, index=True)
+    uniprot_id = mapped_column(Integer, ForeignKey("uniprot.id"))
     uniprot = relationship("Uniprot", back_populates="gene_symbol")
 
     def __repr__(self):
@@ -117,9 +117,9 @@ class Gene(Base):
     """Class definition for the uniprot_gene table."""
 
     __tablename__ = "uniprot_gene"
-    id = Column(Integer, primary_key=True)
-    name = Column(String(100), nullable=False, index=True)
-    uniprot_id = Column(Integer, ForeignKey("uniprot.id"))
+    id = mapped_column(Integer, primary_key=True)
+    name = mapped_column(String(100), nullable=False, index=True)
+    uniprot_id = mapped_column(Integer, ForeignKey("uniprot.id"))
     uniprot = relationship("Uniprot", back_populates="gene_names")
 
 
@@ -128,8 +128,8 @@ class Keyword(Base):
 
     __tablename__ = "uniprot_keyword"
 
-    keywordid = Column(Integer, primary_key=True)
-    keyword_name = Column(String(100), index=True)
+    keywordid = mapped_column(Integer, primary_key=True)
+    keyword_name = mapped_column(String(100), index=True)
 
     uniprots = relationship("Uniprot", secondary=uniprot__uniprot_keyword, back_populates="keywords")
 
@@ -143,8 +143,8 @@ class Organism(Base):
 
     __tablename__ = "uniprot_organism"
 
-    taxid = Column(Integer, primary_key=True)
-    scientific_name = Column(String(255))  # TODO:Check if index=True with  is possible
+    taxid = mapped_column(Integer, primary_key=True)
+    scientific_name = mapped_column(String(255))  # TODO:Check if index=True with  is possible
 
     uniprots = relationship("Uniprot", secondary=uniprot__uniprot_host, back_populates="hosts")
 
@@ -154,9 +154,9 @@ class SubcellularLocation(Base):
 
     __tablename__ = "uniprot_subcellular_location"
 
-    id = Column(Integer, primary_key=True)
+    id = mapped_column(Integer, primary_key=True)
 
-    name = Column(String(100), index=True)
+    name = mapped_column(String(100), index=True)
 
     uniprots = relationship(
         "Uniprot",
@@ -170,10 +170,10 @@ class Xref(Base):
 
     __tablename__ = "uniprot_xref"
 
-    id = Column(Integer, primary_key=True)
+    id = mapped_column(Integer, primary_key=True)
 
-    db = Column(String(50), index=True)
-    identifier = Column(String(100), index=True)
+    db = mapped_column(String(50), index=True)
+    identifier = mapped_column(String(100), index=True)
 
     uniprots = relationship("Uniprot", secondary=uniprot__uniprot_xref, back_populates="xrefs")
 
@@ -183,8 +183,8 @@ class Function(Base):
 
     __tablename__ = "uniprot_function"
 
-    id = Column(Integer, primary_key=True)
+    id = mapped_column(Integer, primary_key=True)
 
-    description = Column(Text)
+    description = mapped_column(Text)
 
     uniprots = relationship("Uniprot", back_populates="function")
diff --git a/pyproject.toml b/pyproject.toml
index 030a032..82b66a4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,27 +39,27 @@ Issues = 'https://github.com/e-bel/ebel/issues'
 Documentation = 'https://ebel.readthedocs.io/en/latest/'
 
 [tool.poetry.dependencies]
-lark-parser = "^0.11.2"
+lark-parser = "^0.11.3"
 click = "^7.1.2"
-requests = "^2.25.1"
-tqdm = "^4.59.0"
-pandas = "^1.2.4"
-sqlalchemy = "^1.4.46"
-SQLAlchemy-Utils = "^0.37.7"
+requests = "^2.31.0"
+tqdm = "^4.66.1"
+pandas = "^1.5.3"
+sqlalchemy = "^2.0.20"
+SQLAlchemy-Utils = "^0.37.9"
 xlwt = "^1.3.0"
 xlrd = "^2.0.1"
-xlsxwriter = "^1.3.8"
+xlsxwriter = "^1.4.5"
 xmltodict = "^0.12.0"
-GitPython = "^3.1.14"
-lxml = "^4.6.5"
-flask = "^2.0.1"
+GitPython = "^3.1.36"
+lxml = "^4.9.3"
+flask = "^2.2.5"
 flask_cors = "^3.0.10"
-connexion = {version = "^2.14.1", extras = ["swagger-ui"]}
-cryptography = "^3.4.7"
-openpyxl = "^3.0.10"
+connexion = {version = "^2.14.2", extras = ["swagger-ui"]}
+cryptography = "^3.4.8"
+openpyxl = "^3.1.2"
 graphviz = "0.20"
 pyorientdb = "^1.0.0"
-PyMySQL = "^1.0.2"
+PyMySQL = "^1.1.0"
 python = "^3.9"
 mkdocstrings = {version = "^0.18", extras = ["python"]}
 
diff --git a/requirements.txt b/requirements.txt
index f00f628..920280b 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,21 +1,21 @@
-lark-parser==0.11.2
+lark-parser==0.11.3
 click>=7.1.2
-requests>=2.25.1
-tqdm>=4.59.0
-pandas>=1.2.4
-sqlalchemy>=1.4.15
-SQLAlchemy-Utils==0.37.7
+requests>=2.31.0
+tqdm>=4.66.1
+pandas>=1.5.3
+sqlalchemy>=2.0.20
+SQLAlchemy-Utils==0.37.9
 xlwt==1.3.0
 xlrd==2.0.1
-xlsxwriter==1.3.8
-pymysql==1.0.2
+xlsxwriter==1.4.5
+pymysql==1.1.0
 xmltodict==0.12.0
-GitPython==3.1.14
-lxml>=4.6.5
-flask==2.0.1
+GitPython==3.1.36
+lxml>=4.9.3
+flask==2.2.5
 flask_cors==3.0.10
-connexion[swagger-ui]==2.14.1
-cryptography==3.4.7
-openpyxl==3.0.7
-graphviz
-pyorientdb
\ No newline at end of file
+connexion[swagger-ui]==2.14.2
+cryptography==3.4.8
+openpyxl==3.1.2
+graphviz==0.20
+pyorientdb==1.0.0
\ No newline at end of file

From 86022d86fc8afcdb444327affc58eccc8aeaba83 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Fri, 15 Sep 2023 13:04:57 +0200
Subject: [PATCH 02/58] feat: finish updating models for sqla2 and blacken code

---
 ebel/database.py                              |  8 +-
 ebel/manager/models.py                        |  3 +-
 ebel/manager/orientdb/biodbs/bel.py           | 35 +++++--
 ebel/manager/orientdb/biodbs/biogrid.py       |  1 +
 ebel/manager/orientdb/odb_meta.py             |  7 +-
 ebel/manager/orientdb/odb_structure.py        |  3 +-
 ebel/manager/rdbms/models/chebi.py            | 25 +++--
 .../rdbms/models/clinical_trials_gov.py       | 34 +++----
 ebel/manager/rdbms/models/drugbank.py         | 12 ++-
 ebel/manager/rdbms/models/hgnc.py             |  3 +-
 ebel/manager/rdbms/models/iuphar.py           |  7 +-
 ebel/manager/rdbms/models/nsides.py           |  4 +-
 ebel/manager/rdbms/models/protein_atlas.py    | 96 +++++++++----------
 ebel/manager/rdbms/models/reactome.py         | 14 +--
 ebel/manager/rdbms/models/stringdb.py         | 68 ++++++-------
 ebel/manager/rdbms/models/uniprot.py          | 86 +++++++++--------
 ebel/web/api/ebel/v1/bel.py                   |  3 +-
 .../web/api/ebel/v1/bel_against_expression.py |  3 +-
 ebel/web/api/ebel/v1/biogrid.py               | 12 ++-
 ebel/web/api/ebel/v1/clinical_trials_gov.py   |  3 +-
 ebel/web/api/ebel/v1/clinvar.py               |  9 +-
 ebel/web/api/ebel/v1/disgenet.py              |  8 +-
 ebel/web/api/ebel/v1/drugbank.py              |  3 +-
 ebel/web/api/ebel/v1/expression_atlas.py      | 11 ++-
 ebel/web/api/ebel/v1/intact.py                |  3 +-
 ebel/web/api/ebel/v1/kegg.py                  |  3 +-
 ebel/web/api/ebel/v1/pathway_commons.py       | 12 ++-
 ebel/web/api/ebel/v1/uniprot.py               |  3 +-
 28 files changed, 259 insertions(+), 220 deletions(-)

diff --git a/ebel/database.py b/ebel/database.py
index 7a006ec..a972cbe 100644
--- a/ebel/database.py
+++ b/ebel/database.py
@@ -6,9 +6,11 @@
 
 import pymysql
 from pyorientdb import OrientDB
-from pyorientdb.exceptions import (PyOrientCommandException,
-                                   PyOrientConnectionException,
-                                   PyOrientSecurityAccessException)
+from pyorientdb.exceptions import (
+    PyOrientCommandException,
+    PyOrientConnectionException,
+    PyOrientSecurityAccessException,
+)
 
 from ebel.config import get_config_as_dict, write_to_config
 from ebel.constants import TerminalFormatting as TF
diff --git a/ebel/manager/models.py b/ebel/manager/models.py
index 1ab5587..010ef16 100755
--- a/ebel/manager/models.py
+++ b/ebel/manager/models.py
@@ -20,8 +20,7 @@
 from sqlalchemy_utils import create_database, database_exists
 from tqdm import tqdm
 
-from ebel.constants import (FILE, GRAMMAR_NS_ANNO_PATH, GRAMMAR_START_ANNO,
-                            GRAMMAR_START_NS, URL)
+from ebel.constants import FILE, GRAMMAR_NS_ANNO_PATH, GRAMMAR_START_ANNO, GRAMMAR_START_NS, URL
 from ebel.tools import BelRdb
 
 Base = declarative_base()
diff --git a/ebel/manager/orientdb/biodbs/bel.py b/ebel/manager/orientdb/biodbs/bel.py
index d5a0f20..126eab6 100644
--- a/ebel/manager/orientdb/biodbs/bel.py
+++ b/ebel/manager/orientdb/biodbs/bel.py
@@ -30,14 +30,29 @@
 from ebel.manager.orientdb.biodbs.reactome import Reactome
 from ebel.manager.orientdb.biodbs.stringdb import StringDb
 from ebel.manager.orientdb.biodbs.uniprot import UniProt
-from ebel.manager.orientdb.constants import (BIOGRID, CHEBI, CLINICAL_TRIALS,
-                                             CLINVAR, DISGENET, DRUGBANK,
-                                             ENSEMBL, EXPRESSION_ATLAS,
-                                             GWAS_CATALOG, HGNC, INTACT,
-                                             IUPHAR, KEGG, MIRTARBASE, NCBI,
-                                             NSIDES, PATHWAY_COMMONS,
-                                             PROTEIN_ATLAS, REACTOME, STRINGDB,
-                                             UNIPROT)
+from ebel.manager.orientdb.constants import (
+    BIOGRID,
+    CHEBI,
+    CLINICAL_TRIALS,
+    CLINVAR,
+    DISGENET,
+    DRUGBANK,
+    ENSEMBL,
+    EXPRESSION_ATLAS,
+    GWAS_CATALOG,
+    HGNC,
+    INTACT,
+    IUPHAR,
+    KEGG,
+    MIRTARBASE,
+    NCBI,
+    NSIDES,
+    PATHWAY_COMMONS,
+    PROTEIN_ATLAS,
+    REACTOME,
+    STRINGDB,
+    UNIPROT,
+)
 from ebel.manager.orientdb.importer import _BelImporter
 from ebel.manager.orientdb.odb_defaults import bel_func_short
 from ebel.manager.orientdb.odb_meta import Graph
@@ -681,6 +696,8 @@ def update_interactions(self) -> int:
         """Abstract method."""
         pass
 
+
 if __name__ == "__main__":
     b = Bel()
-    b.clinical_trials.update()
\ No newline at end of file
+    b.clinical_trials.recreate_tables()
+    b.clinical_trials.update()
diff --git a/ebel/manager/orientdb/biodbs/biogrid.py b/ebel/manager/orientdb/biodbs/biogrid.py
index 63df5a5..277998f 100644
--- a/ebel/manager/orientdb/biodbs/biogrid.py
+++ b/ebel/manager/orientdb/biodbs/biogrid.py
@@ -21,6 +21,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class BioGridNode:
     """Custom class definition for BioGRID nodes."""
 
diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index 10c1972..db5cff7 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -21,9 +21,7 @@
 import sqlalchemy as sqla
 import xmltodict
 from pyorientdb import OrientDB, orient
-from pyorientdb.exceptions import (PyOrientCommandException,
-                                   PyOrientIndexException,
-                                   PyOrientSecurityAccessException)
+from pyorientdb.exceptions import PyOrientCommandException, PyOrientIndexException, PyOrientSecurityAccessException
 from pyorientdb.otypes import OrientRecord
 from sqlalchemy import text
 from sqlalchemy.sql.schema import Table
@@ -35,8 +33,7 @@
 from ebel.config import get_config_as_dict, get_config_value, write_to_config
 from ebel.constants import DEFAULT_ODB, RID
 from ebel.manager.orientdb import urls as default_urls
-from ebel.manager.orientdb.odb_structure import (Edge, Generic, Node, OClass,
-                                                 OIndex, OProperty)
+from ebel.manager.orientdb.odb_structure import Edge, Generic, Node, OClass, OIndex, OProperty
 from ebel.tools import BelRdb, chunks, get_file_path, get_standard_name
 
 type_map_inverse = {v: k for k, v in orient.type_map.items()}
diff --git a/ebel/manager/orientdb/odb_structure.py b/ebel/manager/orientdb/odb_structure.py
index a0fceb3..e4e830b 100755
--- a/ebel/manager/orientdb/odb_structure.py
+++ b/ebel/manager/orientdb/odb_structure.py
@@ -9,8 +9,7 @@
 from enum import Enum
 from typing import Dict, List, Optional, Tuple
 
-from ebel.manager.orientdb.odb_defaults import (ODataType, OIndexType,
-                                                normalized_pmod)
+from ebel.manager.orientdb.odb_defaults import ODataType, OIndexType, normalized_pmod
 
 
 class OClassType(Enum):
diff --git a/ebel/manager/rdbms/models/chebi.py b/ebel/manager/rdbms/models/chebi.py
index 99876ff..52d3120 100644
--- a/ebel/manager/rdbms/models/chebi.py
+++ b/ebel/manager/rdbms/models/chebi.py
@@ -1,9 +1,8 @@
 """CHEBI RDBMS model definition."""
 import datetime
-from typing import List
+from typing import List, Optional
 
-from sqlalchemy import (DateTime, ForeignKey, Index, Integer, String,
-                        Text)
+from sqlalchemy import DateTime, ForeignKey, Index, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import relationship, mapped_column, Mapped
 
@@ -16,7 +15,7 @@ class ChemicalData(Base):
     __tablename__ = "chebi_chemical_data"
     id: Mapped[int] = mapped_column(primary_key=True)
 
-    chemical_data: Mapped[str] = mapped_column(Text, nullable=True)
+    chemical_data: Mapped[Optional[str]] = mapped_column(Text)
     source: Mapped[str] = mapped_column(Text, nullable=False)
     type: Mapped[str] = mapped_column(Text, nullable=False)
 
@@ -68,15 +67,15 @@ class Compound(Base):
     __tablename__ = "chebi_compound"
     id: Mapped[int] = mapped_column(primary_key=True)
 
-    name: Mapped[str] = mapped_column(String(2000), nullable=True)
+    name: Mapped[Optional[str]] = mapped_column(String(2000))
     source: Mapped[str] = mapped_column(String(32), nullable=False)
-    parent_id: Mapped[int] = mapped_column(nullable=True)
+    parent_id: Mapped[Optional[int]] = mapped_column()
     chebi_accession: Mapped[str] = mapped_column(String(30), nullable=False)
     status: Mapped[str] = mapped_column(String(1), nullable=False)
-    definition: Mapped[str] = mapped_column(Text, nullable=True)
+    definition: Mapped[Optional[str]] = mapped_column(Text)
     star: Mapped[int] = mapped_column(nullable=False)
-    modified_on: Mapped[str] = mapped_column(Text, nullable=True)
-    created_by: Mapped[int] = mapped_column(Text, nullable=True)
+    modified_on: Mapped[Optional[str]] = mapped_column(Text)
+    created_by: Mapped[Optional[str]] = mapped_column(Text)
 
     chemicalData: Mapped[List["ChemicalData"]] = relationship("ChemicalData", back_populates="compounds")
     comments: Mapped[List["Comment"]] = relationship("Comment", back_populates="compounds")
@@ -136,7 +135,7 @@ class DatabaseAccession(Base):
     __tablename__ = "chebi_database_accession"
     id: Mapped[int] = mapped_column(primary_key=True)
 
-    accession_number: Mapped[str] = mapped_column(String(255), nullable=True)
+    accession_number: Mapped[Optional[str]] = mapped_column(String(255))
     type: Mapped[str] = mapped_column(Text, nullable=False)
     source: Mapped[str] = mapped_column(Text, nullable=False)
 
@@ -161,7 +160,7 @@ class Name(Base):
     __tablename__ = "chebi_name"
     id: Mapped[int] = mapped_column(primary_key=True)
 
-    name: Mapped[str] = mapped_column(Text, nullable=True)
+    name: Mapped[Optional[str]] = mapped_column(Text)
     type: Mapped[str] = mapped_column(Text, nullable=False)
     source: Mapped[str] = mapped_column(Text, nullable=False)
     adapted: Mapped[str] = mapped_column(Text, nullable=False)
@@ -193,8 +192,8 @@ class Reference(Base):
 
     reference_id: Mapped[str] = mapped_column(String(60), nullable=False, index=True)
     reference_db_name: Mapped[str] = mapped_column(String(60), nullable=False, index=True)
-    location_in_ref: Mapped[str] = mapped_column(String(90), nullable=True, index=True)
-    reference_name: Mapped[str] = mapped_column(String(1024), nullable=True)
+    location_in_ref: Mapped[Optional[str]] = mapped_column(String(90), index=True)
+    reference_name: Mapped[Optional[str]] = mapped_column(String(1024))
 
     compound_id: Mapped[int] = mapped_column(ForeignKey("chebi_compound.id"))
     compounds: Mapped[List["Compound"]] = relationship("Compound", back_populates="references")
diff --git a/ebel/manager/rdbms/models/clinical_trials_gov.py b/ebel/manager/rdbms/models/clinical_trials_gov.py
index a94ff4f..c38cf31 100644
--- a/ebel/manager/rdbms/models/clinical_trials_gov.py
+++ b/ebel/manager/rdbms/models/clinical_trials_gov.py
@@ -1,6 +1,6 @@
 """ClinicalTrials.gov RDBMS model definition."""
 import re
-from typing import List
+from typing import List, Optional
 
 from sqlalchemy import ForeignKey, Integer, String, Table, Text, Column
 from sqlalchemy.ext.declarative import declarative_base
@@ -86,24 +86,24 @@ class ClinicalTrialGov(Base):
 
     id: Mapped[int] = mapped_column(primary_key=True)
     nct_id = mapped_column(String(100), index=True)
-    org_study_id: Mapped[str] = mapped_column(Text)
-    brief_title: Mapped[str] = mapped_column(Text)
-    official_title: Mapped[str] = mapped_column(Text)
-    is_fda_regulated_drug: Mapped[str] = mapped_column(Text)
-    brief_summary: Mapped[str] = mapped_column(Text)
-    detailed_description: Mapped[str] = mapped_column(Text)
-    overall_status: Mapped[str] = mapped_column(Text)
-    start_date: Mapped[str] = mapped_column(Text)
-    completion_date: Mapped[str] = mapped_column(Text)
-    phase: Mapped[str] = mapped_column(Text)
-    study_type: Mapped[str] = mapped_column(Text)
-    study_design_intervention_model: Mapped[str] = mapped_column(Text)
-    study_design_primary_purpose: Mapped[str] = mapped_column(Text)
-    study_design_masking: Mapped[str] = mapped_column(Text)
+    org_study_id: Mapped[Optional[str]] = mapped_column(Text)
+    brief_title: Mapped[Optional[str]] = mapped_column(Text)
+    official_title: Mapped[Optional[str]] = mapped_column(Text)
+    is_fda_regulated_drug: Mapped[Optional[str]] = mapped_column(Text)
+    brief_summary: Mapped[Optional[str]] = mapped_column(Text)
+    detailed_description: Mapped[Optional[str]] = mapped_column(Text)
+    overall_status: Mapped[Optional[str]] = mapped_column(Text)
+    start_date: Mapped[Optional[str]] = mapped_column(Text)
+    completion_date: Mapped[Optional[str]] = mapped_column(Text)
+    phase: Mapped[Optional[str]] = mapped_column(Text)
+    study_type: Mapped[Optional[str]] = mapped_column(Text)
+    study_design_intervention_model: Mapped[Optional[str]] = mapped_column(Text)
+    study_design_primary_purpose: Mapped[Optional[str]] = mapped_column(Text)
+    study_design_masking: Mapped[Optional[str]] = mapped_column(Text)
     # primary_outcomes
     # secondary_outcomes
-    patient_data_sharing_ipd: Mapped[str] = mapped_column(Text)
-    patient_data_ipd_description: Mapped[str] = mapped_column(Text)
+    patient_data_sharing_ipd: Mapped[Optional[str]] = mapped_column(Text)
+    patient_data_ipd_description: Mapped[Optional[str]] = mapped_column(Text)
 
     keywords: Mapped[List["Keyword"]] = relationship(
         "Keyword",
diff --git a/ebel/manager/rdbms/models/drugbank.py b/ebel/manager/rdbms/models/drugbank.py
index c0f1ba6..8877527 100644
--- a/ebel/manager/rdbms/models/drugbank.py
+++ b/ebel/manager/rdbms/models/drugbank.py
@@ -35,9 +35,15 @@ class Drugbank(Base):
     references: Mapped[List["Reference"]] = relationship("Reference", back_populates="drugbank", cascade="save-update")
     synonyms: Mapped[List["Synonym"]] = relationship("Synonym", back_populates="drugbank", cascade="save-update")
     targets: Mapped[List["Target"]] = relationship("Target", back_populates="drugbank", cascade="save-update")
-    external_identifiers: Mapped[List["ExternalIdentifier"]] = relationship("ExternalIdentifier", back_populates="drugbank", cascade="save-update")
-    product_names: Mapped[List["ProductName"]] = relationship("ProductName", back_populates="drugbank", cascade="save-update")
-    drug_interactions: Mapped[List["DrugInteraction"]] = relationship("DrugInteraction", back_populates="drugbank", cascade="save-update")
+    external_identifiers: Mapped[List["ExternalIdentifier"]] = relationship(
+        "ExternalIdentifier", back_populates="drugbank", cascade="save-update"
+    )
+    product_names: Mapped[List["ProductName"]] = relationship(
+        "ProductName", back_populates="drugbank", cascade="save-update"
+    )
+    drug_interactions: Mapped[List["DrugInteraction"]] = relationship(
+        "DrugInteraction", back_populates="drugbank", cascade="save-update"
+    )
     statuses: Mapped[List["Status"]] = relationship("Status", back_populates="drugbank", cascade="save-update")
     patents: Mapped[List["Patent"]] = relationship("Patent", back_populates="drugbank", cascade="save-update")
     pathways: Mapped[List["Pathway"]] = relationship("Pathway", back_populates="drugbank", cascade="save-update")
diff --git a/ebel/manager/rdbms/models/hgnc.py b/ebel/manager/rdbms/models/hgnc.py
index 26c5a20..56c0b64 100644
--- a/ebel/manager/rdbms/models/hgnc.py
+++ b/ebel/manager/rdbms/models/hgnc.py
@@ -2,8 +2,7 @@
 import datetime
 from typing import List
 
-from sqlalchemy import (BigInteger, Column, Date, ForeignKey, Integer, String,
-                        Text)
+from sqlalchemy import BigInteger, Column, Date, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import relationship, mapped_column, Mapped
 
diff --git a/ebel/manager/rdbms/models/iuphar.py b/ebel/manager/rdbms/models/iuphar.py
index cc1c1b8..11d2c4d 100644
--- a/ebel/manager/rdbms/models/iuphar.py
+++ b/ebel/manager/rdbms/models/iuphar.py
@@ -1,8 +1,7 @@
 """IUPHAR RDBMS model definition."""
 from typing import List
 
-from sqlalchemy import (BigInteger, Boolean, Column, ForeignKey, Integer,
-                        Numeric, String, Text)
+from sqlalchemy import BigInteger, Boolean, Column, ForeignKey, Integer, Numeric, String, Text
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import relationship, mapped_column, Mapped
 
@@ -25,7 +24,9 @@ class IupharLigand(Base):
     labelled: Mapped[bool] = mapped_column()
     radioactive: Mapped[bool] = mapped_column()
     pubchem_sid: Mapped[int] = mapped_column()
-    pubchem_cid: Mapped[str] = mapped_column(Text)  # TODO: This is a integer, but for import reasons this changed to text
+    pubchem_cid: Mapped[str] = mapped_column(
+        Text
+    )  # TODO: This is a integer, but for import reasons this changed to text
     uniprot_id: Mapped[str] = mapped_column(Text)
     ensembl_id: Mapped[str] = mapped_column(Text)
     ligand_subunit_ids: Mapped[str] = mapped_column(Text)
diff --git a/ebel/manager/rdbms/models/nsides.py b/ebel/manager/rdbms/models/nsides.py
index 2d92762..aceb587 100644
--- a/ebel/manager/rdbms/models/nsides.py
+++ b/ebel/manager/rdbms/models/nsides.py
@@ -24,7 +24,9 @@ class Nsides(Base):
     )
     id: Mapped[int] = mapped_column(primary_key=True)
 
-    drug_rxnorn_id: Mapped[str] = mapped_column(String(20), index=True)  # This has to be a String because of mapping to drugbank ids
+    drug_rxnorn_id: Mapped[str] = mapped_column(
+        String(20), index=True
+    )  # This has to be a String because of mapping to drugbank ids
     drug_concept_name: Mapped[str] = mapped_column(String(255), index=True)
 
     source: Mapped[str] = mapped_column(String(10), index=True)
diff --git a/ebel/manager/rdbms/models/protein_atlas.py b/ebel/manager/rdbms/models/protein_atlas.py
index a1a57e7..cce2936 100644
--- a/ebel/manager/rdbms/models/protein_atlas.py
+++ b/ebel/manager/rdbms/models/protein_atlas.py
@@ -1,7 +1,7 @@
 """Protein Atlas RDBMS model definition."""
 from sqlalchemy import Column, Integer, Numeric, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import mapped_column
+from sqlalchemy.orm import mapped_column, Mapped
 
 Base = declarative_base()
 
@@ -10,14 +10,14 @@ class ProteinAtlasNormalTissue(Base):
     """Class definition for the protein_atlas_normal_tissue table."""
 
     __tablename__ = "protein_atlas_normal_tissue"
-    id = mapped_column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    gene = mapped_column(String(100), index=True)
-    gene_name = mapped_column(String(100))
-    tissue = mapped_column(String(100))
-    cell_type = mapped_column(String(100))
-    level = mapped_column(String(100), index=True)
-    reliability = mapped_column(String(100), index=True)
+    gene: Mapped[str] = mapped_column(String(100), index=True)
+    gene_name: Mapped[str] = mapped_column(String(100))
+    tissue: Mapped[str] = mapped_column(String(100))
+    cell_type: Mapped[str] = mapped_column(String(100))
+    level: Mapped[str] = mapped_column(String(100), index=True)
+    reliability: Mapped[str] = mapped_column(String(100), index=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -35,22 +35,22 @@ class ProteinAtlasSubcellularLocation(Base):
     """Class definition for the protein_atlas_subcellular_location table."""
 
     __tablename__ = "protein_atlas_subcellular_location"
-    id = mapped_column(Integer, primary_key=True)
-
-    gene = mapped_column(String(100))
-    gene_name = mapped_column(String(100))
-    reliability = mapped_column(String(100))
-    main_location = mapped_column(String(100))
-    additional_location = mapped_column(String(100))
-    extracellular_location = mapped_column(String(100))
-    enhanced = mapped_column(String(100))
-    supported = mapped_column(String(100))
-    approved = mapped_column(String(100))
-    uncertain = mapped_column(String(100))
-    single_cell_variation_intensity = mapped_column(String(100))
-    single_cell_variation_spatial = mapped_column(String(100))
-    cell_cycle_dependency = mapped_column(Text)
-    go_id = mapped_column(Text)
+    id: Mapped[int] = mapped_column(primary_key=True)
+
+    gene: Mapped[str] = mapped_column(String(100))
+    gene_name: Mapped[str] = mapped_column(String(100))
+    reliability: Mapped[str] = mapped_column(String(100))
+    main_location: Mapped[str] = mapped_column(String(100))
+    additional_location: Mapped[str] = mapped_column(String(100))
+    extracellular_location: Mapped[str] = mapped_column(String(100))
+    enhanced: Mapped[str] = mapped_column(String(100))
+    supported: Mapped[str] = mapped_column(String(100))
+    approved: Mapped[str] = mapped_column(String(100))
+    uncertain: Mapped[str] = mapped_column(String(100))
+    single_cell_variation_intensity: Mapped[str] = mapped_column(String(100))
+    single_cell_variation_spatial: Mapped[str] = mapped_column(String(100))
+    cell_cycle_dependency: Mapped[str] = mapped_column(Text)
+    go_id: Mapped[str] = mapped_column(Text)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -76,12 +76,12 @@ class ProteinAtlasRnaTissueConsensus(Base):
     """Class definition for the protein_atlas_rna_tissue_consensus table."""
 
     __tablename__ = "protein_atlas_rna_tissue_consensus"
-    id = mapped_column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    gene = mapped_column(String(100), index=True)
-    gene_name = mapped_column(String(100), index=True)
-    tissue = mapped_column(String(100), index=True)
-    n_tpm = mapped_column(Numeric(8, 1))
+    gene: Mapped[str] = mapped_column(String(100), index=True)
+    gene_name: Mapped[str] = mapped_column(String(100), index=True)
+    tissue: Mapped[str] = mapped_column(String(100), index=True)
+    n_tpm: Mapped[float] = mapped_column(Numeric(8, 1))
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -97,14 +97,14 @@ class ProteinAtlasRnaBrainGtex(Base):
     """Class definition for the protein_atlas_rna_brain_gtex table."""
 
     __tablename__ = "protein_atlas_rna_brain_gtex"
-    id = mapped_column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    gene = mapped_column(String(100), index=True)
-    gene_name = mapped_column(String(100), index=True)
-    brain_region = mapped_column(String(100), index=True)
-    tpm = mapped_column(Numeric(8, 1))
-    p_tpm = mapped_column(Numeric(8, 1))
-    n_tpm = mapped_column(Numeric(8, 1))
+    gene: Mapped[str] = mapped_column(String(100), index=True)
+    gene_name: Mapped[str] = mapped_column(String(100), index=True)
+    brain_region: Mapped[str] = mapped_column(String(100), index=True)
+    tpm: Mapped[float] = mapped_column(Numeric(8, 1))
+    p_tpm: Mapped[float] = mapped_column(Numeric(8, 1))
+    n_tpm: Mapped[float] = mapped_column(Numeric(8, 1))
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -122,14 +122,14 @@ class ProteinAtlasRnaBrainFantom(Base):
     """Class definition for the protein_atlas_rna_brain_fantom table."""
 
     __tablename__ = "protein_atlas_rna_brain_fantom"
-    id = mapped_column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    gene = mapped_column(String(100))
-    gene_name = mapped_column(String(100))
-    brain_region = mapped_column(String(100))
-    tags_per_million = mapped_column(String(100))
-    scaled_tags_per_million = mapped_column(String(100))
-    n_tpm = mapped_column(String(100))
+    gene: Mapped[str] = mapped_column(String(100))
+    gene_name: Mapped[str] = mapped_column(String(100))
+    brain_region: Mapped[str] = mapped_column(String(100))
+    tags_per_million: Mapped[str] = mapped_column(String(100))
+    scaled_tags_per_million: Mapped[str] = mapped_column(String(100))
+    n_tpm: Mapped[str] = mapped_column(String(100))
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -147,12 +147,12 @@ class ProteinAtlasRnaMouseBrainAllen(Base):
     """Class definition for the protein_atlas_rna_mouse_brain_allen table."""
 
     __tablename__ = "protein_atlas_rna_mouse_brain_allen"
-    id = mapped_column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    gene = mapped_column(String(100))
-    gene_name = mapped_column(String(100))
-    brain_region = mapped_column(String(100))
-    expression_energy = mapped_column(Numeric(8, 1))
+    gene: Mapped[str] = mapped_column(String(100))
+    gene_name: Mapped[str] = mapped_column(String(100))
+    brain_region: Mapped[str] = mapped_column(String(100))
+    expression_energy: Mapped[float] = mapped_column(Numeric(8, 1))
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/reactome.py b/ebel/manager/rdbms/models/reactome.py
index e986cb1..3852882 100644
--- a/ebel/manager/rdbms/models/reactome.py
+++ b/ebel/manager/rdbms/models/reactome.py
@@ -1,7 +1,7 @@
 """Reactome RDBMS model definition."""
 from sqlalchemy import Column, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import mapped_column
+from sqlalchemy.orm import mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -12,12 +12,12 @@ class Reactome(Base):
     """Class definition for the reactome table."""
 
     __tablename__ = "reactome"
-    id = mapped_column(Integer, primary_key=True)
-    identifier = mapped_column(String(50), index=True)
-    uniprot_accession = mapped_column(String(50), index=True)
-    organism = mapped_column(String(255))
-    name = mapped_column(String(255))
-    evidence_type = mapped_column(String(255))
+    id: Mapped[int] = mapped_column(primary_key=True)
+    identifier: Mapped[str] = mapped_column(String(50), index=True)
+    uniprot_accession: Mapped[str] = mapped_column(String(50), index=True)
+    organism: Mapped[str] = mapped_column(String(255))
+    name: Mapped[str] = mapped_column(String(255))
+    evidence_type: Mapped[str] = mapped_column(String(255))
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/stringdb.py b/ebel/manager/rdbms/models/stringdb.py
index df1fedf..c56b9f9 100644
--- a/ebel/manager/rdbms/models/stringdb.py
+++ b/ebel/manager/rdbms/models/stringdb.py
@@ -2,7 +2,7 @@
 
 from sqlalchemy import Boolean, Column, Integer, SmallInteger, String
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import mapped_column
+from sqlalchemy.orm import mapped_column, Mapped
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -14,26 +14,26 @@ class StringDb(Base):
 
     __tablename__ = "stringdb"
 
-    id = mapped_column(Integer, primary_key=True)
-
-    protein1 = mapped_column(String(50), nullable=False)
-    protein2 = mapped_column(String(50), nullable=False)
-    symbol1 = mapped_column(String(50), nullable=False, index=True)
-    symbol2 = mapped_column(String(50), nullable=False, index=True)
-    neighborhood = mapped_column(Integer)
-    neighborhood_transferred = mapped_column(SmallInteger)
-    fusion = mapped_column(SmallInteger)
-    cooccurence = mapped_column(SmallInteger)
-    homology = mapped_column(SmallInteger)
-    coexpression = mapped_column(SmallInteger)
-    coexpression_transferred = mapped_column(SmallInteger)
-    experiments = mapped_column(SmallInteger, index=True)
-    experiments_transferred = mapped_column(SmallInteger)
-    database = mapped_column(Integer)
-    database_transferred = mapped_column(SmallInteger)
-    textmining = mapped_column(SmallInteger)
-    textmining_transferred = mapped_column(SmallInteger)
-    combined_score = mapped_column(SmallInteger)
+    id: Mapped[int] = mapped_column(primary_key=True)
+
+    protein1: Mapped[str] = mapped_column(String(50), nullable=False)
+    protein2: Mapped[str] = mapped_column(String(50), nullable=False)
+    symbol1: Mapped[str] = mapped_column(String(50), nullable=False, index=True)
+    symbol2: Mapped[str] = mapped_column(String(50), nullable=False, index=True)
+    neighborhood: Mapped[int] = mapped_column()
+    neighborhood_transferred: Mapped[int] = mapped_column(SmallInteger)
+    fusion: Mapped[int] = mapped_column(SmallInteger)
+    cooccurence: Mapped[int] = mapped_column(SmallInteger)
+    homology: Mapped[int] = mapped_column(SmallInteger)
+    coexpression: Mapped[int] = mapped_column(SmallInteger)
+    coexpression_transferred: Mapped[int] = mapped_column(SmallInteger)
+    experiments: Mapped[int] = mapped_column(SmallInteger, index=True)
+    experiments_transferred: Mapped[int] = mapped_column(SmallInteger)
+    database: Mapped[int] = mapped_column()
+    database_transferred: Mapped[int] = mapped_column(SmallInteger)
+    textmining: Mapped[int] = mapped_column(SmallInteger)
+    textmining_transferred: Mapped[int] = mapped_column(SmallInteger)
+    combined_score: Mapped[int] = mapped_column(SmallInteger)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -45,9 +45,9 @@ class StringDbProtein(Base):
 
     __tablename__ = "stringdb_protein"
 
-    id = mapped_column(Integer, primary_key=True)
-    string_protein_id = mapped_column(String(50), nullable=False, index=True)
-    preferred_name = mapped_column(String(50), nullable=False, index=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    string_protein_id: Mapped[str] = mapped_column(String(50), nullable=False, index=True)
+    preferred_name: Mapped[str] = mapped_column(String(50), nullable=False, index=True)
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -58,16 +58,16 @@ class StringDbAction(Base):
     """Class definition for the stringdb_action table."""
 
     __tablename__ = "stringdb_action"
-    id = mapped_column(Integer, primary_key=True)
-    item_id_a = mapped_column(String(50), nullable=False)
-    item_id_b = mapped_column(String(50), nullable=False)
-    symbol1 = mapped_column(String(50), nullable=False, index=True)
-    symbol2 = mapped_column(String(50), nullable=False, index=True)
-    mode = mapped_column(String(20), nullable=False, index=True)
-    action = mapped_column(String(20))
-    is_directional = mapped_column(Boolean, nullable=False, index=True)
-    a_is_acting = mapped_column(Boolean, nullable=False, index=True)
-    score = mapped_column(SmallInteger)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    item_id_a: Mapped[str] = mapped_column(String(50), nullable=False)
+    item_id_b: Mapped[str] = mapped_column(String(50), nullable=False)
+    symbol1: Mapped[str] = mapped_column(String(50), nullable=False, index=True)
+    symbol2: Mapped[str] = mapped_column(String(50), nullable=False, index=True)
+    mode: Mapped[str] = mapped_column(String(20), nullable=False, index=True)
+    action: Mapped[str] = mapped_column(String(20))
+    is_directional: Mapped[bool] = mapped_column(Boolean, nullable=False, index=True)
+    a_is_acting: Mapped[bool] = mapped_column(Boolean, nullable=False, index=True)
+    score: Mapped[int] = mapped_column(SmallInteger)
 
     def as_dict(self):
         """Convert object values to dictionary."""
diff --git a/ebel/manager/rdbms/models/uniprot.py b/ebel/manager/rdbms/models/uniprot.py
index 331ea82..e4cfd9a 100644
--- a/ebel/manager/rdbms/models/uniprot.py
+++ b/ebel/manager/rdbms/models/uniprot.py
@@ -1,9 +1,10 @@
 """UniProt RDBMS model definition."""
 from collections import defaultdict
+from typing import List
 
 from sqlalchemy import Column, ForeignKey, Integer, String, Table, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column
+from sqlalchemy.orm import relationship, mapped_column, Mapped
 
 Base = declarative_base()
 
@@ -45,29 +46,33 @@ class Uniprot(Base):
 
     __tablename__ = "uniprot"
 
-    id = mapped_column(Integer, primary_key=True)
+    id: Mapped[str] = mapped_column(primary_key=True)
 
-    accession = mapped_column(String(20), unique=True)
-    name = mapped_column(String(100), nullable=False, unique=True)
-    recommended_name = mapped_column(String(255), nullable=True)
+    accession: Mapped[str] = mapped_column(String(20), unique=True)
+    name: Mapped[str] = mapped_column(String(100), nullable=False, unique=True)
+    recommended_name: Mapped[str] = mapped_column(String(255), nullable=True)
 
-    taxid = mapped_column(Integer, ForeignKey("uniprot_organism.taxid"), nullable=False, index=True)
-    organism = relationship("Organism")
+    taxid: Mapped[int] = mapped_column(ForeignKey("uniprot_organism.taxid"), nullable=False, index=True)
+    organism: Mapped["Organism"] = relationship("Organism")
 
-    function_id = mapped_column(Integer, ForeignKey("uniprot_function.id"), nullable=True)
-    function = relationship("Function")
+    function_id: Mapped[int] = mapped_column(ForeignKey("uniprot_function.id"), nullable=True)
+    function: Mapped["Function"] = relationship("Function")
 
-    gene_names = relationship("Gene", back_populates="uniprot")
+    gene_names: Mapped[List["Gene"]] = relationship("Gene", back_populates="uniprot")
 
-    gene_symbol = relationship("GeneSymbol", uselist=False, back_populates="uniprot")
+    gene_symbol: Mapped["GeneSymbol"] = relationship("GeneSymbol", uselist=False, back_populates="uniprot")
 
-    keywords = relationship("Keyword", secondary=uniprot__uniprot_keyword, back_populates="uniprots")
+    keywords: Mapped[List["Keyword"]] = relationship(
+        "Keyword", secondary=uniprot__uniprot_keyword, back_populates="uniprots"
+    )
 
-    hosts = relationship("Organism", secondary=uniprot__uniprot_host, back_populates="uniprots")
+    hosts: Mapped[List["Organism"]] = relationship(
+        "Organism", secondary=uniprot__uniprot_host, back_populates="uniprots"
+    )
 
-    xrefs = relationship("Xref", secondary=uniprot__uniprot_xref, back_populates="uniprots")
+    xrefs: Mapped[List["Xref"]] = relationship("Xref", secondary=uniprot__uniprot_xref, back_populates="uniprots")
 
-    subcellular_locations = relationship(
+    subcellular_locations: Mapped[List["SubcellularLocation"]] = relationship(
         "SubcellularLocation",
         secondary=uniprot__uniprot_subcellular_location,
         back_populates="uniprots",
@@ -103,10 +108,10 @@ class GeneSymbol(Base):
     """Class definition for the uniprot_gene_symbol table."""
 
     __tablename__ = "uniprot_gene_symbol"
-    id = mapped_column(Integer, primary_key=True)
-    symbol = mapped_column(String(100), nullable=False, index=True)
-    uniprot_id = mapped_column(Integer, ForeignKey("uniprot.id"))
-    uniprot = relationship("Uniprot", back_populates="gene_symbol")
+    id: Mapped[int] = mapped_column(primary_key=True)
+    symbol: Mapped[str] = mapped_column(String(100), nullable=False, index=True)
+    uniprot_id: Mapped[int] = mapped_column(ForeignKey("uniprot.id"))
+    uniprot: Mapped[Uniprot] = relationship("Uniprot", back_populates="gene_symbol")
 
     def __repr__(self):
         """Define repr."""
@@ -117,10 +122,10 @@ class Gene(Base):
     """Class definition for the uniprot_gene table."""
 
     __tablename__ = "uniprot_gene"
-    id = mapped_column(Integer, primary_key=True)
-    name = mapped_column(String(100), nullable=False, index=True)
-    uniprot_id = mapped_column(Integer, ForeignKey("uniprot.id"))
-    uniprot = relationship("Uniprot", back_populates="gene_names")
+    id: Mapped[int] = mapped_column(primary_key=True)
+    name: Mapped[str] = mapped_column(String(100), nullable=False, index=True)
+    uniprot_id: Mapped[int] = mapped_column(ForeignKey("uniprot.id"))
+    uniprot: Mapped[Uniprot] = relationship("Uniprot", back_populates="gene_names")
 
 
 class Keyword(Base):
@@ -128,10 +133,12 @@ class Keyword(Base):
 
     __tablename__ = "uniprot_keyword"
 
-    keywordid = mapped_column(Integer, primary_key=True)
-    keyword_name = mapped_column(String(100), index=True)
+    keywordid: Mapped[int] = mapped_column(primary_key=True)
+    keyword_name: Mapped[str] = mapped_column(String(100), index=True)
 
-    uniprots = relationship("Uniprot", secondary=uniprot__uniprot_keyword, back_populates="keywords")
+    uniprots: Mapped[List[Uniprot]] = relationship(
+        "Uniprot", secondary=uniprot__uniprot_keyword, back_populates="keywords"
+    )
 
     def __repr__(self):
         """Define repr."""
@@ -143,10 +150,10 @@ class Organism(Base):
 
     __tablename__ = "uniprot_organism"
 
-    taxid = mapped_column(Integer, primary_key=True)
-    scientific_name = mapped_column(String(255))  # TODO:Check if index=True with  is possible
+    taxid: Mapped[int] = mapped_column(primary_key=True)
+    scientific_name: Mapped[str] = mapped_column(String(255))  # TODO:Check if index=True with  is possible
 
-    uniprots = relationship("Uniprot", secondary=uniprot__uniprot_host, back_populates="hosts")
+    uniprots: Mapped[List[Uniprot]] = relationship("Uniprot", secondary=uniprot__uniprot_host, back_populates="hosts")
 
 
 class SubcellularLocation(Base):
@@ -154,11 +161,10 @@ class SubcellularLocation(Base):
 
     __tablename__ = "uniprot_subcellular_location"
 
-    id = mapped_column(Integer, primary_key=True)
-
-    name = mapped_column(String(100), index=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
+    name: Mapped[str] = mapped_column(String(100), index=True)
 
-    uniprots = relationship(
+    uniprots: Mapped[List[Uniprot]] = relationship(
         "Uniprot",
         secondary=uniprot__uniprot_subcellular_location,
         back_populates="subcellular_locations",
@@ -170,12 +176,12 @@ class Xref(Base):
 
     __tablename__ = "uniprot_xref"
 
-    id = mapped_column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    db = mapped_column(String(50), index=True)
-    identifier = mapped_column(String(100), index=True)
+    db: Mapped[str] = mapped_column(String(50), index=True)
+    identifier: Mapped[str] = mapped_column(String(100), index=True)
 
-    uniprots = relationship("Uniprot", secondary=uniprot__uniprot_xref, back_populates="xrefs")
+    uniprots: Mapped[List[Uniprot]] = relationship("Uniprot", secondary=uniprot__uniprot_xref, back_populates="xrefs")
 
 
 class Function(Base):
@@ -183,8 +189,8 @@ class Function(Base):
 
     __tablename__ = "uniprot_function"
 
-    id = mapped_column(Integer, primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
-    description = mapped_column(Text)
+    description: Mapped[str] = mapped_column(Text)
 
-    uniprots = relationship("Uniprot", back_populates="function")
+    uniprots: Mapped[List[Uniprot]] = relationship("Uniprot", back_populates="function")
diff --git a/ebel/web/api/ebel/v1/bel.py b/ebel/web/api/ebel/v1/bel.py
index 23c8d72..e14c76b 100644
--- a/ebel/web/api/ebel/v1/bel.py
+++ b/ebel/web/api/ebel/v1/bel.py
@@ -17,8 +17,7 @@
 from graphviz import Digraph
 
 from ebel import Bel
-from ebel.manager.orientdb.odb_structure import (get_columns,
-                                                 get_node_view_labels)
+from ebel.manager.orientdb.odb_structure import get_columns, get_node_view_labels
 from ebel.validate import validate_bel_file
 from ebel.web.api.ebel.v1 import DataType, OrientDbSqlOperator, _get_pagination
 
diff --git a/ebel/web/api/ebel/v1/bel_against_expression.py b/ebel/web/api/ebel/v1/bel_against_expression.py
index 7ee4e39..da7696f 100644
--- a/ebel/web/api/ebel/v1/bel_against_expression.py
+++ b/ebel/web/api/ebel/v1/bel_against_expression.py
@@ -8,8 +8,7 @@
 from sqlalchemy.sql import func
 
 from ebel import Bel
-from ebel.manager.rdbms.models.expression_atlas import (Experiment, FoldChange,
-                                                        GroupComparison)
+from ebel.manager.rdbms.models.expression_atlas import Experiment, FoldChange, GroupComparison
 from ebel.web.api.ebel.v1 import _get_pagination
 
 Relation = namedtuple(
diff --git a/ebel/web/api/ebel/v1/biogrid.py b/ebel/web/api/ebel/v1/biogrid.py
index ebc6563..25b43d3 100644
--- a/ebel/web/api/ebel/v1/biogrid.py
+++ b/ebel/web/api/ebel/v1/biogrid.py
@@ -9,9 +9,15 @@
 
 from ebel import Bel
 from ebel.manager.orientdb.biodbs.biogrid import MODIFICATIONS
-from ebel.manager.rdbms.models.biogrid import (Biogrid, ExperimentalSystem,
-                                               Interactor, Modification,
-                                               Publication, Source, Taxonomy)
+from ebel.manager.rdbms.models.biogrid import (
+    Biogrid,
+    ExperimentalSystem,
+    Interactor,
+    Modification,
+    Publication,
+    Source,
+    Taxonomy,
+)
 from ebel.web.api import RDBMS
 from ebel.web.api.ebel.v1 import _get_data
 
diff --git a/ebel/web/api/ebel/v1/clinical_trials_gov.py b/ebel/web/api/ebel/v1/clinical_trials_gov.py
index 88d91d2..5332ab9 100644
--- a/ebel/web/api/ebel/v1/clinical_trials_gov.py
+++ b/ebel/web/api/ebel/v1/clinical_trials_gov.py
@@ -5,8 +5,7 @@
 
 from ebel.manager.rdbms.models import clinical_trials_gov as ct
 from ebel.web.api import RDBMS
-from ebel.web.api.ebel.v1 import (_get_paginated_query_result,
-                                  _get_terms_from_model_starts_with)
+from ebel.web.api.ebel.v1 import _get_paginated_query_result, _get_terms_from_model_starts_with
 
 
 def get_ct_by_nct_id():
diff --git a/ebel/web/api/ebel/v1/clinvar.py b/ebel/web/api/ebel/v1/clinvar.py
index 46feeac..5502e7c 100644
--- a/ebel/web/api/ebel/v1/clinvar.py
+++ b/ebel/web/api/ebel/v1/clinvar.py
@@ -7,9 +7,12 @@
 from ebel import Bel
 from ebel.manager.rdbms.models import clinvar
 from ebel.web.api import RDBMS
-from ebel.web.api.ebel.v1 import (_get_data, _get_paginated_query_result,
-                                  _get_pagination,
-                                  _get_terms_from_model_starts_with)
+from ebel.web.api.ebel.v1 import (
+    _get_data,
+    _get_paginated_query_result,
+    _get_pagination,
+    _get_terms_from_model_starts_with,
+)
 
 
 def get_clinvar():
diff --git a/ebel/web/api/ebel/v1/disgenet.py b/ebel/web/api/ebel/v1/disgenet.py
index 71dbab4..62ac705 100644
--- a/ebel/web/api/ebel/v1/disgenet.py
+++ b/ebel/web/api/ebel/v1/disgenet.py
@@ -3,9 +3,11 @@
 
 from ebel.manager.rdbms.models import disgenet
 from ebel.web.api import RDBMS
-from ebel.web.api.ebel.v1 import (_get_paginated_ebel_query_result,
-                                  _get_paginated_query_result,
-                                  _get_terms_from_model_starts_with)
+from ebel.web.api.ebel.v1 import (
+    _get_paginated_ebel_query_result,
+    _get_paginated_query_result,
+    _get_terms_from_model_starts_with,
+)
 
 
 def get_sources():
diff --git a/ebel/web/api/ebel/v1/drugbank.py b/ebel/web/api/ebel/v1/drugbank.py
index 6b55fa9..fd1205c 100644
--- a/ebel/web/api/ebel/v1/drugbank.py
+++ b/ebel/web/api/ebel/v1/drugbank.py
@@ -4,8 +4,7 @@
 
 from ebel.manager.rdbms.models import drugbank
 from ebel.web.api import RDBMS
-from ebel.web.api.ebel.v1 import (_get_data, _get_paginated_ebel_query_result,
-                                  _get_paginated_query_result)
+from ebel.web.api.ebel.v1 import _get_data, _get_paginated_ebel_query_result, _get_paginated_query_result
 
 
 def get_by_id():
diff --git a/ebel/web/api/ebel/v1/expression_atlas.py b/ebel/web/api/ebel/v1/expression_atlas.py
index 9302eaa..e1c9c23 100644
--- a/ebel/web/api/ebel/v1/expression_atlas.py
+++ b/ebel/web/api/ebel/v1/expression_atlas.py
@@ -8,9 +8,14 @@
 from sqlalchemy import inspect
 
 from ebel import Bel
-from ebel.manager.rdbms.models.expression_atlas import (Experiment, FoldChange,
-                                                        GroupComparison, Gsea,
-                                                        Idf, SdrfCondensed)
+from ebel.manager.rdbms.models.expression_atlas import (
+    Experiment,
+    FoldChange,
+    GroupComparison,
+    Gsea,
+    Idf,
+    SdrfCondensed,
+)
 from ebel.web.api import RDBMS
 from ebel.web.api.ebel.v1 import _get_data
 
diff --git a/ebel/web/api/ebel/v1/intact.py b/ebel/web/api/ebel/v1/intact.py
index f625920..75d8d5d 100644
--- a/ebel/web/api/ebel/v1/intact.py
+++ b/ebel/web/api/ebel/v1/intact.py
@@ -5,8 +5,7 @@
 from ebel.manager.orientdb.odb_structure import intact_edges
 from ebel.manager.rdbms.models.intact import Intact
 from ebel.web.api import RDBMS
-from ebel.web.api.ebel.v1 import (_get_data, _get_paginated_ebel_query_result,
-                                  _get_paginated_query_result)
+from ebel.web.api.ebel.v1 import _get_data, _get_paginated_ebel_query_result, _get_paginated_query_result
 
 
 def get_intact():
diff --git a/ebel/web/api/ebel/v1/kegg.py b/ebel/web/api/ebel/v1/kegg.py
index e0bd768..f2be69b 100644
--- a/ebel/web/api/ebel/v1/kegg.py
+++ b/ebel/web/api/ebel/v1/kegg.py
@@ -5,8 +5,7 @@
 
 from ebel.manager.rdbms.models.kegg import Kegg
 from ebel.web.api import RDBMS
-from ebel.web.api.ebel.v1 import (_get_data, _get_paginated_ebel_query_result,
-                                  _get_paginated_query_result)
+from ebel.web.api.ebel.v1 import _get_data, _get_paginated_ebel_query_result, _get_paginated_query_result
 
 
 def get_kegg():
diff --git a/ebel/web/api/ebel/v1/pathway_commons.py b/ebel/web/api/ebel/v1/pathway_commons.py
index 3f5d1f4..6c68070 100644
--- a/ebel/web/api/ebel/v1/pathway_commons.py
+++ b/ebel/web/api/ebel/v1/pathway_commons.py
@@ -3,12 +3,14 @@
 from flask import request
 from sqlalchemy import or_
 
-from ebel.manager.rdbms.models.pathway_commons import (
-    PathwayCommons, PathwayName, Pmid, pathway_commons__pathway_name)
+from ebel.manager.rdbms.models.pathway_commons import PathwayCommons, PathwayName, Pmid, pathway_commons__pathway_name
 from ebel.web.api import RDBMS
-from ebel.web.api.ebel.v1 import (_get_data, _get_paginated_ebel_query_result,
-                                  _get_paginated_query_result,
-                                  _get_terms_from_model_starts_with)
+from ebel.web.api.ebel.v1 import (
+    _get_data,
+    _get_paginated_ebel_query_result,
+    _get_paginated_query_result,
+    _get_terms_from_model_starts_with,
+)
 
 
 def get_pathway_commons():
diff --git a/ebel/web/api/ebel/v1/uniprot.py b/ebel/web/api/ebel/v1/uniprot.py
index 0f231e5..51858c3 100644
--- a/ebel/web/api/ebel/v1/uniprot.py
+++ b/ebel/web/api/ebel/v1/uniprot.py
@@ -6,8 +6,7 @@
 from ebel import Bel
 from ebel.manager.rdbms.models import uniprot
 from ebel.web.api import RDBMS
-from ebel.web.api.ebel.v1 import (_get_paginated_query_result,
-                                  _get_terms_from_model_starts_with)
+from ebel.web.api.ebel.v1 import _get_paginated_query_result, _get_terms_from_model_starts_with
 
 from . import add_query_filters
 

From b61e876d4aad5e93290bfe9775e8db5b3edc395b Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Fri, 15 Sep 2023 13:56:27 +0200
Subject: [PATCH 03/58] fix: add optional to clinvar props

---
 ebel/manager/orientdb/biodbs/bel.py     | 6 ------
 ebel/manager/orientdb/biodbs/clinvar.py | 1 +
 ebel/manager/rdbms/models/clinvar.py    | 4 ++--
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/bel.py b/ebel/manager/orientdb/biodbs/bel.py
index 126eab6..ed7241c 100644
--- a/ebel/manager/orientdb/biodbs/bel.py
+++ b/ebel/manager/orientdb/biodbs/bel.py
@@ -695,9 +695,3 @@ def insert_data(self) -> Dict[str, int]:
     def update_interactions(self) -> int:
         """Abstract method."""
         pass
-
-
-if __name__ == "__main__":
-    b = Bel()
-    b.clinical_trials.recreate_tables()
-    b.clinical_trials.update()
diff --git a/ebel/manager/orientdb/biodbs/clinvar.py b/ebel/manager/orientdb/biodbs/clinvar.py
index ef8f237..613ce5e 100644
--- a/ebel/manager/orientdb/biodbs/clinvar.py
+++ b/ebel/manager/orientdb/biodbs/clinvar.py
@@ -57,6 +57,7 @@ def __contains__(self, item):
     def insert_data(self) -> Dict[str, int]:
         """Insert data."""
         inserted = {}
+        logger.info("Insert data for ClinVar")
         self.recreate_tables()
         df = pd.read_csv(self.file_path, sep="\t", low_memory=False)
         self._standardize_dataframe(df)
diff --git a/ebel/manager/rdbms/models/clinvar.py b/ebel/manager/rdbms/models/clinvar.py
index 3d91d16..e70a738 100644
--- a/ebel/manager/rdbms/models/clinvar.py
+++ b/ebel/manager/rdbms/models/clinvar.py
@@ -1,5 +1,5 @@
 """ClinVar RDBMS model definition."""
-from typing import List
+from typing import List, Optional
 
 from sqlalchemy import ForeignKey, Index, Integer, String, Table, Text, Column
 from sqlalchemy.ext.declarative import declarative_base
@@ -58,7 +58,7 @@ class Clinvar(Base):
     hgnc_id: Mapped[str] = mapped_column(String(100))
     clinical_significance: Mapped[str] = mapped_column(String(100))
     clin_sig_simple: Mapped[int] = mapped_column()
-    last_evaluated: Mapped[str] = mapped_column(String(100))
+    last_evaluated: Mapped[Optional[str]] = mapped_column(String(100))
     rs_db_snp: Mapped[int] = mapped_column(index=True)
     nsv_esv_db_var: Mapped[str] = mapped_column(String(100))
     rcvaccession: Mapped[str] = mapped_column(String(1000))

From f87311af3911f54df41c12f1c6181a1f4d4bc2eb Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 18 Sep 2023 10:13:54 +0200
Subject: [PATCH 04/58] build: upgrade click version to work with flask deps

---
 pyproject.toml   | 2 +-
 requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 82b66a4..a0b9f80 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,7 @@ Documentation = 'https://ebel.readthedocs.io/en/latest/'
 
 [tool.poetry.dependencies]
 lark-parser = "^0.11.3"
-click = "^7.1.2"
+click = "^8.1.7"
 requests = "^2.31.0"
 tqdm = "^4.66.1"
 pandas = "^1.5.3"
diff --git a/requirements.txt b/requirements.txt
index 920280b..d128e4b 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 lark-parser==0.11.3
-click>=7.1.2
+click>=8.1.7
 requests>=2.31.0
 tqdm>=4.66.1
 pandas>=1.5.3

From 05a42148e021f6b9dcfcfcd16d8f22c09121a64a Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 18 Sep 2023 10:19:59 +0200
Subject: [PATCH 05/58] fix: disgenet URLs and merge method

---
 ebel/manager/orientdb/biodbs/disgenet.py | 25 ++++++++++++++++++------
 ebel/manager/orientdb/urls.py            |  4 ++--
 ebel/manager/rdbms/models/disgenet.py    |  6 +++---
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/disgenet.py b/ebel/manager/orientdb/biodbs/disgenet.py
index d884350..149242c 100644
--- a/ebel/manager/orientdb/biodbs/disgenet.py
+++ b/ebel/manager/orientdb/biodbs/disgenet.py
@@ -4,10 +4,11 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text
+from sqlalchemy import text, select
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
+from ebel.manager.orientdb.biodbs.ensembl import Ensembl
 from ebel.manager.orientdb.constants import DISGENET
 from ebel.manager.rdbms.models import disgenet
 from ebel.tools import get_disease_trait_keywords_from_config, get_file_path
@@ -51,6 +52,12 @@ def __repr__(self) -> str:
     def insert_data(self) -> Dict[str, int]:
         """Insert data into database."""
         logger.info(f"Import {self.biodb_name.upper()}")
+
+        # Update EnSembl first since DisGeNet is dependent on it
+        ens = Ensembl()
+        ens.update()
+
+        # Insert data
         inserted = dict()
         inserted["sources"] = self._insert_sources()
         inserted["gene_symbols"] = self._insert_gene_symbols()
@@ -74,8 +81,8 @@ def file_path_variant(self):
         return self.__get_file_for_model(disgenet.DisgenetVariant)
 
     def _insert_sources(self):
-        df_g = pd.read_csv(self.file_path_gene, sep="\t", usecols=["source"]).drop_duplicates()
-        df_v = pd.read_csv(self.file_path_variant, sep="\t", usecols=["source"]).drop_duplicates()
+        df_g = pd.read_csv(self.file_path_gene, sep="\t", usecols=["source"])
+        df_v = pd.read_csv(self.file_path_variant, sep="\t", usecols=["source"])
         df = pd.concat([df_g, df_v]).drop_duplicates()
         df.reset_index(inplace=True, drop=True)
         df.index += 1
@@ -116,9 +123,10 @@ def _insert_gene_symbols(self) -> int:
         return df.shape[0]
 
     def _merge_with_source(self, df):
-        df_sources = pd.read_sql_table(disgenet.DisgenetSource.__tablename__, self.engine).rename(
-            columns={"id": "source_id"}
-        )
+        with self.engine.connect() as conn:
+            stmt = select(disgenet.DisgenetSource)
+            df_sources = pd.read_sql(stmt, conn).rename(columns={"id": "source_id"})
+
         return pd.merge(df, df_sources, on="source").drop(columns=["source"])
 
     def _insert_gene_disease_pmid_associations(self) -> int:
@@ -245,3 +253,8 @@ def update_snps(self) -> int:
                 inserted += 1
 
         return inserted
+
+
+if __name__ == "__main__":
+    dis = DisGeNet()
+    dis.update()
diff --git a/ebel/manager/orientdb/urls.py b/ebel/manager/orientdb/urls.py
index a6c31d2..4e1a06f 100755
--- a/ebel/manager/orientdb/urls.py
+++ b/ebel/manager/orientdb/urls.py
@@ -52,8 +52,8 @@
 WIKIPATHWAYS = "http://data.wikipathways.org/20180710/gpml/wikipathways-20180710-gpml-Homo_sapiens.zip"
 
 # Ensembl #
-ENSEMBL_FASTA_PEP = "ftp://ftp.ensembl.org/pub/release-94/fasta/homo_sapiens/pep/Homo_sapiens.GRCh38.pep.all.fa.gz"
-ENSEMBL_CDS = "ftp://ftp.ensembl.org/pub/release-96/fasta/homo_sapiens/cds/Homo_sapiens.GRCh38.cds.all.fa.gz"
+ENSEMBL_FASTA_PEP = "https://ftp.ensembl.org/pub/release-94/fasta/homo_sapiens/pep/Homo_sapiens.GRCh38.pep.all.fa.gz"
+ENSEMBL_CDS = "https://ftp.ensembl.org/pub/release-96/fasta/homo_sapiens/cds/Homo_sapiens.GRCh38.cds.all.fa.gz"
 
 # SIDER #
 SIDER_ATC = "http://sideeffects.embl.de/media/download/drug_atc.tsv"
diff --git a/ebel/manager/rdbms/models/disgenet.py b/ebel/manager/rdbms/models/disgenet.py
index 3127fd4..9ff23fe 100644
--- a/ebel/manager/rdbms/models/disgenet.py
+++ b/ebel/manager/rdbms/models/disgenet.py
@@ -1,5 +1,5 @@
 """DisGeNet RDBMS model definition."""
-from typing import List
+from typing import List, Optional
 
 from sqlalchemy import BigInteger, Float, ForeignKey, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
@@ -23,7 +23,7 @@ class DisgenetGene(Base):
     disease_id: Mapped[str] = mapped_column(String(100), ForeignKey("disgenet_disease.disease_id"))
     disease: Mapped["DisgenetDisease"] = relationship("DisgenetDisease", foreign_keys=[disease_id])
     score: Mapped[float] = mapped_column()
-    pmid: Mapped[int] = mapped_column()
+    pmid: Mapped[Optional[int]] = mapped_column()
     source_id: Mapped[int] = mapped_column(ForeignKey("disgenet_source.id"))
     source: Mapped["DisgenetSource"] = relationship("DisgenetSource", foreign_keys=[source_id])
 
@@ -68,7 +68,7 @@ class DisgenetVariant(Base):
     disease_id: Mapped[str] = mapped_column(String(100), ForeignKey("disgenet_disease.disease_id"))
     disease: Mapped["DisgenetDisease"] = relationship("DisgenetDisease", foreign_keys=[disease_id])
     score: Mapped[float] = mapped_column()
-    pmid: Mapped[int] = mapped_column(index=True)
+    pmid: Mapped[Optional[int]] = mapped_column(index=True)
     source_id: Mapped[int] = mapped_column(ForeignKey("disgenet_source.id"))
     source: Mapped["DisgenetSource"] = relationship("DisgenetSource", foreign_keys=[source_id])
 

From 84da7eb26902e5e4211535bd3a7d989d704a5f3f Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 18 Sep 2023 10:47:47 +0200
Subject: [PATCH 06/58] fix: disgenet nullable fields, now working

---
 ebel/manager/orientdb/biodbs/disgenet.py | 5 -----
 ebel/manager/orientdb/odb_meta.py        | 2 +-
 ebel/manager/rdbms/models/disgenet.py    | 4 ++--
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/disgenet.py b/ebel/manager/orientdb/biodbs/disgenet.py
index 149242c..bb3259d 100644
--- a/ebel/manager/orientdb/biodbs/disgenet.py
+++ b/ebel/manager/orientdb/biodbs/disgenet.py
@@ -253,8 +253,3 @@ def update_snps(self) -> int:
                 inserted += 1
 
         return inserted
-
-
-if __name__ == "__main__":
-    dis = DisGeNet()
-    dis.update()
diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index db5cff7..8773bad 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -1346,7 +1346,7 @@ def get_set_gene_rids_by_position(
 
         for gene_type, sql in sqls.items():
             if gene_type in gene_types:
-                results = self.session.execute(sql)
+                results = self.session.execute(text(sql))
                 for (symbol,) in results.fetchall():
                     bel = f'g(HGNC:"{symbol}")'
                     data = {
diff --git a/ebel/manager/rdbms/models/disgenet.py b/ebel/manager/rdbms/models/disgenet.py
index 9ff23fe..3ee071c 100644
--- a/ebel/manager/rdbms/models/disgenet.py
+++ b/ebel/manager/rdbms/models/disgenet.py
@@ -24,7 +24,7 @@ class DisgenetGene(Base):
     disease: Mapped["DisgenetDisease"] = relationship("DisgenetDisease", foreign_keys=[disease_id])
     score: Mapped[float] = mapped_column()
     pmid: Mapped[Optional[int]] = mapped_column()
-    source_id: Mapped[int] = mapped_column(ForeignKey("disgenet_source.id"))
+    source_id: Mapped[Optional[int]] = mapped_column(ForeignKey("disgenet_source.id"))
     source: Mapped["DisgenetSource"] = relationship("DisgenetSource", foreign_keys=[source_id])
 
     def as_dict(self):
@@ -69,7 +69,7 @@ class DisgenetVariant(Base):
     disease: Mapped["DisgenetDisease"] = relationship("DisgenetDisease", foreign_keys=[disease_id])
     score: Mapped[float] = mapped_column()
     pmid: Mapped[Optional[int]] = mapped_column(index=True)
-    source_id: Mapped[int] = mapped_column(ForeignKey("disgenet_source.id"))
+    source_id: Mapped[Optional[int]] = mapped_column(ForeignKey("disgenet_source.id"))
     source: Mapped["DisgenetSource"] = relationship("DisgenetSource", foreign_keys=[source_id])
 
     def as_dict(self):

From 02142f9248b687c810960d7929cf9db2ff07973a Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 18 Sep 2023 11:22:52 +0200
Subject: [PATCH 07/58] fix: hgnc sqla2 import

---
 ebel/manager/orientdb/biodbs/hgnc.py | 105 +++++++++++++++++----------
 ebel/manager/orientdb/urls.py        |   6 +-
 ebel/manager/rdbms/models/hgnc.py    |  51 ++++++-------
 3 files changed, 96 insertions(+), 66 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/hgnc.py b/ebel/manager/orientdb/biodbs/hgnc.py
index 8140544..13c7598 100644
--- a/ebel/manager/orientdb/biodbs/hgnc.py
+++ b/ebel/manager/orientdb/biodbs/hgnc.py
@@ -9,11 +9,32 @@
 import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
+from sqlalchemy import select
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
 from ebel.manager.orientdb.constants import HGNC
 from ebel.manager.rdbms.models import hgnc
+from ebel.manager.rdbms.models.hgnc import (
+    Hgnc as HgncDb,
+    PrevSymbol,
+    AliasSymbol,
+    AliasName,
+    Ccds,
+    Ena,
+    Enzyme,
+    GeneGroupName,
+    GeneGroupId,
+    UniProt,
+    RnaCentral,
+    Rgd,
+    RefSeq,
+    PubMed,
+    PrevName,
+    Omim,
+    Mgd,
+    Lsdb,
+)
 from ebel.tools import get_file_path
 
 logger = logging.getLogger(__name__)
@@ -117,28 +138,28 @@ def import_hgnc_into_rdbms(self) -> int:
 
         df["id"] = pd.to_numeric(df.hgnc_id.str.split(":").str[1])
         df.set_index("id", inplace=True)
-        df[columns].to_sql(hgnc.Hgnc.__tablename__, self.engine, if_exists="append")
+        df[columns].to_sql(HgncDb.__tablename__, self.engine, if_exists="append")
 
         df.hgnc_id = pd.to_numeric(df.hgnc_id.str.split(":").str[1])
 
         for df_col, model, m_col in (
-            ("prev_symbol", hgnc.PrevSymbol, None),
-            ("alias_symbol", hgnc.AliasSymbol, None),
-            ("alias_name", hgnc.AliasName, None),
-            ("ccds_id", hgnc.Ccds, "identifier"),
-            ("ena", hgnc.Ena, "identifier"),
-            ("enzyme_id", hgnc.Enzyme, "ec_number"),
-            ("gene_group", hgnc.GeneGroupName, "name"),
-            ("gene_group_id", hgnc.GeneGroupId, "identifier"),
-            ("uniprot_ids", hgnc.UniProt, "accession"),
-            ("rna_central_id", hgnc.RnaCentral, "identifier"),
-            ("rgd_id", hgnc.Rgd, "identifier"),
-            ("refseq_accession", hgnc.RefSeq, "accession"),
-            ("pubmed_id", hgnc.PubMed, "pmid"),
-            ("prev_name", hgnc.PrevName, None),
-            ("omim_id", hgnc.Omim, "identifier"),
-            ("mgd_id", hgnc.Mgd, "identifier"),
-            ("lsdb", hgnc.Lsdb, "identifier"),
+            ("prev_symbol", PrevSymbol, None),
+            ("alias_symbol", AliasSymbol, None),
+            ("alias_name", AliasName, None),
+            ("ccds_id", Ccds, "identifier"),
+            ("ena", Ena, "identifier"),
+            ("enzyme_id", Enzyme, "ec_number"),
+            ("gene_group", GeneGroupName, "name"),
+            ("gene_group_id", GeneGroupId, "identifier"),
+            ("uniprot_ids", UniProt, "accession"),
+            ("rna_central_id", RnaCentral, "identifier"),
+            ("rgd_id", Rgd, "identifier"),
+            ("refseq_accession", RefSeq, "accession"),
+            ("pubmed_id", PubMed, "pmid"),
+            ("prev_name", PrevName, None),
+            ("omim_id", Omim, "identifier"),
+            ("mgd_id", Mgd, "identifier"),
+            ("lsdb", Lsdb, "identifier"),
         ):
             df_1n_table = df[[df_col, "hgnc_id"]].explode(df_col).dropna()
             if m_col:
@@ -264,11 +285,10 @@ def get_bel_symbols_all(self):
 
     def get_correct_symbol(self, symbol: str):
         """Checks if symbol is valid otherwise checks previsous symbols."""
-        result_in_symbol = self.session.query(hgnc.Hgnc).filter(hgnc.Hgnc.symbol == symbol).first()
+        symbol_query = select(HgncDb).where(HgncDb.symbol == symbol)
+        result_in_symbol = self.session.execute(symbol_query).first()
         if not result_in_symbol:
-            result_in_prev_symbol = (
-                self.session.query(hgnc.PrevSymbol).filter(hgnc.PrevSymbol.prev_symbol == symbol).first()
-            )
+            result_in_prev_symbol = self.session.query(PrevSymbol).filter(PrevSymbol.prev_symbol == symbol).first()
             if result_in_prev_symbol:
                 symbol = result_in_prev_symbol.hgnc.symbol
             else:
@@ -277,7 +297,7 @@ def get_correct_symbol(self, symbol: str):
 
     def correct_wrong_symbol(self, symbol, bel_symbols_all: set):
         """Corrects the symbol of the node and relinks all edges to existing node if needed."""
-        result = self.session.query(hgnc.PrevSymbol).filter_by(prev_symbol=symbol).first()
+        result = self.session.query(PrevSymbol).filter_by(prev_symbol=symbol).first()
         if result:
             correct_symbol = result.hgnc.symbol
             if correct_symbol not in bel_symbols_all:
@@ -300,7 +320,10 @@ def update_bel(self) -> int:
 
         bel_symbols_all = self.get_bel_symbols_all()
         symbols_without_hgnc = self.get_bel_symbols_without_hgnc_link()
-        hgnc_symbols = {x[0] for x in self.session.query(hgnc.Hgnc.symbol).all()}
+
+        symbol_query = select(HgncDb.symbol)
+        symbol_results = self.session.execute(symbol_query).all()
+        hgnc_symbols = {x[0] for x in symbol_results}
 
         for wrong_symbol in symbols_without_hgnc - hgnc_symbols:
             self.correct_wrong_symbol(wrong_symbol, bel_symbols_all)
@@ -377,28 +400,28 @@ def update_protein(self, hgnc_rid: str, label: str, hgnc_symbol: str, suggested_
 
     def update_nodes_by_symbol(self, symbol) -> dict:
         """Update all nodes by HGNC symbol."""
-        hgnc = self.get_basic_entry_by_symbol(symbol)
+        hgnc_results = self.get_basic_entry_by_symbol(symbol)
 
-        if hgnc:
-            suggest = json.dumps(hgnc.suggested_corrections) if hgnc.suggested_corrections else None
+        if hgnc_results:
+            suggest = json.dumps(hgnc_results.suggested_corrections) if hgnc_results.suggested_corrections else None
 
             num_update_genes = self.update_gene(
-                hgnc_symbol=hgnc.symbol,
-                hgnc_rid=hgnc.hgnc_rid,
-                label=hgnc.label,
-                location=hgnc.location,
+                hgnc_symbol=hgnc_results.symbol,
+                hgnc_rid=hgnc_results.hgnc_rid,
+                label=hgnc_results.label,
+                location=hgnc_results.location,
                 suggested_corrections=suggest,
             )
             num_update_rnas = self.update_rna(
-                hgnc_symbol=hgnc.symbol,
-                hgnc_rid=hgnc.hgnc_rid,
-                label=hgnc.label,
+                hgnc_symbol=hgnc_results.symbol,
+                hgnc_rid=hgnc_results.hgnc_rid,
+                label=hgnc_results.label,
                 suggested_corrections=suggest,
             )
             num_update_proteins = self.update_protein(
-                hgnc_symbol=hgnc.symbol,
-                hgnc_rid=hgnc.hgnc_rid,
-                label=hgnc.label,
+                hgnc_symbol=hgnc_results.symbol,
+                hgnc_rid=hgnc_results.hgnc_rid,
+                label=hgnc_results.label,
                 suggested_corrections=suggest,
             )
             return {
@@ -409,9 +432,15 @@ def update_nodes_by_symbol(self, symbol) -> dict:
 
     def get_symbol_entrez_dict(self) -> Dict[str, int]:
         """Return dictionary with gene symbols as keys and entrez IDs as values."""
-        query = self.session.query(hgnc.Hgnc.symbol, hgnc.Hgnc.entrez_id).filter(hgnc.Hgnc.entrez_id.isnot(None))
+        query = self.session.query(HgncDb.symbol, HgncDb.entrez_id).filter(HgncDb.entrez_id.isnot(None))
         return {r.symbol: r.entrez_id for r in query.all()}
 
     def update_interactions(self) -> int:
         """Abstract method."""
         pass
+
+
+if __name__ == "__main__":
+    hgncdb = Hgnc()
+    hgncdb.recreate_tables()
+    hgncdb.update()
diff --git a/ebel/manager/orientdb/urls.py b/ebel/manager/orientdb/urls.py
index 4e1a06f..0e42a79 100755
--- a/ebel/manager/orientdb/urls.py
+++ b/ebel/manager/orientdb/urls.py
@@ -1,9 +1,9 @@
 """URLs to download files."""
 
 # HGNC #
-HGNC_JSON = "ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/json/hgnc_complete_set.json"
-HGNC_TSV = "ftp://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/tsv/hgnc_complete_set.txt"
-HCOP_GZIP = "ftp://ftp.ebi.ac.uk/pub/databases/genenames/hcop/human_all_hcop_sixteen_column.txt.gz"
+HGNC_JSON = "https://ftp.ebi.ac.uk/pub/databases/genenames/new/json/hgnc_complete_set.json"
+HGNC_TSV = "https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/tsv/hgnc_complete_set.txt"
+HCOP_GZIP = "https://ftp.ebi.ac.uk/pub/databases/genenames/hcop/human_all_hcop_sixteen_column.txt.gz"
 
 # UniProt #
 UNIPROT_SPROT = (
diff --git a/ebel/manager/rdbms/models/hgnc.py b/ebel/manager/rdbms/models/hgnc.py
index 56c0b64..b356682 100644
--- a/ebel/manager/rdbms/models/hgnc.py
+++ b/ebel/manager/rdbms/models/hgnc.py
@@ -1,6 +1,6 @@
 """HGNC RDBMS model definition."""
 import datetime
-from typing import List
+from typing import List, Optional
 
 from sqlalchemy import BigInteger, Column, Date, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
@@ -15,41 +15,42 @@ class Hgnc(Base):
     """Class definition for the hgnc table."""
 
     __tablename__ = "hgnc"
+
     id: Mapped[int] = mapped_column(primary_key=True)
     hgnc_id: Mapped[str] = mapped_column(String(20))
     version: Mapped[int] = mapped_column()
-    bioparadigms_slc: Mapped[str] = mapped_column(String(20))
-    cd: Mapped[str] = mapped_column(String(20))
-    cosmic: Mapped[str] = mapped_column(String(50))
+    bioparadigms_slc: Mapped[Optional[str]] = mapped_column(String(20))
+    cd: Mapped[Optional[str]] = mapped_column(String(20))
+    cosmic: Mapped[Optional[str]] = mapped_column(String(50))
     date_approved_reserved: Mapped[datetime.date] = mapped_column(Date)
-    date_modified: Mapped[datetime.date] = mapped_column(Date)
-    date_name_changed: Mapped[datetime.date] = mapped_column(Date)
-    date_symbol_changed: Mapped[datetime.date] = mapped_column(Date)
-    ensembl_gene_id: Mapped[str] = mapped_column(String(20))
-    entrez_id: Mapped[int] = mapped_column()
-    homeodb: Mapped[int] = mapped_column()
-    horde_id: Mapped[str] = mapped_column(String(50))
-    imgt: Mapped[str] = mapped_column(String(50))
-    iuphar: Mapped[str] = mapped_column(String(50))
+    date_modified: Mapped[Optional[datetime.date]] = mapped_column(Date)
+    date_name_changed: Mapped[Optional[datetime.date]] = mapped_column(Date)
+    date_symbol_changed: Mapped[Optional[datetime.date]] = mapped_column(Date)
+    ensembl_gene_id: Mapped[Optional[str]] = mapped_column(String(20))
+    entrez_id: Mapped[Optional[int]] = mapped_column()
+    homeodb: Mapped[Optional[int]] = mapped_column()
+    horde_id: Mapped[Optional[str]] = mapped_column(String(50))
+    imgt: Mapped[Optional[str]] = mapped_column(String(50))
+    iuphar: Mapped[Optional[str]] = mapped_column(String(50))
     kznf_gene_catalog: Mapped[int] = mapped_column()
-    lncipedia: Mapped[str] = mapped_column(String(50))
-    lncrnadb: Mapped[str] = mapped_column(String(50))
-    location: Mapped[str] = mapped_column(String(100))
-    location_sortable: Mapped[str] = mapped_column(String(100))
+    lncipedia: Mapped[Optional[str]] = mapped_column(String(50))
+    lncrnadb: Mapped[Optional[str]] = mapped_column(String(50))
+    location: Mapped[Optional[str]] = mapped_column(String(100))
+    location_sortable: Mapped[Optional[str]] = mapped_column(String(100))
     locus_group: Mapped[str] = mapped_column(String(50))
     locus_type: Mapped[str] = mapped_column(String(50))
-    merops: Mapped[str] = mapped_column(String(20))
-    mirbase: Mapped[str] = mapped_column(String(20))
+    merops: Mapped[Optional[str]] = mapped_column(String(20))
+    mirbase: Mapped[Optional[str]] = mapped_column(String(20))
     name: Mapped[str] = mapped_column(String(255))
-    orphanet: Mapped[int] = mapped_column()
-    snornabase: Mapped[str] = mapped_column(String(20))
+    orphanet: Mapped[Optional[int]] = mapped_column()
+    snornabase: Mapped[Optional[str]] = mapped_column(String(20))
     status: Mapped[str] = mapped_column(String(50))
     symbol: Mapped[str] = mapped_column(String(100), index=True)
-    ucsc_id: Mapped[str] = mapped_column(String(50))
+    ucsc_id: Mapped[Optional[str]] = mapped_column(String(50))
     uuid: Mapped[str] = mapped_column(String(50))
-    vega_id: Mapped[str] = mapped_column(String(50))
-    agr: Mapped[str] = mapped_column(String(50))
-    kznf_gene_catalog: Mapped[str] = mapped_column(Text)
+    vega_id: Mapped[Optional[str]] = mapped_column(String(50))
+    agr: Mapped[Optional[str]] = mapped_column(String(50))
+    kznf_gene_catalog: Mapped[Optional[str]] = mapped_column(Text)
 
     pre_symbols: Mapped[List["PrevSymbol"]] = relationship("PrevSymbol", back_populates="hgnc")
     alias_names: Mapped[List["AliasName"]] = relationship("AliasName", back_populates="hgnc")

From fce52806351870471b46c99d81c96cda6470f813 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 18 Sep 2023 11:39:41 +0200
Subject: [PATCH 08/58] feat: drugbank sqla2 import

---
 ebel/manager/orientdb/biodbs/hgnc.py  |  6 ----
 ebel/manager/rdbms/models/drugbank.py | 44 ++++++++++++++++-----------
 2 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/hgnc.py b/ebel/manager/orientdb/biodbs/hgnc.py
index 13c7598..91d1a65 100644
--- a/ebel/manager/orientdb/biodbs/hgnc.py
+++ b/ebel/manager/orientdb/biodbs/hgnc.py
@@ -438,9 +438,3 @@ def get_symbol_entrez_dict(self) -> Dict[str, int]:
     def update_interactions(self) -> int:
         """Abstract method."""
         pass
-
-
-if __name__ == "__main__":
-    hgncdb = Hgnc()
-    hgncdb.recreate_tables()
-    hgncdb.update()
diff --git a/ebel/manager/rdbms/models/drugbank.py b/ebel/manager/rdbms/models/drugbank.py
index 8877527..6cd9b52 100644
--- a/ebel/manager/rdbms/models/drugbank.py
+++ b/ebel/manager/rdbms/models/drugbank.py
@@ -1,6 +1,6 @@
 """DrugBank RDBMS model definition."""
 import datetime
-from typing import List
+from typing import List, Optional
 
 from sqlalchemy import Column, Date, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
@@ -13,24 +13,25 @@ class Drugbank(Base):
     """Class definition for the drugbank table."""
 
     __tablename__ = "drugbank"
+
     id: Mapped[int] = mapped_column(primary_key=True)
     drugbank_id: Mapped[str] = mapped_column(String(10), index=True)
     name: Mapped[str] = mapped_column(String(255))
-    description: Mapped[str] = mapped_column(Text)
-    cas_number: Mapped[str] = mapped_column(String(20))
-    unii: Mapped[str] = mapped_column(String(20))
-    state: Mapped[str] = mapped_column(String(20))
-    indication: Mapped[str] = mapped_column(Text)
-    pharmacodynamics: Mapped[str] = mapped_column(Text)
-    toxicity: Mapped[str] = mapped_column(Text)
-    metabolism: Mapped[str] = mapped_column(Text)
-    absorption: Mapped[str] = mapped_column(Text)
-    half_life: Mapped[str] = mapped_column(Text)
-    route_of_elimination: Mapped[str] = mapped_column(Text)
-    volume_of_distribution: Mapped[str] = mapped_column(Text)
-    clearance: Mapped[str] = mapped_column(Text)
-    mechanism_of_action: Mapped[str] = mapped_column(Text)
-    fda_label: Mapped[str] = mapped_column(Text)
+    description: Mapped[Optional[str]] = mapped_column(Text)
+    cas_number: Mapped[Optional[str]] = mapped_column(String(20))
+    unii: Mapped[Optional[str]] = mapped_column(String(20))
+    state: Mapped[Optional[str]] = mapped_column(String(20))
+    indication: Mapped[Optional[str]] = mapped_column(Text)
+    pharmacodynamics: Mapped[Optional[str]] = mapped_column(Text)
+    toxicity: Mapped[Optional[str]] = mapped_column(Text)
+    metabolism: Mapped[Optional[str]] = mapped_column(Text)
+    absorption: Mapped[Optional[str]] = mapped_column(Text)
+    half_life: Mapped[Optional[str]] = mapped_column(Text)
+    route_of_elimination: Mapped[Optional[str]] = mapped_column(Text)
+    volume_of_distribution: Mapped[Optional[str]] = mapped_column(Text)
+    clearance: Mapped[Optional[str]] = mapped_column(Text)
+    mechanism_of_action: Mapped[Optional[str]] = mapped_column(Text)
+    fda_label: Mapped[Optional[str]] = mapped_column(Text)
 
     references: Mapped[List["Reference"]] = relationship("Reference", back_populates="drugbank", cascade="save-update")
     synonyms: Mapped[List["Synonym"]] = relationship("Synonym", back_populates="drugbank", cascade="save-update")
@@ -85,6 +86,7 @@ class Pathway(Base):
     """Class definition for the drugbank_pathway table."""
 
     __tablename__ = "drugbank_pathway"
+
     id: Mapped[int] = mapped_column(primary_key=True)
     smpdb_id: Mapped[str] = mapped_column(String(255))
 
@@ -103,6 +105,7 @@ class Patent(Base):
     """Class definition for the drugbank_patent table."""
 
     __tablename__ = "drugbank_patent"
+
     id: Mapped[int] = mapped_column(primary_key=True)
     number: Mapped[str] = mapped_column(String(255))
     country: Mapped[str] = mapped_column(String(255))
@@ -132,6 +135,7 @@ class Status(Base):
     """Class definition for the drugbank_status table."""
 
     __tablename__ = "drugbank_status"
+
     id: Mapped[int] = mapped_column(primary_key=True)
     status: Mapped[str] = mapped_column(String(20), index=True)
 
@@ -150,6 +154,7 @@ class ExternalIdentifier(Base):
     """Class definition for the drugbank_external_identifier table."""
 
     __tablename__ = "drugbank_external_identifier"
+
     id: Mapped[int] = mapped_column(primary_key=True)
     resource: Mapped[str] = mapped_column(String(255), index=True)
     identifier: Mapped[str] = mapped_column(String(255), index=True)
@@ -173,6 +178,7 @@ class Reference(Base):
     """Class definition for the drugbank_reference table."""
 
     __tablename__ = "drugbank_reference"
+
     id: Mapped[int] = mapped_column(primary_key=True)
     pmid: Mapped[int] = mapped_column()
 
@@ -191,9 +197,10 @@ class Target(Base):
     """Class definition for the drugbank_target table."""
 
     __tablename__ = "drugbank_target"
+
     id: Mapped[int] = mapped_column(primary_key=True)
     uniprot: Mapped[str] = mapped_column(String(20), index=True)
-    action: Mapped[str] = mapped_column(String(50), index=True)
+    action: Mapped[Optional[str]] = mapped_column(String(50), index=True)
     known_action: Mapped[str] = mapped_column(String(20), index=True)
 
     drugbank_id: Mapped[int] = mapped_column(ForeignKey("drugbank.id"))
@@ -216,6 +223,7 @@ class DrugInteraction(Base):
     """Class definition for the drugbank_drug_interaction table."""
 
     __tablename__ = "drugbank_drug_interaction"
+
     id: Mapped[int] = mapped_column(primary_key=True)
     drugbank_id: Mapped[str] = mapped_column(String(10), index=True)
     name: Mapped[str] = mapped_column(Text)
@@ -241,6 +249,7 @@ class ProductName(Base):
     """Class definition for the drugbank_product_name table."""
 
     __tablename__ = "drugbank_product_name"
+
     id: Mapped[int] = mapped_column(primary_key=True)
     name: Mapped[str] = mapped_column(Text)
 
@@ -259,6 +268,7 @@ class Synonym(Base):
     """Class definition for the drugbank_synonym table."""
 
     __tablename__ = "drugbank_synonym"
+
     id: Mapped[int] = mapped_column(primary_key=True)
     synonym: Mapped[str] = mapped_column(Text)
 

From dfe6bcd1097dc5defc961729fbcb9736298b7fb5 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 18 Sep 2023 12:00:32 +0200
Subject: [PATCH 09/58] feat: gwas catalog sqla2 import

---
 ebel/manager/rdbms/models/gwas_catalog.py | 40 +++++++++++------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/ebel/manager/rdbms/models/gwas_catalog.py b/ebel/manager/rdbms/models/gwas_catalog.py
index 6c2c9a9..c846ead 100644
--- a/ebel/manager/rdbms/models/gwas_catalog.py
+++ b/ebel/manager/rdbms/models/gwas_catalog.py
@@ -1,5 +1,5 @@
 """GWAS Catalog RDBMS model definition."""
-from typing import List
+from typing import List, Optional
 
 from sqlalchemy import Column, Float, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
@@ -23,29 +23,29 @@ class GwasCatalog(Base):
     link: Mapped[str] = mapped_column(String(255))
     study: Mapped[str] = mapped_column(Text)
     disease_trait: Mapped[str] = mapped_column(String(255))
-    initial_sample_size: Mapped[str] = mapped_column(Text)
-    replication_sample_size: Mapped[str] = mapped_column(Text)
-    region: Mapped[str] = mapped_column(String(50))
-    chr_id: Mapped[str] = mapped_column(Text)
-    chr_pos: Mapped[str] = mapped_column(Text)
-    reported_gene_s: Mapped[str] = mapped_column(Text)
-    mapped_gene: Mapped[str] = mapped_column(Text)
-    upstream_gene_id: Mapped[str] = mapped_column(String(50))
-    downstream_gene_id: Mapped[str] = mapped_column(String(50))
-    upstream_gene_distance: Mapped[int] = mapped_column()
-    downstream_gene_distance: Mapped[int] = mapped_column()
+    initial_sample_size: Mapped[Optional[str]] = mapped_column(Text)
+    replication_sample_size: Mapped[Optional[str]] = mapped_column(Text)
+    region: Mapped[Optional[str]] = mapped_column(String(50))
+    chr_id: Mapped[Optional[str]] = mapped_column(Text)
+    chr_pos: Mapped[Optional[str]] = mapped_column(Text)
+    reported_gene_s: Mapped[Optional[str]] = mapped_column(Text)
+    mapped_gene: Mapped[Optional[str]] = mapped_column(Text)
+    upstream_gene_id: Mapped[Optional[str]] = mapped_column(String(50))
+    downstream_gene_id: Mapped[Optional[str]] = mapped_column(String(50))
+    upstream_gene_distance: Mapped[Optional[int]] = mapped_column()
+    downstream_gene_distance: Mapped[Optional[int]] = mapped_column()
     strongest_snp_risk_allele: Mapped[str] = mapped_column(Text)
     snp: Mapped[str] = mapped_column(Text)
-    merged: Mapped[int] = mapped_column()
-    snp_id_current: Mapped[str] = mapped_column(Text)
-    context: Mapped[str] = mapped_column(Text)
-    intergenic: Mapped[int] = mapped_column()
-    risk_allele_frequency: Mapped[str] = mapped_column(Text)
+    merged: Mapped[Optional[int]] = mapped_column()
+    snp_id_current: Mapped[Optional[str]] = mapped_column(Text)
+    context: Mapped[Optional[str]] = mapped_column(Text)
+    intergenic: Mapped[Optional[int]] = mapped_column()
+    risk_allele_frequency: Mapped[Optional[str]] = mapped_column(Text)
     p_value: Mapped[float] = mapped_column()
     pvalue_mlog: Mapped[float] = mapped_column()
-    p_value_text: Mapped[str] = mapped_column(Text)
-    or_or_beta: Mapped[float] = mapped_column()
-    _95_ci_text: Mapped[str] = mapped_column(Text)
+    p_value_text: Mapped[Optional[str]] = mapped_column(Text)
+    or_or_beta: Mapped[Optional[float]] = mapped_column()
+    _95_ci_text: Mapped[Optional[str]] = mapped_column(Text)
     platform_snps_passing_qc: Mapped[str] = mapped_column(Text)
     cnv: Mapped[str] = mapped_column(Text)
 

From c0baf7171110e0da911985af2b64bfd2c99685f3 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 18 Sep 2023 12:19:34 +0200
Subject: [PATCH 10/58] feat: clinvar sqla2 import

---
 ebel/manager/rdbms/models/clinvar.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/ebel/manager/rdbms/models/clinvar.py b/ebel/manager/rdbms/models/clinvar.py
index e70a738..8f84af7 100644
--- a/ebel/manager/rdbms/models/clinvar.py
+++ b/ebel/manager/rdbms/models/clinvar.py
@@ -54,14 +54,14 @@ class Clinvar(Base):
     type: Mapped[str] = mapped_column(String(100))
     name: Mapped[str] = mapped_column(String(1000))
     gene_id: Mapped[int] = mapped_column(index=True)
-    gene_symbol: Mapped[str] = mapped_column(String(1000))
-    hgnc_id: Mapped[str] = mapped_column(String(100))
+    gene_symbol: Mapped[Optional[str]] = mapped_column(String(1000))
+    hgnc_id: Mapped[Optional[str]] = mapped_column(String(100))
     clinical_significance: Mapped[str] = mapped_column(String(100))
     clin_sig_simple: Mapped[int] = mapped_column()
     last_evaluated: Mapped[Optional[str]] = mapped_column(String(100))
     rs_db_snp: Mapped[int] = mapped_column(index=True)
-    nsv_esv_db_var: Mapped[str] = mapped_column(String(100))
-    rcvaccession: Mapped[str] = mapped_column(String(1000))
+    nsv_esv_db_var: Mapped[Optional[str]] = mapped_column(String(100))
+    rcvaccession: Mapped[Optional[str]] = mapped_column(String(1000))
     origin: Mapped[str] = mapped_column(Text)
     origin_simple: Mapped[str] = mapped_column(Text)
     assembly: Mapped[str] = mapped_column(String(100), index=True)
@@ -69,18 +69,18 @@ class Clinvar(Base):
     chromosome: Mapped[str] = mapped_column(Text)
     start: Mapped[int] = mapped_column()
     stop: Mapped[int] = mapped_column()
-    reference_allele: Mapped[str] = mapped_column(Text)
-    alternate_allele: Mapped[str] = mapped_column(Text)
-    cytogenetic: Mapped[str] = mapped_column(Text)
+    reference_allele: Mapped[Optional[str]] = mapped_column(Text)
+    alternate_allele: Mapped[Optional[str]] = mapped_column(Text)
+    cytogenetic: Mapped[Optional[str]] = mapped_column(Text)
     review_status: Mapped[str] = mapped_column(Text)
     number_submitters: Mapped[int] = mapped_column()
-    guidelines: Mapped[str] = mapped_column(Text)
+    guidelines: Mapped[Optional[str]] = mapped_column(Text)
     tested_in_gtr: Mapped[str] = mapped_column(Text)
     submitter_categories: Mapped[int] = mapped_column()
     variation_id: Mapped[int] = mapped_column()
     position_vcf: Mapped[int] = mapped_column()
-    reference_allele_vcf: Mapped[str] = mapped_column(Text(100000))
-    alternate_allele_vcf: Mapped[str] = mapped_column(Text(100000))
+    reference_allele_vcf: Mapped[Optional[str]] = mapped_column(Text(100000))
+    alternate_allele_vcf: Mapped[Optional[str]] = mapped_column(Text(100000))
 
     phenotypeMedgens: Mapped[List["ClinvarPhenotypeMedgen"]] = relationship(
         "ClinvarPhenotypeMedgen", foreign_keys=[ClinvarPhenotypeMedgen.clinvar_id]

From f8502aedb420b42946e503a724621779bb79cb54 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 18 Sep 2023 14:16:13 +0200
Subject: [PATCH 11/58] feat: mirtarbase sqla2 import

---
 ebel/manager/rdbms/models/mirtarbase.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ebel/manager/rdbms/models/mirtarbase.py b/ebel/manager/rdbms/models/mirtarbase.py
index 700543f..15df19a 100644
--- a/ebel/manager/rdbms/models/mirtarbase.py
+++ b/ebel/manager/rdbms/models/mirtarbase.py
@@ -1,4 +1,6 @@
 """KEGG RDBMS model definition."""
+from typing import Optional
+
 from sqlalchemy import Column, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import mapped_column, Mapped
@@ -21,7 +23,7 @@ class Mirtarbase(Base):
     target_gene_entrez_id: Mapped[int] = mapped_column()
     species_target_gene: Mapped[str] = mapped_column(String(50), index=True)
     experiments: Mapped[str] = mapped_column(Text)
-    support_type: Mapped[str] = mapped_column(String(50), index=True)
+    support_type: Mapped[Optional[str]] = mapped_column(String(50), index=True)
     references_pmid: Mapped[int] = mapped_column()
 
     def as_dict(self):

From 9bf20461b67cb31ea5810ecda98935ed7e872223 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 18 Sep 2023 15:17:31 +0200
Subject: [PATCH 12/58] feat: intact sqla2 import

---
 ebel/manager/orientdb/urls.py       | 2 +-
 ebel/manager/rdbms/models/intact.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/ebel/manager/orientdb/urls.py b/ebel/manager/orientdb/urls.py
index 0e42a79..b5f3036 100755
--- a/ebel/manager/orientdb/urls.py
+++ b/ebel/manager/orientdb/urls.py
@@ -32,7 +32,7 @@
 BIOGRID = (
     "https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive/BIOGRID-4.4.215/BIOGRID-ALL-4.4.215.tab3.zip"
 )
-INTACT = "ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.zip"
+INTACT = "https://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.zip"
 STITCH = "http://stitch.embl.de/download/protein_chemical.links.transfer.v5.0.tsv.gz"
 
 # String #
diff --git a/ebel/manager/rdbms/models/intact.py b/ebel/manager/rdbms/models/intact.py
index 7067f37..fbbb3d5 100644
--- a/ebel/manager/rdbms/models/intact.py
+++ b/ebel/manager/rdbms/models/intact.py
@@ -1,4 +1,6 @@
 """IntAct RDBMS model definition."""
+from typing import Optional
+
 from sqlalchemy import Column, Float, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import mapped_column, Mapped
@@ -21,7 +23,7 @@ class Intact(Base):
     interaction_ids: Mapped[str] = mapped_column(Text)
     interaction_type: Mapped[str] = mapped_column(String(100), index=True)
     interaction_type_psimi_id: Mapped[int] = mapped_column()
-    pmid: Mapped[int] = mapped_column()
+    pmid: Mapped[Optional[int]] = mapped_column()
 
     def as_dict(self):
         """Convert object values to dictionary."""

From 6b060d3916656be17445caf94508f62273256721 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 18 Sep 2023 16:20:26 +0200
Subject: [PATCH 13/58] feat: pc sqla2 import

---
 .../orientdb/biodbs/pathway_commons.py        | 30 ++++++++++++-------
 ebel/manager/rdbms/models/pathway_commons.py  |  2 +-
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/pathway_commons.py b/ebel/manager/orientdb/biodbs/pathway_commons.py
index 1fcc12b..34e36a0 100644
--- a/ebel/manager/orientdb/biodbs/pathway_commons.py
+++ b/ebel/manager/orientdb/biodbs/pathway_commons.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
+from sqlalchemy import select
 from tqdm import tqdm
 
 from ebel.constants import RID
@@ -104,13 +105,16 @@ def create_pmids_table(self, df):
             inplace=True,
         )
         df_pmids.pmid = pd.to_numeric(df_pmids.pmid, errors="coerce")
-        df_pmids.to_sql(
-            pc.Pmid.__tablename__,
-            con=self.engine,
-            index=False,
-            if_exists="append",
-            chunksize=10000,
-        )
+        df_pmids = df_pmids[df_pmids.pmid.notna()]
+
+        with self.engine.connect() as conn:
+            df_pmids.to_sql(
+                pc.Pmid.__tablename__,
+                con=conn,
+                index=False,
+                if_exists="append",
+                chunksize=10000,
+            )
         del df_pmids
 
     def create_joining_table_names(self, df, df_pc_names):
@@ -232,9 +236,13 @@ def update_interactions(self) -> Dict[str, int]:
         for edge_type in edge_types:
             inserted[edge_type] = 0
 
-            sql = f"""Select id, participant_a, participant_b from
-                pathway_commons where interaction_type='{edge_type}'"""
-            df_ppi_of = pd.read_sql(sql, self.engine)
+            sql = select(pc.PathwayCommons.id, pc.PathwayCommons.participant_a, pc.PathwayCommons.participant_b).where(
+                pc.PathwayCommons.interaction_type == edge_type
+            )
+
+            with self.engine.connect() as conn:
+                df_ppi_of = pd.read_sql(sql, conn)
+
             df_join = (
                 df_ppi_of.set_index("participant_a")
                 .join(df_all.set_index("symbol"))
@@ -289,7 +297,7 @@ def update_interactions(self) -> Dict[str, int]:
 
     def get_pathway_pmids_sources(self, pc_id, pc_pathway_name_rid_dict) -> tuple:
         """Return all pathway, PMIDs, and their sources."""
-        pc_obj = self.session.query(pc.PathwayCommons).get(pc_id)
+        pc_obj = self.session.get(pc.PathwayCommons, pc_id)
         sources = [x.source for x in pc_obj.sources]
         pmids = [x.pmid for x in pc_obj.pmids]
         pathways = [pc_pathway_name_rid_dict[x.name] for x in pc_obj.pathway_names]
diff --git a/ebel/manager/rdbms/models/pathway_commons.py b/ebel/manager/rdbms/models/pathway_commons.py
index fde22a0..a27b49f 100644
--- a/ebel/manager/rdbms/models/pathway_commons.py
+++ b/ebel/manager/rdbms/models/pathway_commons.py
@@ -1,5 +1,5 @@
 """Pathway Commons RDBMS model definition."""
-from typing import List
+from typing import List, Optional
 
 from sqlalchemy import BigInteger, Column, ForeignKey, Integer, String, Table
 from sqlalchemy.ext.declarative import declarative_base

From 6ab734f9b3e294711629b5ab9c82e334530d325b Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 18 Sep 2023 16:37:31 +0200
Subject: [PATCH 14/58] feat: stringdb sqla2 import

---
 ebel/manager/orientdb/biodbs/stringdb.py | 19 +++++++++++--------
 ebel/manager/rdbms/models/stringdb.py    |  3 ++-
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/stringdb.py b/ebel/manager/orientdb/biodbs/stringdb.py
index 27272e9..b5c6774 100644
--- a/ebel/manager/orientdb/biodbs/stringdb.py
+++ b/ebel/manager/orientdb/biodbs/stringdb.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text
+from sqlalchemy import text, select, or_
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
@@ -280,13 +280,10 @@ def update_action_interactions(self, hgnc: Hgnc) -> int:
             ("inhibition", "inhibition"): "inhibits_st",
         }
 
+        sdbaction = stringdb.StringDbAction
         Action = namedtuple("Action", ("symbol1", "symbol2", "mode", "action", "score"))
 
-        columns = ", ".join(Action._fields)
-        sql_temp = f"""Select {columns} from {self.table_action}
-                       where mode in ('activation', 'inhibition', 'ptmod', 'expression')
-                       and (symbol1='{{symbol}}' or symbol2='{{symbol}}')
-                       and is_directional=1 and a_is_acting=1"""
+        modes = ("activation", "inhibition", "ptmod", "expression")
 
         symbols_rid_dict = self.get_pure_symbol_rids_dict_in_bel_context(namespace="HGNC")
         symbols = tuple(symbols_rid_dict.keys())
@@ -295,8 +292,14 @@ def update_action_interactions(self, hgnc: Hgnc) -> int:
 
         updated = 0
         for symbol in tqdm(symbols, desc="Update has_action_st edges"):
-            sql = sql_temp.format(symbol=symbol)
-            rows = self.engine.execute(text(sql))
+            sql = (
+                select(sdbaction.symbol1, sdbaction.symbol2, sdbaction.mode, sdbaction.action, sdbaction.score)
+                .where(sdbaction.mode.in_(modes))
+                .where(or_(sdbaction.symbol1 == symbol, sdbaction.symbol2 == symbol))
+                .where(sdbaction.is_directional == 1)
+                .where(sdbaction.a_is_acting == 1)
+            )
+            rows = self.session.execute(sql)
             for row in rows.fetchall():
                 action = Action(*row)
 
diff --git a/ebel/manager/rdbms/models/stringdb.py b/ebel/manager/rdbms/models/stringdb.py
index c56b9f9..47eccac 100644
--- a/ebel/manager/rdbms/models/stringdb.py
+++ b/ebel/manager/rdbms/models/stringdb.py
@@ -1,4 +1,5 @@
 """StringDB RDBMS model definition."""
+from typing import Optional
 
 from sqlalchemy import Boolean, Column, Integer, SmallInteger, String
 from sqlalchemy.ext.declarative import declarative_base
@@ -64,7 +65,7 @@ class StringDbAction(Base):
     symbol1: Mapped[str] = mapped_column(String(50), nullable=False, index=True)
     symbol2: Mapped[str] = mapped_column(String(50), nullable=False, index=True)
     mode: Mapped[str] = mapped_column(String(20), nullable=False, index=True)
-    action: Mapped[str] = mapped_column(String(20))
+    action: Mapped[Optional[str]] = mapped_column(String(20))
     is_directional: Mapped[bool] = mapped_column(Boolean, nullable=False, index=True)
     a_is_acting: Mapped[bool] = mapped_column(Boolean, nullable=False, index=True)
     score: Mapped[int] = mapped_column(SmallInteger)

From 0dc24ef1d7f7674075342eeebc5436863c252507 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 18 Sep 2023 16:43:57 +0200
Subject: [PATCH 15/58] feat: protein atlas sqla2 import

---
 ebel/manager/orientdb/biodbs/protein_atlas.py |  4 ++-
 ebel/manager/rdbms/models/protein_atlas.py    | 28 ++++++++++---------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/protein_atlas.py b/ebel/manager/orientdb/biodbs/protein_atlas.py
index 53481b5..efd77eb 100644
--- a/ebel/manager/orientdb/biodbs/protein_atlas.py
+++ b/ebel/manager/orientdb/biodbs/protein_atlas.py
@@ -170,7 +170,9 @@ def update_interactions(self) -> int:
 
         location_rid_cache = {x["bel"]: x["rid"] for x in self.query_class("location", columns=["bel"])}
 
-        for ensembl_gene_id, data in tqdm(rid_ensembl_gene_ids.items()):
+        for ensembl_gene_id, data in tqdm(
+            rid_ensembl_gene_ids.items(), desc=f"Update {self.biodb_name.upper()} interactions"
+        ):
             ns_location = "PROTEIN_ATLAS"
             pure_protein = data.oRecordData
             ns = pure_protein["namespace"]
diff --git a/ebel/manager/rdbms/models/protein_atlas.py b/ebel/manager/rdbms/models/protein_atlas.py
index cce2936..07ef8fc 100644
--- a/ebel/manager/rdbms/models/protein_atlas.py
+++ b/ebel/manager/rdbms/models/protein_atlas.py
@@ -1,4 +1,6 @@
 """Protein Atlas RDBMS model definition."""
+from typing import Optional
+
 from sqlalchemy import Column, Integer, Numeric, String, Text
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import mapped_column, Mapped
@@ -14,9 +16,9 @@ class ProteinAtlasNormalTissue(Base):
 
     gene: Mapped[str] = mapped_column(String(100), index=True)
     gene_name: Mapped[str] = mapped_column(String(100))
-    tissue: Mapped[str] = mapped_column(String(100))
-    cell_type: Mapped[str] = mapped_column(String(100))
-    level: Mapped[str] = mapped_column(String(100), index=True)
+    tissue: Mapped[Optional[str]] = mapped_column(String(100))
+    cell_type: Mapped[Optional[str]] = mapped_column(String(100))
+    level: Mapped[Optional[str]] = mapped_column(String(100), index=True)
     reliability: Mapped[str] = mapped_column(String(100), index=True)
 
     def as_dict(self):
@@ -40,16 +42,16 @@ class ProteinAtlasSubcellularLocation(Base):
     gene: Mapped[str] = mapped_column(String(100))
     gene_name: Mapped[str] = mapped_column(String(100))
     reliability: Mapped[str] = mapped_column(String(100))
-    main_location: Mapped[str] = mapped_column(String(100))
-    additional_location: Mapped[str] = mapped_column(String(100))
-    extracellular_location: Mapped[str] = mapped_column(String(100))
-    enhanced: Mapped[str] = mapped_column(String(100))
-    supported: Mapped[str] = mapped_column(String(100))
-    approved: Mapped[str] = mapped_column(String(100))
-    uncertain: Mapped[str] = mapped_column(String(100))
-    single_cell_variation_intensity: Mapped[str] = mapped_column(String(100))
-    single_cell_variation_spatial: Mapped[str] = mapped_column(String(100))
-    cell_cycle_dependency: Mapped[str] = mapped_column(Text)
+    main_location: Mapped[Optional[str]] = mapped_column(String(100))
+    additional_location: Mapped[Optional[str]] = mapped_column(String(100))
+    extracellular_location: Mapped[Optional[str]] = mapped_column(String(100))
+    enhanced: Mapped[Optional[str]] = mapped_column(String(100))
+    supported: Mapped[Optional[str]] = mapped_column(String(100))
+    approved: Mapped[Optional[str]] = mapped_column(String(100))
+    uncertain: Mapped[Optional[str]] = mapped_column(String(100))
+    single_cell_variation_intensity: Mapped[Optional[str]] = mapped_column(String(100))
+    single_cell_variation_spatial: Mapped[Optional[str]] = mapped_column(String(100))
+    cell_cycle_dependency: Mapped[Optional[str]] = mapped_column(Text)
     go_id: Mapped[str] = mapped_column(Text)
 
     def as_dict(self):

From fb2c89e46d5573859725fd68e90e02668d2346d9 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Wed, 20 Sep 2023 15:34:55 +0200
Subject: [PATCH 16/58] feat: get ncbi and pc working

---
 ebel/constants.py                             | 30 +++++++++----------
 ebel/defaults.py                              | 12 ++++----
 .../orientdb/biodbs/pathway_commons.py        |  7 +++--
 ebel/manager/orientdb/biodbs/uniprot.py       | 18 ++++++-----
 ebel/manager/rdbms/models/hgnc.py             |  4 +--
 ebel/manager/rdbms/models/ncbi.py             | 10 +++----
 ebel/manager/rdbms/models/uniprot.py          |  2 +-
 7 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/ebel/constants.py b/ebel/constants.py
index 4d8c28b..86be228 100755
--- a/ebel/constants.py
+++ b/ebel/constants.py
@@ -2,38 +2,36 @@
 # -*- coding: utf-8 -*-
 
 import os
+from pathlib import Path
 
-THIS_DIR = os.path.dirname(__file__)
+THIS_DIR = Path(__file__)
 PROJECT_NAME = "ebel"
 
-HOME = os.path.expanduser("~")
+HOME = Path.home()
 LIBRARY_NAME = PROJECT_NAME
 
 # Path to folder
-PROJECT_DIR = os.path.join(HOME, f".{PROJECT_NAME}")
-if not os.path.exists(PROJECT_DIR):
-    os.mkdir(PROJECT_DIR)
+PROJECT_DIR = Path(HOME, f".{PROJECT_NAME}")
+PROJECT_DIR.mkdir(parents=True, exist_ok=True)
 
 # Path to data folder
-DATA_DIR = os.path.join(PROJECT_DIR, "data")
-if not os.path.exists(DATA_DIR):
-    os.mkdir(DATA_DIR)
+DATA_DIR = Path(PROJECT_DIR, "data")
+DATA_DIR.mkdir(parents=True, exist_ok=True)
 
 # Path to logs folder
-LOG_DIR = os.path.join(PROJECT_DIR, "logs")
-if not os.path.exists(LOG_DIR):
-    os.mkdir(LOG_DIR)
+LOG_DIR = Path(PROJECT_DIR, "logs")
+LOG_DIR.mkdir(parents=True, exist_ok=True)
 
 # Default database name and location
-DB_NAME = "{}.db".format(PROJECT_NAME)
-DB_PATH = os.path.join(DATA_DIR, DB_NAME)
+DB_NAME = f"{PROJECT_NAME}.db"
+DB_PATH = Path(DATA_DIR, DB_NAME)
 
 GRAMMAR_BEL_PATH = {
-    "2": os.path.join(THIS_DIR, "grammar", "grammar_bel_2.bnf"),
-    "2_1": os.path.join(THIS_DIR, "grammar", "grammar_bel_2_1.bnf"),
+    "2": THIS_DIR.joinpath("grammar", "grammar_bel_2.bnf"),
+    "2_1": THIS_DIR.joinpath("grammar", "grammar_bel_2_1.bnf"),
 }
 
-GRAMMAR_NS_ANNO_PATH = os.path.join(THIS_DIR, "grammar", "grammar_belns_belanno_1__2.bnf")
+GRAMMAR_NS_ANNO_PATH = THIS_DIR.joinpath("grammar", "grammar_belns_belanno_1__2.bnf")
 GRAMMAR_START_NS = "belns"
 GRAMMAR_START_ANNO = "belanno"
 GRAMMAR_START_LINE = "script_line_by_line"
diff --git a/ebel/defaults.py b/ebel/defaults.py
index 566da81..3ff9bb7 100755
--- a/ebel/defaults.py
+++ b/ebel/defaults.py
@@ -24,16 +24,16 @@
 
 SQLITE_DATABASE_NAME = "ebel.db"
 SQLITE_TEST_DATABASE_NAME = "ebel_test.db"
-DATABASE_LOCATION = os.path.join(DATA_DIR, SQLITE_DATABASE_NAME)
-DEFAULT_TEST_DATABASE_LOCATION = os.path.join(DATA_DIR, SQLITE_TEST_DATABASE_NAME)
+DATABASE_LOCATION = DATA_DIR.joinpath(SQLITE_DATABASE_NAME)
+DEFAULT_TEST_DATABASE_LOCATION = DATA_DIR.joinpath(SQLITE_TEST_DATABASE_NAME)
 
 ###############################################################################
 # SQLAlchemy connection strings
 # =============================
 # SQLite
 # ------
-CONN_STR_DEFAULT = "sqlite:///" + DATABASE_LOCATION
-CONN_STR_TESTS = "sqlite:///" + SQLITE_TEST_DATABASE_NAME
+CONN_STR_DEFAULT = "sqlite:///" + DATABASE_LOCATION.name
+CONN_STR_TESTS = "sqlite:///" + DEFAULT_TEST_DATABASE_LOCATION.name
 # MySQL
 # -----
 CONN_STR_MYSQL_PREFIX = "mysql+pymysql://ebel:ebel@localhost/"
@@ -42,12 +42,12 @@
 
 ###############################################################################
 # Config
-config_file_path = os.path.join(PROJECT_DIR, "config.ini")
+config_file_path = PROJECT_DIR.joinpath("config.ini")
 
 ###############################################################################
 # Log Handling
 logHandler = handlers.RotatingFileHandler(
-    filename=os.path.join(LOG_DIR, "ebel.log"),
+    filename=LOG_DIR.joinpath("ebel.log"),
     mode="a",
     maxBytes=4098 * 10,  # 4MB file max
     backupCount=0,
diff --git a/ebel/manager/orientdb/biodbs/pathway_commons.py b/ebel/manager/orientdb/biodbs/pathway_commons.py
index 34e36a0..88a23fe 100644
--- a/ebel/manager/orientdb/biodbs/pathway_commons.py
+++ b/ebel/manager/orientdb/biodbs/pathway_commons.py
@@ -63,12 +63,12 @@ def insert_data(self) -> Dict[str, int]:
             "INTERACTION_PUBMED_ID",
             "PATHWAY_NAMES",
         ]
-
         df = pd.read_csv(self.file_path, sep="\t", low_memory=True, usecols=usecols)
         # Because 2 tables are in file, we have to identify where second table starts and slice the dataframe
         df = df.iloc[: df[df["PARTICIPANT_A"] == "PARTICIPANT"].index[0]]
 
         df.columns = self._standardize_column_names(df.columns)
+
         df.pathway_names = df.pathway_names.str.split(";")
         df.interaction_data_source = df.interaction_data_source.str.split(";")
         df.interaction_pubmed_id = df.interaction_pubmed_id.str.split(";")
@@ -104,7 +104,7 @@ def create_pmids_table(self, df):
             columns={"id": "pathway_commons_id", "interaction_pubmed_id": "pmid"},
             inplace=True,
         )
-        df_pmids.pmid = pd.to_numeric(df_pmids.pmid, errors="coerce")
+        df_pmids.pmid = pd.to_numeric(df_pmids.pmid, errors="coerce", downcast="integer")
         df_pmids = df_pmids[df_pmids.pmid.notna()]
 
         with self.engine.connect() as conn:
@@ -216,6 +216,9 @@ def update_interactions(self) -> Dict[str, int]:
         inserted = {}
 
         pc_pathway_name_rid_dict = self.get_pathway_name_rid_dict()
+
+        # Update HGNC in case not in DB
+        self.hgnc.update()
         valid_hgnc_symbols = {x[0] for x in self.session.query(hgnc.Hgnc).with_entities(hgnc.Hgnc.symbol).all()}
 
         cols = ["symbol", "rid"]
diff --git a/ebel/manager/orientdb/biodbs/uniprot.py b/ebel/manager/orientdb/biodbs/uniprot.py
index 4e4c131..1bdb0be 100644
--- a/ebel/manager/orientdb/biodbs/uniprot.py
+++ b/ebel/manager/orientdb/biodbs/uniprot.py
@@ -4,6 +4,7 @@
 import os
 import re
 from collections import namedtuple
+from pathlib import Path
 from typing import Dict, List, Tuple, Union
 
 import pandas as pd
@@ -479,10 +480,13 @@ def insert_uniprot(self) -> int:
         logger.info("Drop and create Uniprot table in RDBMS")
 
         logger.info("Insert data linked to Uniprot entry into RDBMS")
-        # avoid to use old gunzipped file
-        if os.path.exists(self.file_path_gunzipped):
-            os.remove(self.file_path_gunzipped)
-        if not os.path.exists(self.file_path_gunzipped):
+
+        gunzipped_file = Path(self.file_path_gunzipped)
+        # Remove previous gunzipped file if present
+        if gunzipped_file.is_file():
+            gunzipped_file.unlink()
+
+        if not gunzipped_file.is_file():  # Gunzip compressed uniprot file
             gunzip(self.file_path, self.file_path_gunzipped)
 
         (
@@ -496,9 +500,9 @@ def insert_uniprot(self) -> int:
         self.__insert_linked_data(keywords, hosts, xrefs, functions, sclocations)
         inserted = self.__insert_uniprot_data(xrefs, functions, sclocations, number_of_entries)
 
-        # save storage space
-        if os.path.exists(self.file_path_gunzipped):
-            os.remove(self.file_path_gunzipped)
+        # save storage space by deleting uncompressed XML file
+        if gunzipped_file.is_file():
+            gunzipped_file.unlink()
 
         # return number_of_entries
         return inserted
diff --git a/ebel/manager/rdbms/models/hgnc.py b/ebel/manager/rdbms/models/hgnc.py
index b356682..6f6f80a 100644
--- a/ebel/manager/rdbms/models/hgnc.py
+++ b/ebel/manager/rdbms/models/hgnc.py
@@ -2,7 +2,7 @@
 import datetime
 from typing import List, Optional
 
-from sqlalchemy import BigInteger, Column, Date, ForeignKey, Integer, String, Text
+from sqlalchemy import BigInteger, Date, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import relationship, mapped_column, Mapped
 
@@ -18,7 +18,7 @@ class Hgnc(Base):
 
     id: Mapped[int] = mapped_column(primary_key=True)
     hgnc_id: Mapped[str] = mapped_column(String(20))
-    version: Mapped[int] = mapped_column()
+    version: Mapped[int] = mapped_column(BigInteger)
     bioparadigms_slc: Mapped[Optional[str]] = mapped_column(String(20))
     cd: Mapped[Optional[str]] = mapped_column(String(20))
     cosmic: Mapped[Optional[str]] = mapped_column(String(50))
diff --git a/ebel/manager/rdbms/models/ncbi.py b/ebel/manager/rdbms/models/ncbi.py
index ade3a7b..3c83f6b 100644
--- a/ebel/manager/rdbms/models/ncbi.py
+++ b/ebel/manager/rdbms/models/ncbi.py
@@ -1,5 +1,5 @@
 """NCBI RDBMS model definition."""
-from typing import List
+from typing import List, Optional
 
 from sqlalchemy import Column, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
@@ -19,10 +19,10 @@ class NcbiGeneInfo(Base):
     tax_id: Mapped[int] = mapped_column(index=True)
     symbol: Mapped[str] = mapped_column(String(100), index=True)
     type_of_gene: Mapped[str] = mapped_column(String(100), index=True)
-    locus_tag: Mapped[str] = mapped_column(String(100))
-    chromosome: Mapped[str] = mapped_column(String(100))
-    map_location: Mapped[str] = mapped_column(String(100))
-    description_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info_description.id"))
+    locus_tag: Mapped[Optional[str]] = mapped_column(String(100))
+    chromosome: Mapped[Optional[str]] = mapped_column(String(100))
+    map_location: Mapped[Optional[str]] = mapped_column(String(100))
+    description_id: Mapped[Optional[int]] = mapped_column(ForeignKey("ncbi_gene_info_description.id"))
     description: Mapped["NcbiGeneInfoDescription"] = relationship(
         "NcbiGeneInfoDescription", foreign_keys=[description_id]
     )
diff --git a/ebel/manager/rdbms/models/uniprot.py b/ebel/manager/rdbms/models/uniprot.py
index e4cfd9a..a63484a 100644
--- a/ebel/manager/rdbms/models/uniprot.py
+++ b/ebel/manager/rdbms/models/uniprot.py
@@ -46,7 +46,7 @@ class Uniprot(Base):
 
     __tablename__ = "uniprot"
 
-    id: Mapped[str] = mapped_column(primary_key=True)
+    id: Mapped[int] = mapped_column(primary_key=True)
 
     accession: Mapped[str] = mapped_column(String(20), unique=True)
     name: Mapped[str] = mapped_column(String(100), nullable=False, unique=True)

From 608c4b641880cce463f3e3a8b852dafc3fc25b1d Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Wed, 20 Sep 2023 15:36:07 +0200
Subject: [PATCH 17/58] fix: ncbi constraint

---
 ebel/manager/rdbms/models/ncbi.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ebel/manager/rdbms/models/ncbi.py b/ebel/manager/rdbms/models/ncbi.py
index 3c83f6b..a26ac3a 100644
--- a/ebel/manager/rdbms/models/ncbi.py
+++ b/ebel/manager/rdbms/models/ncbi.py
@@ -17,8 +17,8 @@ class NcbiGeneInfo(Base):
     gene_id: Mapped[int] = mapped_column(primary_key=True)
 
     tax_id: Mapped[int] = mapped_column(index=True)
-    symbol: Mapped[str] = mapped_column(String(100), index=True)
-    type_of_gene: Mapped[str] = mapped_column(String(100), index=True)
+    symbol: Mapped[Optional[str]] = mapped_column(String(100), index=True)
+    type_of_gene: Mapped[Optional[str]] = mapped_column(String(100), index=True)
     locus_tag: Mapped[Optional[str]] = mapped_column(String(100))
     chromosome: Mapped[Optional[str]] = mapped_column(String(100))
     map_location: Mapped[Optional[str]] = mapped_column(String(100))

From d4f66702079e301fdfe5cf4dabce43dc65989c63 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 21 Sep 2023 09:01:21 +0200
Subject: [PATCH 18/58] feat: update iuphar to sqla 2

---
 ebel/manager/orientdb/biodbs/iuphar.py |  44 ++++++++--
 ebel/manager/orientdb/urls.py          |   2 +-
 ebel/manager/rdbms/models/iuphar.py    | 117 +++++++++++++------------
 3 files changed, 97 insertions(+), 66 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/iuphar.py b/ebel/manager/orientdb/biodbs/iuphar.py
index 5f03d50..71e1476 100644
--- a/ebel/manager/orientdb/biodbs/iuphar.py
+++ b/ebel/manager/orientdb/biodbs/iuphar.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
+from sqlalchemy import select
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
@@ -132,13 +133,30 @@ def update_interactions(self) -> int:
             "Gating inhibitor": "inhibits_gating__iu",
         }
 
-        sql = """select i.pubmed_id, i.assay_description, i.affinity_units, i.affinity_low, i.affinity_median,
-        i.affinity_high, i.type,
-        i.action,i.target_uniprot, l.name as ligand_name, l.pubchem_sid, i.ligand_gene_symbol, i.ligand_species
-        from iuphar_interaction as i inner join iuphar_ligand as l
-        on (i.ligand_id=l.id) where i.target_uniprot IS NOT NULL and pubchem_sid IS NOT NULL"""
+        i_int = iuphar.IupharInteraction
+        lig = iuphar.IupharLigand
+        sql = (
+            select(
+                i_int.pubmed_id,
+                i_int.assay_description,
+                i_int.affinity_units,
+                i_int.affinity_low,
+                i_int.affinity_median,
+                i_int.affinity_high,
+                i_int.type,
+                i_int.action,
+                i_int.target_uniprot,
+                lig.name.label("ligand_name"),
+                lig.pubchem_sid,
+            )
+            .join(lig)
+            .where(i_int.target_uniprot.isnot(None))
+            .where(lig.pubchem_sid.isnot(None))
+        )
+
+        with self.engine.connect() as conn:
+            df_iuphar = pd.read_sql(sql, conn).replace({np.nan: None})
 
-        df_iuphar = pd.read_sql(sql, self.engine).replace({np.nan: None})
         df_iuphar.set_index("target_uniprot", inplace=True)
         df_graph = pd.DataFrame(
             uniprot.get_pure_uniprot_rid_dict_in_bel_context().items(),
@@ -152,7 +170,11 @@ def update_interactions(self) -> int:
             total=df_join.shape[0],
             desc=f"Update {self.biodb_name.upper()} interactions",
         ):
-            if data.ligand_gene_symbol and data.ligand_species and "Human" in data.ligand_species:
+            if (
+                "ligand_gene_symbol" in data.index
+                and "ligand_species" in data.index
+                and "Human" in data.ligand_species
+            ):
                 symbol = data.ligand_gene_symbol.split("|")[0]  # human seems to always the first
                 a_value_dict = {
                     "pure": True,
@@ -161,6 +183,7 @@ def update_interactions(self) -> int:
                     "name": symbol,
                 }
                 a_class = "protein"
+
             else:
                 a_value_dict = {
                     "pure": True,
@@ -170,6 +193,7 @@ def update_interactions(self) -> int:
                     "label": data.ligand_name,
                 }
                 a_class = "abundance"
+
             a_rid = self.get_create_rid(a_class, value_dict=a_value_dict, check_for="bel")
 
             i_value_dict = {
@@ -189,3 +213,9 @@ def update_interactions(self) -> int:
         # Hgnc(self.client).update_bel()
 
         return df_join.shape[0]
+
+
+if __name__ == "__main__":
+    hgncdb = Iuphar()
+    # hgncdb.recreate_tables()
+    hgncdb.update()
diff --git a/ebel/manager/orientdb/urls.py b/ebel/manager/orientdb/urls.py
index b5f3036..a83b81b 100755
--- a/ebel/manager/orientdb/urls.py
+++ b/ebel/manager/orientdb/urls.py
@@ -60,7 +60,7 @@
 SIDER_SE = "http://sideeffects.embl.de/media/download/meddra_all_se.tsv.gz"
 
 # Expression Atlas #
-EXPRESSION_ATLAS_BASE = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/experiments/"
+EXPRESSION_ATLAS_BASE = "https://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/experiments/"
 EXPRESSION_ATLAS_EXPERIMENTS = EXPRESSION_ATLAS_BASE + "atlas-latest-data.tar.gz"
 
 # DisGeNet #
diff --git a/ebel/manager/rdbms/models/iuphar.py b/ebel/manager/rdbms/models/iuphar.py
index 11d2c4d..d28002f 100644
--- a/ebel/manager/rdbms/models/iuphar.py
+++ b/ebel/manager/rdbms/models/iuphar.py
@@ -1,9 +1,10 @@
 """IUPHAR RDBMS model definition."""
-from typing import List
+from typing import List, Optional
 
-from sqlalchemy import BigInteger, Boolean, Column, ForeignKey, Integer, Numeric, String, Text
+from sqlalchemy import (BigInteger, Boolean, Column, ForeignKey, Integer,
+                        Numeric, String, Text)
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -17,31 +18,31 @@ class IupharLigand(Base):
     id: Mapped[int] = mapped_column(primary_key=True)
 
     name: Mapped[str] = mapped_column(Text)
-    species: Mapped[str] = mapped_column(Text)
+    species: Mapped[Optional[str]] = mapped_column(Text)
     type: Mapped[str] = mapped_column(Text)
-    approved: Mapped[bool] = mapped_column()
-    withdrawn: Mapped[bool] = mapped_column()
-    labelled: Mapped[bool] = mapped_column()
-    radioactive: Mapped[bool] = mapped_column()
-    pubchem_sid: Mapped[int] = mapped_column()
-    pubchem_cid: Mapped[str] = mapped_column(
+    approved: Mapped[Optional[bool]] = mapped_column()
+    withdrawn: Mapped[Optional[bool]] = mapped_column()
+    labelled: Mapped[Optional[bool]] = mapped_column()
+    radioactive: Mapped[Optional[bool]] = mapped_column()
+    pubchem_sid: Mapped[Optional[int]] = mapped_column()
+    pubchem_cid: Mapped[Optional[int]] = mapped_column(
         Text
     )  # TODO: This is a integer, but for import reasons this changed to text
-    uniprot_id: Mapped[str] = mapped_column(Text)
-    ensembl_id: Mapped[str] = mapped_column(Text)
-    ligand_subunit_ids: Mapped[str] = mapped_column(Text)
-    ligand_subunit_name: Mapped[str] = mapped_column(Text)
-    ligand_subunit_uni_prot_ids: Mapped[str] = mapped_column(Text)
-    ligand_subunit_ensembl_ids: Mapped[str] = mapped_column(Text)
-    iupac_name: Mapped[str] = mapped_column(Text)
-    inn: Mapped[str] = mapped_column(Text)
-    synonyms: Mapped[str] = mapped_column(Text)
-    smiles: Mapped[str] = mapped_column(Text)
-    inchi_key: Mapped[str] = mapped_column(Text)
-    inchi: Mapped[str] = mapped_column(Text)
-    gto_immu_pdb: Mapped[bool] = mapped_column()
-    gto_mpdb: Mapped[bool] = mapped_column()
-    antibacterial: Mapped[bool] = mapped_column()
+    uniprot_id: Mapped[Optional[str]] = mapped_column(Text)
+    ensembl_id: Mapped[Optional[str]] = mapped_column(Text)
+    ligand_subunit_ids: Mapped[Optional[str]] = mapped_column(Text)
+    ligand_subunit_name: Mapped[Optional[str]] = mapped_column(Text)
+    ligand_subunit_uni_prot_ids: Mapped[Optional[str]] = mapped_column(Text)
+    ligand_subunit_ensembl_ids: Mapped[Optional[str]] = mapped_column(Text)
+    iupac_name: Mapped[Optional[str]] = mapped_column(Text)
+    inn: Mapped[Optional[str]] = mapped_column(Text)
+    synonyms: Mapped[Optional[str]] = mapped_column(Text)
+    smiles: Mapped[Optional[str]] = mapped_column(Text)
+    inchi_key: Mapped[Optional[str]] = mapped_column(Text)
+    inchi: Mapped[Optional[str]] = mapped_column(Text)
+    gto_immu_pdb: Mapped[Optional[bool]] = mapped_column()
+    gto_mpdb: Mapped[Optional[bool]] = mapped_column()
+    antibacterial: Mapped[Optional[bool]] = mapped_column()
 
     interactions: Mapped[List["IupharInteraction"]] = relationship("IupharInteraction")
 
@@ -56,48 +57,48 @@ class IupharInteraction(Base):
     __tablename__ = "iuphar_interaction"
     id = mapped_column(Integer, primary_key=True)
 
-    target: Mapped[str] = mapped_column(String(255))
-    target_id: Mapped[int] = mapped_column()
-    target_subunit_ids: Mapped[str] = mapped_column(Text)
-    target_gene_symbol: Mapped[str] = mapped_column(String(100))
-    target_uniprot: Mapped[str] = mapped_column(String(100))
-    target_ensembl_gene_id: Mapped[str] = mapped_column(String(200))
-    target_ligand: Mapped[str] = mapped_column(String(100))
-    target_ligand_id: Mapped[int] = mapped_column()
-    target_ligand_subunit_ids: Mapped[str] = mapped_column(Text)
-    target_ligand_gene_symbol: Mapped[str] = mapped_column(String(50))
-    target_ligand_uniprot_id: Mapped[str] = mapped_column(String(200))
-    target_ligand_ensembl_gene_id: Mapped[str] = mapped_column(String(50))
-    target_ligand_pubchem_sid: Mapped[int] = mapped_column()
-    target_species: Mapped[str] = mapped_column(String(100))
+    target: Mapped[Optional[str]] = mapped_column(String(255))
+    target_id: Mapped[Optional[int]] = mapped_column()
+    target_subunit_ids: Mapped[Optional[str]] = mapped_column(Text)
+    target_gene_symbol: Mapped[Optional[str]] = mapped_column(String(100))
+    target_uniprot: Mapped[Optional[str]] = mapped_column(String(100))
+    target_ensembl_gene_id: Mapped[Optional[str]] = mapped_column(String(200))
+    target_ligand: Mapped[Optional[str]] = mapped_column(String(100))
+    target_ligand_id: Mapped[Optional[str]] = mapped_column()
+    target_ligand_subunit_ids: Mapped[Optional[str]] = mapped_column(Text)
+    target_ligand_gene_symbol: Mapped[Optional[str]] = mapped_column(String(50))
+    target_ligand_uniprot_id: Mapped[Optional[str]] = mapped_column(String(200))
+    target_ligand_ensembl_gene_id: Mapped[Optional[str]] = mapped_column(String(50))
+    target_ligand_pubchem_sid: Mapped[Optional[str]] = mapped_column()
+    target_species: Mapped[Optional[str]] = mapped_column(String(100))
     ligand: Mapped[str] = mapped_column(String(255))
     ligand_id: Mapped[int] = mapped_column(ForeignKey("iuphar_ligand.id"), index=True)
-    ligand_subunit_ids: Mapped[str] = mapped_column(Text)
-    ligand_gene_symbol: Mapped[str] = mapped_column(String(50))
-    ligand_species: Mapped[str] = mapped_column(String(50))
-    ligand_pubchem_sid: Mapped[int] = mapped_column()
+    ligand_subunit_ids: Mapped[Optional[str]] = mapped_column(Text)
+    ligand_gene_symbol: Mapped[Optional[str]] = mapped_column(String(50))
+    ligand_species: Mapped[Optional[str]] = mapped_column(String(50))
+    ligand_pubchem_sid: Mapped[Optional[int]] = mapped_column()
     ligand_type: Mapped[str] = mapped_column(Text)
     approved: Mapped[bool] = mapped_column()
     type: Mapped[str] = mapped_column(String(100))
     action: Mapped[str] = mapped_column(String(100))
-    action_comment: Mapped[str] = mapped_column(String(255))
-    selectivity: Mapped[str] = mapped_column(String(50))
+    action_comment: Mapped[Optional[str]] = mapped_column(String(255))
+    selectivity: Mapped[Optional[str]] = mapped_column(String(50))
     endogenous: Mapped[bool] = mapped_column()
     primary_target: Mapped[bool] = mapped_column()
-    concentration_range: Mapped[str] = mapped_column(String(50))
+    concentration_range: Mapped[Optional[str]] = mapped_column(String(50))
     affinity_units: Mapped[str] = mapped_column(String(10))
-    affinity_high: Mapped[float] = mapped_column(Numeric(6, 2))
-    affinity_median: Mapped[float] = mapped_column(Numeric(6, 2))
-    affinity_low: Mapped[float] = mapped_column(Numeric(6, 2))
-    original_affinity_units: Mapped[str] = mapped_column(String(10))
-    original_affinity_low_nm: Mapped[float] = mapped_column(Numeric(12, 3))
-    original_affinity_median_nm: Mapped[float] = mapped_column(Numeric(12, 3))
-    original_affinity_high_nm: Mapped[float] = mapped_column(Numeric(12, 3))
-    original_affinity_relation: Mapped[str] = mapped_column(String(1))
-    assay_description: Mapped[str] = mapped_column(Text)
-    receptor_site: Mapped[str] = mapped_column(String(100))
-    ligand_context: Mapped[str] = mapped_column(String(50))
-    pubmed_id: Mapped[str] = mapped_column(Text)
+    affinity_high: Mapped[Optional[float]] = mapped_column(Numeric(6, 2))
+    affinity_median: Mapped[Optional[float]] = mapped_column(Numeric(6, 2))
+    affinity_low: Mapped[Optional[float]] = mapped_column(Numeric(6, 2))
+    original_affinity_units: Mapped[Optional[str]] = mapped_column(String(10))
+    original_affinity_low_nm: Mapped[Optional[float]] = mapped_column(Numeric(12, 3))
+    original_affinity_median_nm: Mapped[Optional[float]] = mapped_column(Numeric(12, 3))
+    original_affinity_high_nm: Mapped[Optional[float]] = mapped_column(Numeric(12, 3))
+    original_affinity_relation: Mapped[Optional[str]] = mapped_column(String(1))
+    assay_description: Mapped[Optional[str]] = mapped_column(Text)
+    receptor_site: Mapped[Optional[str]] = mapped_column(String(100))
+    ligand_context: Mapped[Optional[str]] = mapped_column(String(50))
+    pubmed_id: Mapped[Optional[str]] = mapped_column(Text)
 
     def as_dict(self):
         """Convert object values to dictionary."""

From c1856ca410078c9f9dc21fd7fa3bc8675fe256e0 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 21 Sep 2023 09:01:40 +0200
Subject: [PATCH 19/58] style: black and isort

---
 ebel/manager/models.py                        |  2 +-
 ebel/manager/orientdb/biodbs/disgenet.py      |  2 +-
 ebel/manager/orientdb/biodbs/hgnc.py          | 26 +++++++------------
 ebel/manager/orientdb/biodbs/stringdb.py      |  2 +-
 ebel/manager/rdbms/models/biogrid.py          |  2 +-
 ebel/manager/rdbms/models/chebi.py            |  2 +-
 .../rdbms/models/clinical_trials_gov.py       |  4 +--
 ebel/manager/rdbms/models/clinvar.py          |  4 +--
 ebel/manager/rdbms/models/disgenet.py         |  2 +-
 ebel/manager/rdbms/models/drugbank.py         |  2 +-
 ebel/manager/rdbms/models/ensembl.py          |  2 +-
 ebel/manager/rdbms/models/expression_atlas.py |  2 +-
 ebel/manager/rdbms/models/gwas_catalog.py     |  2 +-
 ebel/manager/rdbms/models/hgnc.py             |  2 +-
 ebel/manager/rdbms/models/human_ortholog.py   |  2 +-
 ebel/manager/rdbms/models/intact.py           |  2 +-
 ebel/manager/rdbms/models/iuphar.py           |  3 +--
 ebel/manager/rdbms/models/kegg.py             |  2 +-
 ebel/manager/rdbms/models/mirtarbase.py       |  2 +-
 ebel/manager/rdbms/models/ncbi.py             |  2 +-
 ebel/manager/rdbms/models/nsides.py           |  2 +-
 ebel/manager/rdbms/models/pathway_commons.py  |  2 +-
 ebel/manager/rdbms/models/protein_atlas.py    |  2 +-
 ebel/manager/rdbms/models/reactome.py         |  2 +-
 ebel/manager/rdbms/models/stringdb.py         |  2 +-
 ebel/manager/rdbms/models/uniprot.py          |  2 +-
 26 files changed, 37 insertions(+), 44 deletions(-)

diff --git a/ebel/manager/models.py b/ebel/manager/models.py
index 010ef16..32c0307 100755
--- a/ebel/manager/models.py
+++ b/ebel/manager/models.py
@@ -15,7 +15,7 @@
 from lark import Lark, Token, Tree
 from sqlalchemy import Boolean, ForeignKey, Index, Integer, String
 from sqlalchemy.ext.declarative import declarative_base, declared_attr
-from sqlalchemy.orm import relationship, mapped_column
+from sqlalchemy.orm import mapped_column, relationship
 from sqlalchemy.sql.expression import func
 from sqlalchemy_utils import create_database, database_exists
 from tqdm import tqdm
diff --git a/ebel/manager/orientdb/biodbs/disgenet.py b/ebel/manager/orientdb/biodbs/disgenet.py
index bb3259d..c052cc7 100644
--- a/ebel/manager/orientdb/biodbs/disgenet.py
+++ b/ebel/manager/orientdb/biodbs/disgenet.py
@@ -4,7 +4,7 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text, select
+from sqlalchemy import select, text
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
diff --git a/ebel/manager/orientdb/biodbs/hgnc.py b/ebel/manager/orientdb/biodbs/hgnc.py
index 91d1a65..f16e0b7 100644
--- a/ebel/manager/orientdb/biodbs/hgnc.py
+++ b/ebel/manager/orientdb/biodbs/hgnc.py
@@ -15,25 +15,19 @@
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
 from ebel.manager.orientdb.constants import HGNC
 from ebel.manager.rdbms.models import hgnc
+from ebel.manager.rdbms.models.hgnc import AliasName, AliasSymbol, Ccds, Ena, Enzyme, GeneGroupId, GeneGroupName
+from ebel.manager.rdbms.models.hgnc import Hgnc as HgncDb
 from ebel.manager.rdbms.models.hgnc import (
-    Hgnc as HgncDb,
+    Lsdb,
+    Mgd,
+    Omim,
+    PrevName,
     PrevSymbol,
-    AliasSymbol,
-    AliasName,
-    Ccds,
-    Ena,
-    Enzyme,
-    GeneGroupName,
-    GeneGroupId,
-    UniProt,
-    RnaCentral,
-    Rgd,
-    RefSeq,
     PubMed,
-    PrevName,
-    Omim,
-    Mgd,
-    Lsdb,
+    RefSeq,
+    Rgd,
+    RnaCentral,
+    UniProt,
 )
 from ebel.tools import get_file_path
 
diff --git a/ebel/manager/orientdb/biodbs/stringdb.py b/ebel/manager/orientdb/biodbs/stringdb.py
index b5c6774..19aceb3 100644
--- a/ebel/manager/orientdb/biodbs/stringdb.py
+++ b/ebel/manager/orientdb/biodbs/stringdb.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text, select, or_
+from sqlalchemy import or_, select, text
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
diff --git a/ebel/manager/rdbms/models/biogrid.py b/ebel/manager/rdbms/models/biogrid.py
index d552d56..b8dea17 100644
--- a/ebel/manager/rdbms/models/biogrid.py
+++ b/ebel/manager/rdbms/models/biogrid.py
@@ -1,7 +1,7 @@
 """BioGRID RDBMS model definition."""
 from sqlalchemy import Float, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/chebi.py b/ebel/manager/rdbms/models/chebi.py
index 52d3120..365968e 100644
--- a/ebel/manager/rdbms/models/chebi.py
+++ b/ebel/manager/rdbms/models/chebi.py
@@ -4,7 +4,7 @@
 
 from sqlalchemy import DateTime, ForeignKey, Index, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 Base = declarative_base()
 
diff --git a/ebel/manager/rdbms/models/clinical_trials_gov.py b/ebel/manager/rdbms/models/clinical_trials_gov.py
index c38cf31..c56daef 100644
--- a/ebel/manager/rdbms/models/clinical_trials_gov.py
+++ b/ebel/manager/rdbms/models/clinical_trials_gov.py
@@ -2,9 +2,9 @@
 import re
 from typing import List, Optional
 
-from sqlalchemy import ForeignKey, Integer, String, Table, Text, Column
+from sqlalchemy import Column, ForeignKey, Integer, String, Table, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/clinvar.py b/ebel/manager/rdbms/models/clinvar.py
index 8f84af7..23ffd65 100644
--- a/ebel/manager/rdbms/models/clinvar.py
+++ b/ebel/manager/rdbms/models/clinvar.py
@@ -1,9 +1,9 @@
 """ClinVar RDBMS model definition."""
 from typing import List, Optional
 
-from sqlalchemy import ForeignKey, Index, Integer, String, Table, Text, Column
+from sqlalchemy import Column, ForeignKey, Index, Integer, String, Table, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/disgenet.py b/ebel/manager/rdbms/models/disgenet.py
index 3ee071c..7959b9a 100644
--- a/ebel/manager/rdbms/models/disgenet.py
+++ b/ebel/manager/rdbms/models/disgenet.py
@@ -3,7 +3,7 @@
 
 from sqlalchemy import BigInteger, Float, ForeignKey, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/drugbank.py b/ebel/manager/rdbms/models/drugbank.py
index 6cd9b52..3f3800f 100644
--- a/ebel/manager/rdbms/models/drugbank.py
+++ b/ebel/manager/rdbms/models/drugbank.py
@@ -4,7 +4,7 @@
 
 from sqlalchemy import Column, Date, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 Base = declarative_base()
 
diff --git a/ebel/manager/rdbms/models/ensembl.py b/ebel/manager/rdbms/models/ensembl.py
index 6d88a66..5d386ec 100644
--- a/ebel/manager/rdbms/models/ensembl.py
+++ b/ebel/manager/rdbms/models/ensembl.py
@@ -2,7 +2,7 @@
 
 from sqlalchemy import Column, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/expression_atlas.py b/ebel/manager/rdbms/models/expression_atlas.py
index cf7afaf..8e38c5e 100644
--- a/ebel/manager/rdbms/models/expression_atlas.py
+++ b/ebel/manager/rdbms/models/expression_atlas.py
@@ -3,7 +3,7 @@
 
 from sqlalchemy import Column, Float, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/gwas_catalog.py b/ebel/manager/rdbms/models/gwas_catalog.py
index c846ead..550e128 100644
--- a/ebel/manager/rdbms/models/gwas_catalog.py
+++ b/ebel/manager/rdbms/models/gwas_catalog.py
@@ -3,7 +3,7 @@
 
 from sqlalchemy import Column, Float, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/hgnc.py b/ebel/manager/rdbms/models/hgnc.py
index 6f6f80a..e6f5d1e 100644
--- a/ebel/manager/rdbms/models/hgnc.py
+++ b/ebel/manager/rdbms/models/hgnc.py
@@ -4,7 +4,7 @@
 
 from sqlalchemy import BigInteger, Date, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/human_ortholog.py b/ebel/manager/rdbms/models/human_ortholog.py
index 565d88c..1b1ab01 100644
--- a/ebel/manager/rdbms/models/human_ortholog.py
+++ b/ebel/manager/rdbms/models/human_ortholog.py
@@ -1,7 +1,7 @@
 """HGNC Human Ortholog RDBMS model definition."""
 from sqlalchemy import Column, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/intact.py b/ebel/manager/rdbms/models/intact.py
index fbbb3d5..62c6062 100644
--- a/ebel/manager/rdbms/models/intact.py
+++ b/ebel/manager/rdbms/models/intact.py
@@ -3,7 +3,7 @@
 
 from sqlalchemy import Column, Float, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/iuphar.py b/ebel/manager/rdbms/models/iuphar.py
index d28002f..83adc97 100644
--- a/ebel/manager/rdbms/models/iuphar.py
+++ b/ebel/manager/rdbms/models/iuphar.py
@@ -1,8 +1,7 @@
 """IUPHAR RDBMS model definition."""
 from typing import List, Optional
 
-from sqlalchemy import (BigInteger, Boolean, Column, ForeignKey, Integer,
-                        Numeric, String, Text)
+from sqlalchemy import BigInteger, Boolean, Column, ForeignKey, Integer, Numeric, String, Text
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 
diff --git a/ebel/manager/rdbms/models/kegg.py b/ebel/manager/rdbms/models/kegg.py
index d26d78d..d0975e0 100644
--- a/ebel/manager/rdbms/models/kegg.py
+++ b/ebel/manager/rdbms/models/kegg.py
@@ -1,7 +1,7 @@
 """KEGG RDBMS model definition."""
 from sqlalchemy import Column, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/mirtarbase.py b/ebel/manager/rdbms/models/mirtarbase.py
index 15df19a..f44aaaf 100644
--- a/ebel/manager/rdbms/models/mirtarbase.py
+++ b/ebel/manager/rdbms/models/mirtarbase.py
@@ -3,7 +3,7 @@
 
 from sqlalchemy import Column, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/ncbi.py b/ebel/manager/rdbms/models/ncbi.py
index a26ac3a..caa04f7 100644
--- a/ebel/manager/rdbms/models/ncbi.py
+++ b/ebel/manager/rdbms/models/ncbi.py
@@ -3,7 +3,7 @@
 
 from sqlalchemy import Column, ForeignKey, Integer, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from . import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/nsides.py b/ebel/manager/rdbms/models/nsides.py
index aceb587..577c09f 100644
--- a/ebel/manager/rdbms/models/nsides.py
+++ b/ebel/manager/rdbms/models/nsides.py
@@ -2,7 +2,7 @@
 
 from sqlalchemy import Column, Float, Index, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/pathway_commons.py b/ebel/manager/rdbms/models/pathway_commons.py
index a27b49f..172b159 100644
--- a/ebel/manager/rdbms/models/pathway_commons.py
+++ b/ebel/manager/rdbms/models/pathway_commons.py
@@ -3,7 +3,7 @@
 
 from sqlalchemy import BigInteger, Column, ForeignKey, Integer, String, Table
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/protein_atlas.py b/ebel/manager/rdbms/models/protein_atlas.py
index 07ef8fc..0857120 100644
--- a/ebel/manager/rdbms/models/protein_atlas.py
+++ b/ebel/manager/rdbms/models/protein_atlas.py
@@ -3,7 +3,7 @@
 
 from sqlalchemy import Column, Integer, Numeric, String, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column
 
 Base = declarative_base()
 
diff --git a/ebel/manager/rdbms/models/reactome.py b/ebel/manager/rdbms/models/reactome.py
index 3852882..42f5b68 100644
--- a/ebel/manager/rdbms/models/reactome.py
+++ b/ebel/manager/rdbms/models/reactome.py
@@ -1,7 +1,7 @@
 """Reactome RDBMS model definition."""
 from sqlalchemy import Column, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/stringdb.py b/ebel/manager/rdbms/models/stringdb.py
index 47eccac..1d1992f 100644
--- a/ebel/manager/rdbms/models/stringdb.py
+++ b/ebel/manager/rdbms/models/stringdb.py
@@ -3,7 +3,7 @@
 
 from sqlalchemy import Boolean, Column, Integer, SmallInteger, String
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column
 
 from ebel.manager.rdbms.models import object_as_dict
 
diff --git a/ebel/manager/rdbms/models/uniprot.py b/ebel/manager/rdbms/models/uniprot.py
index a63484a..50e3170 100644
--- a/ebel/manager/rdbms/models/uniprot.py
+++ b/ebel/manager/rdbms/models/uniprot.py
@@ -4,7 +4,7 @@
 
 from sqlalchemy import Column, ForeignKey, Integer, String, Table, Text
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, mapped_column, Mapped
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 Base = declarative_base()
 

From 201ef8a19143ece2c10a489923a515384ca8a6e5 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 21 Sep 2023 10:52:07 +0200
Subject: [PATCH 20/58] feat: update intact and iuphar to select stmts

---
 ebel/manager/orientdb/biodbs/intact.py     | 101 +++++++++++++++------
 ebel/manager/orientdb/biodbs/iuphar.py     |   9 --
 ebel/manager/orientdb/biodbs/mirtarbase.py |  51 ++++++++---
 3 files changed, 107 insertions(+), 54 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index 0a4d57d..a450fb8 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -5,7 +5,7 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text
+from sqlalchemy import select, or_
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
@@ -77,18 +77,22 @@ def insert_data(self) -> Dict[str, int]:
 
         df = pd.read_csv(zf.open("intact.txt"), sep="\t", usecols=usecols.keys())
         df.rename(columns=usecols, inplace=True)
+
         regex_accession = r"uniprotkb:([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})"
         df.int_a_uniprot_id = df.int_a_uniprot_id.str.extract(regex_accession)[0]
         df.int_b_uniprot_id = df.int_b_uniprot_id.str.extract(regex_accession)[0]
         df = df[(pd.notnull(df.int_a_uniprot_id) & pd.notnull(df.int_b_uniprot_id))]
+
         regex_detection_method = r"psi-mi:\"MI:0*(?P<detection_method_psimi_id>\d+)\"\((?P<detection_method>[^)]+)\)"
         df = df.join(df.dm.str.extract(regex_detection_method), how="left")
         df.drop(columns=["dm"], inplace=True)
         df.pmid = df.pmid.str.extract(r"pubmed:(\d+)")
+
         regex_interaction_type = r"psi-mi:\"MI:0*(?P<interaction_type_psimi_id>\d+)\"\((?P<interaction_type>[^)]+)\)"
         df = df.join(df.it.str.extract(regex_interaction_type), how="left")
         df.drop(columns=["it"], inplace=True)
         df.confidence_value = df.confidence_value.str.extract(r"intact-miscore:(\d+(\.\d+)?)")[0]
+
         df.index += 1
         df.index.rename("id", inplace=True)
 
@@ -141,9 +145,14 @@ def get_namespace_name_by_uniprot(self, uniprot_accession: str) -> tuple:
             namespace, value
         """
         return_value = ()
-        sql = f"""Select s.symbol, u.taxid from uniprot u inner join uniprot_gene_symbol s
-                  on (u.id=s.uniprot_id) where u.accession='{uniprot_accession}' limit 1"""
-        result = self.session.execute(text(sql)).fetchone()
+
+        sql = (
+            select(uniprot.GeneSymbol.symbol, uniprot.Uniprot.taxid)
+            .join(uniprot.Uniprot)
+            .where(uniprot.Uniprot.accession == uniprot_accession)
+        )
+
+        result = self.session.execute(sql).fetchone()
         taxid_to_namespace = {9606: "HGNC", 10090: "MGI", 10116: "RGD"}
         if result:
             name, taxid = result
@@ -161,38 +170,64 @@ def update_interactions(self) -> int:
 
         uniprot_rid_dict = uniprot.get_pure_uniprot_rid_dict_in_bel_context()
 
-        sql_temp = """SELECT
-            int_a_uniprot_id,
-            int_b_uniprot_id,
-            pmid,
-            interaction_ids,
-            interaction_type,
-            interaction_type_psimi_id,
-            detection_method,
-            detection_method_psimi_id,
-            confidence_value
-        FROM
-            intact
-        WHERE
-            int_a_uniprot_id = '{uniprot_accession}' or int_b_uniprot_id = '{uniprot_accession}'
-        GROUP BY
-            int_a_uniprot_id,
-            int_b_uniprot_id,
-            pmid,
-            interaction_ids,
-            interaction_type,
-            interaction_type_psimi_id,
-            detection_method,
-            detection_method_psimi_id,
-            confidence_value"""
+        # sql_temp = """SELECT
+        #     int_a_uniprot_id,
+        #     int_b_uniprot_id,
+        #     pmid,
+        #     interaction_ids,
+        #     interaction_type,
+        #     interaction_type_psimi_id,
+        #     detection_method,
+        #     detection_method_psimi_id,
+        #     confidence_value
+        # FROM
+        #     intact
+        # WHERE
+        #     int_a_uniprot_id = '{uniprot_accession}' or int_b_uniprot_id = '{uniprot_accession}'
+        # GROUP BY
+        #     int_a_uniprot_id,
+        #     int_b_uniprot_id,
+        #     pmid,
+        #     interaction_ids,
+        #     interaction_type,
+        #     interaction_type_psimi_id,
+        #     detection_method,
+        #     detection_method_psimi_id,
+        #     confidence_value"""
 
         updated = 0
 
         uniprot_accessions = tuple(uniprot_rid_dict.keys())
+        it = intact.Intact
 
         for uniprot_accession in tqdm(uniprot_accessions, desc="Update IntAct interactions"):
-            sql = sql_temp.format(uniprot_accession=uniprot_accession)
-            result = self.session.execute(text(sql))
+            # sql = sql_temp.format(uniprot_accession=uniprot_accession)
+            sql = (
+                select(
+                    it.int_a_uniprot_id,
+                    it.int_b_uniprot_id,
+                    it.pmid,
+                    it.interaction_ids,
+                    it.interaction_type,
+                    it.interaction_type_psimi_id,
+                    it.detection_method,
+                    it.detection_method_psimi_id,
+                    it.confidence_value,
+                )
+                .where(or_(it.int_a_uniprot_id == uniprot_accession, it.int_b_uniprot_id == uniprot_accession))
+                .group_by(
+                    it.int_a_uniprot_id,
+                    it.int_b_uniprot_id,
+                    it.pmid,
+                    it.interaction_ids,
+                    it.interaction_type,
+                    it.interaction_type_psimi_id,
+                    it.detection_method,
+                    it.detection_method_psimi_id,
+                    it.confidence_value,
+                )
+            )
+            result = self.session.execute(sql)
 
             for (
                 up_a,
@@ -229,3 +264,9 @@ def update_interactions(self) -> int:
                     updated += 1
 
         return updated
+
+
+if __name__ == "__main__":
+    hgncdb = IntAct()
+    hgncdb.recreate_tables()
+    hgncdb.update()
diff --git a/ebel/manager/orientdb/biodbs/iuphar.py b/ebel/manager/orientdb/biodbs/iuphar.py
index 71e1476..3691a41 100644
--- a/ebel/manager/orientdb/biodbs/iuphar.py
+++ b/ebel/manager/orientdb/biodbs/iuphar.py
@@ -209,13 +209,4 @@ def update_interactions(self) -> int:
             edge_class = iuphar_edge_type_mapper.get(data.type, "iuphar_interaction")
             self.create_edge(edge_class, from_rid=a_rid, to_rid=data.rid, value_dict=i_value_dict)
 
-        # not sure if this is really needed
-        # Hgnc(self.client).update_bel()
-
         return df_join.shape[0]
-
-
-if __name__ == "__main__":
-    hgncdb = Iuphar()
-    # hgncdb.recreate_tables()
-    hgncdb.update()
diff --git a/ebel/manager/orientdb/biodbs/mirtarbase.py b/ebel/manager/orientdb/biodbs/mirtarbase.py
index 4085b2a..c66e14e 100644
--- a/ebel/manager/orientdb/biodbs/mirtarbase.py
+++ b/ebel/manager/orientdb/biodbs/mirtarbase.py
@@ -3,7 +3,7 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text
+from sqlalchemy import text, select
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
@@ -37,7 +37,7 @@ def __contains__(self, item) -> bool:
 
     def insert_data(self) -> Dict[str, int]:
         """Insert mirtarbase data into database."""
-        # TODO Fix download error -
+        # TODO: Fix download error -
         #  ssl.SSLError: [SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:997)
         df = pd.read_excel(self.file_path)
         df.columns = self._standardize_column_names(df.columns)
@@ -58,20 +58,35 @@ def update_interactions(self) -> int:
         self.clear_edges()
         df_symbol_rid = self.get_pure_symbol_rid_df_in_bel_context(class_name="rna", namespace="HGNC")
 
-        sql = f"""Select
-                mi_rna,
-                target_gene as symbol,
-                support_type,
-                references_pmid as pmid,
-                experiments
-            from
-                {mirtarbase.Mirtarbase.__tablename__}
-            where
-                species_mi_rna='Homo sapiens' and
-                species_target_gene='Homo sapiens' and
-                support_type in ('Functional MTI', 'Non-Functional MTI')"""
+        # sql = f"""Select
+        #         mi_rna,
+        #         target_gene as symbol,
+        #         support_type,
+        #         references_pmid as pmid,
+        #         experiments
+        #     from
+        #         {mirtarbase.Mirtarbase.__tablename__}
+        #     where
+        #         species_mi_rna='Homo sapiens' and
+        #         species_target_gene='Homo sapiens' and
+        #         support_type in ()"""
+
+        mtb = mirtarbase.Mirtarbase
+        sql = (
+            select(
+                mtb.mi_rna,
+                mtb.target_gene.label("symbol"),
+                mtb.support_type,
+                mtb.references_pmid.label("pmid"),
+                mtb.experiments,
+            )
+            .where(mtb.species_mi_rna == "Homo sapiens")
+            .where(mtb.species_target_gene == "Homo sapiens")
+            .where(mtb.support_type.in_(["Functional MTI", "Non-Functional MTI"]))
+        )
+
         cols = ["mi_rna", "symbol", "support_type", "pmid", "experiments"]
-        df_mirtarbase = pd.DataFrame(self.session.execute(text(sql)).fetchall(), columns=cols)
+        df_mirtarbase = pd.DataFrame(self.session.execute(sql).fetchall(), columns=cols)
         df_mirtarbase.experiments = df_mirtarbase.experiments.str.split("//")
         df_join = df_mirtarbase.set_index("symbol").join(df_symbol_rid.set_index("symbol"), how="inner")
 
@@ -95,3 +110,9 @@ def update_interactions(self) -> int:
             updated += 1
 
         return updated
+
+
+if __name__ == "__main__":
+    hgncdb = MirTarBase()
+    # hgncdb.recreate_tables()
+    hgncdb.update()

From ad2b9a1b84cb2a181c9bbf1de2c5c45e4b9b1e06 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 21 Sep 2023 11:01:07 +0200
Subject: [PATCH 21/58] fix: chebi inster replace table instead of append

---
 ebel/manager/orientdb/biodbs/chebi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ebel/manager/orientdb/biodbs/chebi.py b/ebel/manager/orientdb/biodbs/chebi.py
index 3ace1cb..767f4c6 100644
--- a/ebel/manager/orientdb/biodbs/chebi.py
+++ b/ebel/manager/orientdb/biodbs/chebi.py
@@ -104,7 +104,7 @@ def insert_data(self) -> Dict[str, int]:
                         .reset_index()
                     )
 
-                df.to_sql(table_name, self.engine, index=False, if_exists="append")
+                df.to_sql(table_name, self.engine, index=False, if_exists="replace")
 
                 inserted[table_name] += df.shape[0]
         self.session.commit()

From 3499b8d78548b6018c53295d839be0632dbeb3cc Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 21 Sep 2023 12:29:27 +0200
Subject: [PATCH 22/58] chore: remove test code

---
 ebel/manager/models.py                     | 1 +
 ebel/manager/orientdb/biodbs/intact.py     | 6 ------
 ebel/manager/orientdb/biodbs/mirtarbase.py | 6 ------
 3 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/ebel/manager/models.py b/ebel/manager/models.py
index 32c0307..98fe552 100755
--- a/ebel/manager/models.py
+++ b/ebel/manager/models.py
@@ -42,6 +42,7 @@ def reset_tables(engine: sqlalchemy.engine.Engine, force_new_db: bool) -> None:
 
     if force_new_db:
         Base.metadata.drop_all(bind=engine)
+
     Base.metadata.create_all(bind=engine, checkfirst=True)
 
 
diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index a450fb8..c3c4959 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -264,9 +264,3 @@ def update_interactions(self) -> int:
                     updated += 1
 
         return updated
-
-
-if __name__ == "__main__":
-    hgncdb = IntAct()
-    hgncdb.recreate_tables()
-    hgncdb.update()
diff --git a/ebel/manager/orientdb/biodbs/mirtarbase.py b/ebel/manager/orientdb/biodbs/mirtarbase.py
index c66e14e..68ace6d 100644
--- a/ebel/manager/orientdb/biodbs/mirtarbase.py
+++ b/ebel/manager/orientdb/biodbs/mirtarbase.py
@@ -110,9 +110,3 @@ def update_interactions(self) -> int:
             updated += 1
 
         return updated
-
-
-if __name__ == "__main__":
-    hgncdb = MirTarBase()
-    # hgncdb.recreate_tables()
-    hgncdb.update()

From f87ebfcb640e9676116f604d3405d205764274a9 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Thu, 21 Sep 2023 14:00:22 +0200
Subject: [PATCH 23/58] build: update pandas version in reqs

---
 pyproject.toml   | 4 ++--
 requirements.txt | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a0b9f80..f6e5a1b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,7 @@ lark-parser = "^0.11.3"
 click = "^8.1.7"
 requests = "^2.31.0"
 tqdm = "^4.66.1"
-pandas = "^1.5.3"
+pandas = "^2.1.1"
 sqlalchemy = "^2.0.20"
 SQLAlchemy-Utils = "^0.37.9"
 xlwt = "^1.3.0"
@@ -117,4 +117,4 @@ source = [
     ]
 
 [tool.coverage.html]
-directory = "coverage_html_report"
\ No newline at end of file
+directory = "coverage_html_report"
diff --git a/requirements.txt b/requirements.txt
index d128e4b..dcc3963 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ lark-parser==0.11.3
 click>=8.1.7
 requests>=2.31.0
 tqdm>=4.66.1
-pandas>=1.5.3
+pandas>=2.2.1
 sqlalchemy>=2.0.20
 SQLAlchemy-Utils==0.37.9
 xlwt==1.3.0
@@ -18,4 +18,4 @@ connexion[swagger-ui]==2.14.2
 cryptography==3.4.8
 openpyxl==3.1.2
 graphviz==0.20
-pyorientdb==1.0.0
\ No newline at end of file
+pyorientdb==1.0.0

From f778baf93462b363469b78bedeb63c40e6576c52 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Fri, 22 Sep 2023 09:40:56 +0200
Subject: [PATCH 24/58] fix: pandas version in requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index dcc3963..cbde27b 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ lark-parser==0.11.3
 click>=8.1.7
 requests>=2.31.0
 tqdm>=4.66.1
-pandas>=2.2.1
+pandas>=2.1.1
 sqlalchemy>=2.0.20
 SQLAlchemy-Utils==0.37.9
 xlwt==1.3.0

From 48418e56c46df7231b38923ee996778a10b3bbb7 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Fri, 22 Sep 2023 10:29:42 +0200
Subject: [PATCH 25/58] fix: gwas catalog nullable props

---
 ebel/manager/orientdb/biodbs/clinvar.py      |  6 +++---
 ebel/manager/orientdb/biodbs/gwas_catalog.py |  4 ++--
 ebel/manager/rdbms/models/gwas_catalog.py    | 12 ++++++------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/clinvar.py b/ebel/manager/orientdb/biodbs/clinvar.py
index 613ce5e..f8b77b6 100644
--- a/ebel/manager/orientdb/biodbs/clinvar.py
+++ b/ebel/manager/orientdb/biodbs/clinvar.py
@@ -63,9 +63,9 @@ def insert_data(self) -> Dict[str, int]:
         self._standardize_dataframe(df)
         df.index += 1
         df.index.rename("id", inplace=True)
-        df.drop(columns=["phenotype_ids", "phenotype_list", "other_ids"]).to_sql(
-            self.biodb_name, self.engine, if_exists="append", chunksize=10000
-        )
+
+        df_base = df.drop(columns=["phenotype_ids", "phenotype_list", "other_ids"])
+        df_base.to_sql(clinvar.Clinvar.__tablename__, con=self.engine, if_exists="append", chunksize=10000)
 
         df_clinvar__phenotype = (
             df["phenotype_list"]
diff --git a/ebel/manager/orientdb/biodbs/gwas_catalog.py b/ebel/manager/orientdb/biodbs/gwas_catalog.py
index 414ee8d..ac44b79 100644
--- a/ebel/manager/orientdb/biodbs/gwas_catalog.py
+++ b/ebel/manager/orientdb/biodbs/gwas_catalog.py
@@ -79,7 +79,7 @@ def insert_data(self) -> Dict[str, int]:
 
         table_name = gwas_catalog.GwasCatalog.__tablename__
 
-        df[columns_main_table].to_sql(table_name, self.engine, if_exists="append")
+        df[columns_main_table].to_sql(table_name, self.engine, if_exists="replace")
 
         df.snp_gene_ids = df.snp_gene_ids.str.strip().str.split(", ")
         df[table_name + "_id"] = df.index
@@ -89,7 +89,7 @@ def insert_data(self) -> Dict[str, int]:
         df_snp_gene_ids.index = range(1, df_snp_gene_ids.shape[0] + 1)
         df_snp_gene_ids.rename(columns={"snp_gene_ids": "ensembl_identifier"}, inplace=True)
         df_snp_gene_ids.index.rename("id", inplace=True)
-        df_snp_gene_ids.to_sql(gwas_catalog.SnpGene.__tablename__, self.engine, if_exists="append")
+        df_snp_gene_ids.to_sql(gwas_catalog.SnpGene.__tablename__, self.engine, if_exists="replace")
 
         self.session.commit()
 
diff --git a/ebel/manager/rdbms/models/gwas_catalog.py b/ebel/manager/rdbms/models/gwas_catalog.py
index 550e128..bcd336e 100644
--- a/ebel/manager/rdbms/models/gwas_catalog.py
+++ b/ebel/manager/rdbms/models/gwas_catalog.py
@@ -34,20 +34,20 @@ class GwasCatalog(Base):
     downstream_gene_id: Mapped[Optional[str]] = mapped_column(String(50))
     upstream_gene_distance: Mapped[Optional[int]] = mapped_column()
     downstream_gene_distance: Mapped[Optional[int]] = mapped_column()
-    strongest_snp_risk_allele: Mapped[str] = mapped_column(Text)
-    snp: Mapped[str] = mapped_column(Text)
+    strongest_snp_risk_allele: Mapped[Optional[int]] = mapped_column(Text)
+    snp: Mapped[Optional[int]] = mapped_column(Text)
     merged: Mapped[Optional[int]] = mapped_column()
     snp_id_current: Mapped[Optional[str]] = mapped_column(Text)
     context: Mapped[Optional[str]] = mapped_column(Text)
     intergenic: Mapped[Optional[int]] = mapped_column()
     risk_allele_frequency: Mapped[Optional[str]] = mapped_column(Text)
-    p_value: Mapped[float] = mapped_column()
-    pvalue_mlog: Mapped[float] = mapped_column()
+    p_value: Mapped[Optional[float]] = mapped_column()
+    pvalue_mlog: Mapped[Optional[float]] = mapped_column()
     p_value_text: Mapped[Optional[str]] = mapped_column(Text)
     or_or_beta: Mapped[Optional[float]] = mapped_column()
     _95_ci_text: Mapped[Optional[str]] = mapped_column(Text)
-    platform_snps_passing_qc: Mapped[str] = mapped_column(Text)
-    cnv: Mapped[str] = mapped_column(Text)
+    platform_snps_passing_qc: Mapped[Optional[str]] = mapped_column(Text)
+    cnv: Mapped[Optional[str]] = mapped_column(Text)
 
     snp_genes: Mapped[List["SnpGene"]] = relationship("SnpGene", back_populates="gwascatalog")
 

From d645b5dcdbb16fcffc86a5a3d4b9a66e60eba7fb Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Fri, 22 Sep 2023 13:33:50 +0200
Subject: [PATCH 26/58] fix: add Ensembl update to ClinVar

---
 ebel/manager/orientdb/biodbs/clinvar.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/ebel/manager/orientdb/biodbs/clinvar.py b/ebel/manager/orientdb/biodbs/clinvar.py
index f8b77b6..56cca96 100644
--- a/ebel/manager/orientdb/biodbs/clinvar.py
+++ b/ebel/manager/orientdb/biodbs/clinvar.py
@@ -11,6 +11,7 @@
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
 from ebel.manager.orientdb.constants import CLINVAR
 from ebel.manager.rdbms.models import clinvar
+from ebel.manager.orientdb.biodbs.ensembl import Ensembl
 from ebel.tools import get_disease_trait_keywords_from_config, get_file_path
 
 logger = logging.getLogger(__name__)
@@ -58,7 +59,12 @@ def insert_data(self) -> Dict[str, int]:
         """Insert data."""
         inserted = {}
         logger.info("Insert data for ClinVar")
+
+        # Depends on Ensembl
+        Ensembl().update()
+
         self.recreate_tables()
+
         df = pd.read_csv(self.file_path, sep="\t", low_memory=False)
         self._standardize_dataframe(df)
         df.index += 1

From 191ca9128476322563a336fe216bef4238ffe025 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Sat, 23 Sep 2023 09:40:28 +0200
Subject: [PATCH 27/58] fix: add hgnc update to stringdb

---
 ebel/manager/orientdb/biodbs/stringdb.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/ebel/manager/orientdb/biodbs/stringdb.py b/ebel/manager/orientdb/biodbs/stringdb.py
index 19aceb3..a10c904 100644
--- a/ebel/manager/orientdb/biodbs/stringdb.py
+++ b/ebel/manager/orientdb/biodbs/stringdb.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import or_, select, text
+from sqlalchemy import or_, select, text, and_
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
@@ -167,6 +167,7 @@ def get_stringdb_action_hgnc_set(self):
     def update_interactions(self) -> Dict[str, int]:
         """Update the edges with StringDB metadata."""
         hgnc = Hgnc(self.client)
+        hgnc.update()  # If users haven't run Hgnc yet
         updated = dict()
         updated["interactions"] = self.update_stringdb_interactions(hgnc)
         updated["actions"] = self.update_action_interactions(hgnc)
@@ -322,3 +323,9 @@ def update_action_interactions(self, hgnc: Hgnc) -> int:
                         updated += 1
 
         return updated
+
+
+if __name__ == "__main__":
+    a = StringDb()
+    # a.recreate_tables()
+    a.update()

From c39de4faf866d14252c4a75d27fcba1dda9d81da Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Sat, 23 Sep 2023 09:40:49 +0200
Subject: [PATCH 28/58] chore: remove test code from stringdb

---
 ebel/manager/orientdb/biodbs/stringdb.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/stringdb.py b/ebel/manager/orientdb/biodbs/stringdb.py
index a10c904..83380a5 100644
--- a/ebel/manager/orientdb/biodbs/stringdb.py
+++ b/ebel/manager/orientdb/biodbs/stringdb.py
@@ -323,9 +323,3 @@ def update_action_interactions(self, hgnc: Hgnc) -> int:
                         updated += 1
 
         return updated
-
-
-if __name__ == "__main__":
-    a = StringDb()
-    # a.recreate_tables()
-    a.update()

From 6c03eb20da7daaf5b8c1425425fbda0464481697 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 25 Sep 2023 10:32:19 +0200
Subject: [PATCH 29/58] build: update sqlalchemy-utils version in deps

---
 pyproject.toml   | 2 +-
 requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f6e5a1b..187016c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,7 +45,7 @@ requests = "^2.31.0"
 tqdm = "^4.66.1"
 pandas = "^2.1.1"
 sqlalchemy = "^2.0.20"
-SQLAlchemy-Utils = "^0.37.9"
+SQLAlchemy-Utils = "^0.41.1"
 xlwt = "^1.3.0"
 xlrd = "^2.0.1"
 xlsxwriter = "^1.4.5"
diff --git a/requirements.txt b/requirements.txt
index cbde27b..206a5d6 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ requests>=2.31.0
 tqdm>=4.66.1
 pandas>=2.1.1
 sqlalchemy>=2.0.20
-SQLAlchemy-Utils==0.37.9
+SQLAlchemy-Utils==0.41.1
 xlwt==1.3.0
 xlrd==2.0.1
 xlsxwriter==1.4.5

From 107676f91bdfb9efdd2bccef04333465818ecd13 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 25 Sep 2023 10:35:57 +0200
Subject: [PATCH 30/58] fix: add check/create database step to abstract graph
 init

---
 ebel/manager/orientdb/odb_meta.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index 8773bad..88e510f 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -94,13 +94,18 @@ def __init__(
         self.engine = rdb.engine
         self.session = rdb.session
 
-        if not (get_config_value("DATABASE", "sqlalchemy_connection_string") or database_exists(self.engine.url)):
+        conn = get_config_value("DATABASE", "sqlalchemy_connection_string")
+
+        if not (conn or database_exists(self.engine.url)):
             if str(self.engine.url).startswith("mysql"):
                 set_mysql_interactive()
 
             else:
                 create_database(self.engine.url)
 
+        if not database_exists(self.engine.url):
+            create_database(self.engine.url)
+
     def __config_params_check(self, overwrite_config: bool = False):
         """Go through passed/available configuration params."""
         # Set the client

From 3b9e9b616283de26cf40c2d49a175f05274ea189 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 25 Sep 2023 11:07:04 +0200
Subject: [PATCH 31/58] fix: dafaults dir path

---
 ebel/constants.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ebel/constants.py b/ebel/constants.py
index 86be228..bb8a088 100755
--- a/ebel/constants.py
+++ b/ebel/constants.py
@@ -4,7 +4,7 @@
 import os
 from pathlib import Path
 
-THIS_DIR = Path(__file__)
+THIS_DIR = Path(__file__).parent
 PROJECT_NAME = "ebel"
 
 HOME = Path.home()

From 0755a47f4207e90db7b45a2666e4482b14379bb6 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 25 Sep 2023 11:10:28 +0200
Subject: [PATCH 32/58] feat: begin updating biogrid to sql2

---
 ebel/manager/orientdb/biodbs/biogrid.py | 47 ++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 4 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/biogrid.py b/ebel/manager/orientdb/biodbs/biogrid.py
index 277998f..072342c 100644
--- a/ebel/manager/orientdb/biodbs/biogrid.py
+++ b/ebel/manager/orientdb/biodbs/biogrid.py
@@ -7,7 +7,7 @@
 import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text
+from sqlalchemy import text, select, func, cast, Integer
 from tqdm import tqdm
 
 from ebel import tools
@@ -516,8 +516,9 @@ def update_interactions(self) -> int:
             ib.uniprot as object_uniprot,
             ib.taxonomy_id as object_taxonomy_id,
             es.experimental_system,
-            group_concat( distinct b.biogrid_id) as biogrid_ids,
-            group_concat( distinct if(p.source='PUBMED',CAST(p.source_identifier AS UNSIGNED),NULL)) as pmids,
+            group_concat( distinct b.biogrid_id) as biogrid_ids, group_concat( 
+            distinct if(p.source='PUBMED',CAST(p.source_identifier AS UNSIGNED),NULL)
+            ) as pmids,
             count(distinct p.source_identifier) as num_pubs,
             group_concat( distinct if(p.source='DOI',CAST(p.source_identifier AS UNSIGNED),NULL)) as dois
         from
@@ -540,6 +541,13 @@ def update_interactions(self) -> int:
             ib.taxonomy_id,
             es.experimental_system"""
 
+        b = biogrid.Biogrid
+        ia = biogrid.Interactor
+        ib = biogrid.Interactor
+        m = biogrid.Modification
+        p = biogrid.Publication
+        es = biogrid.ExperimentalSystem
+
         uniprots_in_bel_set = self.get_pure_uniprots_in_bel_context()
         uniprot_modification_pairs = self.get_uniprot_modification_pairs()
 
@@ -568,7 +576,38 @@ def update_interactions(self) -> int:
                     object_uniprot=e["object_uniprot"],
                 )
 
-                for row in self.session.execute(text(sql)).fetchall():
+                sql = (
+                    select(
+                        ia.symbol.label("subject_symbol"),
+                        ia.uniprot.label("subject_uniprot"),
+                        ia.taxonomy_id.label("subject_taxonomy_id"),
+                        m.modification,
+                        ib.symbol.label("object_symbol"),
+                        ib.uniprot.label("object_uniprot"),
+                        ib.taxonomy_id.label("object_taxonomy_id"),
+                        es.experimental_system,
+                        func.group_concat(
+                            b.biogrid_id.distinct().label("biogrid_ids"),
+                            func.group_concat(
+                                func.IF(p.source == "PUBMED", cast(p.source_identifier, Integer), None).distinct()
+                            ).label("pmids"),
+                        ),
+                        p.source_identifier.count().label("num_pubs"),
+                        func.group_concat(func.IF(p.source == "DOI", cast(p.source_identifier, Integer), None)).label(
+                            "dois"
+                        ),
+                    )
+                    .join(ia)
+                    .join(ib)
+                    .join(m)
+                    .join(p)
+                    .join(es)
+                    .where(ia.uniprot == e["subject_uniprot"])
+                    .where(ib.uniprot == e["object_uniprot"])
+                    .where(m.modification != "No Modification")
+                )
+
+                for row in self.session.execute(sql).fetchall():
                     row_dict = row._asdict()
                     be = BioGridEdge(subject_rid=subj_pure_rid, object_rid=obj_pure_rid, **row_dict)
                     edge_value_dict = be.get_edge_value_dict()

From ded1170aeb2beb1797db2d368f58f3cb72bfda11 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 25 Sep 2023 11:13:55 +0200
Subject: [PATCH 33/58] fix: add defaults import to odb_meta to ensure logging
 init

---
 ebel/manager/orientdb/odb_meta.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index 88e510f..6240440 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -29,6 +29,7 @@
 from tqdm import tqdm
 
 import ebel.database
+import ebel.defaults
 from ebel.cache import set_mysql_interactive
 from ebel.config import get_config_as_dict, get_config_value, write_to_config
 from ebel.constants import DEFAULT_ODB, RID

From d447e934e506618a4ef85ad2560ea9cdce5d00a0 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 25 Sep 2023 13:33:32 +0200
Subject: [PATCH 34/58] fix: convert biogrid update sql text to sqla2 stmt

---
 ebel/defaults.py                        |  6 ++
 ebel/manager/orientdb/biodbs/biogrid.py | 83 +++++++------------------
 ebel/manager/rdbms/models/biogrid.py    |  6 +-
 3 files changed, 33 insertions(+), 62 deletions(-)

diff --git a/ebel/defaults.py b/ebel/defaults.py
index 3ff9bb7..8466190 100755
--- a/ebel/defaults.py
+++ b/ebel/defaults.py
@@ -56,8 +56,14 @@
 logHandler.setFormatter(logh_format)
 logHandler.setLevel(logging.DEBUG)
 
+
 # Console Handler
 ch = logging.StreamHandler()
 ch_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
 ch.setFormatter(ch_format)
 ch.setLevel(logging.WARNING)
+
+logging.basicConfig(
+    handlers=[logHandler, ch],
+    encoding="utf-8",
+)
diff --git a/ebel/manager/orientdb/biodbs/biogrid.py b/ebel/manager/orientdb/biodbs/biogrid.py
index 072342c..b47cc88 100644
--- a/ebel/manager/orientdb/biodbs/biogrid.py
+++ b/ebel/manager/orientdb/biodbs/biogrid.py
@@ -8,6 +8,7 @@
 import pandas as pd
 from pyorientdb import OrientDB
 from sqlalchemy import text, select, func, cast, Integer
+from sqlalchemy.orm import aliased
 from tqdm import tqdm
 
 from ebel import tools
@@ -286,7 +287,7 @@ def insert_data(self) -> Dict[str, int]:
         }
 
         # main table
-        df = pd.read_csv(self.file_path, usecols=use_columns.keys(), sep="\t", low_memory=False)
+        df = pd.read_csv(self.file_path, usecols=list(use_columns.keys()), sep="\t", low_memory=False)
         df.rename(columns=use_columns, inplace=True)
         df.replace("-", np.nan, inplace=True)
 
@@ -505,45 +506,9 @@ def get_create_pure_protein_rid_by_uniprot(self, taxonomy_id, symbol, uniprot):
 
     def update_interactions(self) -> int:
         """Updates all BioGrid interactions."""
-        # TODO: sql_temp as sqlalchemy query
-        sql_temp = """
-        Select
-            ia.symbol as subject_symbol,
-            ia.uniprot as subject_uniprot,
-            ia.taxonomy_id as subject_taxonomy_id,
-            m.modification,
-            ib.symbol as object_symbol,
-            ib.uniprot as object_uniprot,
-            ib.taxonomy_id as object_taxonomy_id,
-            es.experimental_system,
-            group_concat( distinct b.biogrid_id) as biogrid_ids, group_concat( 
-            distinct if(p.source='PUBMED',CAST(p.source_identifier AS UNSIGNED),NULL)
-            ) as pmids,
-            count(distinct p.source_identifier) as num_pubs,
-            group_concat( distinct if(p.source='DOI',CAST(p.source_identifier AS UNSIGNED),NULL)) as dois
-        from
-            biogrid b
-            inner join biogrid_interactor ia on (b.biogrid_a_id=ia.biogrid_id)
-            inner join biogrid_interactor ib on (b.biogrid_b_id=ib.biogrid_id)
-            inner join biogrid_modification m on (m.id=b.modification_id)
-            inner join biogrid_publication p on (b.publication_id=p.id)
-            inner join biogrid_experimental_system es on (b.experimental_system_id=es.id)
-        where
-            (ia.uniprot = '{subject_uniprot}' and ib.uniprot = '{object_uniprot}') and
-            m.modification != 'No Modification'
-        group by
-            ia.symbol,
-            ia.uniprot,
-            ia.taxonomy_id,
-            m.modification,
-            ib.symbol,
-            ib.uniprot,
-            ib.taxonomy_id,
-            es.experimental_system"""
-
         b = biogrid.Biogrid
-        ia = biogrid.Interactor
-        ib = biogrid.Interactor
+        ia = aliased(biogrid.Interactor)
+        ib = aliased(biogrid.Interactor)
         m = biogrid.Modification
         p = biogrid.Publication
         es = biogrid.ExperimentalSystem
@@ -554,6 +519,10 @@ def update_interactions(self) -> int:
         counter = 0
         self.clear_edges()
 
+        if_func = func.iif if self.engine.dialect.name == "sqlite" else func.IF
+
+        logging.info("Update BioGRID")
+
         for e in tqdm(
             uniprot_modification_pairs,
             desc=f"Update {self.biodb_name.upper()} interactions",
@@ -571,10 +540,8 @@ def update_interactions(self) -> int:
                     uniprot=e["object_uniprot"],
                 )
 
-                sql = sql_temp.format(
-                    subject_uniprot=e["subject_uniprot"],
-                    object_uniprot=e["object_uniprot"],
-                )
+                subject_uniprot = e["subject_uniprot"]
+                object_uniprot = e["object_uniprot"]
 
                 sql = (
                     select(
@@ -586,24 +553,22 @@ def update_interactions(self) -> int:
                         ib.uniprot.label("object_uniprot"),
                         ib.taxonomy_id.label("object_taxonomy_id"),
                         es.experimental_system,
+                        func.group_concat(b.biogrid_id.distinct()).label("biogrid_ids"),
+                        func.group_concat(
+                            if_func(p.source == "PUBMED", cast(p.source_identifier, Integer), None).distinct()
+                        ).label("pmids"),
+                        func.count(p.source_identifier).label("num_pubs"),
                         func.group_concat(
-                            b.biogrid_id.distinct().label("biogrid_ids"),
-                            func.group_concat(
-                                func.IF(p.source == "PUBMED", cast(p.source_identifier, Integer), None).distinct()
-                            ).label("pmids"),
-                        ),
-                        p.source_identifier.count().label("num_pubs"),
-                        func.group_concat(func.IF(p.source == "DOI", cast(p.source_identifier, Integer), None)).label(
-                            "dois"
-                        ),
+                            if_func(p.source == "DOI", cast(p.source_identifier, Integer), None).distinct()
+                        ).label("dois"),
                     )
-                    .join(ia)
-                    .join(ib)
-                    .join(m)
-                    .join(p)
-                    .join(es)
-                    .where(ia.uniprot == e["subject_uniprot"])
-                    .where(ib.uniprot == e["object_uniprot"])
+                    .join(ia, b.biogrid_a_id == ia.biogrid_id)
+                    .join(ib, b.biogrid_b_id == ib.biogrid_id)
+                    .join(m, m.id == b.modification_id)
+                    .join(p, b.publication_id == p.id)
+                    .join(es, b.experimental_system_id == es.id)
+                    .where(ia.uniprot == subject_uniprot)
+                    .where(ib.uniprot == object_uniprot)
                     .where(m.modification != "No Modification")
                 )
 
diff --git a/ebel/manager/rdbms/models/biogrid.py b/ebel/manager/rdbms/models/biogrid.py
index b8dea17..f5cec5e 100644
--- a/ebel/manager/rdbms/models/biogrid.py
+++ b/ebel/manager/rdbms/models/biogrid.py
@@ -72,7 +72,7 @@ class Throughput(Base):
     __tablename__ = "biogrid_throughput"
     id: Mapped[int] = mapped_column(primary_key=True)
     throughput: Mapped[str] = mapped_column(String(255))
-    frequency: Mapped[int] = mapped_column()
+    count: Mapped[int] = mapped_column()
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -98,7 +98,7 @@ class ExperimentalSystem(Base):
     id: Mapped[int] = mapped_column(primary_key=True)
     experimental_system: Mapped[str] = mapped_column(String(255), nullable=True)
     experimental_system_type: Mapped[str] = mapped_column(String(255), nullable=True)
-    frequency: Mapped[int] = mapped_column()
+    count: Mapped[int] = mapped_column()
 
     def as_dict(self):
         """Convert object values to dictionary."""
@@ -149,7 +149,7 @@ class Modification(Base):
     __tablename__ = "biogrid_modification"
     id: Mapped[int] = mapped_column(primary_key=True)
     modification: Mapped[str] = mapped_column(String(255), nullable=True)
-    frequency: Mapped[int] = mapped_column()
+    count: Mapped[int] = mapped_column()
 
     def as_dict(self):
         """Convert object values to dictionary."""

From ae93f1a192369e9e20d600d1555aa493fdb9a609 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 25 Sep 2023 13:48:59 +0200
Subject: [PATCH 35/58] fix: pathway commons super table join in update

---
 ebel/manager/orientdb/biodbs/pathway_commons.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/pathway_commons.py b/ebel/manager/orientdb/biodbs/pathway_commons.py
index 88a23fe..c0ca6e9 100644
--- a/ebel/manager/orientdb/biodbs/pathway_commons.py
+++ b/ebel/manager/orientdb/biodbs/pathway_commons.py
@@ -243,8 +243,7 @@ def update_interactions(self) -> Dict[str, int]:
                 pc.PathwayCommons.interaction_type == edge_type
             )
 
-            with self.engine.connect() as conn:
-                df_ppi_of = pd.read_sql(sql, conn)
+            df_ppi_of = pd.read_sql(sql, self.engine)
 
             df_join = (
                 df_ppi_of.set_index("participant_a")
@@ -252,13 +251,13 @@ def update_interactions(self) -> Dict[str, int]:
                 .rename(columns={"rid": "rid_a_all"})
                 .join(df_bel.set_index("symbol"))
                 .reset_index()
-                .rename(columns={"rid": "rid_a_bel", "index": "a"})
+                .rename(columns={"rid": "rid_a_bel", "participant_a": "a"})
                 .set_index("participant_b")
                 .join(df_all.set_index("symbol"))
                 .rename(columns={"rid": "rid_b_all"})
                 .join(df_bel.set_index("symbol"))
                 .reset_index()
-                .rename(columns={"rid": "rid_b_bel", "index": "b"})
+                .rename(columns={"rid": "rid_b_bel", "participant_b": "b"})
                 .set_index("id")
             )
 
@@ -305,3 +304,8 @@ def get_pathway_pmids_sources(self, pc_id, pc_pathway_name_rid_dict) -> tuple:
         pmids = [x.pmid for x in pc_obj.pmids]
         pathways = [pc_pathway_name_rid_dict[x.name] for x in pc_obj.pathway_names]
         return pathways, pmids, sources
+
+
+if __name__ == "__main__":
+    foo = PathwayCommons()
+    foo.update_interactions()

From ca26a1798fdea459287c8ebf38bb7b2d1e9c97f5 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 25 Sep 2023 13:55:04 +0200
Subject: [PATCH 36/58] fix: remove database init

---
 ebel/manager/orientdb/odb_meta.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index 6240440..0501be6 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -104,9 +104,6 @@ def __init__(
             else:
                 create_database(self.engine.url)
 
-        if not database_exists(self.engine.url):
-            create_database(self.engine.url)
-
     def __config_params_check(self, overwrite_config: bool = False):
         """Go through passed/available configuration params."""
         # Set the client

From 0122f00017040468d6a5a280822785133f61dd82 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 25 Sep 2023 14:52:16 +0200
Subject: [PATCH 37/58] fix: pc pmid model upgraded to big integer

---
 ebel/manager/rdbms/models/pathway_commons.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ebel/manager/rdbms/models/pathway_commons.py b/ebel/manager/rdbms/models/pathway_commons.py
index 172b159..ef5207c 100644
--- a/ebel/manager/rdbms/models/pathway_commons.py
+++ b/ebel/manager/rdbms/models/pathway_commons.py
@@ -93,7 +93,7 @@ class Pmid(Base):
     __tablename__ = "pathway_commons_pmid"
     id: Mapped[int] = mapped_column(primary_key=True)
 
-    pmid: Mapped[int] = mapped_column(index=True)
+    pmid: Mapped[int] = mapped_column(BigInteger, index=True)
 
     pathway_commons_id: Mapped[int] = mapped_column(ForeignKey("pathway_commons.id"), index=True)
     pathway_commons: Mapped[List[PathwayCommons]] = relationship("PathwayCommons", back_populates="pmids")

From 8ddd4927a6659cdede0db05d056eacd61d764786 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Mon, 25 Sep 2023 14:58:49 +0200
Subject: [PATCH 38/58] feat: update biogrid to use sqla2 stmts

---
 ebel/manager/orientdb/biodbs/biogrid.py | 81 ++++++++-----------------
 ebel/manager/rdbms/models/biogrid.py    | 51 +++++++++++++++-
 2 files changed, 75 insertions(+), 57 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/biogrid.py b/ebel/manager/orientdb/biodbs/biogrid.py
index b47cc88..789ed32 100644
--- a/ebel/manager/orientdb/biodbs/biogrid.py
+++ b/ebel/manager/orientdb/biodbs/biogrid.py
@@ -7,7 +7,7 @@
 import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text, select, func, cast, Integer
+from sqlalchemy import select, func, cast, Integer
 from sqlalchemy.orm import aliased
 from tqdm import tqdm
 
@@ -460,29 +460,31 @@ def _create_source_table(self, df: pd.DataFrame) -> pd.DataFrame:
 
     def get_uniprot_modification_pairs(self):
         """Return all UniProt modification pairs."""
-        # TODO: sql as sqlalchemy query
-        sql = """Select
-            ia.symbol as subject_symbol,
-            ia.uniprot as subject_uniprot,
-            ia.taxonomy_id as subject_taxonomy_id,
-            ib.symbol as object_symbol,
-            ib.uniprot as object_uniprot,
-            ib.taxonomy_id as object_taxonomy_id
-        from
-            biogrid b
-            inner join biogrid_interactor ia on (b.biogrid_a_id=ia.biogrid_id)
-            inner join biogrid_interactor ib on (b.biogrid_b_id=ib.biogrid_id)
-            inner join biogrid_modification m on (m.id=b.modification_id)
-        where
-            m.modification != 'No Modification' and ia.uniprot IS NOT NULL and ib.uniprot IS NOT NULL
-        group by
-            subject_symbol,
-            subject_uniprot,
-            subject_taxonomy_id,
-            object_symbol,
-            object_uniprot,
-            object_taxonomy_id"""
-        results = self.session.execute(text(sql)).fetchall()
+        b = biogrid.Biogrid
+        ia = aliased(biogrid.Interactor)
+        ib = aliased(biogrid.Interactor)
+        m = biogrid.Modification
+
+        sql = (
+            (
+                select(
+                    ia.symbol.label("subject_symbol"),
+                    ia.uniprot.label("subject_uniprot"),
+                    ia.taxonomy_id.label("subject_taxonomy_id"),
+                    ib.symbol.label("object_symbol"),
+                    ib.uniprot.label("object_uniprot"),
+                    ib.taxonomy_id.label("object_taxonomy_id"),
+                )
+                .join(ia, b.biogrid_a_id == ia.biogrid_id)
+                .join(ib, b.biogrid_b_id == ib.biogrid_id)
+                .join(m, b.modification_id == m.id)
+            )
+            .where(m.modification == "No Modification")
+            .where(ia.uniprot.isnot(None))
+            .where(ib.uniprot.isnot(None))
+            .group_by(ia.symbol, ia.uniprot, ia.taxonomy_id, ib.symbol, ib.uniprot, ib.taxonomy_id)
+        )
+        results = self.session.execute(sql).fetchall()
         return [x._asdict() for x in results]
 
     def get_create_pure_protein_rid_by_uniprot(self, taxonomy_id, symbol, uniprot):
@@ -601,34 +603,3 @@ def update_interactions(self) -> int:
                         )
                         counter += 1
         return counter
-
-    def create_view(self):
-        """Create SQL view of BioGRID data."""
-        sql = """create view if not exists biogrid_view as
-            select
-                b.biogrid_id,
-                ia.symbol as symbol_a,
-                ia.uniprot as uniprot_a,
-                ta.taxonomy_id as tax_id_a,
-                ta.organism_name as organism_a,
-                ib.symbol as symbol_b,
-                ib.uniprot as uniprot_b,
-                tb.taxonomy_id as tax_id_b,
-                tb.organism_name as organism_b,
-                es.experimental_system,
-                m.modification,
-                s.source,
-                b.qualification,
-                p.source as publication_source,
-                p.source_identifier as publication_identifier
-            from
-                biogrid b inner join
-                biogrid_interactor ia on (ia.biogrid_id=b.biogrid_a_id) inner join
-                biogrid_interactor ib on (ib.biogrid_id=b.biogrid_b_id) inner join
-                biogrid_taxonomy ta on (ia.taxonomy_id=ta.taxonomy_id) inner join
-                biogrid_taxonomy tb on (ib.taxonomy_id=tb.taxonomy_id) left join
-                biogrid_experimental_system es on (b.experimental_system_id=es.id) left join
-                biogrid_modification m on (m.id=b.modification_id) left join
-                biogrid_source s on (s.id=b.source_id) left join
-                biogrid_publication p on (p.id=b.publication_id)"""
-        self.session.execute(text(sql))
diff --git a/ebel/manager/rdbms/models/biogrid.py b/ebel/manager/rdbms/models/biogrid.py
index f5cec5e..58def36 100644
--- a/ebel/manager/rdbms/models/biogrid.py
+++ b/ebel/manager/rdbms/models/biogrid.py
@@ -1,7 +1,8 @@
 """BioGRID RDBMS model definition."""
-from sqlalchemy import Float, ForeignKey, Integer, String, Text
+from sqlalchemy import Float, ForeignKey, Integer, String, Text, select
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import Mapped, mapped_column, relationship
+from sqlalchemy.orm import Mapped, mapped_column, relationship, aliased
+from sqlalchemy_utils import create_view
 
 from ebel.manager.rdbms.models import object_as_dict
 
@@ -154,3 +155,49 @@ class Modification(Base):
     def as_dict(self):
         """Convert object values to dictionary."""
         return object_as_dict(self, exclude=["id"])
+
+
+class BiogridView(Base):
+    """SQL view for Biogrid."""
+
+    b = Biogrid
+    ia = aliased(Interactor)
+    ib = aliased(Interactor)
+    m = Modification
+    p = Publication
+    es = ExperimentalSystem
+    ta = aliased(Taxonomy)
+    tb = aliased(Taxonomy)
+    s = Source
+
+    stmt = (
+        select(
+            b.biogrid_id,
+            ia.symbol.label("symbol_a"),
+            ia.uniprot.label("uniprot_a"),
+            ta.taxonomy_id.label("tax_id_a"),
+            ta.organism_name.label("organism_a"),
+            ib.symbol.label("symbol_b"),
+            ib.uniprot.label("uniprot_b"),
+            tb.taxonomy_id.label("tax_id_b"),
+            tb.organism_name.label("organism_b"),
+            es.experimental_system,
+            m.modification,
+            s.source,
+            b.qualification,
+            p.source.label("publication_source"),
+            p.source_identifier.label("publication_identifier"),
+        )
+        .join(ia, b.biogrid_a_id == ia.biogrid_id)
+        .join(ib, b.biogrid_b_id == ib.biogrid_id)
+        .join(ta, ia.taxonomy_id == ta.taxonomy_id)
+        .join(tb, ib.taxonomy_id == tb.taxonomy_id)
+        .join(es, b.experimental_system_id == es.id, isouter=True)
+        .join(m, m.id == b.modification_id, isouter=True)
+        .join(s, b.source_id == s.id, isouter=True)
+        .join(p, b.publication_id == p.id, isouter=True)
+    )
+
+    view = create_view(name="biogrid_view", selectable=stmt, metadata=Base.metadata)
+
+    __table__ = view

From 3b14d3a7c51b1bc2da00cf10d14e0e0bf6847b19 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Tue, 26 Sep 2023 09:47:47 +0200
Subject: [PATCH 39/58] feat: update remaining biodbs to sqla2 select stmts

---
 ebel/manager/orientdb/biodbs/chebi.py         |  2 +-
 .../orientdb/biodbs/clinical_trials.py        |  2 +-
 ebel/manager/orientdb/biodbs/clinvar.py       | 64 ++++++++++++-----
 ebel/manager/orientdb/biodbs/disgenet.py      | 64 ++++++++++-------
 .../orientdb/biodbs/expression_atlas.py       |  2 +-
 ebel/manager/orientdb/biodbs/gwas_catalog.py  |  4 +-
 ebel/manager/orientdb/biodbs/hgnc.py          | 17 +++--
 ebel/manager/orientdb/biodbs/intact.py        | 37 ++--------
 ebel/manager/orientdb/biodbs/kegg.py          | 69 +++++++++++++------
 ebel/manager/orientdb/biodbs/mirtarbase.py    | 13 ----
 ebel/manager/orientdb/biodbs/ncbi.py          |  1 +
 ebel/manager/orientdb/biodbs/nsides.py        | 64 ++++++++++-------
 ebel/manager/orientdb/biodbs/uniprot.py       | 27 +++++---
 ebel/tools.py                                 |  3 +-
 14 files changed, 215 insertions(+), 154 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/chebi.py b/ebel/manager/orientdb/biodbs/chebi.py
index 767f4c6..3ace1cb 100644
--- a/ebel/manager/orientdb/biodbs/chebi.py
+++ b/ebel/manager/orientdb/biodbs/chebi.py
@@ -104,7 +104,7 @@ def insert_data(self) -> Dict[str, int]:
                         .reset_index()
                     )
 
-                df.to_sql(table_name, self.engine, index=False, if_exists="replace")
+                df.to_sql(table_name, self.engine, index=False, if_exists="append")
 
                 inserted[table_name] += df.shape[0]
         self.session.commit()
diff --git a/ebel/manager/orientdb/biodbs/clinical_trials.py b/ebel/manager/orientdb/biodbs/clinical_trials.py
index 008c567..148eba5 100644
--- a/ebel/manager/orientdb/biodbs/clinical_trials.py
+++ b/ebel/manager/orientdb/biodbs/clinical_trials.py
@@ -47,7 +47,7 @@ def __contains__(self, item):
 
     def add_link_to_drugbank(self, data_dict: dict, trial_rid: str):
         """Create LINKSET in drugbank table for associated clinical trials."""
-        # Can't check synonyms untils OrientDB 3.0, need to be able to index on collections
+        # Can't check synonyms until OrientDB 3.0, need to be able to index on collections
         # update_sql = 'UPDATE drugbank ADD clinical_trials = {} WHERE name = "{}" OR "{}" in synonyms'
         # TODO index drugbank.synonyms
 
diff --git a/ebel/manager/orientdb/biodbs/clinvar.py b/ebel/manager/orientdb/biodbs/clinvar.py
index 56cca96..f19b510 100644
--- a/ebel/manager/orientdb/biodbs/clinvar.py
+++ b/ebel/manager/orientdb/biodbs/clinvar.py
@@ -5,7 +5,7 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text
+from sqlalchemy import text, select
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
@@ -166,25 +166,26 @@ def get_disease_snps_dict(self) -> Dict[str, List[Snp]]:
         """Get a dictionary {'disease':[snp,snp,... ]} by disease names."""
         disease_keywords = get_disease_trait_keywords_from_config()
 
-        sql_temp = """Select
-            '{keyword}',
-            phenotype,
-            rs_db_snp as rs_number,
-            hgnc_id,
-            chromosome,
-            start as position,
-            clinical_significance
-                from clinvar c inner join
-                clinvar__phenotype cp on (c.id=cp.clinvar_id) inner JOIN
-                clinvar_phenotype p on (cp.clinvar_phenotype_id=p.id)
-            where
-                p.phenotype like '%%{keyword}%%'
-                and rs_db_snp != -1"""
+        cv = clinvar.Clinvar
+        cp = clinvar.ClinvarPhenotype
 
         results = dict()
         for kwd in disease_keywords:
-            sql = sql_temp.format(keyword=kwd)
-            rows = self.session.execute(text(sql))
+            sql = (
+                select(
+                    cp.phenotype,
+                    cv.rs_db_snp.label("rs_number"),
+                    cv.hgnc_id,
+                    cv.chromosome,
+                    cv.start.label("position"),
+                    cv.clinical_significance,
+                )
+                .join(cv, cp.clinvars)
+                .where(cv.rs_db_snp != -1)
+                .where(cp.phenotype.like(f"%{kwd}%"))
+            )
+            print(sql)
+            rows = self.session.execute(sql)
             results[kwd] = [Snp(*x) for x in rows.fetchall()]
 
         return results
@@ -205,6 +206,7 @@ def update_interactions(self) -> int:
             for snp in tqdm(rows, desc=f"Add has_X_snp_cv edges to BEL for {disease}"):
                 if snp.hgnc_id in hgnc_id_gene_rid_cache:
                     gene_mapped_rid = hgnc_id_gene_rid_cache[snp.hgnc_id]
+
                 else:
                     gene_mapped_rid = self._get_set_gene_rid(hgnc_id=snp.hgnc_id)
                     hgnc_id_gene_rid_cache[snp.hgnc_id] = gene_mapped_rid
@@ -214,7 +216,7 @@ def update_interactions(self) -> int:
                     value_dict = {
                         "clinical_significance": snp.clinical_significance,
                         "phenotype": snp.phenotype,
-                        "keyword": snp.keyword,
+                        "keyword": disease,
                     }
                     self.create_edge(
                         class_name="has_mapped_snp_cv",
@@ -262,3 +264,29 @@ def _get_set_gene_rid(self, hgnc_id: str):
             gene_rid = self.get_create_rid("gene", data, check_for="bel")
 
         return gene_rid
+
+
+if __name__ == "__main__":
+    c = ClinVar()
+    cv = clinvar.Clinvar
+    cp = clinvar.ClinvarPhenotype
+    kwd = "Depression"
+
+    sql = (
+        select(
+            cp.phenotype,
+            cv.rs_db_snp.label("rs_number"),
+            cv.hgnc_id,
+            cv.chromosome,
+            cv.start.label("position"),
+            cv.clinical_significance,
+        )
+        .join(cp, cv.phenotypes)
+        .where(cv.rs_db_snp != -1)
+        .where(cp.phenotype.like(f"%{kwd}%"))
+    )
+    rows = c.session.execute(sql)
+    amt = 0
+    for x in rows:
+        amt += 1
+    print(amt)
diff --git a/ebel/manager/orientdb/biodbs/disgenet.py b/ebel/manager/orientdb/biodbs/disgenet.py
index c052cc7..a814d25 100644
--- a/ebel/manager/orientdb/biodbs/disgenet.py
+++ b/ebel/manager/orientdb/biodbs/disgenet.py
@@ -183,35 +183,47 @@ def update_snps(self) -> int:
             "downstream": "upstream",
             "upstream": "downstream",
         }
-        # TODO: replace SQL with SQL Alchemy statement
-        sql_temp = """Select
-                snp_id,
-                chromosome,
-                position,
-                disease_name,
-                pmid,
-                score,
-                source
-            FROM
-                disgenet_variant v INNER JOIN
-                disgenet_source s on (v.source_id=s.id) INNER JOIN
-                disgenet_disease d on (v.disease_id=d.disease_id)
-            WHERE
-                disease_name like '%%{}%%' and
-                source!='BEFREE'
-            GROUP BY
-                snp_id,
-                chromosome,
-                position,
-                disease_name,
-                pmid,
-                score,
-                source"""
+        # # TODO: replace SQL with SQL Alchemy statement
+        # sql_temp = """Select
+        #         snp_id,
+        #         chromosome,
+        #         position,
+        #         disease_name,
+        #         pmid,
+        #         score,
+        #         source
+        #     FROM
+        #         disgenet_variant v INNER JOIN
+        #         disgenet_source s on (v.source_id=s.id) INNER JOIN
+        #         disgenet_disease d on (v.disease_id=d.disease_id)
+        #     WHERE
+        #         disease_name like '%%{}%%' and
+        #         source!='BEFREE'
+        #     GROUP BY
+        #         snp_id,
+        #         chromosome,
+        #         position,
+        #         disease_name,
+        #         pmid,
+        #         score,
+        #         source"""
+
+        dv = disgenet.DisgenetVariant
+        ds = disgenet.DisgenetSource
+        dd = disgenet.DisgenetDisease
 
         results = dict()
         for kwd in self.disease_keywords:
-            sql = sql_temp.format(kwd)
-            rows = self.session.execute(text(sql))
+            sql = (
+                select(dv.snp_id, dv.chromosome, dv.position, dd.disease_name, dv.pmid, dv.score, ds.source)
+                .join(ds)
+                .join(dd)
+                .where(dd.disease_name.like(f"%{kwd}%"))
+                .where(ds.source != "BEFREE")
+                .group_by(dv.snp_id, dv.chromosome, dv.position, dd.disease_name, dv.pmid, dv.score, ds.source)
+            )
+
+            rows = self.session.execute(sql)
             results[kwd] = rows
 
         inserted = 0
diff --git a/ebel/manager/orientdb/biodbs/expression_atlas.py b/ebel/manager/orientdb/biodbs/expression_atlas.py
index cdafe88..8fd8fd9 100644
--- a/ebel/manager/orientdb/biodbs/expression_atlas.py
+++ b/ebel/manager/orientdb/biodbs/expression_atlas.py
@@ -341,7 +341,7 @@ def get_idf(self, experiment_name: str) -> Optional[pd.DataFrame]:
                 values = [x.strip() for x in line_splitted[1:] if x.strip()]
                 rows.append((key_name, values))
 
-        df = pd.DataFrame(rows, columns=("key_name", "value")).explode("value")
+        df = pd.DataFrame(rows, columns=["key_name", "value"]).explode("value")
         return df
 
     def get_sdrf_condensed(self, experiment_name: str) -> Optional[pd.DataFrame]:
diff --git a/ebel/manager/orientdb/biodbs/gwas_catalog.py b/ebel/manager/orientdb/biodbs/gwas_catalog.py
index ac44b79..414ee8d 100644
--- a/ebel/manager/orientdb/biodbs/gwas_catalog.py
+++ b/ebel/manager/orientdb/biodbs/gwas_catalog.py
@@ -79,7 +79,7 @@ def insert_data(self) -> Dict[str, int]:
 
         table_name = gwas_catalog.GwasCatalog.__tablename__
 
-        df[columns_main_table].to_sql(table_name, self.engine, if_exists="replace")
+        df[columns_main_table].to_sql(table_name, self.engine, if_exists="append")
 
         df.snp_gene_ids = df.snp_gene_ids.str.strip().str.split(", ")
         df[table_name + "_id"] = df.index
@@ -89,7 +89,7 @@ def insert_data(self) -> Dict[str, int]:
         df_snp_gene_ids.index = range(1, df_snp_gene_ids.shape[0] + 1)
         df_snp_gene_ids.rename(columns={"snp_gene_ids": "ensembl_identifier"}, inplace=True)
         df_snp_gene_ids.index.rename("id", inplace=True)
-        df_snp_gene_ids.to_sql(gwas_catalog.SnpGene.__tablename__, self.engine, if_exists="replace")
+        df_snp_gene_ids.to_sql(gwas_catalog.SnpGene.__tablename__, self.engine, if_exists="append")
 
         self.session.commit()
 
diff --git a/ebel/manager/orientdb/biodbs/hgnc.py b/ebel/manager/orientdb/biodbs/hgnc.py
index f16e0b7..0e505e0 100644
--- a/ebel/manager/orientdb/biodbs/hgnc.py
+++ b/ebel/manager/orientdb/biodbs/hgnc.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 import pandas as pd
-from pyorientdb import OrientDB
+from pyorientdb import OrientDB, OrientRecord
 from sqlalchemy import select
 from tqdm import tqdm
 
@@ -267,26 +267,29 @@ def get_location(location: str) -> dict:
             location_dict = {"unknown_schema": location}
         return location_dict
 
-    def get_bel_symbols_without_hgnc_link(self):
+    def get_bel_symbols_without_hgnc_link(self) -> set:
         """Return set of all gene symbols in database without a link to HGNC."""
         sql_symbols = "Select distinct(name) as symbol from bio_object where namespace='HGNC' and hgnc IS NULL"
         return {x.oRecordData["symbol"] for x in self.execute(sql_symbols)}
 
-    def get_bel_symbols_all(self):
+    def get_bel_symbols_all(self) -> set:
         """Return set of all gene symbols in database."""
         sql_symbols = "Select distinct(name) as symbol from bio_object where namespace='HGNC'"
         return {x.oRecordData["symbol"] for x in self.execute(sql_symbols)}
 
-    def get_correct_symbol(self, symbol: str):
+    def get_correct_symbol(self, symbol: str) -> str:
         """Checks if symbol is valid otherwise checks previsous symbols."""
         symbol_query = select(HgncDb).where(HgncDb.symbol == symbol)
         result_in_symbol = self.session.execute(symbol_query).first()
         if not result_in_symbol:
             result_in_prev_symbol = self.session.query(PrevSymbol).filter(PrevSymbol.prev_symbol == symbol).first()
+
             if result_in_prev_symbol:
                 symbol = result_in_prev_symbol.hgnc.symbol
+
             else:
                 symbol = None
+
         return symbol
 
     def correct_wrong_symbol(self, symbol, bel_symbols_all: set):
@@ -337,7 +340,7 @@ def update_gene(
         location: str,
         hgnc_symbol: str,
         suggested_corrections: str,
-    ) -> int:
+    ) -> OrientRecord:
         """Update genes in OrientDB and returns number of updates."""
         suggest = (
             ", suggested_corrections={{'wrong name': {}}}".format(suggested_corrections)
@@ -358,7 +361,7 @@ def update_gene(
         )
         return self.execute(sql)[0]
 
-    def update_rna(self, hgnc_rid: str, label: str, hgnc_symbol: str, suggested_corrections: str) -> int:
+    def update_rna(self, hgnc_rid: str, label: str, hgnc_symbol: str, suggested_corrections: str) -> OrientRecord:
         """Update RNAs in OrientDB and returns number of updates."""
         suggest = (
             ", suggested_corrections={{'wrong name': {}}}".format(suggested_corrections)
@@ -375,7 +378,7 @@ def update_rna(self, hgnc_rid: str, label: str, hgnc_symbol: str, suggested_corr
         )
         return self.execute(sql)[0]
 
-    def update_protein(self, hgnc_rid: str, label: str, hgnc_symbol: str, suggested_corrections: str) -> int:
+    def update_protein(self, hgnc_rid: str, label: str, hgnc_symbol: str, suggested_corrections: str) -> OrientRecord:
         """Update proteins in OrientDB and returns number of updates."""
         suggest = (
             ", suggested_corrections={{'wrong name': {}}}".format(suggested_corrections)
diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index c3c4959..d982fc7 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -154,46 +154,24 @@ def get_namespace_name_by_uniprot(self, uniprot_accession: str) -> tuple:
 
         result = self.session.execute(sql).fetchone()
         taxid_to_namespace = {9606: "HGNC", 10090: "MGI", 10116: "RGD"}
+
         if result:
             name, taxid = result
             namespace = taxid_to_namespace.get(taxid, "UNIPROT")
             return_value = (namespace, name)
+
         else:
             if self.session.query(uniprot.Uniprot).filter(uniprot.Uniprot.accession == uniprot_accession).first():
                 return_value = ("UNIPROT", uniprot_accession)
+
         return return_value
 
     def update_interactions(self) -> int:
         """Update intact interactions to graph."""
-        uniprot = UniProt(self.client)
-        uniprot.update()
-
-        uniprot_rid_dict = uniprot.get_pure_uniprot_rid_dict_in_bel_context()
-
-        # sql_temp = """SELECT
-        #     int_a_uniprot_id,
-        #     int_b_uniprot_id,
-        #     pmid,
-        #     interaction_ids,
-        #     interaction_type,
-        #     interaction_type_psimi_id,
-        #     detection_method,
-        #     detection_method_psimi_id,
-        #     confidence_value
-        # FROM
-        #     intact
-        # WHERE
-        #     int_a_uniprot_id = '{uniprot_accession}' or int_b_uniprot_id = '{uniprot_accession}'
-        # GROUP BY
-        #     int_a_uniprot_id,
-        #     int_b_uniprot_id,
-        #     pmid,
-        #     interaction_ids,
-        #     interaction_type,
-        #     interaction_type_psimi_id,
-        #     detection_method,
-        #     detection_method_psimi_id,
-        #     confidence_value"""
+        up = UniProt(self.client)
+        up.update()
+
+        uniprot_rid_dict = up.get_pure_uniprot_rid_dict_in_bel_context()
 
         updated = 0
 
@@ -201,7 +179,6 @@ def update_interactions(self) -> int:
         it = intact.Intact
 
         for uniprot_accession in tqdm(uniprot_accessions, desc="Update IntAct interactions"):
-            # sql = sql_temp.format(uniprot_accession=uniprot_accession)
             sql = (
                 select(
                     it.int_a_uniprot_id,
diff --git a/ebel/manager/orientdb/biodbs/kegg.py b/ebel/manager/orientdb/biodbs/kegg.py
index 10ad6f9..7799357 100644
--- a/ebel/manager/orientdb/biodbs/kegg.py
+++ b/ebel/manager/orientdb/biodbs/kegg.py
@@ -9,6 +9,7 @@
 import pandas as pd
 import requests
 from pyorientdb import OrientDB
+from sqlalchemy import select, or_
 from tqdm import tqdm
 
 from ebel.config import get_config_value
@@ -281,29 +282,53 @@ def update_interactions(self) -> int:
 
         species_ids = ",".join([f"'{x}'" for x in self.species])
 
-        sql_temp = f"""Select
-                interaction_type,
-                pathway_identifier,
-                pathway_name,
-                gene_symbol_a,
-                gene_symbol_b,
-                kegg_species_id
-            from
-                kegg
-            where
-                (gene_symbol_a='{{symbol}}' or gene_symbol_a='{{symbol}}') and
-                kegg_species_id in ({species_ids}) and
-                interaction_type in ({{interaction_types}})
-            group by
-                interaction_type,
-                pathway_identifier,
-                pathway_name,
-                gene_symbol_a,
-                gene_symbol_b,
-                kegg_species_id"""
-
+        # sql_temp = f"""Select
+        #         interaction_type,
+        #         pathway_identifier,
+        #         pathway_name,
+        #         gene_symbol_a,
+        #         gene_symbol_b,
+        #         kegg_species_id
+        #     from
+        #         kegg
+        #     where
+        #         (gene_symbol_a='{{symbol}}' or gene_symbol_a='{{symbol}}') and
+        #         kegg_species_id in ({species_ids}) and
+        #         interaction_type in ({{interaction_types}})
+        #     group by
+        #         interaction_type,
+        #         pathway_identifier,
+        #         pathway_name,
+        #         gene_symbol_a,
+        #         gene_symbol_b,
+        #         kegg_species_id"""
+
+        kg = kegg.Kegg
         for symbol, rid in tqdm(symbol_rids_dict.items(), desc="Update KEGG posttranslational modifications"):
-            sql = sql_temp.format(symbol=symbol, interaction_types=post_translational_modifications)
+            # sql = sql_temp.format(symbol=symbol, interaction_types=post_translational_modifications)
+
+            sql = (
+                select(
+                    kg.interaction_type,
+                    kg.pathway_identifier,
+                    kg.pathway_name,
+                    kg.gene_symbol_a,
+                    kg.gene_symbol_b,
+                    kg.kegg_species_id,
+                )
+                .where(or_(kg.gene_symbol_a == symbol, kg.gene_symbol_b == symbol))
+                .where(kg.kegg_species_id.in_(species_ids))
+                .where(kg.interaction_type.in_(post_translational_modifications))
+                .group_by(
+                    kg.interaction_type,
+                    kg.pathway_identifier,
+                    kg.pathway_name,
+                    kg.gene_symbol_a,
+                    kg.gene_symbol_b,
+                    kg.kegg_species_id,
+                )
+            )
+
             df = pd.read_sql(sql, self.engine)
             keys = (
                 "interaction_type",
diff --git a/ebel/manager/orientdb/biodbs/mirtarbase.py b/ebel/manager/orientdb/biodbs/mirtarbase.py
index 68ace6d..f3c03bc 100644
--- a/ebel/manager/orientdb/biodbs/mirtarbase.py
+++ b/ebel/manager/orientdb/biodbs/mirtarbase.py
@@ -58,19 +58,6 @@ def update_interactions(self) -> int:
         self.clear_edges()
         df_symbol_rid = self.get_pure_symbol_rid_df_in_bel_context(class_name="rna", namespace="HGNC")
 
-        # sql = f"""Select
-        #         mi_rna,
-        #         target_gene as symbol,
-        #         support_type,
-        #         references_pmid as pmid,
-        #         experiments
-        #     from
-        #         {mirtarbase.Mirtarbase.__tablename__}
-        #     where
-        #         species_mi_rna='Homo sapiens' and
-        #         species_target_gene='Homo sapiens' and
-        #         support_type in ()"""
-
         mtb = mirtarbase.Mirtarbase
         sql = (
             select(
diff --git a/ebel/manager/orientdb/biodbs/ncbi.py b/ebel/manager/orientdb/biodbs/ncbi.py
index c395fc9..a629731 100644
--- a/ebel/manager/orientdb/biodbs/ncbi.py
+++ b/ebel/manager/orientdb/biodbs/ncbi.py
@@ -274,6 +274,7 @@ def _insert_info(self, chunksize: int = 1000000) -> int:
             "type_of_gene",
             "dbXrefs",
         }
+
         for df in tqdm(
             pd.read_csv(
                 file_path,
diff --git a/ebel/manager/orientdb/biodbs/nsides.py b/ebel/manager/orientdb/biodbs/nsides.py
index f9cb0a7..b9a1f43 100644
--- a/ebel/manager/orientdb/biodbs/nsides.py
+++ b/ebel/manager/orientdb/biodbs/nsides.py
@@ -7,13 +7,13 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text
+from sqlalchemy import text, select, or_
 from tqdm import tqdm
 
 from ebel.constants import RID
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
 from ebel.manager.orientdb.constants import OFFSIDES, ONSIDES
-from ebel.manager.rdbms.models import nsides
+from ebel.manager.rdbms.models import nsides, drugbank
 from ebel.tools import get_file_path
 
 logger = logging.getLogger(__name__)
@@ -143,25 +143,25 @@ def update_bel(self) -> int:
         self.delete_nodes_with_no_edges("side_effect")
         self.delete_nodes_with_no_edges("drug")
 
-        # TODO: Translate to sqlalchemy query
-        sql_temp = """Select
-            o.condition_meddra_id,
-            o.condition_concept_name,
-            o.prr,
-            o.mean_reporting_frequency
-        from
-            drugbank as d inner join
-            drugbank_external_identifier as dei on (d.id=dei.drugbank_id) inner join
-            nsides as o on (dei.identifier=o.drug_rxnorn_id)
-        where
-            d.drugbank_id='{}' and resource='RxCUI'
-            and (mean_reporting_frequency>=0.01 OR mean_reporting_frequency is NULL)
-        group by
-            o.condition_meddra_id,
-            o.condition_concept_name,
-            o.prr,
-            o.mean_reporting_frequency
-        """
+        # # TODO: Translate to sqlalchemy query
+        # sql_temp = """Select
+        #     o.condition_meddra_id,
+        #     o.condition_concept_name,
+        #     o.prr,
+        #     o.mean_reporting_frequency
+        # from
+        #     drugbank as d inner join
+        #     drugbank_external_identifier as dei on (d.id=dei.drugbank_id) inner join
+        #     nsides as o on (dei.identifier=o.drug_rxnorn_id)
+        # where
+        #     d.drugbank_id='{}' and resource='RxCUI'
+        #     and (mean_reporting_frequency>=0.01 OR mean_reporting_frequency is NULL)
+        # group by
+        #     o.condition_meddra_id,
+        #     o.condition_concept_name,
+        #     o.prr,
+        #     o.mean_reporting_frequency
+        # """
 
         drugbank_ids = self.query_class("drug", columns=["drugbank_id"], drugbank_id="notnull")
         drugbank_id_rids = {d["drugbank_id"]: d[RID] for d in drugbank_ids}
@@ -171,9 +171,27 @@ def update_bel(self) -> int:
 
         updated = 0
 
+        d = drugbank.Drugbank
+        dei = drugbank.ExternalIdentifier
+        o = nsides.Nsides
+
         for drugbank_id, drugbank_rid in tqdm(drugbank_id_rids.items(), desc=f"Update {self.biodb_name.upper()}"):
-            sql = sql_temp.format(drugbank_id)
-            for r in self.session.execute(text(sql)):
+            # sql = sql_temp.format(drugbank_id)
+            sql = (
+                (
+                    select(o.condition_meddra_id, o.condition_concept_name, o.prr, o.mean_reporting_frequency)
+                    .join(dei, dei.identifier == o.drug_rxnorn_id)
+                    .join(d, d.id == dei.drugbank_id)
+                )
+                .where(d.drugbank_id == drugbank_id)
+                .where(dei.resource == "RxCUI")
+                .where(or_(o.mean_reporting_frequency >= 0.01, o.mean_reporting_frequency.is_(None)))
+                .group_by(o.condition_meddra_id, o.condition_concept_name, o.prr, o.mean_reporting_frequency)
+            )
+
+            results = self.session.execute(sql)
+
+            for r in results:
                 (
                     condition_meddra_id,
                     condition_concept_name,
diff --git a/ebel/manager/orientdb/biodbs/uniprot.py b/ebel/manager/orientdb/biodbs/uniprot.py
index 1bdb0be..06dcbcf 100644
--- a/ebel/manager/orientdb/biodbs/uniprot.py
+++ b/ebel/manager/orientdb/biodbs/uniprot.py
@@ -10,7 +10,7 @@
 import pandas as pd
 from lxml.etree import iterparse
 from pyorientdb import OrientDB
-from sqlalchemy import text
+from sqlalchemy import text, select
 from tqdm import tqdm
 
 from ebel.defaults import default_tax_ids
@@ -309,11 +309,17 @@ def _get_accesssion_recname(self, taxid, gene_symbol) -> Union[Tuple[str, str],
         """
         # TODO: This is in general a dangerous method because it selects the first accession number, but there could
         # be more than one
+        # sql = (
+        #     f"Select accession, recommended_name from uniprot as u inner join uniprot_gene_symbol as gs "
+        #     f'on (u.id=gs.uniprot_id) where u.taxid={taxid} and gs.symbol="{gene_symbol}" limit 1'
+        # )
         sql = (
-            f"Select accession, recommended_name from uniprot as u inner join uniprot_gene_symbol as gs "
-            f'on (u.id=gs.uniprot_id) where u.taxid={taxid} and gs.symbol="{gene_symbol}" limit 1'
+            select(up.Uniprot.accession, up.Uniprot.recommended_name)
+            .join(up.GeneSymbol)
+            .where(up.Uniprot.taxid == taxid)
+            .where(up.GeneSymbol.symbol == gene_symbol)
         )
-        results = self.session.execute(text(sql))
+        results = self.session.execute(sql)
         return results.fetchone() if results else None
 
     def _update_proteins(self, namespace, taxid) -> int:
@@ -339,33 +345,36 @@ def _update_proteins(self, namespace, taxid) -> int:
 
     def _get_recname_taxid_by_accession_from_uniprot_api(self, accession) -> Tuple[str, int]:
         """Fetch uniprot entry by accession and adds to the database. Returns recommended name."""
-        sql = f"Select recommended_name,taxid from uniprot where accession='{accession}' limit 1"
-        result = self.session.execute(text(sql)).fetchone()
+        # sql = f"Select recommended_name,taxid from uniprot where accession='{accession}' limit 1"
+        sql = select(up.Uniprot.recommended_name, up.Uniprot.taxid).where(up.Uniprot.accession == accession)
+        result = self.session.execute(sql).fetchone()
         if result:
             return result
 
     def _update_uniprot_proteins(self) -> int:
         """Update all proteins using UNIPROT as namespace. Returns number of updated proteins."""
         updated = 0
-        sql_temp = "Select recommended_name, taxid from uniprot where accession='{}' limit 1"
+
         sql_uniprot = 'SELECT distinct(name) as accession from protein WHERE namespace="UNIPROT"'
         sql_update = (
             'Update protein set uniprot = name, label = "{}", species = {} '
             'where namespace = "UNIPROT" and name = "{}"'
         )
+
         for protein in self.query(sql_uniprot).itertuples(index=False):
-            sql = sql_temp.format(protein.accession)
-            found = self.session.execute(text(sql)).fetchone()
+            found = self._get_recname_taxid_by_accession_from_uniprot_api(protein.accession)
             if found:
                 recommended_name, taxid = found
                 num_updated = self.execute(sql_update.format(recommended_name, taxid, protein.accession))[0]
                 updated += num_updated
+
             else:
                 recname_taxid = self._get_recname_taxid_by_accession_from_uniprot_api(protein.accession)
                 if recname_taxid:
                     recommended_name, taxid = recname_taxid
                     num_updated = self.execute(sql_update.format(recommended_name, taxid, protein.accession))[0]
                     updated += num_updated
+
         return updated
 
     def __read_linked_tables(
diff --git a/ebel/tools.py b/ebel/tools.py
index 62fba6f..309a57d 100644
--- a/ebel/tools.py
+++ b/ebel/tools.py
@@ -5,6 +5,7 @@
 import os.path
 import re
 import shutil
+from os import PathLike
 from types import GeneratorType
 from typing import Iterable, List, Union
 
@@ -93,7 +94,7 @@ def md5(file_path):
     return hash_md5.hexdigest()
 
 
-def get_file_path(url: str, biodb: str):
+def get_file_path(url: str, biodb: str) -> str:
     """Get standard file path by file_name and DATADIR."""
     file_name = os.path.basename(url)
     bio_db_dir = os.path.join(DATA_DIR, biodb)

From 770d9a83efbd50fe39c242e9511c675c0e860a11 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Tue, 26 Sep 2023 20:55:41 +0200
Subject: [PATCH 40/58] fix: remaining problems with sqla2 updates

---
 ebel/manager/orientdb/biodbs/biogrid.py  | 63 ++++++++--------
 ebel/manager/orientdb/biodbs/clinvar.py  | 31 ++------
 ebel/manager/orientdb/biodbs/disgenet.py |  5 ++
 ebel/manager/orientdb/biodbs/kegg.py     | 13 ++--
 ebel/manager/orientdb/biodbs/stringdb.py | 12 +++-
 ebel/manager/orientdb/odb_meta.py        | 92 +++++++++++++++---------
 ebel/manager/orientdb/odb_structure.py   |  4 +-
 ebel/manager/rdbms/models/iuphar.py      |  8 +--
 8 files changed, 124 insertions(+), 104 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/biogrid.py b/ebel/manager/orientdb/biodbs/biogrid.py
index 789ed32..2e97b11 100644
--- a/ebel/manager/orientdb/biodbs/biogrid.py
+++ b/ebel/manager/orientdb/biodbs/biogrid.py
@@ -475,13 +475,13 @@ def get_uniprot_modification_pairs(self):
                     ib.uniprot.label("object_uniprot"),
                     ib.taxonomy_id.label("object_taxonomy_id"),
                 )
+                .select_from(b)
                 .join(ia, b.biogrid_a_id == ia.biogrid_id)
                 .join(ib, b.biogrid_b_id == ib.biogrid_id)
                 .join(m, b.modification_id == m.id)
             )
             .where(m.modification == "No Modification")
             .where(ia.uniprot.isnot(None))
-            .where(ib.uniprot.isnot(None))
             .group_by(ia.symbol, ia.uniprot, ia.taxonomy_id, ib.symbol, ib.uniprot, ib.taxonomy_id)
         )
         results = self.session.execute(sql).fetchall()
@@ -559,7 +559,7 @@ def update_interactions(self) -> int:
                         func.group_concat(
                             if_func(p.source == "PUBMED", cast(p.source_identifier, Integer), None).distinct()
                         ).label("pmids"),
-                        func.count(p.source_identifier).label("num_pubs"),
+                        func.count(p.source_identifier.distinct()).label("num_pubs"),
                         func.group_concat(
                             if_func(p.source == "DOI", cast(p.source_identifier, Integer), None).distinct()
                         ).label("dois"),
@@ -574,32 +574,35 @@ def update_interactions(self) -> int:
                     .where(m.modification != "No Modification")
                 )
 
-                for row in self.session.execute(sql).fetchall():
-                    row_dict = row._asdict()
-                    be = BioGridEdge(subject_rid=subj_pure_rid, object_rid=obj_pure_rid, **row_dict)
-                    edge_value_dict = be.get_edge_value_dict()
-
-                    if be.modConfig.bg_mod_name == "Proteolytic Processing":
-                        self.create_edge(
-                            "decreases_bg",
-                            from_rid=subj_pure_rid,
-                            to_rid=obj_pure_rid,
-                            value_dict=edge_value_dict,
-                        )
-                        counter += 1
-                    else:
-                        obj_pmod_value_dict = be.obj.get_pmod_protein_as_value_dict()
-                        pmod_protein_rid = self.node_exists("protein", obj_pmod_value_dict, check_for="bel")
-                        if not pmod_protein_rid:
-                            pmod_protein_rid = self.get_create_rid("protein", obj_pmod_value_dict, check_for="bel")
-                            self.create_edge("has_modified_protein", obj_pure_rid, pmod_protein_rid)
-                            pmod_rid = self.insert_record("pmod", be.get_pmod_as_value_dict())
-                            self.create_edge("has__pmod", pmod_protein_rid, pmod_rid)
-                        self.create_edge(
-                            be.edge_name,
-                            subj_pure_rid,
-                            pmod_protein_rid,
-                            edge_value_dict,
-                        )
-                        counter += 1
+                results = self.session.execute(sql).fetchall()
+
+                for row in results:
+                    if row[0] is not None:  # No results for uniprot ID combo
+                        row_dict = row._asdict()  # If no modification then no results were returned
+                        be = BioGridEdge(subject_rid=subj_pure_rid, object_rid=obj_pure_rid, **row_dict)
+                        edge_value_dict = be.get_edge_value_dict()
+
+                        if be.modConfig.bg_mod_name == "Proteolytic Processing":
+                            self.create_edge(
+                                "decreases_bg",
+                                from_rid=subj_pure_rid,
+                                to_rid=obj_pure_rid,
+                                value_dict=edge_value_dict,
+                            )
+                            counter += 1
+                        else:
+                            obj_pmod_value_dict = be.obj.get_pmod_protein_as_value_dict()
+                            pmod_protein_rid = self.node_exists("protein", obj_pmod_value_dict, check_for="bel")
+                            if not pmod_protein_rid:
+                                pmod_protein_rid = self.get_create_rid("protein", obj_pmod_value_dict, check_for="bel")
+                                self.create_edge("has_modified_protein", obj_pure_rid, pmod_protein_rid)
+                                pmod_rid = self.insert_record("pmod", be.get_pmod_as_value_dict())
+                                self.create_edge("has__pmod", pmod_protein_rid, pmod_rid)
+                            self.create_edge(
+                                be.edge_name,
+                                subj_pure_rid,
+                                pmod_protein_rid,
+                                edge_value_dict,
+                            )
+                            counter += 1
         return counter
diff --git a/ebel/manager/orientdb/biodbs/clinvar.py b/ebel/manager/orientdb/biodbs/clinvar.py
index f19b510..2024124 100644
--- a/ebel/manager/orientdb/biodbs/clinvar.py
+++ b/ebel/manager/orientdb/biodbs/clinvar.py
@@ -19,7 +19,6 @@
 Snp = namedtuple(
     "Snp",
     (
-        "keyword",
         "phenotype",
         "rs_number",
         "hgnc_id",
@@ -180,13 +179,12 @@ def get_disease_snps_dict(self) -> Dict[str, List[Snp]]:
                     cv.start.label("position"),
                     cv.clinical_significance,
                 )
-                .join(cv, cp.clinvars)
+                .join(cp, cv.phenotypes)
                 .where(cv.rs_db_snp != -1)
                 .where(cp.phenotype.like(f"%{kwd}%"))
             )
-            print(sql)
-            rows = self.session.execute(sql)
-            results[kwd] = [Snp(*x) for x in rows.fetchall()]
+            rows = self.session.execute(sql).fetchall()
+            results[kwd] = [Snp(*x) for x in rows]
 
         return results
 
@@ -268,25 +266,4 @@ def _get_set_gene_rid(self, hgnc_id: str):
 
 if __name__ == "__main__":
     c = ClinVar()
-    cv = clinvar.Clinvar
-    cp = clinvar.ClinvarPhenotype
-    kwd = "Depression"
-
-    sql = (
-        select(
-            cp.phenotype,
-            cv.rs_db_snp.label("rs_number"),
-            cv.hgnc_id,
-            cv.chromosome,
-            cv.start.label("position"),
-            cv.clinical_significance,
-        )
-        .join(cp, cv.phenotypes)
-        .where(cv.rs_db_snp != -1)
-        .where(cp.phenotype.like(f"%{kwd}%"))
-    )
-    rows = c.session.execute(sql)
-    amt = 0
-    for x in rows:
-        amt += 1
-    print(amt)
+    c.update()
diff --git a/ebel/manager/orientdb/biodbs/disgenet.py b/ebel/manager/orientdb/biodbs/disgenet.py
index a814d25..c903013 100644
--- a/ebel/manager/orientdb/biodbs/disgenet.py
+++ b/ebel/manager/orientdb/biodbs/disgenet.py
@@ -265,3 +265,8 @@ def update_snps(self) -> int:
                 inserted += 1
 
         return inserted
+
+
+if __name__ == "__main__":
+    d = DisGeNet()
+    d.update()
diff --git a/ebel/manager/orientdb/biodbs/kegg.py b/ebel/manager/orientdb/biodbs/kegg.py
index 7799357..f75c91f 100644
--- a/ebel/manager/orientdb/biodbs/kegg.py
+++ b/ebel/manager/orientdb/biodbs/kegg.py
@@ -278,9 +278,9 @@ def update_interactions(self) -> int:
             "phosphorylation": ("pho", "increases", BelPmod.PHO),
             "ubiquitination": ("ubi", "increases", BelPmod.UBI),
         }
-        post_translational_modifications = ",".join([f"'{x}'" for x in pmods.keys()])
+        # post_translational_modifications = ",".join([f"'{x}'" for x in pmods.keys()])
 
-        species_ids = ",".join([f"'{x}'" for x in self.species])
+        # species_ids = ",".join([f"'{x}'" for x in self.species])
 
         # sql_temp = f"""Select
         #         interaction_type,
@@ -317,8 +317,8 @@ def update_interactions(self) -> int:
                     kg.kegg_species_id,
                 )
                 .where(or_(kg.gene_symbol_a == symbol, kg.gene_symbol_b == symbol))
-                .where(kg.kegg_species_id.in_(species_ids))
-                .where(kg.interaction_type.in_(post_translational_modifications))
+                .where(kg.kegg_species_id.in_(self.species))
+                .where(kg.interaction_type.in_(list(pmods.keys())))
                 .group_by(
                     kg.interaction_type,
                     kg.pathway_identifier,
@@ -387,3 +387,8 @@ def update_interactions(self) -> int:
         self.hgnc.update_bel()
 
         return inserted
+
+
+if __name__ == "__main__":
+    k = Kegg()
+    k.update()
diff --git a/ebel/manager/orientdb/biodbs/stringdb.py b/ebel/manager/orientdb/biodbs/stringdb.py
index 83380a5..7adccd0 100644
--- a/ebel/manager/orientdb/biodbs/stringdb.py
+++ b/ebel/manager/orientdb/biodbs/stringdb.py
@@ -160,9 +160,15 @@ def insert_action_data(self) -> int:
 
     def get_stringdb_action_hgnc_set(self):
         """Get unique HGNC symbols from stringdb_actions table."""
-        sql = f"""(Select distinct( symbol1 ) from {self.table_action})
-                union (Select distinct( symbol2 ) from {self.table_action})"""
-        return set([x[0] for x in self.session.execute(text(sql)).fetchall()])
+        # sql = f"""(Select distinct( symbol1 ) from {self.table_action})
+        #         union (Select distinct( symbol2 ) from {self.table_action})"""
+
+        stmt1 = select(stringdb.StringDbAction.symbol1).distinct()
+        stmt2 = select(stringdb.StringDbAction.symbol2).distinct()
+        sql = stmt1.union(stmt2).alias("combined")
+        print(sql)
+
+        return set([x[0] for x in self.session.execute(sql).fetchall()])
 
     def update_interactions(self) -> Dict[str, int]:
         """Update the edges with StringDB metadata."""
diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index 0501be6..e6f12e4 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -23,7 +23,7 @@
 from pyorientdb import OrientDB, orient
 from pyorientdb.exceptions import PyOrientCommandException, PyOrientIndexException, PyOrientSecurityAccessException
 from pyorientdb.otypes import OrientRecord
-from sqlalchemy import text
+from sqlalchemy import text, select, func
 from sqlalchemy.sql.schema import Table
 from sqlalchemy_utils import create_database, database_exists
 from tqdm import tqdm
@@ -34,6 +34,7 @@
 from ebel.config import get_config_as_dict, get_config_value, write_to_config
 from ebel.constants import DEFAULT_ODB, RID
 from ebel.manager.orientdb import urls as default_urls
+from ebel.manager.rdbms.models.ensembl import Ensembl as ens
 from ebel.manager.orientdb.odb_structure import Edge, Generic, Node, OClass, OIndex, OProperty
 from ebel.tools import BelRdb, chunks, get_file_path, get_standard_name
 
@@ -97,12 +98,13 @@ def __init__(
 
         conn = get_config_value("DATABASE", "sqlalchemy_connection_string")
 
-        if not (conn or database_exists(self.engine.url)):
-            if str(self.engine.url).startswith("mysql"):
+        if not conn:
+            dialect = self.session.bind.dialect.name
+            if dialect == "mysql":
                 set_mysql_interactive()
 
-            else:
-                create_database(self.engine.url)
+        if not database_exists(self.engine.url):
+            create_database(self.engine.url)
 
     def __config_params_check(self, overwrite_config: bool = False):
         """Go through passed/available configuration params."""
@@ -819,8 +821,9 @@ def number_of_generics(self) -> Dict[str, int]:
         if self.tables_base:
             for table_name, table in self.tables_base.metadata.tables.items():
                 if self.table_exists(table_name):
-                    sql = f"Select count(*) from `{table_name}`"
-                    numbers[table_name] = self.session.execute(text(sql)).fetchone()[0]
+                    # sql = f"Select count(*) from `{table_name}`"
+                    sql = select(func.count(table_name))
+                    numbers[table_name] = self.session.execute(sql).fetchone()[0]
                 else:
                     numbers[table_name] = 0
         elif self.generic_classes:
@@ -1320,36 +1323,57 @@ def get_set_gene_rids_by_position(
         gene_rids = defaultdict(list)
         sqls = dict()
 
-        sqls[
-            "mapped"
-        ] = f"""Select symbol
-                            from ensembl
-                            where
-                                start < {position} and
-                                stop > {position} and
-                                chromosome='{chromosome}' group by symbol"""
-
-        sqls[
-            "downstream"
-        ] = f"""Select symbol
-                            from ensembl
-                            where
-                                start > {position} and
-                                chromosome='{chromosome}'
-                            order by start limit 1"""
-
-        sqls[
-            "upstream"
-        ] = f"""Select symbol
-                            from ensembl
-                            where
-                                stop < {position} and
-                                chromosome='{chromosome}'
-                            order by stop desc limit 1"""
+        # sqls[
+        #     "mapped"
+        # ] = f"""Select symbol
+        #                     from ensembl
+        #                     where
+        #                         start < {position} and
+        #                         stop > {position} and
+        #                         chromosome='{chromosome}' group by symbol"""
+        sqls["mapped"] = (
+            select(ens.symbol)
+            .where(ens.start < position)
+            .where(ens.stop > position)
+            .where(ens.chromosome == chromosome)
+            .group_by(ens.symbol)
+        )
+
+        # sqls[
+        #     "downstream"
+        # ] = f"""Select symbol
+        #                     from ensembl
+        #                     where
+        #                         start > {position} and
+        #                         chromosome='{chromosome}'
+        #                     order by start limit 1"""
+        sqls["downstream"] = (
+            select(ens.symbol)
+            .where(ens.start > position)
+            .where(ens.chromosome == chromosome)
+            .limit(1)
+            .order_by(ens.start.asc())
+        )
+
+        # sqls[
+        #     "upstream"
+        # ] = f"""Select symbol
+        #                     from ensembl
+        #                     where
+        #                         stop < {position} and
+        #                         chromosome='{chromosome}'
+        #                     order by stop desc limit 1"""
+        sqls["upstream"] = (
+            select(ens.symbol)
+            .where(ens.stop < position)
+            .where(ens.chromosome == chromosome)
+            .limit(1)
+            .order_by(ens.stop.desc())
+        )
 
         for gene_type, sql in sqls.items():
             if gene_type in gene_types:
-                results = self.session.execute(text(sql))
+                results = self.session.execute(sql)
                 for (symbol,) in results.fetchall():
                     bel = f'g(HGNC:"{symbol}")'
                     data = {
diff --git a/ebel/manager/orientdb/odb_structure.py b/ebel/manager/orientdb/odb_structure.py
index e4e830b..8e09ec0 100755
--- a/ebel/manager/orientdb/odb_structure.py
+++ b/ebel/manager/orientdb/odb_structure.py
@@ -462,8 +462,8 @@ def __init__(self, odb_class: OClass, columns: Tuple[str, ...], index_type: OInd
 
 bel_indices = (
     OIndex(bel, ("bel",), OIndexType.NOTUNIQUE_HASH_INDEX),
-    OIndex(bel, ("involved_genes",), OIndexType.NOTUNIQUE_HASH_INDEX),
-    OIndex(bel, ("involved_other",), OIndexType.NOTUNIQUE_HASH_INDEX),
+    # OIndex(bel, ("involved_genes",), OIndexType.NOTUNIQUE_HASH_INDEX),
+    # OIndex(bel, ("involved_other",), OIndexType.NOTUNIQUE_HASH_INDEX),
     OIndex(bel_relation, ("evidence",), OIndexType.NOTUNIQUE_HASH_INDEX),
     OIndex(protein, ("uniprot",), OIndexType.NOTUNIQUE_HASH_INDEX),
     OIndex(bel_relation, ("annotation",), OIndexType.DICTIONARY),
diff --git a/ebel/manager/rdbms/models/iuphar.py b/ebel/manager/rdbms/models/iuphar.py
index 83adc97..57a8b36 100644
--- a/ebel/manager/rdbms/models/iuphar.py
+++ b/ebel/manager/rdbms/models/iuphar.py
@@ -63,12 +63,12 @@ class IupharInteraction(Base):
     target_uniprot: Mapped[Optional[str]] = mapped_column(String(100))
     target_ensembl_gene_id: Mapped[Optional[str]] = mapped_column(String(200))
     target_ligand: Mapped[Optional[str]] = mapped_column(String(100))
-    target_ligand_id: Mapped[Optional[str]] = mapped_column()
+    target_ligand_id: Mapped[Optional[str]] = mapped_column(String(100))
     target_ligand_subunit_ids: Mapped[Optional[str]] = mapped_column(Text)
     target_ligand_gene_symbol: Mapped[Optional[str]] = mapped_column(String(50))
     target_ligand_uniprot_id: Mapped[Optional[str]] = mapped_column(String(200))
     target_ligand_ensembl_gene_id: Mapped[Optional[str]] = mapped_column(String(50))
-    target_ligand_pubchem_sid: Mapped[Optional[str]] = mapped_column()
+    target_ligand_pubchem_sid: Mapped[Optional[str]] = mapped_column(String(100))
     target_species: Mapped[Optional[str]] = mapped_column(String(100))
     ligand: Mapped[str] = mapped_column(String(255))
     ligand_id: Mapped[int] = mapped_column(ForeignKey("iuphar_ligand.id"), index=True)
@@ -78,8 +78,8 @@ class IupharInteraction(Base):
     ligand_pubchem_sid: Mapped[Optional[int]] = mapped_column()
     ligand_type: Mapped[str] = mapped_column(Text)
     approved: Mapped[bool] = mapped_column()
-    type: Mapped[str] = mapped_column(String(100))
-    action: Mapped[str] = mapped_column(String(100))
+    type: Mapped[Optional[str]] = mapped_column(String(100))
+    action: Mapped[Optional[str]] = mapped_column(String(100))
     action_comment: Mapped[Optional[str]] = mapped_column(String(255))
     selectivity: Mapped[Optional[str]] = mapped_column(String(50))
     endogenous: Mapped[bool] = mapped_column()

From fec21f5c7d7af15bb8adaa030ccc114c74c843f2 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Wed, 27 Sep 2023 07:48:20 +0200
Subject: [PATCH 41/58] chore: remove run blocks

---
 ebel/manager/orientdb/biodbs/clinvar.py         | 5 -----
 ebel/manager/orientdb/biodbs/disgenet.py        | 5 -----
 ebel/manager/orientdb/biodbs/kegg.py            | 5 -----
 ebel/manager/orientdb/biodbs/pathway_commons.py | 5 -----
 4 files changed, 20 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/clinvar.py b/ebel/manager/orientdb/biodbs/clinvar.py
index 2024124..a85c5fc 100644
--- a/ebel/manager/orientdb/biodbs/clinvar.py
+++ b/ebel/manager/orientdb/biodbs/clinvar.py
@@ -262,8 +262,3 @@ def _get_set_gene_rid(self, hgnc_id: str):
             gene_rid = self.get_create_rid("gene", data, check_for="bel")
 
         return gene_rid
-
-
-if __name__ == "__main__":
-    c = ClinVar()
-    c.update()
diff --git a/ebel/manager/orientdb/biodbs/disgenet.py b/ebel/manager/orientdb/biodbs/disgenet.py
index c903013..a814d25 100644
--- a/ebel/manager/orientdb/biodbs/disgenet.py
+++ b/ebel/manager/orientdb/biodbs/disgenet.py
@@ -265,8 +265,3 @@ def update_snps(self) -> int:
                 inserted += 1
 
         return inserted
-
-
-if __name__ == "__main__":
-    d = DisGeNet()
-    d.update()
diff --git a/ebel/manager/orientdb/biodbs/kegg.py b/ebel/manager/orientdb/biodbs/kegg.py
index f75c91f..2ef9e3a 100644
--- a/ebel/manager/orientdb/biodbs/kegg.py
+++ b/ebel/manager/orientdb/biodbs/kegg.py
@@ -387,8 +387,3 @@ def update_interactions(self) -> int:
         self.hgnc.update_bel()
 
         return inserted
-
-
-if __name__ == "__main__":
-    k = Kegg()
-    k.update()
diff --git a/ebel/manager/orientdb/biodbs/pathway_commons.py b/ebel/manager/orientdb/biodbs/pathway_commons.py
index c0ca6e9..1ff0649 100644
--- a/ebel/manager/orientdb/biodbs/pathway_commons.py
+++ b/ebel/manager/orientdb/biodbs/pathway_commons.py
@@ -304,8 +304,3 @@ def get_pathway_pmids_sources(self, pc_id, pc_pathway_name_rid_dict) -> tuple:
         pmids = [x.pmid for x in pc_obj.pmids]
         pathways = [pc_pathway_name_rid_dict[x.name] for x in pc_obj.pathway_names]
         return pathways, pmids, sources
-
-
-if __name__ == "__main__":
-    foo = PathwayCommons()
-    foo.update_interactions()

From 128376ffcf8a53bc4f90c83ea541c1d693bb2e3b Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Wed, 27 Sep 2023 08:26:47 +0200
Subject: [PATCH 42/58] fix: ncbi nullable props

---
 ebel/manager/orientdb/biodbs/disgenet.py | 4 ++--
 ebel/manager/orientdb/biodbs/ncbi.py     | 5 +++++
 ebel/manager/rdbms/models/ncbi.py        | 8 ++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/disgenet.py b/ebel/manager/orientdb/biodbs/disgenet.py
index a814d25..a4843cc 100644
--- a/ebel/manager/orientdb/biodbs/disgenet.py
+++ b/ebel/manager/orientdb/biodbs/disgenet.py
@@ -223,7 +223,7 @@ def update_snps(self) -> int:
                 .group_by(dv.snp_id, dv.chromosome, dv.position, dd.disease_name, dv.pmid, dv.score, ds.source)
             )
 
-            rows = self.session.execute(sql)
+            rows = self.session.execute(sql).fetchall()
             results[kwd] = rows
 
         inserted = 0
@@ -234,7 +234,7 @@ def update_snps(self) -> int:
             for r in tqdm(
                 kwd_disease_results,
                 desc=f"Update DisGeNET variant interactions for {trait}",
-                total=kwd_disease_results.rowcount,
+                total=len(kwd_disease_results),
             ):
                 snp_id, chromosome, position, disease_name, pmid, score, source = r
 
diff --git a/ebel/manager/orientdb/biodbs/ncbi.py b/ebel/manager/orientdb/biodbs/ncbi.py
index a629731..eb50d44 100644
--- a/ebel/manager/orientdb/biodbs/ncbi.py
+++ b/ebel/manager/orientdb/biodbs/ncbi.py
@@ -299,3 +299,8 @@ def _insert_info(self, chunksize: int = 1000000) -> int:
     def update_interactions(self) -> int:
         """Abstract method."""
         pass
+
+
+if __name__ == "__main__":
+    n = Ncbi()
+    n._insert_medgen()
diff --git a/ebel/manager/rdbms/models/ncbi.py b/ebel/manager/rdbms/models/ncbi.py
index caa04f7..fb0231c 100644
--- a/ebel/manager/rdbms/models/ncbi.py
+++ b/ebel/manager/rdbms/models/ncbi.py
@@ -180,10 +180,10 @@ class NcbiGeneEnsembl(Base):
     tax_id: Mapped[int] = mapped_column(index=True)
     gene_id: Mapped[int] = mapped_column(ForeignKey("ncbi_gene_info.gene_id"))
     ensembl_gene_identifier: Mapped[str] = mapped_column(String(100))
-    rna_nucleotide_accession_version: Mapped[str] = mapped_column(String(100))
-    ensembl_rna_identifier: Mapped[str] = mapped_column(String(100))
-    protein_accession_version: Mapped[str] = mapped_column(String(100))
-    ensembl_protein_identifier: Mapped[str] = mapped_column(String(100))
+    rna_nucleotide_accession_version: Mapped[Optional[str]] = mapped_column(String(100))
+    ensembl_rna_identifier: Mapped[Optional[str]] = mapped_column(String(100))
+    protein_accession_version: Mapped[Optional[str]] = mapped_column(String(100))
+    ensembl_protein_identifier: Mapped[Optional[str]] = mapped_column(String(100))
 
     genes: Mapped[NcbiGeneInfo] = relationship("NcbiGeneInfo", back_populates="ensembl_ids")
 

From c5b00ae0d31cc96522e3b0233709a1257ddd3b46 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Wed, 27 Sep 2023 15:32:14 +0200
Subject: [PATCH 43/58] build:  update logging and copyright year

---
 ebel/__init__.py  |  6 +++---
 ebel/defaults.py  | 28 +++-------------------------
 ebel/logging.conf | 30 ++++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 28 deletions(-)
 create mode 100644 ebel/logging.conf

diff --git a/ebel/__init__.py b/ebel/__init__.py
index 5cae1ea..e2ea2fc 100755
--- a/ebel/__init__.py
+++ b/ebel/__init__.py
@@ -1,6 +1,6 @@
 """Root init for eBEL."""
-from . import cache, constants, errors, parser, transformers
-from .manager.orientdb.biodbs.bel import Bel
+from ebel import cache, constants, errors, parser, transformers
+from ebel.manager.orientdb.biodbs.bel import Bel
 
 __version__ = "1.0.37"
 
@@ -12,7 +12,7 @@
 __email__ = "christian.ebeling@scai.fraunhofer.de"
 
 __license__ = "?"
-__copyright__ = """Copyright (c) 2021 Christian Ebeling, Fraunhofer Institute for Algorithms and Scientific
+__copyright__ = """Copyright (c) 2023 Christian Ebeling, Fraunhofer Institute for Algorithms and Scientific
 Computing SCAI, Schloss Birlinghoven, 53754 Sankt Augustin, Germany"""
 
 project_name = __title__
diff --git a/ebel/defaults.py b/ebel/defaults.py
index 8466190..638c1bd 100755
--- a/ebel/defaults.py
+++ b/ebel/defaults.py
@@ -2,11 +2,9 @@
 
 """This file contains default values for configurations and parameters."""
 
-import logging
-import logging.handlers as handlers
-import os
+import logging.config
 
-from .constants import DATA_DIR, LOG_DIR, PROJECT_DIR
+from ebel.constants import DATA_DIR, PROJECT_DIR, THIS_DIR, LOG_DIR
 
 ###############################################################################
 # UNIPROT taxonomy IDs to import
@@ -46,24 +44,4 @@
 
 ###############################################################################
 # Log Handling
-logHandler = handlers.RotatingFileHandler(
-    filename=LOG_DIR.joinpath("ebel.log"),
-    mode="a",
-    maxBytes=4098 * 10,  # 4MB file max
-    backupCount=0,
-)
-logh_format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-logHandler.setFormatter(logh_format)
-logHandler.setLevel(logging.DEBUG)
-
-
-# Console Handler
-ch = logging.StreamHandler()
-ch_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
-ch.setFormatter(ch_format)
-ch.setLevel(logging.WARNING)
-
-logging.basicConfig(
-    handlers=[logHandler, ch],
-    encoding="utf-8",
-)
+logging.config.fileConfig(THIS_DIR.joinpath("logging.conf"), defaults={"logfilename": LOG_DIR.joinpath("ebel.log")})
diff --git a/ebel/logging.conf b/ebel/logging.conf
new file mode 100644
index 0000000..aa6b8b8
--- /dev/null
+++ b/ebel/logging.conf
@@ -0,0 +1,30 @@
+[loggers]
+keys=root
+
+[handlers]
+keys=consoleHandler,fileHandler
+
+[formatters]
+keys=full,simple
+
+[logger_root]
+level=DEBUG
+handlers=fileHandler,consoleHandler
+
+[handler_consoleHandler]
+class=StreamHandler
+level=WARNING
+formatter=simple
+args=(sys.stdout,)
+
+[handler_fileHandler]
+class=logging.handlers.RotatingFileHandler
+level=INFO
+formatter=full
+args=("logfilename.log", 'a')
+
+[formatter_full]
+format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
+
+[formatter_simple]
+format=%(asctime)s - %(message)s
\ No newline at end of file

From d9e213fdf638f3cf6f5448eb4938cc305db8eb53 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Wed, 27 Sep 2023 15:33:03 +0200
Subject: [PATCH 44/58] fix: optimize unirpto rid call

---
 ebel/manager/orientdb/biodbs/biogrid.py |  2 +-
 ebel/manager/orientdb/biodbs/intact.py  | 32 +++++++++++++------------
 ebel/manager/orientdb/odb_meta.py       | 21 +++++++++-------
 3 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/biogrid.py b/ebel/manager/orientdb/biodbs/biogrid.py
index 2e97b11..bd33d57 100644
--- a/ebel/manager/orientdb/biodbs/biogrid.py
+++ b/ebel/manager/orientdb/biodbs/biogrid.py
@@ -523,7 +523,7 @@ def update_interactions(self) -> int:
 
         if_func = func.iif if self.engine.dialect.name == "sqlite" else func.IF
 
-        logging.info("Update BioGRID")
+        logger.info("Update BioGRID")
 
         for e in tqdm(
             uniprot_modification_pairs,
diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index d982fc7..a57b5e4 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -1,5 +1,6 @@
 """IntAct module."""
 import logging
+import time
 import zipfile
 from typing import Dict
 
@@ -35,6 +36,11 @@ def __init__(self, client: OrientDB = None, condition_keyword="Alzheimer"):
             biodb_name=self.biodb_name,
         )
 
+        up = UniProt()
+        up.update()
+
+        self.uniprot_rid_dict = self.get_pure_uniprot_rid_dict_in_bel_context()
+
     def __len__(self):
         return self.number_of_generics
 
@@ -102,22 +108,20 @@ def insert_data(self) -> Dict[str, int]:
 
         return {self.biodb_name: df.shape[0]}
 
-    def get_create_rid_by_uniprot(self, uniprot_accession: str, uniprot_rid_dict: dict) -> str:
+    def get_create_rid_by_uniprot(self, uniprot_accession: str) -> str:
         """Create or get rID entry for a given UniProt ID.
 
         Parameters
         ----------
         uniprot_accession: str
             UniProt accession number.
-        uniprot_rid_dict: dict
-            Entry parameters matching those of the desired rID entry.
 
         Returns
         -------
         str
             UniProt accession ID.
         """
-        if uniprot_accession not in uniprot_rid_dict:
+        if uniprot_accession not in self.uniprot_rid_dict:
             nn = self.get_namespace_name_by_uniprot(uniprot_accession)
             if nn:
                 namespace, name = nn
@@ -128,8 +132,9 @@ def get_create_rid_by_uniprot(self, uniprot_accession: str, uniprot_rid_dict: di
                     "bel": f'p({namespace}:"{name}")',
                     "uniprot": uniprot_accession,
                 }
-                uniprot_rid_dict[uniprot_accession] = self.get_create_rid("protein", value_dict, check_for="bel")
-        return uniprot_rid_dict.get(uniprot_accession)
+                self.uniprot_rid_dict[uniprot_accession] = self.get_create_rid("protein", value_dict, check_for="bel")
+
+        return self.uniprot_rid_dict.get(uniprot_accession)
 
     def get_namespace_name_by_uniprot(self, uniprot_accession: str) -> tuple:
         """Get the namespace of a given UniProt ID.
@@ -168,14 +173,11 @@ def get_namespace_name_by_uniprot(self, uniprot_accession: str) -> tuple:
 
     def update_interactions(self) -> int:
         """Update intact interactions to graph."""
-        up = UniProt(self.client)
-        up.update()
-
-        uniprot_rid_dict = up.get_pure_uniprot_rid_dict_in_bel_context()
+        logger.info("Update IntAct interactions")
 
         updated = 0
 
-        uniprot_accessions = tuple(uniprot_rid_dict.keys())
+        uniprot_accessions = tuple(self.uniprot_rid_dict.keys())
         it = intact.Intact
 
         for uniprot_accession in tqdm(uniprot_accessions, desc="Update IntAct interactions"):
@@ -204,7 +206,7 @@ def update_interactions(self) -> int:
                     it.confidence_value,
                 )
             )
-            result = self.session.execute(sql)
+            results = self.session.execute(sql).fetchall()
 
             for (
                 up_a,
@@ -216,9 +218,9 @@ def update_interactions(self) -> int:
                 d_method,
                 d_method_id,
                 c_value,
-            ) in result.fetchall():
-                from_rid = self.get_create_rid_by_uniprot(up_a, uniprot_rid_dict)
-                to_rid = self.get_create_rid_by_uniprot(up_b, uniprot_rid_dict)
+            ) in results:
+                from_rid = self.get_create_rid_by_uniprot(up_a)
+                to_rid = self.get_create_rid_by_uniprot(up_b)
 
                 if from_rid and to_rid:
                     value_dict = {
diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index e6f12e4..1b2d180 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -21,7 +21,12 @@
 import sqlalchemy as sqla
 import xmltodict
 from pyorientdb import OrientDB, orient
-from pyorientdb.exceptions import PyOrientCommandException, PyOrientIndexException, PyOrientSecurityAccessException
+from pyorientdb.exceptions import (
+    PyOrientCommandException,
+    PyOrientIndexException,
+    PyOrientSecurityAccessException,
+    PyOrientBadMethodCallException,
+)
 from pyorientdb.otypes import OrientRecord
 from sqlalchemy import text, select, func
 from sqlalchemy.sql.schema import Table
@@ -160,13 +165,11 @@ def execute(self, command_str: str) -> List[OrientRecord]:
         try:
             return self.client.command(command_str)
 
-        # TODO: following exceptions seems not to cover connection error
-        # except (PyOrientCommandException, PyOrientSecurityAccessException):
-        except:
+        except (PyOrientCommandException, PyOrientSecurityAccessException, PyOrientBadMethodCallException) as e:
+            logger.error(e)
             # Try to reconnect
             self.client.close()
             self.client = self.get_client()
-            # self.client.db_open(self.odb_name, self.odb_user, self.odb_password)
             # print(command_str)
             return self.client.command(command_str)
 
@@ -848,7 +851,7 @@ def __get_sql_where_part(params, where_list: Tuple[str] = ()):
                 where_list.append("`{}` IS NULL".format(column))
         where = ""
         if where_list:
-            where = " WHERE " + " AND ".join(where_list)
+            where = "WHERE " + " AND ".join(where_list)
         return where
 
     def get_number_of_class(self, class_name, distinct_column_name: str = None, **params):
@@ -1463,6 +1466,7 @@ def delete_nodes_with_no_edges(self, class_name=None) -> int:
             )
             logger.warning(wtext)
             return 0
+
         else:
             class_name = class_name if class_name is not None else "V"
             return self.execute(f"Delete VERTEX {class_name} where both().size() = 0")[0]
@@ -1524,9 +1528,8 @@ def get_pure_uniprot_rid_dict_in_bel_context(self) -> Dict[str, str]:
         # only include proteins which are also part of a BEL statement to avoid explosion of graph
 
         sql = """Select uniprot, @rid.asString() as rid from protein where pure=true and uniprot in (
-        Select unionall(uniprot_list).asSet() as all_uniprots from (select unionall(in.uniprot, out.uniprot).asSet() as
-        uniprot_list from bel_relation where document IS NOT NULL
-        and (in.uniprot IS NOT NULL or out.uniprot IS NOT NULL)))"""
+        select set(unionall(in.uniprot, out.uniprot)) as all_uniprots from bel_relation where document IS NOT NULL)"""
+        # sql = "select uniprot, @rid.asString() as rid from protein where pure = true and uniprot is not null"
 
         return {r["uniprot"]: r["rid"] for r in self.query_get_dict(sql)}
 

From c51943f887e5717d332938226ec95dfdbcb89be8 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Wed, 27 Sep 2023 15:59:34 +0200
Subject: [PATCH 45/58] perf:  disable Uniprot update step in intact

---
 ebel/manager/orientdb/biodbs/intact.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index a57b5e4..3a56bce 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -36,8 +36,8 @@ def __init__(self, client: OrientDB = None, condition_keyword="Alzheimer"):
             biodb_name=self.biodb_name,
         )
 
-        up = UniProt()
-        up.update()
+        # up = UniProt()
+        # up.update()
 
         self.uniprot_rid_dict = self.get_pure_uniprot_rid_dict_in_bel_context()
 
@@ -81,7 +81,7 @@ def insert_data(self) -> Dict[str, int]:
             "Interaction detection method(s)": "dm",
         }
 
-        df = pd.read_csv(zf.open("intact.txt"), sep="\t", usecols=usecols.keys())
+        df = pd.read_csv(zf.open("intact.txt"), sep="\t", usecols=list(usecols.keys()))
         df.rename(columns=usecols, inplace=True)
 
         regex_accession = r"uniprotkb:([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})"

From cf10492a89202d9f662fdcd5b99fe108decbcb94 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Wed, 27 Sep 2023 15:59:59 +0200
Subject: [PATCH 46/58] fix: set query_class method with empty cols to avoid *
 in cmd

---
 ebel/manager/orientdb/odb_meta.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index 1b2d180..c5daa1f 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -171,7 +171,7 @@ def execute(self, command_str: str) -> List[OrientRecord]:
             self.client.close()
             self.client = self.get_client()
             # print(command_str)
-            return self.client.command(command_str)
+            return self.execute(command_str)
 
     def set_configuration_parameters(self):
         """Set configuration for OrientDB database client instance using configuration file or passed params."""
@@ -359,7 +359,7 @@ def query_class(
         class_name: str,
         limit: int = 0,
         skip: int = 0,
-        columns: Iterable[str] = None,
+        columns: Iterable[str] = [],
         with_rid=True,
         with_class=False,
         print_sql: bool = False,
@@ -406,7 +406,7 @@ def query_class(
         if distinct and len(cols) == 1:
             sql_cols = "distinct({})".format(sql_cols)
 
-        sql_temp = "SELECT {sql_cols} FROM `{class_name}` {where} {group_by} {sql_limit} {sql_skip}"
+        sql_temp = "SELECT {sql_cols} FROM {class_name} {where} {group_by} {sql_limit} {sql_skip}"
 
         sql = sql_temp.format(
             sql_cols=sql_cols,
@@ -842,16 +842,21 @@ def __get_sql_where_part(params, where_list: Tuple[str] = ()):
         for column, value in params.items():
             if isinstance(value, (str, list, dict)):
                 if value == "notnull":
-                    where_list.append("`{}` IS NOT NULL".format(column))
+                    where_list.append("{} IS NOT NULL".format(column))
+
                 else:
-                    where_list.append("`{}` = {}".format(column, json.dumps(value)))
+                    where_list.append("{} = {}".format(column, json.dumps(value)))
+
             elif isinstance(value, (int, float)):
-                where_list.append("`{}` = {}".format(column, value))
+                where_list.append("{} = {}".format(column, value))
+
             elif value is None:
-                where_list.append("`{}` IS NULL".format(column))
+                where_list.append("{` IS NULL".format(column))
+
         where = ""
         if where_list:
             where = "WHERE " + " AND ".join(where_list)
+
         return where
 
     def get_number_of_class(self, class_name, distinct_column_name: str = None, **params):
@@ -947,7 +952,9 @@ def node_exists(
         if check_for:
             check_for = [check_for] if isinstance(check_for, str) else check_for
             check_for_dict = {k: v for k, v in check_for_dict.items() if k in check_for}
-        result = self.query_class(class_name=class_name, limit=1, print_sql=print_sql, **check_for_dict)
+        result = self.query_class(
+            class_name=class_name, columns=[], limit=1, with_rid=True, print_sql=print_sql, **check_for_dict
+        )
         if result:
             return result[0][RID]
 
@@ -987,8 +994,10 @@ def get_create_rid(self, class_name: str, value_dict: dict, check_for=None, prin
             check_for=check_for,
             print_sql=print_sql,
         )
+
         if not rid:
             rid = self.insert_record(class_name=class_name, value_dict=value_dict, print_sql=print_sql)
+
         return rid
 
     def update_correlative_edges(self) -> List[str]:

From d6201929021f01657e20fd8d9e6c2328844faf73 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Wed, 27 Sep 2023 16:25:21 +0200
Subject: [PATCH 47/58] revert: logging setup

---
 ebel/defaults.py  | 23 ++++++++++++++++++++---
 ebel/logging.conf | 30 ------------------------------
 2 files changed, 20 insertions(+), 33 deletions(-)
 delete mode 100644 ebel/logging.conf

diff --git a/ebel/defaults.py b/ebel/defaults.py
index 638c1bd..205a3be 100755
--- a/ebel/defaults.py
+++ b/ebel/defaults.py
@@ -2,9 +2,10 @@
 
 """This file contains default values for configurations and parameters."""
 
-import logging.config
+import logging
+import logging.handlers as handlers
 
-from ebel.constants import DATA_DIR, PROJECT_DIR, THIS_DIR, LOG_DIR
+from ebel.constants import DATA_DIR, PROJECT_DIR, LOG_DIR
 
 ###############################################################################
 # UNIPROT taxonomy IDs to import
@@ -44,4 +45,20 @@
 
 ###############################################################################
 # Log Handling
-logging.config.fileConfig(THIS_DIR.joinpath("logging.conf"), defaults={"logfilename": LOG_DIR.joinpath("ebel.log")})
+logHandler = handlers.RotatingFileHandler(
+    filename=LOG_DIR.joinpath("ebel.log"),
+    mode="a",
+    maxBytes=4098 * 10,  # 4MB file max
+    backupCount=3,
+)
+logh_format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+logHandler.setFormatter(logh_format)
+logHandler.setLevel(logging.DEBUG)
+
+# Console Handler
+streamHandler = logging.StreamHandler()
+stream_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+streamHandler.setFormatter(stream_format)
+streamHandler.setLevel(logging.WARNING)
+
+logging.basicConfig(level=logging.INFO, handlers=[logHandler, streamHandler])
diff --git a/ebel/logging.conf b/ebel/logging.conf
deleted file mode 100644
index aa6b8b8..0000000
--- a/ebel/logging.conf
+++ /dev/null
@@ -1,30 +0,0 @@
-[loggers]
-keys=root
-
-[handlers]
-keys=consoleHandler,fileHandler
-
-[formatters]
-keys=full,simple
-
-[logger_root]
-level=DEBUG
-handlers=fileHandler,consoleHandler
-
-[handler_consoleHandler]
-class=StreamHandler
-level=WARNING
-formatter=simple
-args=(sys.stdout,)
-
-[handler_fileHandler]
-class=logging.handlers.RotatingFileHandler
-level=INFO
-formatter=full
-args=("logfilename.log", 'a')
-
-[formatter_full]
-format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
-
-[formatter_simple]
-format=%(asctime)s - %(message)s
\ No newline at end of file

From d00277b6beec93eb23ad10fe4b185c4522cc2108 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Wed, 27 Sep 2023 16:35:51 +0200
Subject: [PATCH 48/58] revert: query_class default columns to None

---
 ebel/manager/orientdb/odb_meta.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index c5daa1f..8a31df2 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -359,7 +359,7 @@ def query_class(
         class_name: str,
         limit: int = 0,
         skip: int = 0,
-        columns: Iterable[str] = [],
+        columns: Iterable[str] = None,
         with_rid=True,
         with_class=False,
         print_sql: bool = False,

From 77de7c4b53aba0926e0fd345e79a94d7913099a1 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 28 Sep 2023 09:11:37 +0200
Subject: [PATCH 49/58] chore: remove previous sql text after check

---
 ebel/manager/orientdb/biodbs/disgenet.py | 24 ------------------------
 ebel/manager/orientdb/biodbs/kegg.py     | 24 ------------------------
 ebel/manager/orientdb/biodbs/nsides.py   | 21 ---------------------
 ebel/manager/orientdb/biodbs/uniprot.py  |  5 -----
 4 files changed, 74 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/disgenet.py b/ebel/manager/orientdb/biodbs/disgenet.py
index a4843cc..4d86467 100644
--- a/ebel/manager/orientdb/biodbs/disgenet.py
+++ b/ebel/manager/orientdb/biodbs/disgenet.py
@@ -183,30 +183,6 @@ def update_snps(self) -> int:
             "downstream": "upstream",
             "upstream": "downstream",
         }
-        # # TODO: replace SQL with SQL Alchemy statement
-        # sql_temp = """Select
-        #         snp_id,
-        #         chromosome,
-        #         position,
-        #         disease_name,
-        #         pmid,
-        #         score,
-        #         source
-        #     FROM
-        #         disgenet_variant v INNER JOIN
-        #         disgenet_source s on (v.source_id=s.id) INNER JOIN
-        #         disgenet_disease d on (v.disease_id=d.disease_id)
-        #     WHERE
-        #         disease_name like '%%{}%%' and
-        #         source!='BEFREE'
-        #     GROUP BY
-        #         snp_id,
-        #         chromosome,
-        #         position,
-        #         disease_name,
-        #         pmid,
-        #         score,
-        #         source"""
 
         dv = disgenet.DisgenetVariant
         ds = disgenet.DisgenetSource
diff --git a/ebel/manager/orientdb/biodbs/kegg.py b/ebel/manager/orientdb/biodbs/kegg.py
index 2ef9e3a..bcfa712 100644
--- a/ebel/manager/orientdb/biodbs/kegg.py
+++ b/ebel/manager/orientdb/biodbs/kegg.py
@@ -278,30 +278,6 @@ def update_interactions(self) -> int:
             "phosphorylation": ("pho", "increases", BelPmod.PHO),
             "ubiquitination": ("ubi", "increases", BelPmod.UBI),
         }
-        # post_translational_modifications = ",".join([f"'{x}'" for x in pmods.keys()])
-
-        # species_ids = ",".join([f"'{x}'" for x in self.species])
-
-        # sql_temp = f"""Select
-        #         interaction_type,
-        #         pathway_identifier,
-        #         pathway_name,
-        #         gene_symbol_a,
-        #         gene_symbol_b,
-        #         kegg_species_id
-        #     from
-        #         kegg
-        #     where
-        #         (gene_symbol_a='{{symbol}}' or gene_symbol_a='{{symbol}}') and
-        #         kegg_species_id in ({species_ids}) and
-        #         interaction_type in ({{interaction_types}})
-        #     group by
-        #         interaction_type,
-        #         pathway_identifier,
-        #         pathway_name,
-        #         gene_symbol_a,
-        #         gene_symbol_b,
-        #         kegg_species_id"""
 
         kg = kegg.Kegg
         for symbol, rid in tqdm(symbol_rids_dict.items(), desc="Update KEGG posttranslational modifications"):
diff --git a/ebel/manager/orientdb/biodbs/nsides.py b/ebel/manager/orientdb/biodbs/nsides.py
index b9a1f43..0d4d441 100644
--- a/ebel/manager/orientdb/biodbs/nsides.py
+++ b/ebel/manager/orientdb/biodbs/nsides.py
@@ -143,26 +143,6 @@ def update_bel(self) -> int:
         self.delete_nodes_with_no_edges("side_effect")
         self.delete_nodes_with_no_edges("drug")
 
-        # # TODO: Translate to sqlalchemy query
-        # sql_temp = """Select
-        #     o.condition_meddra_id,
-        #     o.condition_concept_name,
-        #     o.prr,
-        #     o.mean_reporting_frequency
-        # from
-        #     drugbank as d inner join
-        #     drugbank_external_identifier as dei on (d.id=dei.drugbank_id) inner join
-        #     nsides as o on (dei.identifier=o.drug_rxnorn_id)
-        # where
-        #     d.drugbank_id='{}' and resource='RxCUI'
-        #     and (mean_reporting_frequency>=0.01 OR mean_reporting_frequency is NULL)
-        # group by
-        #     o.condition_meddra_id,
-        #     o.condition_concept_name,
-        #     o.prr,
-        #     o.mean_reporting_frequency
-        # """
-
         drugbank_ids = self.query_class("drug", columns=["drugbank_id"], drugbank_id="notnull")
         drugbank_id_rids = {d["drugbank_id"]: d[RID] for d in drugbank_ids}
 
@@ -176,7 +156,6 @@ def update_bel(self) -> int:
         o = nsides.Nsides
 
         for drugbank_id, drugbank_rid in tqdm(drugbank_id_rids.items(), desc=f"Update {self.biodb_name.upper()}"):
-            # sql = sql_temp.format(drugbank_id)
             sql = (
                 (
                     select(o.condition_meddra_id, o.condition_concept_name, o.prr, o.mean_reporting_frequency)
diff --git a/ebel/manager/orientdb/biodbs/uniprot.py b/ebel/manager/orientdb/biodbs/uniprot.py
index 06dcbcf..95ae065 100644
--- a/ebel/manager/orientdb/biodbs/uniprot.py
+++ b/ebel/manager/orientdb/biodbs/uniprot.py
@@ -308,11 +308,6 @@ def _get_accesssion_recname(self, taxid, gene_symbol) -> Union[Tuple[str, str],
         If this has no result it tries uniprot by gene symbol and NCBI taxonomy ID.
         """
         # TODO: This is in general a dangerous method because it selects the first accession number, but there could
-        # be more than one
-        # sql = (
-        #     f"Select accession, recommended_name from uniprot as u inner join uniprot_gene_symbol as gs "
-        #     f'on (u.id=gs.uniprot_id) where u.taxid={taxid} and gs.symbol="{gene_symbol}" limit 1'
-        # )
         sql = (
             select(up.Uniprot.accession, up.Uniprot.recommended_name)
             .join(up.GeneSymbol)

From 889035e3f1c209a50e08fe9d4cf8c716662fe4ee Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 28 Sep 2023 09:16:03 +0200
Subject: [PATCH 50/58] perf: improve intact update

---
 ebel/manager/orientdb/biodbs/intact.py | 56 +++++++++++++-------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index 3a56bce..40e1484 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -180,34 +180,34 @@ def update_interactions(self) -> int:
         uniprot_accessions = tuple(self.uniprot_rid_dict.keys())
         it = intact.Intact
 
-        for uniprot_accession in tqdm(uniprot_accessions, desc="Update IntAct interactions"):
-            sql = (
-                select(
-                    it.int_a_uniprot_id,
-                    it.int_b_uniprot_id,
-                    it.pmid,
-                    it.interaction_ids,
-                    it.interaction_type,
-                    it.interaction_type_psimi_id,
-                    it.detection_method,
-                    it.detection_method_psimi_id,
-                    it.confidence_value,
-                )
-                .where(or_(it.int_a_uniprot_id == uniprot_accession, it.int_b_uniprot_id == uniprot_accession))
-                .group_by(
-                    it.int_a_uniprot_id,
-                    it.int_b_uniprot_id,
-                    it.pmid,
-                    it.interaction_ids,
-                    it.interaction_type,
-                    it.interaction_type_psimi_id,
-                    it.detection_method,
-                    it.detection_method_psimi_id,
-                    it.confidence_value,
-                )
-            )
-            results = self.session.execute(sql).fetchall()
+        sql = select(
+            it.int_a_uniprot_id,
+            it.int_b_uniprot_id,
+            it.pmid,
+            it.interaction_ids,
+            it.interaction_type,
+            it.interaction_type_psimi_id,
+            it.detection_method,
+            it.detection_method_psimi_id,
+            it.confidence_value,
+        ).group_by(
+            it.int_a_uniprot_id,
+            it.int_b_uniprot_id,
+            it.pmid,
+            it.interaction_ids,
+            it.interaction_type,
+            it.interaction_type_psimi_id,
+            it.detection_method,
+            it.detection_method_psimi_id,
+            it.confidence_value,
+        )
 
+        intact_df = pd.read_sql(sql, self.engine)
+
+        for uniprot_accession in tqdm(uniprot_accessions, desc="Update IntAct interactions"):
+            filtered_df = intact_df[
+                (intact_df.int_a_uniprot_id == uniprot_accession) | (intact_df.int_b_uniprot_id == uniprot_accession)
+            ]
             for (
                 up_a,
                 up_b,
@@ -218,7 +218,7 @@ def update_interactions(self) -> int:
                 d_method,
                 d_method_id,
                 c_value,
-            ) in results:
+            ) in filtered_df.iterrows(index=False):
                 from_rid = self.get_create_rid_by_uniprot(up_a)
                 to_rid = self.get_create_rid_by_uniprot(up_b)
 

From b610d93d178418bd5c0cbf6a0d97e92af19fe81d Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 28 Sep 2023 09:17:46 +0200
Subject: [PATCH 51/58] fix: change intact update to use itertuples

---
 ebel/manager/orientdb/biodbs/intact.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index 40e1484..0625efc 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -218,7 +218,7 @@ def update_interactions(self) -> int:
                 d_method,
                 d_method_id,
                 c_value,
-            ) in filtered_df.iterrows(index=False):
+            ) in filtered_df.itertuples(index=False):
                 from_rid = self.get_create_rid_by_uniprot(up_a)
                 to_rid = self.get_create_rid_by_uniprot(up_b)
 

From 83addcc8e4d8944e462b1f86e866d170c56247bd Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 28 Sep 2023 09:20:42 +0200
Subject: [PATCH 52/58] fix: replace nans with None in intact update

---
 ebel/manager/orientdb/biodbs/intact.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index 0625efc..25f44ba 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -4,6 +4,7 @@
 import zipfile
 from typing import Dict
 
+import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
 from sqlalchemy import select, or_
@@ -202,7 +203,7 @@ def update_interactions(self) -> int:
             it.confidence_value,
         )
 
-        intact_df = pd.read_sql(sql, self.engine)
+        intact_df = pd.read_sql(sql, self.engine).replace({np.nan: None})
 
         for uniprot_accession in tqdm(uniprot_accessions, desc="Update IntAct interactions"):
             filtered_df = intact_df[

From 697da197367200ddd9093f4bbf5dfc5833a02281 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 28 Sep 2023 09:34:43 +0200
Subject: [PATCH 53/58] =?UTF-8?q?Bump=20version:=201.0.37=20=E2=86=92=201.?=
 =?UTF-8?q?1.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg | 2 +-
 ebel/__init__.py | 2 +-
 mkdocs.yml       | 2 +-
 pyproject.toml   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 82983d2..d7f2e2f 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.0.37
+current_version = 1.1.0
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P<build>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?
diff --git a/ebel/__init__.py b/ebel/__init__.py
index e2ea2fc..97d20d5 100755
--- a/ebel/__init__.py
+++ b/ebel/__init__.py
@@ -2,7 +2,7 @@
 from ebel import cache, constants, errors, parser, transformers
 from ebel.manager.orientdb.biodbs.bel import Bel
 
-__version__ = "1.0.37"
+__version__ = "1.1.0"
 
 __title__ = "e(BE:L)"
 __description__ = "Validation and extension of biomedical knowledge graphs"
diff --git a/mkdocs.yml b/mkdocs.yml
index 8be399d..bd87227 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -8,7 +8,7 @@ repo_url: https://github.com/e-bel/ebel
 theme: readthedocs
 
 extra:
-    version: 1.0.37
+    version: 1.1.0
 
 nav:
 #    - Home: index.md
diff --git a/pyproject.toml b/pyproject.toml
index 187016c..56014ba 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "ebel"
-version = "1.0.37"
+version = "1.1.0"
 description = "e(BE:L) - validation and extension of BEL networks."
 authors = [
     "Bruce Schultz <bruce.schultz@scai.fraunhofer.de>",

From 71fb54bb59fef1ec845b94eb17a6b50daa0c3397 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 28 Sep 2023 10:40:00 +0200
Subject: [PATCH 54/58] perf: improve intact update by caching bel string

---
 ebel/manager/orientdb/biodbs/intact.py | 25 +++++++++++++++++--------
 ebel/manager/orientdb/odb_meta.py      |  6 ++++++
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index 25f44ba..36caab3 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -41,6 +41,7 @@ def __init__(self, client: OrientDB = None, condition_keyword="Alzheimer"):
         # up.update()
 
         self.uniprot_rid_dict = self.get_pure_uniprot_rid_dict_in_bel_context()
+        self.bel_rid_dict = self.get_pure_bel_rid_dict()
 
     def __len__(self):
         return self.number_of_generics
@@ -126,14 +127,22 @@ def get_create_rid_by_uniprot(self, uniprot_accession: str) -> str:
             nn = self.get_namespace_name_by_uniprot(uniprot_accession)
             if nn:
                 namespace, name = nn
-                value_dict = {
-                    "name": name,
-                    "namespace": namespace,
-                    "pure": True,
-                    "bel": f'p({namespace}:"{name}")',
-                    "uniprot": uniprot_accession,
-                }
-                self.uniprot_rid_dict[uniprot_accession] = self.get_create_rid("protein", value_dict, check_for="bel")
+                bel = f'p({namespace}:"{name}")'
+
+                if bel in self.bel_rid_dict:
+                    self.uniprot_rid_dict[uniprot_accession] = self.bel_rid_dict[bel]
+
+                else:
+                    value_dict = {
+                        "name": name,
+                        "namespace": namespace,
+                        "pure": True,
+                        "bel": bel,
+                        "uniprot": uniprot_accession,
+                    }
+                    new_rid = self.insert_record("protein", value_dict=value_dict)
+                    self.bel_rid_dict[bel] = new_rid
+                    self.uniprot_rid_dict[uniprot_accession] = new_rid
 
         return self.uniprot_rid_dict.get(uniprot_accession)
 
diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index 8a31df2..cd2bfbd 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -1542,6 +1542,12 @@ def get_pure_uniprot_rid_dict_in_bel_context(self) -> Dict[str, str]:
 
         return {r["uniprot"]: r["rid"] for r in self.query_get_dict(sql)}
 
+    def get_pure_bel_rid_dict(self) -> Dict[str, str]:
+        """Return a dictionary of pure bel representation and it's rid."""
+        sql = "SELECT bel, @rid.asString() as rid from protein where pure=true"
+        results = self.query_get_dict(sql)
+        return {r["bel"]: r["rid"] for r in results}
+
     def get_pure_uniprot_rids_dict(self):
         """Return dictionary with UniProt IDs as keys and node rIDs as values."""
         sql = "Select uniprot, @rid.asString() as rid from protein where uniprot IS NOT NULL and pure=true"

From fe8991a3ac037077ad8de61d32d8ad529d421036 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 28 Sep 2023 10:53:28 +0200
Subject: [PATCH 55/58] perf: improve intact update by caching uniprot nn

---
 ebel/manager/orientdb/biodbs/intact.py | 27 +++++++++-----------------
 ebel/manager/orientdb/odb_meta.py      | 18 +++++++++++++++++
 2 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index 36caab3..abd89db 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -42,6 +42,7 @@ def __init__(self, client: OrientDB = None, condition_keyword="Alzheimer"):
 
         self.uniprot_rid_dict = self.get_pure_uniprot_rid_dict_in_bel_context()
         self.bel_rid_dict = self.get_pure_bel_rid_dict()
+        self.acc_nn = self.get_uniprot_accession_namespaces()
 
     def __len__(self):
         return self.number_of_generics
@@ -159,27 +160,17 @@ def get_namespace_name_by_uniprot(self, uniprot_accession: str) -> tuple:
         tuple
             namespace, value
         """
-        return_value = ()
-
-        sql = (
-            select(uniprot.GeneSymbol.symbol, uniprot.Uniprot.taxid)
-            .join(uniprot.Uniprot)
-            .where(uniprot.Uniprot.accession == uniprot_accession)
-        )
-
-        result = self.session.execute(sql).fetchone()
-        taxid_to_namespace = {9606: "HGNC", 10090: "MGI", 10116: "RGD"}
-
-        if result:
-            name, taxid = result
-            namespace = taxid_to_namespace.get(taxid, "UNIPROT")
-            return_value = (namespace, name)
+        if uniprot_accession in self.acc_nn:
+            return self.acc_nn[uniprot_accession]
 
         else:
-            if self.session.query(uniprot.Uniprot).filter(uniprot.Uniprot.accession == uniprot_accession).first():
-                return_value = ("UNIPROT", uniprot_accession)
+            up_r = self.session.query(uniprot.Uniprot).filter(uniprot.Uniprot.accession == uniprot_accession).first()
+
+            if up_r:
+                return "UNIPROT", uniprot_accession
 
-        return return_value
+            else:
+                return ()
 
     def update_interactions(self) -> int:
         """Update intact interactions to graph."""
diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index cd2bfbd..f18734e 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -39,6 +39,7 @@
 from ebel.config import get_config_as_dict, get_config_value, write_to_config
 from ebel.constants import DEFAULT_ODB, RID
 from ebel.manager.orientdb import urls as default_urls
+from ebel.manager.rdbms.models import uniprot
 from ebel.manager.rdbms.models.ensembl import Ensembl as ens
 from ebel.manager.orientdb.odb_structure import Edge, Generic, Node, OClass, OIndex, OProperty
 from ebel.tools import BelRdb, chunks, get_file_path, get_standard_name
@@ -1553,3 +1554,20 @@ def get_pure_uniprot_rids_dict(self):
         sql = "Select uniprot, @rid.asString() as rid from protein where uniprot IS NOT NULL and pure=true"
         results = self.query_get_dict(sql)
         return {r["uniprot"]: r["rid"] for r in results}
+
+    def get_uniprot_accession_namespaces(self) -> Dict[str, Tuple[str, str]]:
+        """Return a dictionary of uniprot accession keys and namespace and values."""
+        sql = (
+            select(uniprot.Uniprot.accession, uniprot.GeneSymbol.symbol, uniprot.Uniprot.taxid)
+            .join(uniprot.Uniprot)
+        )
+        results = self.session.execute(sql).fetchall()
+
+        acc_dict = dict()
+        taxid_to_namespace = {9606: "HGNC", 10090: "MGI", 10116: "RGD"}
+        for r in results:
+            accession, name, taxid = r
+            namespace = taxid_to_namespace.get(taxid, "UNIPROT")
+            acc_dict[accession] = (namespace, name)
+
+        return acc_dict

From a2cdd5c8138b488e2e2d12e00b4c83b7d66437d7 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 28 Sep 2023 11:03:35 +0200
Subject: [PATCH 56/58] perf: apply caching techniques to stringdb

---
 ebel/manager/orientdb/biodbs/intact.py   |  7 +++-
 ebel/manager/orientdb/biodbs/stringdb.py | 50 ++++++++++++++----------
 ebel/manager/orientdb/odb_meta.py        |  5 +--
 3 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index abd89db..f9625c4 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -167,10 +167,13 @@ def get_namespace_name_by_uniprot(self, uniprot_accession: str) -> tuple:
             up_r = self.session.query(uniprot.Uniprot).filter(uniprot.Uniprot.accession == uniprot_accession).first()
 
             if up_r:
-                return "UNIPROT", uniprot_accession
+                return_value = "UNIPROT", uniprot_accession
 
             else:
-                return ()
+                return_value = ()
+
+            self.acc_nn[uniprot_accession] = return_value
+            return return_value
 
     def update_interactions(self) -> int:
         """Update intact interactions to graph."""
diff --git a/ebel/manager/orientdb/biodbs/stringdb.py b/ebel/manager/orientdb/biodbs/stringdb.py
index 7adccd0..3ac3935 100644
--- a/ebel/manager/orientdb/biodbs/stringdb.py
+++ b/ebel/manager/orientdb/biodbs/stringdb.py
@@ -40,6 +40,9 @@ def __init__(self, client: OrientDB = None):
             biodb_name=self.biodb_name,
         )
 
+        self.symbol_rid_dict = self.get_pure_symbol_rids_dict_in_bel_context(namespace="HGNC")
+        self.bel_rid_dict = self.get_pure_bel_rid_dict()
+
     def __len__(self) -> dict:
         """Get number of 'biogrid_interaction' graph edges."""
         pass
@@ -202,10 +205,9 @@ def update_stringdb_interactions(self, hgnc: Hgnc) -> int:
             "combined_score",
         )
 
-        bel_hgnc_rid_dict = self.get_pure_symbol_rids_dict_in_bel_context(namespace="HGNC")
-        bel_hgncs = set(bel_hgnc_rid_dict.keys())
+        symbols = set(self.symbol_rid_dict.keys())
         strdb_hgncs = self.get_stringdb_symbols()
-        shared_hgncs = bel_hgncs & strdb_hgncs
+        shared_hgncs = symbols & strdb_hgncs
 
         updated = 0
         already_inserted = set()
@@ -222,8 +224,8 @@ def update_stringdb_interactions(self, hgnc: Hgnc) -> int:
                 if sorted_combi not in already_inserted:
                     value_dict = {k: v for k, v in row.__dict__.items() if k in columns}
 
-                    from_rid = self.get_create_rid_by_symbol(row.symbol1, bel_hgnc_rid_dict, hgnc)
-                    to_rid = self.get_create_rid_by_symbol(row.symbol2, bel_hgnc_rid_dict, hgnc)
+                    from_rid = self.get_create_rid_by_symbol(row.symbol1, hgnc)
+                    to_rid = self.get_create_rid_by_symbol(row.symbol2, hgnc)
 
                     if from_rid and to_rid:
                         self.create_edge(
@@ -237,15 +239,13 @@ def update_stringdb_interactions(self, hgnc: Hgnc) -> int:
 
         return updated
 
-    def get_create_rid_by_symbol(self, symbol: str, symbol_rid_dict: dict, hgnc: Hgnc) -> str:
+    def get_create_rid_by_symbol(self, symbol: str, hgnc: Hgnc) -> str:
         """Create or get rID entry for a given gene symbol.
 
         Parameters
         ----------
         symbol: str
             Gene symbol.
-        symbol_rid_dict: dict
-            Entry parameters matching those of the desired rID entry.
         hgnc: Hgnc
             Hgnc model definition.
 
@@ -254,17 +254,26 @@ def get_create_rid_by_symbol(self, symbol: str, symbol_rid_dict: dict, hgnc: Hgn
         str
             rID.
         """
-        if symbol not in symbol_rid_dict:
+        if symbol not in self.symbol_rid_dict:
             symbol = hgnc.get_correct_symbol(symbol)
             if symbol:
-                value_dict = {
-                    "name": symbol,
-                    "namespace": "HGNC",
-                    "pure": True,
-                    "bel": f'p(HGNC:"{symbol}")',
-                }
-                symbol_rid_dict[symbol] = self.get_create_rid("protein", value_dict, check_for="bel")
-        return symbol_rid_dict.get(symbol)
+                bel = f'p(HGNC:"{symbol}")'
+
+                if bel in self.bel_rid_dict:
+                    self.symbol_rid_dict[symbol] = self.bel_rid_dict[bel]
+
+                else:
+                    value_dict = {
+                        "name": symbol,
+                        "namespace": "HGNC",
+                        "pure": True,
+                        "bel": bel,
+                    }
+                    new_rid = self.insert_record("protein", value_dict)
+                    self.symbol_rid_dict[symbol] = new_rid
+                    self.bel_rid_dict[bel] = new_rid
+
+        return self.symbol_rid_dict.get(symbol)
 
     def update_action_interactions(self, hgnc: Hgnc) -> int:
         """Iterate through BEL proteins and add stringdb_action edges to existing proteins in KG.
@@ -292,8 +301,7 @@ def update_action_interactions(self, hgnc: Hgnc) -> int:
 
         modes = ("activation", "inhibition", "ptmod", "expression")
 
-        symbols_rid_dict = self.get_pure_symbol_rids_dict_in_bel_context(namespace="HGNC")
-        symbols = tuple(symbols_rid_dict.keys())
+        symbols = tuple(self.symbol_rid_dict.keys())
 
         already_inserted = set()
 
@@ -313,8 +321,8 @@ def update_action_interactions(self, hgnc: Hgnc) -> int:
                 sorted_combi = tuple(sorted([action.symbol1, action.symbol2]))
 
                 if sorted_combi not in already_inserted:
-                    from_rid = self.get_create_rid_by_symbol(action.symbol1, symbols_rid_dict, hgnc)
-                    to_rid = self.get_create_rid_by_symbol(action.symbol2, symbols_rid_dict, hgnc)
+                    from_rid = self.get_create_rid_by_symbol(action.symbol1, hgnc)
+                    to_rid = self.get_create_rid_by_symbol(action.symbol2, hgnc)
 
                     if from_rid and to_rid:
                         class_name = translator[(action.mode, action.action)]
diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index f18734e..7fd687b 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -1557,10 +1557,7 @@ def get_pure_uniprot_rids_dict(self):
 
     def get_uniprot_accession_namespaces(self) -> Dict[str, Tuple[str, str]]:
         """Return a dictionary of uniprot accession keys and namespace and values."""
-        sql = (
-            select(uniprot.Uniprot.accession, uniprot.GeneSymbol.symbol, uniprot.Uniprot.taxid)
-            .join(uniprot.Uniprot)
-        )
+        sql = select(uniprot.Uniprot.accession, uniprot.GeneSymbol.symbol, uniprot.Uniprot.taxid).join(uniprot.Uniprot)
         results = self.session.execute(sql).fetchall()
 
         acc_dict = dict()

From 36337f41186e786d4054672d2aaaa6fe235debca Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bschultz013@gmail.com>
Date: Thu, 28 Sep 2023 12:51:58 +0200
Subject: [PATCH 57/58] style: black and isort

---
 ebel/defaults.py                           |  2 +-
 ebel/manager/orientdb/biodbs/biogrid.py    |  2 +-
 ebel/manager/orientdb/biodbs/clinvar.py    |  4 ++--
 ebel/manager/orientdb/biodbs/intact.py     |  2 +-
 ebel/manager/orientdb/biodbs/kegg.py       |  2 +-
 ebel/manager/orientdb/biodbs/mirtarbase.py |  2 +-
 ebel/manager/orientdb/biodbs/nsides.py     |  4 ++--
 ebel/manager/orientdb/biodbs/stringdb.py   |  2 +-
 ebel/manager/orientdb/biodbs/uniprot.py    |  2 +-
 ebel/manager/orientdb/odb_meta.py          | 14 ++++++++++----
 ebel/manager/rdbms/models/biogrid.py       |  2 +-
 11 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/ebel/defaults.py b/ebel/defaults.py
index 205a3be..aa8260e 100755
--- a/ebel/defaults.py
+++ b/ebel/defaults.py
@@ -5,7 +5,7 @@
 import logging
 import logging.handlers as handlers
 
-from ebel.constants import DATA_DIR, PROJECT_DIR, LOG_DIR
+from ebel.constants import DATA_DIR, LOG_DIR, PROJECT_DIR
 
 ###############################################################################
 # UNIPROT taxonomy IDs to import
diff --git a/ebel/manager/orientdb/biodbs/biogrid.py b/ebel/manager/orientdb/biodbs/biogrid.py
index bd33d57..850016c 100644
--- a/ebel/manager/orientdb/biodbs/biogrid.py
+++ b/ebel/manager/orientdb/biodbs/biogrid.py
@@ -7,7 +7,7 @@
 import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import select, func, cast, Integer
+from sqlalchemy import Integer, cast, func, select
 from sqlalchemy.orm import aliased
 from tqdm import tqdm
 
diff --git a/ebel/manager/orientdb/biodbs/clinvar.py b/ebel/manager/orientdb/biodbs/clinvar.py
index a85c5fc..ca9dc7c 100644
--- a/ebel/manager/orientdb/biodbs/clinvar.py
+++ b/ebel/manager/orientdb/biodbs/clinvar.py
@@ -5,13 +5,13 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text, select
+from sqlalchemy import select, text
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
+from ebel.manager.orientdb.biodbs.ensembl import Ensembl
 from ebel.manager.orientdb.constants import CLINVAR
 from ebel.manager.rdbms.models import clinvar
-from ebel.manager.orientdb.biodbs.ensembl import Ensembl
 from ebel.tools import get_disease_trait_keywords_from_config, get_file_path
 
 logger = logging.getLogger(__name__)
diff --git a/ebel/manager/orientdb/biodbs/intact.py b/ebel/manager/orientdb/biodbs/intact.py
index f9625c4..e9fde67 100644
--- a/ebel/manager/orientdb/biodbs/intact.py
+++ b/ebel/manager/orientdb/biodbs/intact.py
@@ -7,7 +7,7 @@
 import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import select, or_
+from sqlalchemy import or_, select
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
diff --git a/ebel/manager/orientdb/biodbs/kegg.py b/ebel/manager/orientdb/biodbs/kegg.py
index bcfa712..585b16d 100644
--- a/ebel/manager/orientdb/biodbs/kegg.py
+++ b/ebel/manager/orientdb/biodbs/kegg.py
@@ -9,7 +9,7 @@
 import pandas as pd
 import requests
 from pyorientdb import OrientDB
-from sqlalchemy import select, or_
+from sqlalchemy import or_, select
 from tqdm import tqdm
 
 from ebel.config import get_config_value
diff --git a/ebel/manager/orientdb/biodbs/mirtarbase.py b/ebel/manager/orientdb/biodbs/mirtarbase.py
index f3c03bc..2703586 100644
--- a/ebel/manager/orientdb/biodbs/mirtarbase.py
+++ b/ebel/manager/orientdb/biodbs/mirtarbase.py
@@ -3,7 +3,7 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text, select
+from sqlalchemy import select, text
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
diff --git a/ebel/manager/orientdb/biodbs/nsides.py b/ebel/manager/orientdb/biodbs/nsides.py
index 0d4d441..ef16c77 100644
--- a/ebel/manager/orientdb/biodbs/nsides.py
+++ b/ebel/manager/orientdb/biodbs/nsides.py
@@ -7,13 +7,13 @@
 
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import text, select, or_
+from sqlalchemy import or_, select, text
 from tqdm import tqdm
 
 from ebel.constants import RID
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
 from ebel.manager.orientdb.constants import OFFSIDES, ONSIDES
-from ebel.manager.rdbms.models import nsides, drugbank
+from ebel.manager.rdbms.models import drugbank, nsides
 from ebel.tools import get_file_path
 
 logger = logging.getLogger(__name__)
diff --git a/ebel/manager/orientdb/biodbs/stringdb.py b/ebel/manager/orientdb/biodbs/stringdb.py
index 3ac3935..68219b6 100644
--- a/ebel/manager/orientdb/biodbs/stringdb.py
+++ b/ebel/manager/orientdb/biodbs/stringdb.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pandas as pd
 from pyorientdb import OrientDB
-from sqlalchemy import or_, select, text, and_
+from sqlalchemy import and_, or_, select, text
 from tqdm import tqdm
 
 from ebel.manager.orientdb import odb_meta, odb_structure, urls
diff --git a/ebel/manager/orientdb/biodbs/uniprot.py b/ebel/manager/orientdb/biodbs/uniprot.py
index 95ae065..9fbc0c1 100644
--- a/ebel/manager/orientdb/biodbs/uniprot.py
+++ b/ebel/manager/orientdb/biodbs/uniprot.py
@@ -10,7 +10,7 @@
 import pandas as pd
 from lxml.etree import iterparse
 from pyorientdb import OrientDB
-from sqlalchemy import text, select
+from sqlalchemy import select, text
 from tqdm import tqdm
 
 from ebel.defaults import default_tax_ids
diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index 7fd687b..96d3f3d 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -22,13 +22,14 @@
 import xmltodict
 from pyorientdb import OrientDB, orient
 from pyorientdb.exceptions import (
+    PyOrientBadMethodCallException,
     PyOrientCommandException,
     PyOrientIndexException,
     PyOrientSecurityAccessException,
-    PyOrientBadMethodCallException,
+    PyOrientSecurityException,
 )
 from pyorientdb.otypes import OrientRecord
-from sqlalchemy import text, select, func
+from sqlalchemy import func, select, text
 from sqlalchemy.sql.schema import Table
 from sqlalchemy_utils import create_database, database_exists
 from tqdm import tqdm
@@ -39,9 +40,9 @@
 from ebel.config import get_config_as_dict, get_config_value, write_to_config
 from ebel.constants import DEFAULT_ODB, RID
 from ebel.manager.orientdb import urls as default_urls
+from ebel.manager.orientdb.odb_structure import Edge, Generic, Node, OClass, OIndex, OProperty
 from ebel.manager.rdbms.models import uniprot
 from ebel.manager.rdbms.models.ensembl import Ensembl as ens
-from ebel.manager.orientdb.odb_structure import Edge, Generic, Node, OClass, OIndex, OProperty
 from ebel.tools import BelRdb, chunks, get_file_path, get_standard_name
 
 type_map_inverse = {v: k for k, v in orient.type_map.items()}
@@ -166,7 +167,12 @@ def execute(self, command_str: str) -> List[OrientRecord]:
         try:
             return self.client.command(command_str)
 
-        except (PyOrientCommandException, PyOrientSecurityAccessException, PyOrientBadMethodCallException) as e:
+        except (
+            PyOrientCommandException,
+            PyOrientSecurityAccessException,
+            PyOrientBadMethodCallException,
+            PyOrientSecurityException,
+        ) as e:
             logger.error(e)
             # Try to reconnect
             self.client.close()
diff --git a/ebel/manager/rdbms/models/biogrid.py b/ebel/manager/rdbms/models/biogrid.py
index 58def36..dd0ab2a 100644
--- a/ebel/manager/rdbms/models/biogrid.py
+++ b/ebel/manager/rdbms/models/biogrid.py
@@ -1,7 +1,7 @@
 """BioGRID RDBMS model definition."""
 from sqlalchemy import Float, ForeignKey, Integer, String, Text, select
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import Mapped, mapped_column, relationship, aliased
+from sqlalchemy.orm import Mapped, aliased, mapped_column, relationship
 from sqlalchemy_utils import create_view
 
 from ebel.manager.rdbms.models import object_as_dict

From 7ef4fa5a8739b00869fca45850bbe0a1701d5ed0 Mon Sep 17 00:00:00 2001
From: Bruce Schultz <bruce.schultz@scai.fraunhofer.de>
Date: Fri, 29 Sep 2023 09:29:07 +0200
Subject: [PATCH 58/58] fix: specify columns in pc dict method

---
 ebel/manager/orientdb/biodbs/pathway_commons.py | 12 ++++++++++--
 ebel/manager/orientdb/odb_meta.py               |  2 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/ebel/manager/orientdb/biodbs/pathway_commons.py b/ebel/manager/orientdb/biodbs/pathway_commons.py
index 1ff0649..9a44aa2 100644
--- a/ebel/manager/orientdb/biodbs/pathway_commons.py
+++ b/ebel/manager/orientdb/biodbs/pathway_commons.py
@@ -221,10 +221,12 @@ def update_interactions(self) -> Dict[str, int]:
         self.hgnc.update()
         valid_hgnc_symbols = {x[0] for x in self.session.query(hgnc.Hgnc).with_entities(hgnc.Hgnc.symbol).all()}
 
+        pure_symbol_rids_dict = self.get_pure_symbol_rids_dict()
+        symbol_rids_bel_context_dict = self.get_pure_symbol_rids_dict_in_bel_context()
+
         cols = ["symbol", "rid"]
-        pure_symbol_rids_dict = self.hgnc.get_pure_symbol_rids_dict()
         df_all = pd.DataFrame(pure_symbol_rids_dict.items(), columns=cols)
-        df_bel = pd.DataFrame(self.hgnc.get_pure_symbol_rids_dict_in_bel_context().items(), columns=cols)
+        df_bel = pd.DataFrame(symbol_rids_bel_context_dict.items(), columns=cols)
 
         # skip here if there is no pure symbols with or without BEL context
         if any([df_all.empty, df_bel.empty]):
@@ -304,3 +306,9 @@ def get_pathway_pmids_sources(self, pc_id, pc_pathway_name_rid_dict) -> tuple:
         pmids = [x.pmid for x in pc_obj.pmids]
         pathways = [pc_pathway_name_rid_dict[x.name] for x in pc_obj.pathway_names]
         return pathways, pmids, sources
+
+
+if __name__ == "__main__":
+    p = PathwayCommons()
+    foo = p.get_pure_symbol_rids_dict()
+    a = 2
diff --git a/ebel/manager/orientdb/odb_meta.py b/ebel/manager/orientdb/odb_meta.py
index 96d3f3d..15fa146 100644
--- a/ebel/manager/orientdb/odb_meta.py
+++ b/ebel/manager/orientdb/odb_meta.py
@@ -1525,7 +1525,7 @@ def get_pure_symbol_rid_df_in_bel_context(self, class_name="protein", namespace=
 
     def get_pure_symbol_rids_dict(self, class_name="protein", namespace="HGNC") -> Dict[str, str]:
         """Return dictionary with protein name as keys and node rIDs as values."""
-        results = self.query_class(class_name, pure=True, namespace=namespace)
+        results = self.query_class(class_name, pure=True, namespace=namespace, columns=["name"], with_rid=True)
         return {r["name"]: r["rid"] for r in results}
 
     def get_pure_rid_by_uniprot(self, uniprot: str):