Ensembl · JAlvarezJarreta · Nov 6, 2024 · Nov 6, 2024 · Nov 6, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -190,9 +190,32 @@ show-fixes = true
 [tool.ruff.format]
 docstring-code-format = true
 
+[tool.ruff.lint]
+select = [
+    "ALL",
+]
+ignore = [
+    "COM812",  # https://docs.astral.sh/ruff/rules/trailing-comma-on-bare-tuple/
+    "D107",  # https://docs.astral.sh/ruff/rules/undocumented-public-init/
+    "D203",  # https://docs.astral.sh/ruff/rules/one-blank-line-before-class/
+    "D211",  # https://docs.astral.sh/ruff/rules/blank-line-before-class/
+    "D213",  # https://docs.astral.sh/ruff/rules/multi-line-summary-second-line/
+    "G004",  # https://docs.astral.sh/ruff/rules/logging-f-string/
+    "I001",  # https://docs.astral.sh/ruff/rules/unsorted-imports/
+    "ICN001",  # https://docs.astral.sh/ruff/rules/unconventional-import-alias/
+    "ISC001",  # https://docs.astral.sh/ruff/rules/single-line-implicit-string-concatenation/
+    "UP035",  # https://docs.astral.sh/ruff/rules/deprecated-import/
+]
+
 [tool.ruff.lint.per-file-ignores]
 # Ignore `F403` (unable to detect undefined names) in all `__init__.py` files
 "__init__.py" = ["F403"]
+# Ignore the following checks in all pytest files
+"src/python/tests/**.py" = [
+    "INP001",  # https://docs.astral.sh/ruff/rules/implicit-namespace-package/
+    "PLR0913",  # https://docs.astral.sh/ruff/rules/too-many-arguments/
+    "S101",  # https://docs.astral.sh/ruff/rules/assert/
+]
 
 [tool.mypy]
 mypy_path = "src/python"

diff --git a/src/python/ensembl/io/genomio/annotation/update_description.py b/src/python/ensembl/io/genomio/annotation/update_description.py
@@ -21,7 +21,7 @@
 
 import logging
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 
 from sqlalchemy.orm import Session
 from sqlalchemy import and_, select
@@ -39,18 +39,18 @@
     "transcript": "transcript",
 }
 
-FeatStruct = Tuple[str, str, str]
+FeatStruct = tuple[str, str, str]
 
 
-def get_core_data(session: Session, table: str, match_xrefs: bool = False) -> Dict[str, FeatStruct]:
+def get_core_data(session: Session, table: str, match_xrefs: bool = False) -> dict[str, FeatStruct]:
     """Returns the table descriptions from a core database.
 
     Args:
         session: Session open on a core database.
         table: "gene" or "transcript" table from the core database.
         match_xrefs: If the IDs do not match, try to match an Xref ID instead.
-    """
 
+    """
     if table == "gene":
         stmt = (
             select(Gene.gene_id, Gene.stable_id, Gene.description, Xref.dbprimary_acc)
@@ -103,6 +103,7 @@ def load_descriptions(
         report: Print the mapping of changes to perform in the standard output.
         do_update: Actually update the core database.
         match_xrefs: If the IDs do not match, try to match an Xref ID instead.
+
     """
     func = get_json(func_file)
     logging.info(f"{len(func)} annotations from {func_file}")
@@ -125,7 +126,13 @@ def load_descriptions(
         }
         # Compare, only keep the descriptions that have changed
         features_to_update = _get_features_to_update(
-            table, feat_func, feat_data, stats, report=report, do_update=do_update, match_xrefs=match_xrefs
+            table,
+            feat_func,
+            feat_data,
+            stats,
+            report=report,
+            do_update=do_update,
+            match_xrefs=match_xrefs,
         )
 
         # Show stats for this feature type
@@ -141,8 +148,10 @@ def load_descriptions(
 
 
 def _get_cur_feat(
-    feat_data: Dict[str, FeatStruct], new_feat: Dict[str, Any], match_xrefs: bool = False
-) -> Optional[FeatStruct]:
+    feat_data: dict[str, FeatStruct],
+    new_feat: dict[str, Any],
+    match_xrefs: bool = False,
+) -> FeatStruct | None:
     """Match a feature ID, synonyms or xrefs to a core stable ID and return the matching core feature.
 
     Returns None if no match.
@@ -169,14 +178,14 @@ def _get_cur_feat(
 
 def _get_features_to_update(
     table: str,
-    feat_func: List[Dict[str, Any]],
-    feat_data: Dict[str, FeatStruct],
-    stats: Dict[str, int],
+    feat_func: list[dict[str, Any]],
+    feat_data: dict[str, FeatStruct],
+    stats: dict[str, int],
     *,
     report: bool = False,
     do_update: bool = False,
     match_xrefs: bool = True,
-) -> List[Dict[str, Any]]:
+) -> list[dict[str, Any]]:
     """Checks a list of features and returns those whose description we want to update.
 
     Args:
@@ -190,6 +199,7 @@ def _get_features_to_update(
 
     Returns:
         The list of features with their operation changed to update or insert.
+
     """
     to_update = []
     for new_feat in feat_func:
@@ -247,7 +257,9 @@ def main() -> None:
     parser.add_argument("--report", action="store_true", help="Show what change would be made")
     parser.add_argument("--update", action="store_true", help="Make the changes to the database")
     parser.add_argument(
-        "--match_xrefs", action="store_true", help="Use xref IDs to match features if IDs do not work"
+        "--match_xrefs",
+        action="store_true",
+        help="Use xref IDs to match features if IDs do not work",
     )
     parser.add_log_arguments(add_log_file=True)
     args = parser.parse_args()

diff --git a/src/python/ensembl/io/genomio/assembly/download.py b/src/python/ensembl/io/genomio/assembly/download.py
@@ -34,7 +34,6 @@
 from pathlib import Path
 import re
 import time
-from typing import Dict, Optional
 
 from ensembl.utils.argparse import ArgumentParser
 from ensembl.utils.logging import init_logging_with_args
@@ -70,8 +69,8 @@ def establish_ftp(ftp_conn: FTP, ftp_url: str, accession: str) -> FTP:
 
     Raises:
         UnsupportedFormatError: If `accession` does not follow INSDC's accession format.
-    """
 
+    """
     match = re.match(r"^(GC[AF])_([0-9]{3})([0-9]{3})([0-9]{3})(\.[0-9]+)?$", accession)
     if not match:
         raise UnsupportedFormatError(f"Could not recognize GCA accession format: {accession}")
@@ -89,15 +88,15 @@ def establish_ftp(ftp_conn: FTP, ftp_url: str, accession: str) -> FTP:
     return ftp_conn
 
 
-def md5_files(dl_dir: Path, md5_path: Optional[Path] = None, md5_filename: str = "md5checksums.txt") -> bool:
-    """
-    Check all files checksums with the sums listed in a checksum file, if available.
+def md5_files(dl_dir: Path, md5_path: Path | None = None, md5_filename: str = "md5checksums.txt") -> bool:
+    """Check all files checksums with the sums listed in a checksum file, if available.
     Return False if there is no checksum file, or a file is missing, or has a wrong checksum.
 
     Args:
         dl_dir: Path location to containing downloaded FTP files.
         md5_path: Full path to an MD5 checksum file.
         md5_filename: Name of a checksum file in the `dl_dir` (used if no `md5_path` is given).
+
     """
     # Get or set md5 file to user or default setting
     if md5_path is None:
@@ -127,14 +126,14 @@ def md5_files(dl_dir: Path, md5_path: Optional[Path] = None, md5_filename: str =
     return True
 
 
-def get_checksums(checksum_path: Path) -> Dict[str, str]:
-    """
-    Get a dict of checksums from a file, with file names as keys and sums as values
+def get_checksums(checksum_path: Path) -> dict[str, str]:
+    """Get a dict of checksums from a file, with file names as keys and sums as values
 
     Args:
         checksum_path: Path location to MD5 checksum file.
+
     """
-    sums: Dict[str, str] = {}
+    sums: dict[str, str] = {}
     if not checksum_path.is_file():
         return sums
     with checksum_path.open(mode="r") as fh:
@@ -147,16 +146,15 @@ def get_checksums(checksum_path: Path) -> Dict[str, str]:
 
 
 def download_files(ftp_connection: FTP, accession: str, dl_dir: Path, max_redo: int) -> None:
-    """
-    Given an INSDC accession, download all available files from the ftp to the download dir
+    """Given an INSDC accession, download all available files from the ftp to the download dir
 
     Args:
         ftp_connection: An open FTP connection object
         accession: Genome assembly accession.
         dl_dir: Path to downloaded FTP files.
         max_redo: Maximum FTP connection retry attempts.
-    """
 
+    """
     # Get the list of assemblies for this accession
     for ftp_dir, _ in ftp_connection.mlsd():
         if re.search(accession, ftp_dir):
@@ -176,12 +174,16 @@ def download_files(ftp_connection: FTP, accession: str, dl_dir: Path, max_redo:
                         _download_file(ftp_connection, ftp_file, md5_sums, dl_dir, max_redo)
         else:
             logging.warning(
-                f"Could not find accession '{accession}' from ftp {ftp_dir} in open FTP connection"
+                f"Could not find accession '{accession}' from ftp {ftp_dir} in open FTP connection",
             )
 
 
 def _download_file(
-    ftp_connection: FTP, ftp_file: str, md5_sums: Dict[str, str], dl_dir: Path, max_redo: int = 0
+    ftp_connection: FTP,
+    ftp_file: str,
+    md5_sums: dict[str, str],
+    dl_dir: Path,
+    max_redo: int = 0,
 ) -> None:
     """Downloads individual files from FTP server.
 
@@ -191,6 +193,7 @@ def _download_file(
         md5_sums: Dictionary of key value pairs filename - md5_checksums.
         dl_dir: Path to downloaded FTP files.
         max_redo: Maximum number of connection retry attempts.
+
     """
     has_md5 = True
     expected_sum = ""
@@ -238,7 +241,7 @@ def _download_file(
         raise FileDownloadError(f"Could not download file {ftp_file} after {redo} tries")
 
 
-def get_files_selection(dl_dir: Path) -> Dict[str, str]:
+def get_files_selection(dl_dir: Path) -> dict[str, str]:
     """Returns a dictionary with the relevant downloaded files classified.
 
     Args:
@@ -249,6 +252,7 @@ def get_files_selection(dl_dir: Path) -> Dict[str, str]:
 
     Raises:
         FileDownloadError: If `dl_dir` tree does not include a file named `*_assembly_report.txt`.
+
     """
     files = {}
     root_name = get_root_name(dl_dir)
@@ -267,6 +271,7 @@ def get_root_name(dl_dir: Path) -> str:
 
     Args:
         dl_dir: Path location of downloaded FTP files.
+
     """
     root_name = ""
     for dl_file in dl_dir.iterdir():
@@ -294,6 +299,7 @@ def retrieve_assembly_data(
 
     Raises:
         FileDownloadError: If no files are downloaded or if any does not match its MD5 checksum.
+
     """
     download_dir = Path(download_dir)
 
@@ -304,7 +310,7 @@ def retrieve_assembly_data(
     if not md5_files(download_dir, None):
         logging.info(" Download the files")
 
-        for increment in range(0, max_increment + 1):
+        for increment in range(max_increment + 1):
             if increment > 0:
                 logging.info(f" Increment accession version once from {accession}")
                 version = int(accession[-1])
@@ -331,7 +337,9 @@ def main() -> None:
     parser = ArgumentParser(description="Download an assembly data files from INSDC or RefSeq.")
     parser.add_argument("--accession", required=True, help="Genome assembly accession")
     parser.add_argument_dst_path(
-        "--download_dir", default=Path.cwd(), help="Folder where the data will be downloaded"
+        "--download_dir",
+        default=Path.cwd(),
+        help="Folder where the data will be downloaded",
     )
     parser.add_log_arguments()
     args = parser.parse_args()