Skip to content

Commit

Permalink
Merge pull request #36 from multimeric/id-mapping-typing
Browse files Browse the repository at this point in the history
ID mapping typing
  • Loading branch information
multimeric authored Aug 16, 2024
2 parents 2ba4238 + cc9e523 commit 4734fef
Show file tree
Hide file tree
Showing 9 changed files with 1,261 additions and 799 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
poetry-version: '1.5.1'

- name: Install dependencies
run: poetry install --with lint --with tests
run: poetry install --all-extras

- uses: pre-commit/[email protected]

Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@

### 1.4.0

#### Added

* The `taxon_id` argument to `IdMapper.submit` [[#36]](https://github.com/multimeric/Unipressed/pull/36)
* Detailed type annotations for `IdMapper.submit`, that enforce only certain pairs of `source`/`dest` databases

#### Changed

* Auto-generated type definitions for the datasets have been regenerated [[#37](https://github.com/multimeric/Unipressed/pull/37)]. This pulls upstream changes from Uniprot. For a full list of changes [view this commit diff](https://github.com/multimeric/Unipressed/pull/31/commits/7e620c46175b6ec03e073fc78444a43e96821c31).
Expand Down
1 change: 1 addition & 0 deletions codegen/dataset/generate_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
UniprotSearchField,
)


# If the functions return anything, print it
app = typer.Typer(result_callback=lambda x: print(x))

Expand Down
157 changes: 143 additions & 14 deletions codegen/id_mapping/generate.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,173 @@
import ast
import sys
from collections import defaultdict
from dataclasses import dataclass, field
from typing import List, Optional

import black
import requests
import typer

from codegen.util import make_literal

app = typer.Typer(result_callback=lambda x: print(x))


def make_function(
source_type: ast.expr, dest_type: ast.expr, taxon_id: bool, overload: bool = True
) -> ast.FunctionDef:
"""
Makes a `submit()` function definition, as used by the ID mapper
Params:
source_type: Type of the `source` argument
dest_type: Type of the `dest` argument
taxon_id: If true, include the `taxon_id` parameter
overload: If true, this is a function overload
"""
args: List[ast.arg] = [
ast.arg(
arg="cls",
),
# source: Literal[...]
ast.arg(
arg="source",
annotation=source_type,
),
# source: dest[...]
ast.arg(
arg="dest",
annotation=dest_type,
),
# ids: Iterable[str]
ast.arg(
"ids",
ast.Subscript(ast.Name("Iterable"), ast.Name("str")),
),
]
defaults: list[Optional[ast.expr]] = [None, None, None]

if taxon_id:
# taxon_id: Optional[str] = None
args.append(
# taxon_id: bool
ast.arg(
"taxon_id",
annotation=ast.Subscript(ast.Name("Optional"), ast.Name("int")),
)
)
defaults.append(ast.Constant(None))

decorator_list: list[ast.expr] = [
ast.Name("classmethod"),
]
if overload:
decorator_list.append(
ast.Name("overload"),
)

return ast.FunctionDef(
name=f"submit",
args=ast.arguments(
posonlyargs=[], args=args, kwonlyargs=[], kw_defaults=[], defaults=defaults # type: ignore
),
body=[ast.Expr(ast.Constant(value=...))],
decorator_list=decorator_list,
)


@dataclass
class Rule:
"""
Represents a "rule" in the Uniprot API terminology, which is a method overload
in the Unipressed world. A rule is a set of allowed conversions from one database
to another.
"""

#: Rule ID
id: int = 0
#: List of databases that can be converted to, in this rule
tos: list[ast.Constant] = field(default_factory=list)
#: List of databases that can be converted from, in this rule
froms: list[ast.Constant] = field(default_factory=list)
#: Whether this rule supports specifying the taxon ID
taxon_id: bool = False

def to_function(self) -> ast.FunctionDef:
return make_function(
source_type=ast.Subscript(
value=ast.Name("Literal"),
slice=ast.Tuple(elts=self.froms),
),
dest_type=ast.Subscript(
value=ast.Name("Literal"),
slice=ast.Tuple(elts=self.tos), # type: ignore
),
taxon_id=self.taxon_id,
overload=True,
)


# ast.unparse uses Python 3.9
assert sys.version_info >= (3, 9)


@app.command()
def main():
froms: list[ast.Constant] = []
tos: list[ast.Constant] = []
def main() -> None:
rules: defaultdict[int, Rule] = defaultdict(Rule)

for group in requests.get(
# Build up a list of rules
type_info = requests.get(
"https://rest.uniprot.org/configure/idmapping/fields"
).json()["groups"]:
for item in group["items"]:
).json()
for group_info in type_info["groups"]:
for item in group_info["items"]:
if item["from"]:
froms.append(ast.Constant(item["name"]))
if item["to"]:
tos.append(ast.Constant(item["name"]))
rules[item["ruleId"]].froms.append(ast.Constant(item["name"]))
for rule_info in type_info["rules"]:
rule = rules[rule_info["ruleId"]]
for to in rule_info["tos"]:
rule.tos.append(ast.Constant(to))
rule.taxon_id = rule_info["taxonId"]
rule.id = rule_info["ruleId"]

# Create a class that has one method overload per rule
module = ast.Module(
body=[
ast.ImportFrom(
module="typing_extensions",
names=[
ast.alias("Literal"),
ast.alias("TypeAlias"),
ast.alias("overload"),
],
level=0,
),
make_literal(ast.Name("From"), froms),
make_literal(ast.Name("To"), tos),
ast.ImportFrom(
module="typing",
names=[
ast.alias("Iterable"),
ast.alias("Optional"),
],
level=0,
),
ast.ClassDef(
name="SubmitDummyClass",
body=[
*[rule.to_function() for rule in rules.values()],
make_function(
source_type=ast.Name("str"),
dest_type=ast.Name("str"),
taxon_id=True,
overload=False,
),
],
decorator_list=[],
bases=[],
keywords=[],
),
],
type_ignores=[],
)

# Produce the formatted output
print(
black.format_file_contents(
ast.unparse(ast.fix_missing_locations(module)),
Expand Down
Loading

0 comments on commit 4734fef

Please sign in to comment.