Skip to content

Commit

Permalink
test keylookup
Browse files Browse the repository at this point in the history
  • Loading branch information
jal347 committed Oct 3, 2024
1 parent b650da9 commit f86fdc5
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 2 deletions.
28 changes: 28 additions & 0 deletions src/hub/dataload/sources/chembl/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import json


def parse_data(data):
for item in data["targets"]:
output = {
"chembl_target": item["target_chembl_id"],
"xrefs": {
"accession": (
[
target_component.get("accession")
for target_component in item["target_components"]
if "accession" in target_component
]
),
},
}
if output["xrefs"]["accession"]:
yield output


def load_data(target_filepaths):
for file in target_filepaths:
with open(file) as f:
content = f.read()
json_data = json.loads(content)
parsed_data = parse_data(json_data)
return parsed_data
36 changes: 34 additions & 2 deletions src/hub/dataload/sources/chembl/upload.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,47 @@
import glob
import os

import biothings.hub.dataload.storage as storage
import biothings.hub.dataload.uploader as uploader

from hub.datatransform.keylookup import MyGeneKeyLookup

from .parser import load_data


class ChemblUploader(uploader.DummySourceUploader):
class ChemblUploader(uploader.BaseSourceUploader):
name = "chembl"

storage_class = storage.RootKeyMergerStorage
TARGET_FILENAME_PATTERN = "target.*.json"

keylookup = MyGeneKeyLookup(
[
("uniprot", "chembl.xrefs.accession"),
],
skip_on_failure=True,
)

def load_data(self, data_folder):
target_filepaths = glob.iglob(
os.path.join(data_folder, self.TARGET_FILENAME_PATTERN)
)
return self.keylookup(load_data(target_filepaths), skip_on_failure=True)

@classmethod
def get_mapping(klass):
mapping = {
"chembl_target": {
"type": "keyword",
"normalizer": "keyword_lowercase_normalizer",
}
},
"xrefs": {
"properties": {
"accession": {
"type": "keyword",
"normalizer": "keyword_lowercase_normalizer",
},
}
},
}
return mapping

0 comments on commit f86fdc5

Please sign in to comment.