Skip to content

Commit

Permalink
final cleanup code
Browse files Browse the repository at this point in the history
  • Loading branch information
jal347 committed Oct 3, 2024
1 parent e6aea8c commit 18e558c
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 36 deletions.
8 changes: 0 additions & 8 deletions src/hub/dataload/sources/chembl/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,6 @@ def parse_data(data):
},
}
yield output
# if uniprot_accessions:
# output = {
# "chembl": {
# "chembl_target": item["target_chembl_id"],
# "uniprot_accession": uniprot_accessions,
# },
# }
# yield output


def load_data(target_filepaths):
Expand Down
29 changes: 1 addition & 28 deletions src/hub/dataload/sources/chembl/upload.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,18 @@
import copy
import glob
import os

import biothings.hub.dataload.storage as storage
import biothings.hub.dataload.uploader as uploader
from biothings.utils.dataload import merge_root_keys

from hub.datatransform.keylookup import MyGeneKeyLookup

from .parser import load_data


class ChemblMergerStorage(storage.RootKeyMergerStorage):
"""
Just like MergerStorage, this storage deals with duplicated error
by appending key's content to existing document. Keys in existing
document will be converted to a list as needed.
Note:
- root keys must have the same type in each documents
- inner structures aren't merged together, the merge happend
at root key level
"""

@classmethod
def merge_func(klass, doc1, doc2, **kwargs):
# caller popped it from doc1, let's take from doc2
_id = doc2["_id"]
# exclude_id will remove _if from doc2, that's why we kept it from before
# also, copy doc2 ref as the merged doc will be stored in
# a bulk op object, since doc2 is modified in place, this could impact
# the bulk op and procude empty $set error from mongo
doc = merge_root_keys(doc1, copy.copy(doc2), exclude=["_id", "xrefs"])
doc["_id"] = _id
return doc


class ChemblUploader(uploader.BaseSourceUploader):
name = "chembl"

storage_class = ChemblMergerStorage
storage_class = storage.RootKeyMergerStorage
TARGET_FILENAME_PATTERN = "target.*.json"

keylookup = MyGeneKeyLookup(
Expand Down

0 comments on commit 18e558c

Please sign in to comment.