Skip to content

Commit

Permalink
pharos temporary hotfix (#151)
Browse files Browse the repository at this point in the history
  • Loading branch information
jal347 authored Jul 19, 2024
1 parent cbf4414 commit 532b0d0
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 8 deletions.
29 changes: 23 additions & 6 deletions src/hub/dataload/sources/pharos/parser.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,33 @@
from biothings.utils.dataload import open_anyfile, unlist
from collections import defaultdict

from biothings.utils.dataload import open_anyfile, unlist


def parse_tdl(input_file):
with open_anyfile(input_file) as f:
result = {}
for line in f:
(
_,
_,
_id,
tdl,
) = line.strip().split(",")
if _id and _id != "NCBI_id" and _id != "0":
result[str(_id)] = tdl
return result


def load_data(input_file):
def load_data(input_file, tdl_file):
entrez_tdls = parse_tdl(tdl_file)

with open_anyfile(input_file) as in_f:
result = defaultdict(list)
for line in in_f:
pharos_id, _id = line.strip().split(',')
if _id != 'entrez_gene_id' and _id != '0':
pharos_id, _id = line.strip().split(",")
if _id != "entrez_gene_id" and _id != "0":
result[str(_id)].append(int(pharos_id))
for k, v in result.items():
json_doc = {'_id': str(k),
'pharos': {"target_id": v}}
if tdl := entrez_tdls.get(k):
json_doc = {"_id": str(k), "pharos": {"target_id": v}, "tdl": tdl}
yield unlist(json_doc)
20 changes: 18 additions & 2 deletions src/hub/dataload/sources/pharos/upload.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,30 @@
import os

import biothings.hub.dataload.uploader as uploader

class PharosUploader(uploader.DummySourceUploader):
from .parser import load_data


class PharosUploader(uploader.BaseSourceUploader):
name = "pharos"

def load_data(self, data_folder):
pharos_path = os.path.join(data_folder, "pharos_target_mapping.csv")
pharos_tdl_path = os.path.join(data_folder, "pharos_tdl.csv")
data = load_data(pharos_path, pharos_tdl_path)
for doc in data:
yield doc

@classmethod
def get_mapping(self):
mapping = {
"pharos": {
"properties": {
"target_id": {"type": "integer"}
"target_id": {"type": "integer"},
"tdl": {
"type": "keyword",
"normalizer": "keyword_lowercase_normalizer",
},
}
}
}
Expand Down

0 comments on commit 532b0d0

Please sign in to comment.