-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprocessing_metadata.py
72 lines (42 loc) · 2.07 KB
/
processing_metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
count = 0
def metadata_processing(metadata_becas):
import copy
import logging
metadata_becas_processed = copy.deepcopy(metadata_becas)
gse = list(metadata_becas_processed.keys())[0]
ent_processed = {}
for ent in metadata_becas_processed[gse]['entities']:
name_source_meta = ent.split('|')[0:2]
ent_processed[name_source_meta[0]] = name_source_meta[1].split(';')
ent_further_processed = {}
for s in list(ent_processed.keys()):
if '.'.lower() in s.lower():
new_name = s.replace('.','')
ent_further_processed[new_name] = ent_processed[s]
else:
ent_further_processed[s] = ent_processed[s]
if len(ent_processed) != len(ent_further_processed):
print('something fishy with this {}'.format(gse))
metadata_becas_processed[gse]['entities'] = ent_further_processed
ids_processed = {}
for ident in metadata_becas_processed[gse]['ids'].keys():
name_ref = metadata_becas_processed[gse]['ids'][ident]
ref_id = {}
try:
ref_id['reference'] = metadata_becas_processed[gse]['ids'][ident]['refs']
ref_id['identity'] = ident
ids_processed[metadata_becas_processed[gse]['ids'][ident]['name']] = ref_id
except TypeError:
print('NoneType object is not subscriptable')
#logging.exception("Exception occurred")
ids_further_processed = {}
for s in list(ids_processed.keys()):
if '.'.lower() in s.lower():
new_name = s.replace('.','')
ids_further_processed[new_name] = ids_processed[s]
else:
ids_further_processed[s] = ids_processed[s]
metadata_becas_processed[gse]['ids'] = ids_further_processed
print('metadata for {} processed'.format(gse))
#logging.info('metadata for' ' ' + gse + ' ' + 'processed')
return metadata_becas_processed