Skip to content

Commit f1b5468

Browse files
authored
[MRG] fix identifier munging for local databases (#145)
* fix identifier split * fix identifier split x 2 * fix identifier foo in notebooks
1 parent af9a8cd commit f1b5468

File tree

3 files changed

+22
-4
lines changed

3 files changed

+22
-4
lines changed

genome_grist/copy_local_genomes.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,11 @@ def main():
4141
record_name = record.name
4242
break
4343

44-
record_name = record_name.split(' ', 1)
45-
ident, remainder = record_name
44+
ident, *remainder = record_name.split(' ', 1)
45+
if remainder: # is list, needs to be string
46+
remainder = remainder[0]
47+
else:
48+
remainder = ident
4649

4750
print(f"read identifer '{ident}' and name '{remainder}'")
4851

genome_grist/notebooks/report-gather.ipynb

+8-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,14 @@
8282
"\n",
8383
"# connect gather_df to all_df and left_df using 'genome_id'\n",
8484
"def fix_name(x):\n",
85-
" return \"_\".join(x.split('_')[:2]).split('.')[0]\n",
85+
" # pick off first space-delimited name as identifier\n",
86+
" x = x.split(' ')[0]\n",
87+
" \n",
88+
" # eliminate stuff after the period, too.\n",
89+
" x = x.split('.')[0]\n",
90+
" \n",
91+
" return x\n",
92+
" #return \"_\".join(x.split('_')[:2]).split('.')[0]\n",
8693
"\n",
8794
"gather_df['genome_id'] = gather_df['name'].apply(fix_name)\n",
8895
"names_df['genome_id'] = names_df['ident'].apply(fix_name)"

genome_grist/notebooks/report-mapping.ipynb

+9-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,15 @@
7878
"\n",
7979
"# connect gather_df to all_df and left_df using 'genome_id'\n",
8080
"def fix_name(x):\n",
81-
" return \"_\".join(x.split('_')[:2]).split('.')[0]\n",
81+
" # pick off first space-delimited name as identifier\n",
82+
" x = x.split(' ')[0]\n",
83+
" \n",
84+
" # eliminate stuff after the period, too.\n",
85+
" x = x.split('.')[0]\n",
86+
" \n",
87+
" return x\n",
88+
" #return \"_\".join(x.split('_')[:2]).split('.')[0]\n",
89+
"\n",
8290
"\n",
8391
"gather_df['genome_id'] = gather_df['name'].apply(fix_name)\n",
8492
"names_df['genome_id'] = names_df['ident'].apply(fix_name)"

0 commit comments

Comments
 (0)