forked from udieckmann/Kielipankki-utilities
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathold-gamarr
executable file
·447 lines (387 loc) · 15.5 KB
/
old-gamarr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
#! /usr/bin/env python3
# Original inspiration for the name "game" was:
# - Mitä on erä?
# - Riistaa kuten te.
# (Seen in a Tenavat cartoon that has not been found again.)
#
# While "gamarray" contracted from "game array", there is also a
# further pun: "gamarray" makes a search engine suggest "gamma
# ray".
from argparse import ArgumentParser, REMAINDER
from itertools import chain
from shlex import quote
from subprocess import Popen, PIPE
from tempfile import mkstemp
import os, grp, sys
parser = ArgumentParser(description = '''
Send a single command with a set of arguments to the batch system in
Puhti, with all manner of defaults, as an array job to be run in the
current working directory on each argument, without waiting. The set
of separate arguments starts at '//' on the command line.
By default, module kieli is loaded quietly.
''')
parser.add_argument('command',
help = '''
the name of an executable command
''')
parser.add_argument('argument', metavar = '...', nargs = REMAINDER,
help = '''
the options and initial arguments to the command,
followed by '//' that is followed by a sequence of
arguments, each of which is passed to the command
separately
''')
parser.add_argument('--log', default = 'gamelog', metavar = 'dir',
help = '''
directory where the standard input and standard
output of the batch job are written in files named
%%j-%%t-<job>.{out,err} where %%j is the job
number and %%t the task number (log directory is
created as needed) (default: ./gamelog)
''')
parser.add_argument('--job', default = 'game', metavar = 'name',
help = '''a short name [game] for the job''')
group = parser.add_mutually_exclusive_group()
group.add_argument('--out', '-o', metavar = 'file',
help = '''
((TODO needs redesigned for array job!))
standard output from command on zero exit status;
on non-zero status leave standard output in a
sibling temporary file named file.<random>
''')
group.add_argument('--accept', '-a', metavar = 'file',
help = '''
((TODO see --out))
standard output from command regardless of
exit status
''')
# time group - either specify hours or specify minutes (default one
# hour is probably good for the purpose)
group = parser.add_mutually_exclusive_group()
group.add_argument('--minutes', '-M', metavar = 'num',
help = 'minutes [60] to reserve')
group.add_argument('--hours', '-H', metavar = 'num',
help = 'hours [1] to reserve')
# memory group - man pages refer to megabytes and gigabytes but
# example at CSC says 4GB = 4096MB so they must mean 2^20 and 2^30
# rather than 10^6 and 10^9 https://en.wikipedia.org/wiki/Gibibyte
# https://research.csc.fi/taito-constructing-a-batch-job-file#3.1.2
# (defaults are tentative - less might do for the purpose?)
group = parser.add_mutually_exclusive_group()
group.add_argument('--MiB', metavar = 'num',
help = 'mebibytes [8192] to reserve')
group.add_argument('--GiB', metavar = 'num',
help = 'gibibytes [8] to reserve')
parser.add_argument('--cat', action = 'store_true',
help = '''
write the job description to standard output
instead of sending it to the batch queue
(for information only when using temp file)
''')
if os.path.exists('/appl/soft/ling'):
parser.add_argument('--cores', '-C',
choices = [
# Puhti nodes have 40 cores, trying to
# allocate all cores in the same node so that
# the communication between them is fast -
# TODO to allow larger multi-node numbers for
# heavier but core-savvy jobs like ffmpeg
'1', '2', '4', '5',
'10', '20', '40'
],
default = '4', # not sure!
help = '''
how many Puhti cores to use,
all in one node [tentative default is 4]
(nodes have 40 cores)
''')
else:
parser.add_argument('--cores', '-C',
choices = [
# Taito nodes have 24 cores, trying to
# allocate all cores in the same node so that
# the communication between them is fast
'1', '2', '4', '8', '12', '24'
],
default = '4', # not sure!
help = '''
how many Taito cores to use,
all in one node [tentative default is 4]
(nodes have 24 cores)
''')
# default partition is set just before parsing the arguments;
# Taito defaults to "serial" as always,
# Puhti will default to "small" (1 node, up to 40 cores).
group = parser.add_mutually_exclusive_group()
group.add_argument('--test', dest = 'partition',
action = 'store_const', const = 'test',
help = '''
run in "test" partition
''')
group.add_argument('--serial', dest = 'partition',
action = 'store_const', const = 'serial',
help = '''
run in "serial" partition (Taito default)
''')
group.add_argument('--small', dest = 'partition',
action = 'store_const', const = 'small',
help = '''
run in "small" partition (Puhti default)
''')
group.add_argument('--large', dest = 'partition',
action = 'store_const', const = 'large',
help = '''
run in "large" partition (Puhti)
''')
# default billing group set below just before parsing
parser.add_argument('--bill', '-B', metavar = 'group',
help = '''
bill the project associated with the group
[defaults to clarin if user is in clarin]
''')
parser.add_argument('--kieli',
choices = [
'yes',
'out',
'err',
'null',
'no'
],
default = 'null',
help = '''
how to load modules kieli and biojava
(default is null, load quietly;
yes leaves its stdout and stderr as they are,
out redirects stderr to stdout,
err stdout to stderr,
null both to /dev/null, and
no means do not load)
''')
# add options
groupnames = [ grp.getgrgid(k).gr_name for k in os.getgroups() ]
parser.set_defaults(partition = ('small' # guess this is Puhti
if os.path.exists('/appl/soft/ling') else
# guess this is Taito
'serial'),
bill = ('clarin' if 'clarin' in groupnames else None))
args = parser.parse_args()
args.prog = parser.prog
if not os.path.exists('/appl/soft/ling'):
# Taito does not seem to accept any value for --account
# but works as usual when --account is not specified.
print('{}: info: ignoring billing group "{}" in Taito'
.format(args.prog, args.bill),
file = sys.stderr)
elif args.bill in groupnames:
print('{}: info: billing "{}" project'.format(args.prog, args.bill),
file = sys.stderr)
elif args.bill:
print('{}: error: billing group "{}" not in:'
.format(args.prog, args.bill),
*groupnames,
file = sys.stderr)
exit(1)
else:
print('{}: error: no billing group'.format(args.prog),
file = sys.stderr)
exit(1)
def simplepath(path):
'''Normalize path as relative to current directory if under the
current directory and not starting with a component that starts
with dots, else absolute.
'''
absolute = os.path.abspath(path)
relative = os.path.relpath(path)
return absolute if relative.startswith('..') else relative
def ensuredir(path):
'''Attempt to establish that the directory path exists, by creating it
if not. Do not bother to attempt to check whether anyone in
particular can write in the directory.
'''
path = simplepath(path)
try:
os.makedirs(path, exist_ok = True)
except OSError as exn:
# in Python 3.4.0 (and earlier), a mode mismatch may lead here
# even if path is all right
if os.path.isdir(path): return
raise exn
def ensuretempfile(outdir, outfile):
handle, tmppath = mkstemp(prefix = outfile + '.', dir = outdir)
os.close(handle)
return tmppath
def setup(args):
'''Make sure the relevant directories exist. Return suitable pathnames
for the log dir, outfile (or None), and tempfile (or None). If
there is to be a tempfile, create the tempfile in advance to claim
the name. Except with the --cat option only output pathnames (a
placeholder name for a tempfile) without creating any filesystem
entries.
'''
args.cat or ensuredir(args.log)
if args.out or args.accept:
outfile = simplepath(args.out or args.accept)
head, tail = os.path.split(outfile)
if not tail:
print(args.prog + ':gamarray: not a filename: {}'
.format(args.out or args.accept),
file = sys.stderr)
exit(1)
else:
outfile = None
if args.out and args.cat:
# no good way out
tempfile = os.path.join(head, tail + '.[random]')
elif args.out:
tempfile = ensuretempfile(head, tail)
elif args.accept:
tempfile = None
else:
tempfile = None
return simplepath(args.log), outfile, tempfile
logdir, outfile, tempfile = setup(args)
# watch out! appending ' biojava' to the command when in Puhti,
# because otherwise Puhti batch jobs do not have java!
kieli = ({ 'yes' : 'module load kieli',
'out' : '2>&1 module load kieli',
'err' : '1>&2 module load kieli',
'null' : '1> /dev/null 2>&1 module load kieli',
'no' : '# module load kieli' }
[args.kieli])
# appending ' biojava' to 'module load kieli' in Puhti
if os.path.exists('/appl/soft/ling'): kieli += ' biojava'
# Depending on the output options, command is one of:
# cmd arg ... args[TASK_ID]
# cmd arg ... args[TASK_ID] > outfile
# cmd arg ... args[TASK_ID] > tempfile
# with appropriate quotation for the shell,
# where arg ... are the arguments before //.
if args.argument.count('//') == 1:
argpos = args.argument.index('//')
else:
print(args.prog + ': must have one //', file = sys.stderr)
exit(1)
headargs = args.argument[:argpos]
tailargs = args.argument[1 + argpos:]
if len(tailargs) == 0:
print(args.prog + ': empty array not allowed',
file = sys.stderr)
exit(1)
elif len(tailargs) > 1000:
# https://research.csc.fi/taito-array-jobs
print(args.prog + ': too many arguments:',
len(tailargs), ' (max is 1000)',
file = sys.stderr)
exit(1)
command = ' '.join(chain([quote(args.command)],
map(quote, headargs),
[ '"${args[$SLURM_ARRAY_TASK_ID]}"' ],
( []
if outfile is None else
['>', quote(outfile)]
if tempfile is None else
['>', quote(tempfile)] )))
logcommand = ' '.join(chain([quote(args.command)],
map(quote, headargs),
['<nth arg>']))
if tempfile is None:
# also no outfile, or accept outfile as is
finish = ''
else:
# on success status, move output to outfile;
# on error status, leave output in tempfile
finish = '''\
test $status -eq 0 &&
mv {temp} {out}'''.format(temp = quote(tempfile),
out = quote(outfile))
# default a nour - and this could be done wholly in argparse? can an
# exclusive group have a common default?
time = ( '{:d}:{:02d}:00'
.format(*(divmod(int(args.minutes), 60)
if args.minutes
else (int(args.hours), 0)
if args.hours else (1, 0))) )
# setting memory default here, not checking that argument is even a
# number let alone a natural number and not something outrageous,
# feeling bad about not checking TODO check and the whole processing
# could be moved to argparse machinery
memory = ( '{}M'.format(args.MiB) if args.MiB else
'{}G'.format(args.GiB) if args.GiB else
'8G' )
def fill(args):
'''To lay out the separate arguments in an array'''
n = 0
for arg in map(quote, args):
if n == 0:
yield ' '
yield arg
n = 4 + len(arg)
elif n + len(arg) > 75:
yield '\n '
yield arg
n = 4 + len(arg)
else:
yield ' '
yield arg
n += 1 + len(arg)
script = '''\
#! /bin/bash
#SBATCH --job-name={job}
#SBATCH --account={bill}
#SBATCH --partition={partition}
#SBATCH --nodes={nodes}
#SBATCH --ntasks={cores}
#SBATCH --time={time}
#SBATCH --mem={memory}
#SBATCH --out={out}
#SBATCH --error={err}
#SBATCH --chdir={workdir}
#SBATCH --array=1-{last}
args=(:
{args}
)
echo command: {logcommand}
echo nth arg: "${{args[$SLURM_ARRAY_TASK_ID]}}"
echo outfile: {outfile}
echo workdir: {workdir}
echo partition: {partition}
echo nodes: {nodes}
echo cores: {cores}
echo time: {time}
echo memory: {memory}
echo load kieli: {whetherkieli}
echo billing group: {bill}
echo
date "+%F %T START"
{kieli}
{command}
status=$?
{finish}
T=$SECONDS
printf -v time %d:%02d:%02d $((T/3600)) $((T%3600/60)) $((T%60))
date "+%F %T FINISH IN $time WITH STATUS $status"
'''.format(job = args.job,
bill = args.bill,
partition = args.partition,
nodes = '1',
cores = args.cores,
time = time,
memory = memory,
out = quote(os.path.join(args.log, '%A-%a-{}.out'.format(args.job))),
err = quote(os.path.join(args.log, '%A-%a-{}.err'.format(args.job))),
last = len(tailargs),
workdir = quote(os.getcwd()),
logcommand = quote(logcommand),
kieli = kieli,
whetherkieli = args.kieli,
outfile = quote(outfile or '(stdout)'),
command = command,
args = ''.join(fill(tailargs)),
finish = finish)
if not os.path.exists('/appl/soft/ling'):
# Taito does not seem to accept any value for --account,
# but works as usual when --account is not specified.
script = '\n'.join((line for line in script.split('\n')
if not line.startswith('#SBATCH --account=')))
with Popen(['cat' if args.cat else 'sbatch'], stdin = PIPE) as process:
process.stdin.write(script.encode('UTF-8'))