-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmkmissing
executable file
·114 lines (98 loc) · 4.4 KB
/
mkmissing
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
find pattern (probably subject id) in files of one step missing in the next
by matching a regular expression against expanded wildcard globs
Example:
mkmissing -1 'bids/sub-*' -2 'proc/sub-*' # show subj in bids but not proc
# either side can have more than one glob to use
mkmissing -1 'bids/sub-*' -2 'proc_a/sub-*' -2 'proc_b/sub-*'
Options:
search and replace to normalize: eg. / -> _
-s '(\d{5})/(\d{8})' -r '\1_\2' # specific
-s '/' -r '_' # general
verbose with -v to see contents of inset and outset
as well as their intersection in addition to -1 not in -2
-p sets the pattern extracted from each file to compare between sets
for BIDS vs fmriprep: -p 'sub-\d+_ses-\d+_task-[^_]*'
for simple ld8: -p '\d{5}_\d{8}'
"""
import re
from glob import glob
def patt_in_wildcard(patt, wildcards, searchreplace=None):
"""expand wildcards and extract matches"""
if not isinstance(wildcards, list):
wildcards = [wildcards]
ext = [patt.search(x) for w in wildcards for x in glob(w)]
ext = [m.group(0) for m in ext if m is not None]
if searchreplace is not None:
ext = [re.sub(searchreplace[0], searchreplace[1], x) for x in ext]
# set: sort and uniq
return set(ext)
def __main__():
from argparse import RawTextHelpFormatter, ArgumentParser
parser = ArgumentParser(description=__doc__,
formatter_class=RawTextHelpFormatter)
parser.add_argument('-1', '--inglob', dest='in_glob', required=True,
action='append',
help="input file wildcard")
parser.add_argument('-2', '--outglob', dest='out_glob', required=True,
action='append',
help="step 2 file wildcard")
parser.add_argument('-p', '--pattern', dest='patt',
help="pattern to match in file names",
default="\d{5}[-_/]\d{8}")
parser.add_argument('-s', '--search', dest='search',
help="regexp search. pair with -r/--replace",
default=None)
parser.add_argument('-r', '--replace', dest='replace',
help="string replacement. pair with -s/--search",
default=None)
parser.add_argument('-v', '--verbose', action="store_true")
parser.add_argument('-o', '--saveto', dest='saveto',
help="file to write if any differences\n" +
"if not specified, output is printed to terminal",
default=None)
parser.add_argument('-e', '--save_empty', action='store_true',
help="save file even if there is no difference\n" +
"default is false for `make`'s timestamp compare",
default=None)
args = parser.parse_args()
p = re.compile(args.patt)
# setup search and replace if we have both
search_replace = None
if args.replace is None and args.search is not None or \
args.search is None and args.replace is not None:
print("need both -s and -r!")
parser.print_help()
parser.exit()
if args.search is not None:
search_replace = (re.compile(args.search), args.replace)
# within 'in' set but missing from 'out' set
# order matters: set([1,2,3]) - set([1,2,4]) = {3}
inset = patt_in_wildcard(p, args.in_glob, search_replace)
outset = patt_in_wildcard(p, args.out_glob, search_replace)
missing = inset - outset
# when debuging pattern, it's useful to see the matches
if args.verbose:
print(f"ARGS: p={p}; search_replace={search_replace}")
print(f"INSET: {len(inset)} in {args.in_glob}")
print(inset)
print(f"OUTSET: {len(outset)} in {args.out_glob}")
print(outset)
print(f"MISSING: {len(missing)}")
print(missing)
# likely to be exactly inset, but maybe worth checking
both = inset.intersection(outset)
print(f"BOTH: {len(both)}")
print(both)
outstr = "\n".join(missing)
# dont write to file if none provided
# and only write empty missing to file if save_empty is set
if args.saveto is None:
print(outstr)
elif args.save_empty or missing:
with open(args.saveto, 'w') as f:
f.write(outstr + "\n")
if __name__ == "__main__":
__main__()