-
Notifications
You must be signed in to change notification settings - Fork 2
/
3dmaskave_grp
executable file
·382 lines (334 loc) · 13.1 KB
/
3dmaskave_grp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
#!/usr/bin/env bash
#
# 0.for a given mask
# 1.iterate over subject image volumes, extracting roi average values
# 2.saves to ld8+roi+mask single row comma seprated value
# "${maskname},${subj},${roi},${val}"
#
# 20221108WF - extracted from various 07* scripts
#
ID_PATT='\d{5}_\d{8}'
IN_PATT='s:.*/|.nii$|.nii.gz$|.HEAD$::g'
NJOBS=1 DATATABLE="" DATACOL=""
# remove common neuroimaging file extensions
# and also strip out any usafe characters (eg. <1> range selectors)
niibasename(){ perl -pe 's:.*/|\.nii.gz|\.nii|\.HEAD::g;s/\W+/_/g;s/_$//;' <<< "$*"; }
function niibase_test { #@test
run niibasename 'path/to/ab.nii.gz<1>'
echo $output >&2
[[ $output == ab_1 ]]
}
usage(){ cat <<HEREDOC
SYNOPSIS:
run 3dmaskave over many files and saves an output csv:
roi,subj,input,beta
USAGE:
$(basename $0) -csv dlpfc.csv -mask "mask.nii.gz" -- paths/to/1*_2*/inputs.nii.gz
3dmaskave_grp -njobs 3 -v -datatable /tmp/dt.tsv -label_col 2,3 -m "ACC_1=mask.nii.gz"
3dmaskave_grp -csv MPFC-ACC_1.csv -m "ACC=mask.nii.gz" -m "PFC=mask_many.nii.gz<3>" -- paths/to/1*_2*/inputs.nii.gz
-m|-mask NAME=MASK input mask optionally with afni selector
MASK can also be a perl regular expression like 's:search:replace:'
-m tian_nacc.core='s:[^/]*.nii.gz$:warps/tian_nacc_rs.nii.gz<1>:'
-csv FILE save csv as FILE (defaults to NAME.csv)
-datatable FILE use datatable as input. req Subj is first and InputFile last col
-stdinfiles read files from stdin
-label_col IDX req. for datatable. what col index to use for labels
csv for multiple: -label_col "3,4"
-redo rerun, overwritting old
defaults to only writting new lines if not already in csv
-pattern PAT use PAT to extract id (default: '$ID_PATT')
-inputext PAT perl 's///' to extract input name (default: '$IN_PATT')
-njobs CORES run with NCORES (>1 requires gnu parallel)
-roistats 1 enable 3dROIstats instead of maskave. also try: -roistats -nzmean,-nzvoxels,-nzsigma
NOTE!! header will be order you provide BUT 3dROIstats does not respect that order!!!
-v|-verbose print when not writting b/c already exists in csv
-h|-usage this text
-- FILE1 FILE2 ... input files listed after '--', probably use a glob
NOTE:
you may want 3dROIstats instead. It can do more and quicker! but
you'll have to rerun for any new addition (mask value or inputfile)
you can only use one mask file. but that mask file can have n values (for n rois)
the results are wider and will need more post processsing
# for inputs and a mask like
IN=(/Volumes/Hera/Projects/7TBrainMech/subjs/101*/conn_mrsi_rest/mxcovsph/08-MPFC_deconreml-r.nii.gz)
mask='/Volumes/Hera/Projects/Maria/7Trest_mrsi/mvm/ACC_p2-16_cl40.nii.gz<1>'
# compare this tool
3dmaskave_grp -csv MPFC-ACC_1.csv -mask "ACC_1=\$mask" -- "\${IN[@]}"
# vs
3dROIstats -mask "\$mask" "\${IN[@]}"
# and the perl extraction version
3dROIstats -mask "\$mask" "\${IN[@]}" |
perl -slane '/\\d{5}_\\d{8}/ || next; print "\$mask,$&,",\$F[0]=~s:.*/|.nii.gz$::gr,",\$F[2]"' -- -mask=ACC_1
HEREDOC
}
parse_args(){
MASK=()
REDO=0
USEROISTATS=""
CFILES=()
while [ $# -gt 0 ]; do
case "$1" in
--) shift; CFILES=("${CFILES[@]}" "$@"); break;;
-stdinfiles) mapfile -t CFILES; shift;;
-m|-mask) MASK+=("$2"); shift 2;;
-csv) OUTNAME="$2"; shift 2;; # def to basename of mask.csv
-pattern) ID_PATT="$2"; shift 2;;
-inputext) IN_PATT="$2"; shift 2;;
-redo) REDO=1; shift;;
-h|-usage) usage; exit 0;;
-v|-verbose) VERBOSE=1; shift;;
-datatable) DATATABLE="$2"; shift 2;;
-label_col) DATACOL="$2"; shift 2;;
-roistats) USEROISTATS="$2"; shift 2;;
-njobs) NJOBS="$2"; shift 2;;
*) warn "UNKNOWN ARG: '$1'"; usage; exit 1;;
esac
done
[ -z "${MASK[*]}" ] && warn "ERROR: need -mask 'mask.nii.gz' to be specified" && return 1
[ -z "$DATATABLE" -a -z "${CFILES[*]}" ] && warn "ERROR: must specify subject/to-mask input files! use '-datatable file.txt' or ' -- file1.nii.gz file2.nii.gz ...' or 'find | ... -stdinfiles' " && return 2
# only name out same as mask if only one mask
[ -z "${OUTNAME:-}" -a ${#MASK[@]} -eq 1 ] && OUTNAME="$(niibasename "${MASK[0]}").csv"
[ -z "${OUTNAME:-}" ] && warn "ERROR: need -csv OUTNAME" && return 3
# USEROISTATS to array. default to nonzero mean and nzvoxel count
[ -n "$USEROISTATS" ] && USEROISTATS=(${USEROISTATS//,/ })
[[ ${USEROISTATS[*]} == 1 ]] && USEROISTATS=(-nomeanout -nzmean -nzvoxels)
return 0
}
# check if we input something like s:patt:replace:
name_is_regex(){
# start like s/ or s:
[[ "$*" =~ ^s([/:]) ]] &&
# ends with the same search delim but optionally 'g' or 'i'
[[ "$*" =~ ${BASH_REMATCH[1]}[gi]*$ ]]
}
# analgous to 'test -r $mask' but works for '$file<roinum>' AFNI syntax
# abstracted from check_mask so we can use if mask is a regexp
check_roi() {
mask="${1:?input mask<roi> to confirm count is nonzero}"
maskval=$(3dmaskave -q -mask "$mask" "$mask" 2>/dev/null|| echo 0)
[[ $maskval == 0 ]] && warn "ERROR:BAD MASK: '$mask'" && return 1
return 0
}
# wrap check_roi with 3dcalc and regexp checks
check_mask() {
mask="$1"
[[ $mask =~ ^3dcalc ]] && warn "WARNING: inline 3dcalc will be run for each input file!" && return 0
name_is_regex "$mask" &&
warn "WARNING: using regexp mask '$mask'. cannot preemptivly check if mask exists" &&
return 0
check_roi "$mask"
return 0
}
name_in_file() { perl -pe "$IN_PATT" <<< "$*"; }
id_in_file() { grep -m1 -oP "$ID_PATT" <<< "$*" | sed 1q; }
name_patt_test() { #@test
run name_in_file /Volumes/Hera/Projects/7TBrainMech/subjs/10129_20180917/conn_mrsi_rest/mxcovsph/08-MPFC_deconreml-r.nii.gz
[[ $output == 08-MPFC_deconreml-r ]]
}
id_patt_test() { #@test
run id_in_file /Volumes/Hera/Projects/7TBrainMech/subjs/10129_20180917/conn_mrsi_rest/mxcovsph/08-MPFC_deconreml-r.nii.gz
[[ $output == 10129_20180917 ]]
}
maskave_NA() {
# maskave or NA
local mask_file="$1";shift
local cfile="$1";shift
cmd="3dmaskave -quiet -mask '$mask_file' $cfile"
[ -n "${VERBOSE:-}" ] && warn "$cmd"
eval "$cmd" || echo NA
}
# hyperfine "3dROIstats -nzmean -nzvoxels -mask atlas/13MP-MNI_GMgt0.5-mask.nii.gz'<1>' hurst_nii/10129_20180917/py_dencrct_brnaswdkm_hurst_rs.nii.gz | perl -slane 'print qq/@F[2..\$#F]/ if $.==2'"
# Time (mean ± σ): 37.6 ms ± 6.8 ms [User: 25.7 ms, System: 9.2 ms]
#
# hyperfine "3dmaskave -mask atlas/13MP-MNI_GMgt0.5-mask.nii.gz'<1>' hurst_nii/10129_20180917/py_dencrct_brnaswdkm_hurst_rs.nii.gz"
# Time (mean ± σ): 33.5 ms ± 6.2 ms [User: 23.4 ms, System: 5.9 ms]
roistats_header_opts(){
! [[ $* =~ nomeanout ]] && x="mean,$*" || x="$*"
#remove nomeanout, replace all other switches with their value, and remove the trailing comma
perl -pe 's/-nomeanout//;s/-([^-]+)/$1,/g;s/\s*//g;s/,$//' <<< "$x"
}
repna_roistats_opts(){ roistats_header_opts "$@"| perl -pe 's/[^,]+/NA/g'; }
roistats_extract(){ perl -slane 'print join ",", @F;'; }
roistats_NA() {
# 3droistast. default to usising nomeanout, nzmean, and nzvoxels
local opts NA mask_file cfile
mask_file="$1";shift
cfile="$1";shift
if [ $# -gt 0 ]; then
opts=("$@")
NA=$(repna_roistats_opts "$@")
else
opts=(-nomeanout -nzmean -nzvoxels)
NA="NA,NA"
fi
cmd="3dROIstats ${opts[*]} -quiet -mask '$mask_file' $cfile | roistats_extract"
[ -n "${VERBOSE:-}" ] && warn "$cmd"
eval "$cmd" || echo "$NA"
}
already_exists() {
if [[ $REDO -eq 0 ]] && grep -q "$*" "$OUTNAME"; then
[ -n "${VERBOSE:-}" ] && warn "# '$*' already in $OUTNAME"
return 0
else
return 1
fi
}
maskave_filename_info() {
local input_name="$(name_in_file "$3")"
local id="$(id_in_file "$3")"
[ -z "$input_name" ] && warn "# no subj id in '$3' matching '$IN_PATT'" && return 0
[ -z "$id" ] && warn "# no subj id in '$3' matching '$ID_PATT'" && return 0
maskave_csv "$@" "$input_name" "$id"
}
maskave_csv() {
mask_name="$1"; shift
mask_file="$1"; shift
cfile="$1"; shift
input_name="$1"; shift
id="$1"; shift
already_exists "$mask_name,$id,$input_name," && return 0
if name_is_regex "$mask_file"; then
local regexp="$mask_file"
mask_file=$(perl -plne "$regexp" <<< "$cfile")
! check_roi "$mask_file" &&
echo "ERROR: mask regexp applied but '$mask_file' doesnt exist or roi is empty. see: perl -pne '$regexp' <<< '$cfile'" >&2 &&
return 1
fi
val="$(maskave_NA "$mask_file" "$cfile")"
echo "${mask_name},${id},${input_name},${val}"
}
split_name(){
local name mask name_mask="$1"
name=${name_mask/=*/}
mask=${name_mask#*?=}
[[ $name == $mask ]] &&
name="$(niibasename "$mask")"
echo "$name" "$mask"
}
split_name_test() { #@test
read -r name mask <<< $(split_name "ACC=my/mask.nii.gz<1>")
[[ $name == ACC ]]
[[ $mask == "my/mask.nii.gz<1>" ]]
read -r name mask <<< $(split_name "ACC=my/mask_gm=1.nii.gz<1>")
[[ $name == ACC ]]
[[ $mask == "my/mask_gm=1.nii.gz<1>" ]]
read -r name mask <<< $(split_name "my/mask_gm1.nii.gz<1>")
[[ $name == mask_gm1_1 ]]
[[ $mask == "my/mask_gm1.nii.gz<1>" ]]
}
cnt_uniq_match(){
cnt=$(printf '%s\n' "$@"| sort -u |wc -l)
[ "$cnt" -eq $# ]
}
cnt_test() { #@test
cnt_uniq_match a b c d
! cnt_uniq_match b a d b
}
check_all_masks(){
# double pass. first make sure everything checksout
# before running potentially a whole lot of 3dmaskavs
all_names=()
all_masks=()
local name mask name_mask
for name_mask in "$@"; do
read -r name mask <<< "$(split_name "$name_mask")"
#warn "# checking '$name_mask': '$name' = '$mask'"
all_names+=("$name")
all_masks+=("$mask")
check_mask "$mask" # mask has non-zero voxels
done
# did we uniquely name all?
if ! cnt_uniq_match "${all_names[@]}"; then
warn "#ERROR: repeated names in name=mask: ${all_names[*]} "
return 1
fi
}
maskave_grp_files(){
local name="$1"; shift
local mask="$1"; shift
local input_files=("$@");
warn "# extracting '$name' ($mask) mean from ${#input_files[@]} files"
if [ "$NJOBS" -gt 1 ]; then
parallel --jobs "$NJOBS" -q \
maskave_filename_info "$name" "$mask" ::: "${input_files[@]}" >> "$OUTNAME"
else
for cfile in "${input_files[@]}"; do
maskave_filename_info "$name" "$mask" "$cfile" >> "$OUTNAME"
done
fi
}
maskave_grp_datatable(){
local name mask table cols
read -r name mask table cols <<< "$@"
mapfile -t ids < <(cut -f 1 "$table"|sed 1d)
mapfile -t labels < <(cut -f "$cols" "$table"| sed '1d;s/\s\+/_/g')
lastcol=$(awk '{print NF; exit}' "$table")
mapfile -t files < <(cut -f "$lastcol" "$table"|sed 1d)
# NB. this might hit max number of input args for large datatable files
parallel --jobs "$NJOBS" -q \
maskave_csv "$name" "$mask" \
:::+ "${files[@]}" \
:::+ "${labels[@]}" \
:::+ "${ids[@]}" \
>> "$OUTNAME"
}
check_datatable(){
local file="$1"; shift
local data_col="$1"; shift
! test -r "$file" && warn "ERROR: cannot read $file!" && return 5
if ! sed 1q "$file" |grep -q '^Subj.*InputFile$' ; then
warn "ERROR: '$file' must start with Subj and end with InputFile"
return 5
fi
if grep -qm1 ' ' "$file"; then
warn "ERROR: '$file' includes spaces! must be tab delimited for this script"
return 5
fi
if [ -z "$data_col" ]; then
warn "ERROR: must specify -label_col when using -datafile"
return 6
fi
fcols=$(awk '{print NF; exit}' "$file")
maxcol=$(tr ',' '\n' <<< "$data_col"|sort -nr|sed 1q)
if [ $fcols -lt $maxcol ]; then
warn "ERROR: -label_col '$data_col' (max=$maxcol) when file has only $fcols columns"
return 6
fi
return 0
}
main() {
[ $# -eq 0 ] && usage && exit
parse_args "$@" || exit $?
warn "# checking all input masks"
check_all_masks "${MASK[@]}"
if [ -n "$DATATABLE" ]; then
check_datatable "$DATATABLE" "$DATACOL" || exit $?
fi
# don't need to check file for maskave if file is empty.
# run will be the same as REDO anyway
extrahdr="beta"
if [ -n "${USEROISTATS[*]}" ]; then
extrahdr="$(roistats_header_opts "${USEROISTATS[@]}")"
maskave_NA() { roistats_NA "$@" "${USEROISTATS[@]}"; }
fi
if [ ! -e "$OUTNAME" ] || [ "$(wc -l < "$OUTNAME" 2>/dev/null||echo 0)" -le 1 ]; then
echo "roi,subj,input,$extrahdr" > "$OUTNAME"
REDO=1
fi
# export functions and pattern vars for gnu parallel
export -f check_all_masks cnt_test maskave_csv \
name_in_file id_in_file maskave_NA \
already_exists maskave_filename_info \
roistats_NA roistats_header_opts repna_roistats_opts \
roistats_extract
export IN_PATT ID_PATT OUTNAME REDO VERBOSE
for name_mask in "${MASK[@]}"; do
read -r name mask <<< "$(split_name "$name_mask")"
if [ -z "$DATATABLE" ]; then
maskave_grp_files "$name" "$mask" "${CFILES[@]}"
else
maskave_grp_datatable "$name" "$mask" "$DATATABLE" "$DATACOL"
fi
done
}
eval "$(iffmain "main")"