-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathioDataMethods.py
1476 lines (1271 loc) · 66.3 KB
/
ioDataMethods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# *********************************************************************************
# Author: Christian Jamtheim Gustafsson, PhD, Medical Physcist Expert
# Skåne University Hospital, Lund, Sweden and Lund University, Lund, Sweden
# Description: Class for loading data from Nifti files
# *********************************************************************************
# Modules needed for this class
import numpy as np
import os
import SimpleITK as sitk
import matplotlib.pyplot as plt
import json
import logging
import sys
import pickle
from numpy.core.fromnumeric import size
from scipy import ndimage
from skimage import morphology
from skimage.measure import label
from skimage.transform import resize
from commonConfig import commonConfigClass
conf = commonConfigClass()
class ioDataMethodsClass:
"""
Class describing functions needed for reading Nifti data
"""
def __init__ (self):
"""
Init function
"""
pass
def createDatasetTextFile(self, folderPath, fileName, organList):
"""
Write a text file which defines the dataset for the training
Args:
filePath (str): Path to the folder where the file will be saved
fileName (str): name of the file
Return:
None
"""
assert isinstance(folderPath, str), "Input must be string"
assert isinstance(fileName, str), "Input must be string"
assert isinstance(organList, list), "Input must be list"
# Create file path
filePath = os.path.join(folderPath, fileName)
# If the data set file already exists, do nothing
if os.path.isfile(filePath):
return
# Make sure folder exist
os.makedirs(os.path.dirname(filePath), exist_ok=True)
# Get number of organs
totnrOrgans = len(organList)
# Write file
with open(filePath, 'w') as f:
f.write("{ \n")
f.write("\t \"task\": \"{}\", \n".format(conf.data.TaskNumber))
f.write("\t \"name\": \"{}\", \n".format(conf.data.TaskNumber))
f.write("\t \"dim\": 3, \n")
if totnrOrgans == 1:
f.write("\t \"target_class\": 0, \n")
else:
f.write("\t \"target_class\": null, \n")
f.write("\t \"test_labels\": false, \n")
f.write("\t \"labels\": { \n")
# Write the organs and their class labels in the text file
# Make sure the last line is not ending with a comma
for i_organ, currOrgan in enumerate(organList):
# Get the index position of the organ in the defined organ list
organIndex = conf.organ.organNames.index(currOrgan)
# Define organ class
# If there is only one organ the class must be set to zero
if totnrOrgans == 1:
organClass = 0
else:
# Get organ class organ class vector index position.
# organClassIndex is defined in config from conf.organ.organNames
organClass = conf.organ.organClasses[organIndex]
# Fill text file
if i_organ != totnrOrgans-1:
f.write("\t \t \"{}\": \"{}\", \n".format(organClass, currOrgan))
if i_organ == totnrOrgans-1:
f.write("\t \t \"{}\": \"{}\" \n".format(organClass, currOrgan))
f.write("\t }, \n")
f.write("\t \"modalities\": { \n")
f.write("\t \t \"0\": \"CT\" \n " )
f.write("\t } \n")
f.write("} \n")
# Close file
f.close()
print("Created dataset description file: {}".format(filePath))
def createLabelInstanceTextFile(self, filePath, fileName, organsList, instanceValuesList):
"""
Write a text file which defines the class label for each instance segmentation label
that is availbale in the mask
Args:
filePath (str): Path to the folder where the file will be saved
fileName (str): Name of the file
organs (list): List of organs to write information about
Return:
None
"""
assert isinstance(filePath, str), "Input must be string"
assert isinstance(fileName, str), "Input must be string"
assert isinstance(organsList, list), "Input must be list"
assert isinstance(instanceValuesList, list), "Input must be list"
assert len(organsList) == len(instanceValuesList), "List must be the same size"
# Create file path
filePath = os.path.join(filePath, fileName)
# Make sure folder exist
os.makedirs(os.path.dirname(filePath), exist_ok=True)
# Write file
with open(filePath, 'w') as f:
f.write("{ \n")
f.write("\t \"instances\": { \n")
# Get number of organs
totNrOrgans = len(organsList)
# Write the instanceValues and class label in the text file for each organ in the list
for i_organ, currOrgan in enumerate(organsList):
# Get the index position of the organ in the defined organ list
organIndex = conf.organ.organNames.index(currOrgan)
# Define organ class
# If there is only one organ the class must be set to zero
if totNrOrgans == 1:
organClass = 0
else:
# Get organ class organ class vector index position.
# organClassIndex is defined in config from conf.organ.organNames
organClass = conf.organ.organClasses[organIndex]
# Get instanceValue
instanceValue = instanceValuesList[i_organ]
# Fill text file
if i_organ != totNrOrgans-1:
f.write("\t \t \"{}\": {}, \n".format(instanceValue, organClass))
if i_organ == totNrOrgans-1: # Last iteration
f.write("\t \t \"{}\": {} \n".format(instanceValue, organClass))
f.write("\t } \n")
f.write("} \n")
# Close file
f.close()
# print("Created label instance description file: {}".format(filePath))
def removeSmallSignalRegions(self, mask, threshold, patFolder, organ):
"""
Remove small signal regions from the mask
Return mask withput the small regions
Args:
mask (array): Original mask
threshold (int): Size of region to remove
patFolder (str): Path to patient folder
organ (str): Name of organ
Return:
Mask where small regions have been removed
"""
assert len(mask.shape) == 3, "dim should be 3"
# Label the mask and count the labels
labelMask, nrLabels = label(mask, return_num=True)
if nrLabels > 1:
# Remove small objects
mask = morphology.remove_small_objects(label(mask), threshold)
# Make resulting mask uint8
mask = np.array(mask, dtype='uint8')
print("Removed small signal regions from mask in {} {}".format(patFolder, organ))
# Return the mask
return mask
def limitNrPatients(self, AllPatList, nrPatients):
"""
Limit the number of patients to proces to nrPatients
Args:
AllPatList (list): List of all patients
nrPatients (int or str): Number of patients to limit to, can be 'all' also
Return:
List of patients to limit processing to
"""
if isinstance (nrPatients, int):
assert isinstance(AllPatList, list), "Input must be list"
# Limit the number of patients
patListOut = AllPatList[:nrPatients]
# Return the list
return patListOut
if nrPatients=='all':
assert isinstance(AllPatList, list), "Input must be list"
patListOut = AllPatList
# Return the list
return patListOut
def saveNiftiFile(self, np_imageData, sitk_imageData, outPutFilePath):
"""
Saves 3D Nifty file
Args:
np_imageData (array): 3D numpy array
sitk_imageData (sitk image): 3D sitk image
outPutFilePath (str): Path to the file to be saved
Return:
None
"""
# Assert numpy array
assert isinstance(np_imageData, np.ndarray), "Input must be numpy array"
# Reorder back so slices in the 3D stack will be in the first dimension
# This is the numpy format from SITK when exported as numpy array
# Input assertion to make sure 3D image
assert len(np_imageData.shape) == 3, "dim should be 3"
np_imageData = np.transpose(np_imageData, (2,0,1))
# Converting back to SimpleITK
# (assumes we didn't move the image in space as we copy the information from the original)
outImage = sitk.GetImageFromArray(np_imageData)
outImage.CopyInformation(sitk_imageData)
# Make sure folder exist before saving
os.makedirs(os.path.dirname(outPutFilePath), exist_ok=True)
# Write the image
sitk.WriteImage(outImage, outPutFilePath)
def zScoreNorm(self, imageVolume, ignoreAir=False):
"""
Z-score normalize the image volume
Args:
imageVolume (array): 3D numpy array
Return:
imageVolumeNormalized (array): 3D numpy array
"""
assert len(imageVolume.shape) == 3, "dim should be 3"
# Set HU threshold to ignore air or not. Air = -1000 HU
if ignoreAir==True:
threshold = -1000
else:
threshold = -1001
# Z-score normalization
mean = np.mean(imageVolume[imageVolume>threshold])
std = np.std(imageVolume[imageVolume>threshold])
imageVolumeNormalized = (imageVolume - mean) / std
# Return the normalized image
return imageVolumeNormalized
def cropImageFromMask(self, image, mask):
"""
Crop image from mask
Args:
image (array): 3D image
mask (array): 3D mask
Return:
croppedImage (array): Cropped image
"""
assert len(image.shape) == 3, "dim should be 3"
assert len(mask.shape) == 3, "dim should be 3"
# Coordinates of non-zero elements in the mask
coords = np.argwhere(mask)
# Bounding box coordinates of the box mask
x0, y0, z0 = coords.min(axis=0)
x1, y1, z1 = coords.max(axis=0) + 1 # slices are exclusive at the top
# Get the extracted contents of the box
croppedImage = image[x0:x1, y0:y1, z0:z1]
# Return the cropped image
return croppedImage
def readNiftiFile(self, filePath):
"""
Read 3D Nifti files to numpy array.
Get image resolution in image and the SITK object.
input: file path for Nifti file
output: image data in Numpy format, SITK object, image resolution in tuple format
Args:
filePath (str): Path to the file to be read
Return:
None
"""
assert isinstance(filePath, str), "Input must be string"
# Read the .nii image containing the volume using SimpleITK.
# With SimpleITK the image orientation in the numpy array is correct
sitk_imageData = sitk.ReadImage(filePath)
# Access the numpy array from the SITK object
np_imageData = sitk.GetArrayFromImage(sitk_imageData)
# Get pixelSpacing in image from the SITK object
pixelSpacing = sitk_imageData.GetSpacing()
# Input assertion to make sure 3D image
assert len(np_imageData.shape) == 3, "dim should be 3"
# Reorder so slices in the 3D stack will be in the last dimension
np_imageData = np.transpose(np_imageData, (1,2,0))
# Return np_imagedata, sitk_imageData and pixel spacing
return np_imageData, sitk_imageData, pixelSpacing
def calcImageVolume(self, array, pixelSpacing):
"""
Calculate the volume of array originating from reading a Nifti structure volume.
Args:
array (array): 3D numpy array
pixelSpacing (tuple): Pixel spacing in the image
Return:
volume (float): Volume of the structure in the image
"""
# Cast as int8
array = array.astype(np.int8)
# Assert binary mask
assert isinstance(array, np.ndarray), "Input must be numpy array"
assert len(array.shape) == 3, "Mask must be 3D"
# Assert tuple
assert isinstance(pixelSpacing, tuple), "Pixel spacing must be tuple"
assert array.max() == 1, "Mask must be binary"
assert array.min() == 0, "Mask must be binary"
# Calculate sum of voxels
nrVoxels = np.sum(array)
# Calculate volume
volume = nrVoxels * pixelSpacing[0] * pixelSpacing[1] * pixelSpacing[2]
# Return volume in mm3
return volume
def assertBinaryMask(self, mask):
"""
Assert that the input is a binary mask
Args:
mask (array): 3D numpy array
Return:
None
"""
assert isinstance(mask, np.ndarray), "Input must be numpy array"
assert len(mask.shape) == 3, "Mask must be 3D"
if mask.sum() == 0:
assert mask.max() == 0, "Mask must be binary"
assert mask.min() == 0, "Mask must be binary"
if mask.sum() > 0:
assert mask.max() == 1, "Mask must be binary"
assert mask.min() == 0, "Mask must be binary"
def getBoundingBoxFilled(self, mask, value, margin):
"""
Get a filled bounding box as a binary mask
Args:
mask (array): 3D numpy array
value (int): Value to fill the bounding box with
margin (tuple): Margin to add to the bounding box, in voxels
Return:
None
"""
# Get bounding box of the mask
x, y, z = np.where(mask)
# Load margin
xMargin, yMargin, zMargin = margin
# Get the bounding box
boundingBox = [x.min()-xMargin, x.max()+xMargin, y.min()-yMargin, y.max()+yMargin, z.min()-zMargin, z.max()+zMargin]
# If negative values are created in the bounding box after adding margins, set them to value 0
boundingBox = [0 if x < 0 else x for x in boundingBox]
# Create a mask of the bounding box
boundingBoxmask = np.zeros(mask.shape)
boundingBoxmask[boundingBox[0]:boundingBox[1],boundingBox[2]:boundingBox[3],boundingBox[4]:boundingBox[5]] = value
return boundingBoxmask
def getCenterSlice(self, mask):
"""
Return the center slice of a mask calculated from where signal is present
Args:
mask (array): 3D numpy array
Return:
None
"""
# Check mask
self.assertBinaryMask(mask)
# Get slices with signal
sliceWithSignal = np.argwhere(mask == True)
# Get minimum and maximum, value in third dimension
minZ = sliceWithSignal[:,2].min()
maxZ = sliceWithSignal[:,2].max()
# Calculate center slice
centerZ = int(np.round(minZ + (maxZ-minZ)/2))
# View slice
#plt.imshow(mask[:,:,centerZ])
#plt.show()
return centerZ
def logAndPrint(self, message):
"""
For enhanced logging to both file and console
This function does not work in multi processing
Args:
message (str): Message to be logged
Return:
None
"""
# Define logger object
a_logger = logging.getLogger()
# Set loggin level
a_logger.setLevel(logging.DEBUG)
# Create file handler from defined file path
output_file_handler = logging.FileHandler(os.path.join(conf.data.dataOutputPath, conf.data.logFileName))
stdout_handler = logging.StreamHandler(sys.stdout)
# Output both to file and console
a_logger.addHandler(output_file_handler)
a_logger.addHandler(stdout_handler)
# return logging object
return a_logger
def removeItemsFromList(self, originalList, itemsToRemove):
"""
Remove items from input list which are defined in itemsToRemove
Args:
originalList (list): List to be filtered
itemsToRemove (list): List of items to be removed
Return:
editedList (list): List with items removed
existFlag (bool): True if itemsToRemove exist in originalList
"""
# Init exist flag
existFlag = 0
# First make sure all is lower case for itemsToRemove as this is standard
itemsToRemove = [each_string.lower() for each_string in itemsToRemove]
# Copy the original list to a new variable
# Do not use X = Y, this creates only a reference for lists.
editedList = originalList.copy()
# Loop through all items in input list to see if they starts with any of the objects defined in the itemsToRemove.
# If so, remove it from new list.
for i, item in enumerate(originalList):
if item.lower().startswith(tuple(itemsToRemove)):
editedList.remove(item)
# Set exist flag
existFlag = 1
# Return data
return editedList, existFlag
def getLargestFile(self, folderOfInterest):
"""
Get the largest mask file in a directory
Args:
folderOfInterest (str): Path to the folder containing the masks
Return:
The file name
"""
# Get files
folderFiles = os.listdir(folderOfInterest)
# Selct only files that has prefix 'mask_'
folderFiles = [file for file in folderFiles if file.startswith('mask_')]
# Get file sizes
fileSizes = [os.path.getsize(folderOfInterest + '/' + file) for file in folderFiles]
# Get maximum file size
maxFileSize = np.amax(fileSizes)
# Get index of that entry in the list
maxFileSizeIndex = np.argmax(fileSizes)
# Get the file from original list
largestFile = folderFiles[maxFileSizeIndex]
assert len([largestFile]) == 1, "There should only be one file with the largest size"
# Return file
return largestFile, maxFileSize
def checkImageResolution(self, imageArray, pixelSpacing, desiredResolution, tolerance, folder, organ):
"""
Check that the image resolution is correct and within tolerence
Args:
imageArray (array): 3D numpy array
pixelSpacing (tuple): pixel spacing tuple
tolerance (float): Tolerance for the pixel spacing
Return:
None
"""
# Check matrix size
try:
assert imageArray.shape[0] == 512, "Matrix has the wrong size (rows)"
assert imageArray.shape[1] == 512, "Matrix has the wrong size (columns)"
# Check if pixel spacing is within tolerance
assert abs(1-pixelSpacing[0]/desiredResolution[0]) < tolerance, "Row pixel spacing is not within tolerance"
assert abs(1-pixelSpacing[1]/desiredResolution[1]) < tolerance, "Column pixel spacing is not within tolerance"
assert abs(1-pixelSpacing[2]/desiredResolution[2]) < tolerance, "Slice pixel spacing is not within tolerance"
except:
print("Image resolution is not correct for " + folder + " " + organ)
def fuseRTstructures(self, folder, ignoreStructures):
"""
Read in all Nifti structure files and fuse the binary masks.
Body structure is not added, but used to exclude the table top.
Truncate the fused array value to 1 and return uint8 format.
Exclude structures which are defined as optimization structures.
Args:
folder (str): Path to the folder containing the Nifti files
ignoreStructures (list): List of structures to be ignored
Return:
fusedMask (array): Fused array from all masks
"""
# Get all structure files available
structFiles = os.listdir(folder)
# Make sure the list only contains nii.gz files
structFiles = [file for file in structFiles if '.nii.gz' in file]
# Remove CT image file
structFiles = [file for file in structFiles if conf.data.CTImageFileName not in file]
# Remove file endings and get available structure list
structFiles = [file.replace(conf.data.fileSuffix,'') for file in structFiles if conf.data.fileSuffix in file]
# Remove mask_ prefix
structFiles = [file.split(conf.data.filePrefix)[1] for file in structFiles if conf.data.filePrefix in file]
# Create lower letter version of the list
structFilesLower = [file.lower() for file in structFiles]
# Remove structures to ignore
structFilesCleaned, existFlag = self.removeItemsFromList(structFiles, ignoreStructures)
# Make sure the BODY structure is included in the list, can be defined as multiple names.
assert any(item in conf.data.bodyStructureName for item in structFiles), "BODY structure is missing"
# Remove it from the list as we do not want value 1 everywhere. Several BODY names can be matched. Uses lower case.
structFilesCleaned, existFlag = self.removeItemsFromList(structFilesCleaned, conf.data.bodyStructureName)
# Determine the exact existing BODY structure name(s)
existingBodyStructureName = [item for item in conf.data.bodyStructureName if item.lower() in structFilesLower]
# Assert only one item for existingBodyStructureName
# assert len(existingBodyStructureName) == 1, "Multiple BODY structure names found in the data"
# Collect all data from the structures in the final list
# This is not parallellized because we parallellize calculation over patients instead
# Init variable
fused_np_imageData = []
for i_struct, currStructure in enumerate(structFilesCleaned):
filePath = os.path.join(folder, conf.data.filePrefix + currStructure + conf.data.fileSuffix)
# Read Nifti data from file
np_imageData, sitk_imageData, pixelSpacing = self.readNiftiFile(filePath)
# Make sure np_image data is uint8
np_imageData = np.uint8(np_imageData)
# Add to the fused numpy array
if not len(fused_np_imageData): # If not existing, create new uint8 array
fused_np_imageData = np.zeros(np_imageData.shape, dtype=np.uint8)
fused_np_imageData = fused_np_imageData + np_imageData
else:
fused_np_imageData = fused_np_imageData + np_imageData
# Read the fist BODY structure found (possibly several can be found)
filePathBody = os.path.join(folder, conf.data.filePrefix + str(existingBodyStructureName[0]) + conf.data.fileSuffix)
# Read Nifti body data from file
np_imageDataBody, sitk_imageDataBody, pixelSpacingBody = self.readNiftiFile(filePathBody)
# Cut away table top data by multiplying with body mask
fused_np_imageData = fused_np_imageData * np_imageDataBody
# Make sure data is uint8
fused_np_imageData = np.uint8(fused_np_imageData)
# Truncate data
fused_np_imageData[fused_np_imageData > conf.data.structureFuseTruncValue] = conf.data.structureFuseTruncValue
# Make sure image data only contains binary values
self.assertBinaryMask(fused_np_imageData)
# Return fused array
return fused_np_imageData
def saveNpArray(self, np_array, folderPath, fileName):
"""
Save compressed numpy array to file
Args:
np_array (array): Numpy array to be saved
folderPath (str): Path to the folder where the file should be saved
fileName (str): Name of the file to be saved
Return:
None
"""
# Make sure folder exists before saving
os.makedirs(os.path.dirname(os.path.join(folderPath, fileName)), exist_ok=True)
# Save compressed data. (fusedStructures is the name to call when loading data)
np.savez_compressed(os.path.join(folderPath, fileName), fusedStructures=np_array)
def writeClassificationQAimages(self, GLNumber, patient, label, imageData, maskData):
"""
Write QA PNG file of image data with mask of mask overlay
Args:
imageData (array): Image data
maskData (array): Mask data
Return:
Write out a PNG file
"""
# Make sure folder exists
os.makedirs(os.path.join(conf.data.dataOutputQAPath, conf.data.TaskNumber, GLNumber), exist_ok=True)
# Get center slice of the croppedFused structure
centerSlice = self.getCenterSlice(maskData)
# Get edge of mask
edgeMask = self.getMaskEdge(maskData)
# Save fusion to PNG file
plt.imsave(os.path.join(conf.data.dataOutputQAPath, conf.data.TaskNumber, GLNumber, patient + '_box_label' + str(int(label)) + '.png'), imageData[:,:,centerSlice] + edgeMask[:,:,centerSlice])
def writeClassificationQAimagesPerStructure(self, subject, fileName, CTData, AddMapData, basePath, TaskNumber):
"""
Write QA PNG file of image data for each structure with a mask overlay
Args:
subject (str): Subject ID
fileName (str): Name of the structure
CTData (array): Cropped CT image data
AddMapData (array): Cropped AddMap data
Return:
Write out a PNG file
"""
assert CTData.shape == AddMapData.shape, "CT and AddMap data have different shapes"
# Make sure folder exists
os.makedirs(os.path.join(basePath, conf.data.TaskNumber, conf.base.dataOutputStructureDir), exist_ok=True)
# Get center slice of the AddMap structure with structure of interest in focus
sliceWithSignal = np.argwhere(AddMapData == 1)
# Get minimum and maximum, value in third dimension
minZ = sliceWithSignal[:,2].min()
maxZ = sliceWithSignal[:,2].max()
# Calculate center slice
centerSlice = int(np.round(minZ + (maxZ-minZ)/2))
# Get label from file name
tmp_label = fileName.replace(conf.data.fileSuffix,'')
# Remove mask_ prefix
label = tmp_label.split(conf.data.filePrefix)[1]
# Make sure save directory exists
os.makedirs(os.path.join(basePath, TaskNumber, conf.base.dataOutputStructureDir), exist_ok=True)
# Save fusion data to PNG file
plt.imsave(os.path.join(basePath, TaskNumber, conf.base.dataOutputStructureDir, str(subject) + '_' + str(label) + '.png'), CTData[:,:,centerSlice] + 2*AddMapData[:,:,centerSlice])
def resizeImageData(self, imageData, newShape, imageType):
"""
Resize image data to new shape
Uses interpolation set by the number
0:Nearest-neighbor, 2:Bi-linear, 3:Bi-quadratic, 4:Bi-cubic, 5:Bi-quartic
and adapt to if input is image or binary segmentation
Args:
imageData (array): Image data
newShape (tuple): New shape of the image
Return:
Resized image data
"""
# Assert numpy array
assert isinstance(imageData, np.ndarray), "Image data is not a numpy array"
# Assert tuple
assert isinstance(newShape, tuple), "New shape is not a tuple"
if imageType == 'img':
order = 3
anti_aliasing_value = True
clip_value = True
preserve_rangebool_value = True
if imageType == 'seg':
order = 0
anti_aliasing_value = False
clip_value = True
preserve_rangebool_value = True
self.assertBinaryMask(imageData)
# Resize image data to new size
imageData = resize(imageData.astype(float), newShape, order, mode="constant", cval=0, clip=clip_value, preserve_range=preserve_rangebool_value, anti_aliasing=anti_aliasing_value)
# Assert still binary if segmentation
if imageType =='seg':
self.assertBinaryMask(imageData)
# Return resized image data
return imageData
def padAroundImageCenter(self, imageArray, paddedSize):
"""
Pad matrix with zeros to desired shape.
Args:
imageArray (array): Image array to be padded
paddedSize (int): Size of matrix after zero padding
Return:
paddedImageArray (array): Padded image array
"""
# Assert tuple and np array
assert isinstance(paddedSize, tuple), "Padded size is not a tuple"
assert isinstance(imageArray, np.ndarray), "Image array is not a numpy array"
# Assert image size is not larger than padded size
assert imageArray.shape[0] <= paddedSize[0], "Image size is larger than requested padded size in row: " + str(imageArray.shape[0]) + " vs " + str(paddedSize[0])
assert imageArray.shape[1] <= paddedSize[1], "Image size is larger than requested padded size in column: " + str(imageArray.shape[1]) + " vs " + str(paddedSize[1])
assert imageArray.shape[2] <= paddedSize[2], "Image size is larger than requested padded size in slice: " + str(imageArray.shape[2]) + " vs " + str(paddedSize[2])
# Get shape of the image array
origShape = imageArray.shape
# Caluclate half the difference between the desired
# size and the original shape and round up
diff = np.round((np.array(paddedSize) - np.array(origShape))//2)
# Calculate padding. Takes care of case when matrix are uneven size.
extraLeft = diff[0]
extraRight = paddedSize[0] - origShape[0] - diff[0]
extraTop = diff[1]
extraBottom = paddedSize[1] - origShape[1] - diff[1]
extraFront = diff[2]
extraBack = paddedSize[2] - origShape[2] - diff[2]
# Pad the image array with zeros
paddedImageArray = np.pad(imageArray, ((extraLeft,extraRight), (extraTop,extraBottom), (extraFront, extraBack)), 'constant', constant_values=0)
# Assert correct padded size, very important
assert paddedImageArray.shape[0] == paddedSize[0], "Padded image size is incorrect in row"
assert paddedImageArray.shape[1] == paddedSize[1], "Padded image size is incorrect in column"
assert paddedImageArray.shape[2] == paddedSize[2], "Padded image size is incorrect in slice"
# Return the padded image array
return paddedImageArray
def getMaskEdge(self, mask):
"""
Get the edge of the binary mask.
This is used to display the mask without overlaying the whole mask
Args:
mask (array): Binary mask array
Return:
edge (array): Edge of the mask
"""
# Assert binary mask
self.assertBinaryMask(mask)
# Convert to double
img_data = np.asarray(mask[:, :, :], dtype=np.double)
# Calculate gradient of mask
gx, gy, gz = np.gradient(img_data)
# Convert to positive values of gradient (ignore z direction)
edge = gy * gy + gx * gx
# Assign value to gradient edge
edge[edge != 0] = 1
# Cast as uint8
edge = np.asarray(edge, dtype=np.uint8)
# Make sure they are the same size
assert edge.shape == mask.shape, "Edge and mask are not the same size"
# Return gradient of mask
return edge
def readDataToDataloader(self, dataFilePath):
"""
Read data from a Numpy save file and return the data as a float 32 numpy array
Args:
dataFilePath (str): Path to the data file
Return:
data (array): Data array
"""
assert isinstance(dataFilePath, str), "Data file path is not a string"
assert os.path.isfile(dataFilePath), "Data file does not exist"
# Load data from file
img = np.load(dataFilePath)['fusedStructures'].astype(np.float32) #preDetermined name of the numpy array
# Move channels first in array
img = np.moveaxis(img, -1, 0)
# Assert 4D Numpy array
assert img.ndim == 4, "Image data is not 4D"
# Return data
return img
def writeClassificationTrainingData(self, GLNumber, patient, label, croppedCT, croppedStructures):
"""
Write out Numpy training data to file as a 4D array
Args:
cropped CT (array): Cropped part of the CT
cropped fused structures (array): Cropped part of the fused structures
Return:
Numpy file (array): 4D array with cropped CT and fused structures in different channels
"""
# Assert binary mask
self.assertBinaryMask(croppedStructures)
# Stack the 3D arrays into 4D array
array4Dfused = np.stack((croppedCT, croppedStructures), axis = -1)
# Assert 4D array
assert len(array4Dfused.shape) == 4, "Array must be 4D"
# Make sure save folder exists
os.makedirs(os.path.join(conf.data.dataOutputClassificationTrainingDataPath, conf.data.TaskNumber, GLNumber), exist_ok=True)
# Save fusion to Numpy file
self.saveNpArray(array4Dfused, os.path.join(conf.data.dataOutputClassificationTrainingDataPath, conf.data.TaskNumber, GLNumber), patient + '_4D_label' + str(int(label)))
def writeClassificationTrainingDataPerStructure(self, subject, fileName, CTData, AddMapData, basePath, TaskNumber):
"""
Write out Numpy training data for each structure to file as a 4D array
Args:
subject (string): Subject ID
fileName (string): File name of the structure
CTData (array): Cropped part of the CT
AddMapData (array): Cropped part of the fused structures
Return:
Numpy file (array): 4D array with cropped CT and cropped AddMap in different channels
"""
assert CTData.shape == AddMapData.shape, "CT and AddMap data are not the same size"
# Fuse CT and AddMap to a 4D matrix, AddMap as second channel
np_CT_AddMap_4D = np.stack((CTData, AddMapData), axis=-1)
# Assert number of slices > 0
assert np_CT_AddMap_4D.shape[2] > 0, 'Number of slices in CT and AddMap is 0'
# Assert 4D array
assert len(np_CT_AddMap_4D.shape) == 4, "Array must be 4D"
# Get label from file name
tmp_label = fileName.replace(conf.data.fileSuffix,'')
# Remove mask_ prefix
label = tmp_label.split(conf.data.filePrefix)[1]
# Make sure save folder exists
os.makedirs(os.path.join(basePath, conf.data.TaskNumber, conf.base.dataOutputStructureDir), exist_ok=True)
# Save fusion to Numpy file
self.saveNpArray(np_CT_AddMap_4D, os.path.join(basePath, TaskNumber, conf.base.dataOutputStructureDir), subject + '_' + str(label))
def editFusedStructuresToy(self, fusedStructures):
"""
Edit structures for Toy dataset creation.
Args:
fusedStructures (array): Fused structures
Return:
fusedStructuresEdited (array): Fused structures edited
"""
# Assert binary mask
self.assertBinaryMask(fusedStructures)
# Get size of the mask
maskSize = fusedStructures.shape
# Get middle row
middleRow = int(maskSize[0] / 2)
# Copy too another variable
fusedStructuresEdited = fusedStructures.copy()
# Set all data between first row and middle row to 0
# i.e we remove part of the mask
fusedStructuresEdited[:middleRow,:,:] = 0
# Return data
return fusedStructuresEdited
def getBodyStructure(self, subject, folder, structFiles):
"""
Get the body structure from the folder contents.
Args:
folder (string): Folder name
Return:
bodyStructure (string): Body structure
"""
# Assertions
assert type(folder) == str, "Folder must be a string"
assert type(structFiles) == list, "structFiles must be a list"
# Create an empty list
bodyStructNameCandidates = []
# Extract all list items in structFiles containing any of the template body words in the conf.base.bodyStructureName
for templateName in conf.base.bodyStructureName:
for structure in structFiles:
if templateName.lower() in structure.lower(): # If word in structure name
bodyStructNameCandidates.append(structure) # Add to list
# Define what body structure file to use (bodyStructNameCandidates) if multiple body structures are found
if len(bodyStructNameCandidates) == 0:
raise Exception("No BODY structure names was found for the subject " + subject)
elif len(bodyStructNameCandidates) == 1:
bodyStructFileUse = bodyStructNameCandidates[0] # Use the only one existing
else:
print('Warning: Multiple BODY structure files found for subject ' + subject)
bodyStructFileUse = [file for file in bodyStructNameCandidates if conf.data.filePrefix + 'body' in file.lower()] # Select the ones with mask_body in the name
if len(bodyStructFileUse) == 0:
bodyStructFileUse = bodyStructNameCandidates[0] # No files with 'body' found, use the first one
print('Using the first body file one found: ' + bodyStructFileUse)
else:
bodyStructFileUse = bodyStructFileUse[0] # Use the one with 'body' in the name
print('Using the one containing the name body: ' + bodyStructFileUse)
# Old solution: Get the largest mask file. This should logically correspond to the body structure mask.
# This did not work as other files interfered with larger size. However saving the line for future use.
# bodyStructFileUse, largestFileSize = self.getLargestFile(folder)
# Get lower version of the body structure keywords
bodyStructureNamesTemplateLower = [each_string.lower() for each_string in conf.base.bodyStructureName]
assert len([bodyStructFileUse]) == 1, "More than one body structure file found"
# Double Check that any of the body template keywords are in the determined file name (lower case)
boolBodyCheck = any(bodyName in bodyStructFileUse.lower() for bodyName in bodyStructureNamesTemplateLower)
# Make sure some body structure keywords exist within this file name
assert boolBodyCheck, "Body structure name not found in detected body file name"
# Return data
return bodyStructFileUse
def getNumberOfUsedSlices(self, structure):
"""
Get the number of slices and slice distance in the structure which contain signal.
Args:
structure (array): Structure to check
Return:
numberOfSignalSlices (int): Number of slices with signal
"""
x, y, z = np.where(structure)
firstSlice = np.min(z)
lastSlice = np.max(z)
numberOfSlices = lastSlice - firstSlice + 1
return numberOfSlices
def truncVolSliceToStruct(self, np_struct, np_CT, AddMap):
"""
Be Aware: This pre-processing might not be as good as extendring the CT (truncVolSliceToDesiredSize)
Truncation of volumes in slice direction with respect to the extent of the structure.
The new volume has the same number of slices as the structure.
This is performed to limit the CT and AddMap information to the same extent as the structure.
Args:
np_struct (array): Structure volume
np_CT (array): CT volume
AddMap (array): AddMap volume
Return:
np_struct_trunc (array): Truncated structure volume
np_CT_trunc(array): Truncated CT volume
AddMap_trunc (array): Truncated AddMap volume
"""
# Get voxel coordinates of the structure
x, y, z = np.where(np_struct)
firstSlice = np.min(z)
lastSlice = np.max(z) +1 # +1 because max is exclusive in the selection
# Limit the slices in the data volumes with respect to number of slices in the structure
np_CT_ztrunk = np_CT[:,:,firstSlice:lastSlice]
np_struct_ztrunk = np_struct[:,:,firstSlice:lastSlice]
AddMap_ztrunk = AddMap[:,:,firstSlice:lastSlice]
# Make sure they are not empty after truncation
if np_CT_ztrunk.sum() == 0 or np_struct_ztrunk.sum() == 0 or AddMap_ztrunk.sum() == 0:
raise ValueError('Data was found to be empty empty after truncation')
# Assert shapes
assert np_struct_ztrunk.shape == np_CT_ztrunk.shape, 'Shape of cropped CT do not match'
assert AddMap_ztrunk.shape == np_CT_ztrunk.shape, 'Shape of the AddMap do not match'
# Return truncated volumes
return np_struct_ztrunk, np_CT_ztrunk, AddMap_ztrunk
def truncVolSliceToDesiredSize(self, np_struct, np_CT, AddMap, desiredImageMatrixSize):
"""
Truncation of volume in slice direction with respect to the center slice of the structure.
The new volume has the desired number of slices where the slices not containing structure
is symmetrically spaced around the slices containing structure information.
With other words: the strucutre is in the center of the new truncated volume.
This is performed to be able to contain the CT information for more slices than the structure content.
Args:
np_struct (array): Structure volume
np_CT (array): CT volume
AddMap (array): AddMap volume
desiredImageMatrixSize (tuple): Desired number of slices in the new volume
Return:
np_struct_trunc (array): Truncated structure volume
np_CT_trunc(array): Truncated CT volume
AddMap_trunc (array): Truncated AddMap volume
"""
# We are counting distances between slices, not amount of slices, important to remember below