-
Notifications
You must be signed in to change notification settings - Fork 15
/
merge_dataset.py
92 lines (77 loc) · 3.42 KB
/
merge_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import argparse
import os
import glob
import shutil
parser = argparse.ArgumentParser()
requiredArguments = parser.add_argument_group("required arguments")
requiredArguments.add_argument("-s", "--sourcedir", type=str, required=True, help="Directory containing the source annotations/images")
requiredArguments.add_argument("-t", "--targetdir", type=str, required=True, help="Directory containing the target annotations/images")
requiredArguments.add_argument("-sc", "--sourceclasses", type=str, required=True, help="File containing the classes/labels for the source annotations")
requiredArguments.add_argument("-tc", "--targetclasses", type=str, required=True, help="File containing the classes/labels for the target annotations")
args = parser.parse_args()
annSourceDir = args.sourcedir
annTargetDir = args.targetdir
sourceClasses = []
with open(args.sourceclasses) as cf:
classes = cf.read().splitlines()
sourceClasses = [x for x in classes if x.strip()]
targetClasses = []
with open(args.targetclasses) as cf:
classes = cf.read().splitlines()
targetClasses = [x for x in classes if x.strip()]
#keep old idx and append unknown classes
srcClsDiff = [x for x in sourceClasses if x not in targetClasses]
mergedClasses = []
mergedClasses.extend(targetClasses)
mergedClasses.extend(srcClsDiff)
targetAnns = glob.iglob(annTargetDir + '/*.txt')
targetAnns = [os.path.basename(x) for x in targetAnns]
srcImgs = dict()
imgPattern = ["*.jpg", "*.jpeg", "*.png", "*.JPG", "*.JPEG", "*.PNG"]
for imp in imgPattern:
pattern = annSourceDir+"/"+imp
for img in glob.iglob(pattern):
imgId = os.path.basename(img)
imgId = os.path.splitext(imgId)[0]
srcImgs[imgId] = os.path.basename(img)
#take care of line ending stuff
def appendToFile(path, content):
newContent = []
if(os.path.isfile(path)):
with open(path) as f:
oldLines = f.read().splitlines()
oldLines = [x for x in oldLines if x.strip()]
newContent.extend(oldLines)
newContent.extend(content)
with open(path, 'w') as f:
if newContent:
f.write("\n".join(newContent))
def writeAnnotation(srcAnn, targetAnn):
with open(srcAnn) as saf:
srcLines = saf.read().splitlines()
srcLines = [x for x in srcLines if x.strip()]
#rewrite old idx to new idx
for idx, srcLine in enumerate(srcLines):
splitLine = srcLine.split()
oldIdx = int(splitLine[0])
srcClass = sourceClasses[oldIdx]
newIdx = mergedClasses.index(srcClass)
splitLine[0] = str(newIdx)
srcLines[idx] = " ".join(splitLine)
appendToFile(targetAnn, srcLines);
for srcAnn in glob.iglob(annSourceDir+'/*.txt'):
print("processing %s" % (srcAnn))
srcAnnFileName = os.path.basename(srcAnn)
srcAnnId = os.path.splitext(srcAnnFileName)[0]
targetAnn = os.path.join(annTargetDir, srcAnnFileName)
if(srcAnnFileName in targetAnns):
writeAnnotation(srcAnn, targetAnn)
#unknown annotation but with corresponding image - merge it
elif(srcAnnId in srcImgs):
srcImg = os.path.join(annSourceDir, srcImgs[srcAnnId])
targetImg = os.path.join(annTargetDir, srcImgs[srcAnnId])
shutil.copyfile(srcImg, targetImg)
writeAnnotation(srcAnn, targetAnn)
else:
print("No matching src image or target annotation found for %s - skipping." % (srcAnnFileName))
appendToFile(args.targetclasses,srcClsDiff)