-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmyVOCDataSet.py
145 lines (138 loc) · 6.1 KB
/
myVOCDataSet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from drawBox import drawBBoxes
import random
import torch
from torchvision import datasets
import torchvision.transforms.functional as tfFunc
from PIL import Image, ImageEnhance, ImageFilter
from PIL import Image,ImageEnhance,ImageFilter
CloudInputDir='/input0'
VOC_CLASSES = { # always index 0
'aeroplane':0, 'bicycle':1, 'bird':2, 'boat':3,
'bottle':4, 'bus':5, 'car':6, 'cat':7, 'chair':8,
'cow':9, 'diningtable':10, 'dog':11, 'horse':12,
'motorbike':13, 'person':14, 'pottedplant':15,
'sheep':16, 'sofa':17, 'train':18, 'tvmonitor':19}
class myVOCtransform(object):
changeFactor=[0.6,1,1.2,1.3,1.5,1.8]
def __init__(self,classlist,S=7,B=2,C=20,inputSize=224,train=False):
super().__init__()
self.S=S
self.B=B
self.C=C
self.inputSize=inputSize
self.classlist=classlist
self.train=train
def __call__(self, img,anno):
random.seed()
objectList=anno['annotation']['object']
boxList=torch.FloatTensor(size=[len(objectList),5])
for i,obj in enumerate(objectList):#妈的voc的index从1开始算
xmin=int(obj['bndbox']['ymin'])-1
ymin=int(obj['bndbox']['xmin'])-1
xmax=int(obj['bndbox']['ymax'])-1
ymax=int(obj['bndbox']['xmax'])-1
cls=obj['name']
boxList[i,:]=torch.tensor([xmin,ymin,xmax,ymax,VOC_CLASSES[cls]+self.B*5])
target=torch.zeros(size=[self.S,self.S,self.B*5+self.C])
#originImg=img
if self.train:
img=self.randomBrightness(img)
img=self.randomSharporBlur(img)
img=self.randomContrast(img)
img=self.randomSatuation(img)
img,boxList=self.randomFlip(img,boxList)
#img,boxList=self.randomCrop(img,boxList)#random crop因效果很差被取消
if img.size!=(3,self.inputSize,self.inputSize):
img,boxList=self.reSize(img,boxList)
gridSize=self.inputSize//self.S
for _,box in enumerate(boxList):
y=(box[1]+box[3])/2
x=(box[0]+box[2])/2
h=(box[2]-box[0])/self.inputSize
w=(box[3]-box[1])/self.inputSize
gridX=int(x//gridSize)
gridY=int(y//gridSize)
if gridX>=7 or gridY>=7:
print(gridX,gridY)
delX=float(x%gridSize)/float(gridSize)
delY=float(y%gridSize)/float(gridSize)
if target[gridX,gridY,4]!=1:
for b in range(self.B):
target[gridX,gridY,b*5]=delX
target[gridX,gridY,b*5+1]=delY
target[gridX,gridY,b*5+2]=h
target[gridX,gridY,b*5+3]=w
target[gridX,gridY,b*5+4]=1
target[gridX,gridY,int(box[4])]=1
#imgArr=numpy.array(originImg)
#imgSave,_=drawBBoxes(imgArr,target,0.5,32,224,'cpu')
#tfFunc.to_pil_image(imgSave).save('save.jpg')
return tfFunc.to_tensor(img),target
def randomFlip(self,img:Image.Image,boxes):
if random.random()<.5:
W,_=img.size
img=img.transpose(Image.FLIP_LEFT_RIGHT)
boxes[:,[1,3]]=W-boxes[:,[3,1]]-1
return img,boxes
def randomCrop(self,img:Image.Image,boxes:torch.Tensor):
#只有剪切后剩余面积大于原面积的60%的bbox才会被保留
if random.random()>=0:
W,H=img.size
N=boxes.size(0)
left=random.randint(0,int(W*.2))
top=random.randint(0,int(H*.2))
right=random.randint(int(W*.8),W)
bot=random.randint(int(H*.8),H)
cropBox=torch.tensor([left,top,right,bot]).unsqueeze(0)
lt = torch.max(#计算左上角盒子的顶点
boxes[:,:2].unsqueeze(1).expand(N,1,2),
cropBox[:,:2].unsqueeze(0).expand(N,1,2),
)
rb = torch.min(#计算右下角盒子的顶点
boxes[:,2:4].unsqueeze(1).expand(N,1,2),
cropBox[:,2:].unsqueeze(0).expand(N,1,2),
)
wh = rb - lt
wh[wh<0] = 0 # clip at 0
inter = (wh[:,:,0] * wh[:,:,1]).squeeze()
boxesArea=(boxes[:,2]-boxes[:,0])*(boxes[:,3]-boxes[:,1])
mask=((inter/boxesArea)>=.6).unsqueeze(-1).expand_as(boxes)
boxes=boxes[mask].view(-1,5)
boxes[:,:2]=boxes[:,:2]-cropBox[:,:2].expand_as(boxes[:,:2])
boxes[:,2:4]=boxes[:,2:4]-cropBox[:,:2].expand_as(boxes[:,2:4])
boxes[boxes[:,0]<0,0]=0
boxes[boxes[:,1]<0,1]=0
boxes[boxes[:,2]>(bot-top),2]=bot-top-1
boxes[boxes[:,3]>(right-left),3]=right-left-1
img=img.crop((left,top,right,bot))
return img,boxes
def randomBrightness(self,img:Image.Image):
b=random.choice(self.changeFactor)
return ImageEnhance.Brightness(img).enhance(b)
def randomContrast(self,img:Image.Image):
c=random.choice(self.changeFactor)
return ImageEnhance.Contrast(img).enhance(c)
def randomSharporBlur(self,img:Image.Image):
if random.random()<.5:
img=img.filter(ImageFilter.GaussianBlur(1.5))
else:
s=random.choice(self.changeFactor)
img=ImageEnhance.Sharpness(img).enhance(s)
return img
def randomSatuation(self,img:Image.Image):
s=random.choice(self.changeFactor)
return ImageEnhance.Color(img).enhance(s)
def reSize(self,img:Image.Image,boxes):
imgW,imgH=img.size
img=img.resize((self.inputSize,self.inputSize))
resizeFactor=torch.tensor([self.inputSize/imgH,self.inputSize/imgW,self.inputSize/imgH,self.inputSize/imgW]).unsqueeze(0).expand_as(boxes[:,:4])
boxes[:,:4]=boxes[:,:4]*resizeFactor
return img,boxes
def loadVOCTrainDataSet(root='../root',year='2007',d=False):
return datasets.VOCDetection(root=root,
year=year,image_set='train',download=d,
transforms=myVOCtransform(VOC_CLASSES,train=True))
def loadVOCValDataSet(root='../root',year='2007',d=False):
return datasets.VOCDetection(root=root,
year=year,image_set='val',download=d,
transforms=myVOCtransform(VOC_CLASSES))