More changes to support C3D

facebookarchive · chuckcho · Jan 6, 2016 · Jan 19, 2016 · Jan 20, 2016 · Jan 20, 2016
commit f350701e87b52f7a4014556fb669f282568bf617
diff --git a/python/caffe/io.py b/python/caffe/io.py
@@ -28,14 +28,43 @@ def resize_image(im, new_dims, interp_order=1):
     Resize an image array with interpolation.
 
     Take
-    im: (H x W x K) ndarray
+    im: (H x W x K) or (H x W x K x L) ndarray
     new_dims: (height, width) tuple of new dimensions.
     interp_order: interpolation order, default is linear.
 
     Give
     im: resized ndarray with shape (new_dims[0], new_dims[1], K)
     """
-    return skimage.transform.resize(im, new_dims, order=interp_order)
+
+    im_min, im_max = im.min(), im.max()
+    if im_max > im_min:
+        # skimage is fast but only understands {1,3} channel images
+        # in [0, 1].
+        im_std = (im - im_min) / (im_max - im_min)
+    else:
+        # the image is a constant -- avoid divide by 0
+        # TODO(chuck): cover for 4-dim im case
+        ret = np.empty((new_dims[0], new_dims[1], im.shape[-1]),
+                       dtype=np.float32)
+        ret.fill(im_min)
+        return ret
+
+    if im.ndim == 3:
+        resized = skimage.transform.resize(im_std, new_dims, order=interp_order)
+        resized = resized * (im_max - im_min) + im_min
+    elif im.ndim == 4:
+        resized = np.empty(new_dims + im.shape[-2:])
+        for l in range(im.shape[3]):
+            resized[:,:,:,l] = skimage.transform.resize(
+                    im_std[:,:,:,l],
+                    new_dims,
+                    order=interp_order
+                    )
+            resized[:,:,:,l] = resized[:,:,:,l] * (im_max - im_min) + im_min
+    else:
+        raise ValueError('Incorrect input array shape.')
+
+    return resized
 
 
 def oversample(images, crop_dims):

diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
@@ -176,35 +176,32 @@ def _Net_forward_backward_all(self, blobs=None, diffs=None, **kwargs):
     return all_outs, all_diffs
 
 
-def _Net_set_mean(self, input_, mean_f, mode='elementwise'):
+def _Net_set_mean(self, input_, mean_f):
     """
     Set the mean to subtract for data centering.
 
     Take
     input_: which input to assign this mean.
     mean_f: path to mean .npy with ndarray (input dimensional or broadcastable)
     mode: elementwise = use the whole mean (and check dimensions)
-          channel = channel constant (e.g. mean pixel instead of mean image)
     """
     if not hasattr(self, 'mean'):
         self.mean = {}
     if input_ not in self.inputs:
         raise Exception('Input not in {}'.format(self.inputs))
     in_shape = self.blobs[input_].data.shape
     mean = np.load(mean_f)
-    if mode == 'elementwise':
-        if mean.shape != in_shape[1:]:
-            # Resize mean (which requires H x W x K input in range [0,1]).
-            m_min, m_max = mean.min(), mean.max()
-            normal_mean = (mean - m_min) / (m_max - m_min)
-            mean = caffe.io.resize_image(normal_mean.transpose((1,2,0)),
-                    in_shape[2:]).transpose((2,0,1)) * (m_max - m_min) + m_min
-        self.mean[input_] = mean
-    elif mode == 'channel':
-        self.mean[input_] = mean.mean(1).mean(1).reshape((in_shape[1], 1, 1))
-    else:
-        raise Exception('Mode not in {}'.format(['elementwise', 'channel']))
-
+    if mean.ndim == 5:
+        mean = np.squeeze(mean, 0)
+    if mean.shape != in_shape[1:]:
+        # Resize mean (which requires H x W x K input in range [0,1]).
+        m_min, m_max = mean.min(), mean.max()
+        normal_mean = (mean - m_min) / (m_max - m_min)
+        ''' [info] normal_mean.shape=(16, 3, 128, 171),in_shape=(1, 3, 16, 112, 112) '''
+        mean = caffe.io.resize_image(
+                normal_mean.transpose((2,3,0,1)),
+                in_shape[3:]).transpose((2,3,0,1)) * (m_max - m_min) + m_min
+    self.mean[input_] = mean
 
 
 def _Net_set_input_scale(self, input_, scale):
@@ -247,27 +244,27 @@ def _Net_preprocess(self, input_name, input_):
     - scale feature
     - reorder channels (for instance color to BGR)
     - subtract mean
-    - transpose dimensions to K x H x W
+    - transpose dimensions to K x L X H x W (L: c3d_depth)
 
     Take
     input_name: name of input blob to preprocess for
-    input_: (H' x W' x K) ndarray
+    input_: (H' x W' x K X L) ndarray
 
     Give
-    caffe_inputs: (K x H x W) ndarray
+    caffe_inputs: (K x L X H x W) ndarray
     """
     caffe_in = input_.astype(np.float32)
     input_scale = self.input_scale.get(input_name)
     channel_order = self.channel_swap.get(input_name)
     mean = self.mean.get(input_name)
-    in_size = self.blobs[input_name].data.shape[2:]
+    in_size = self.blobs[input_name].data.shape[3:]
     if caffe_in.shape[:2] != in_size:
         caffe_in = caffe.io.resize_image(caffe_in, in_size)
     if input_scale:
         caffe_in *= input_scale
     if channel_order:
-        caffe_in = caffe_in[:, :, channel_order]
-    caffe_in = caffe_in.transpose((2, 0, 1))
+        caffe_in = caffe_in[:, :, channel_order, :]
+    caffe_in = caffe_in.transpose((2, 3, 0, 1))
     if mean is not None:
         caffe_in -= mean
     return caffe_in
@@ -283,11 +280,11 @@ def _Net_deprocess(self, input_name, input_):
     mean = self.mean.get(input_name)
     if mean is not None:
         decaf_in += mean
-    decaf_in = decaf_in.transpose((1,2,0))
+    decaf_in = decaf_in.transpose((2,3,0,1))
     if channel_order:
         channel_order_inverse = [channel_order.index(i)
                                  for i in range(decaf_in.shape[2])]
-        decaf_in = decaf_in[:, :, channel_order_inverse]
+        decaf_in = decaf_in[:, :, channel_order_inverse, :]
     if input_scale:
         decaf_in /= input_scale
     return decaf_in