ktl014
7/27/2017 - 10:29 PM

steps for input_preprocessing caffe's input preprocessing; pedro's input preprocessing

steps for input_preprocessing caffe's input preprocessing; pedro's input preprocessing

# Image preparation during training stage
img = caffe.io.load_image(img_fns[i])   # Read image
img = caffe.io.resize_image(img, np.array([256, 256]))     # Resize to 256
img = (img*255).astype(np.uint8)        # [0,1]->[0,255]
img = img[:, :, (2, 1, 0)]              # RGB->BGR
img = np.transpose(img, (2, 0, 1))      # [X,Y,C]->[C,X,Y]
if DEBUG:
  print(img.max(), img.min(), img.mean(), img.shape)
  exit(0)

# Image preparation during deploy stage
def prep_image(img):
    img = img.astype(float)[:, 14:241, 14:241]             # center crop (img is in shape [C, X, Y])
    img -= np.array([104., 117., 123.]).reshape((3,1,1))     # demean (same as in trainval.prototxt
    return img
1. Read in the image
2. Resize the image to 256
3. Set the raw scale to [0,1] to [0,255]
4. Swap channels from RGB -> BGR
5. Move image channels to outermost dimension [X,Y,C] -> [C,X,Y]
    #=============================================================================#
    #                                                                             #
    # Set up input preprocessing                                                  #
    #                                                                             #
    #=============================================================================#
    # Load the mean ImageNet image for subtraction
    mu = np.load(caffe_root + '/python/caffe/imagenet/ilsvrc_2012_mean.npy')
    mu = mu.mean(1).mean(1)
    print 'mean-subtracted values:', zip("BGR",mu)

    '''
    Default CaffeNet is configured to take images in BGR format
    - They start in range [0,255] and then have the mean ImageNet pixel value subtracted from them
    - Channel dimension is expected as the first (outermost) dimension
    '''

    # Create transformer for input data
    transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

    transformer.set_transpose('data',(2,0,1)) # move image channels to outermost dimension
    transformer.set_mean('data', mu) # subtract the dataset-mean value in each channel
    transformer.set_raw_scale('data', 255) # rescale from [0,1]-->[0,255]
    transformer.set_channel_swap('data', (2,1,0)) # swap channels from RGB to BGR
    
    # mean-subtracted values: [('B', 104.0069879317889), ('G', 116.66876761696767), ('R', 122.6789143406786)]