Input processing
mu 定义了 整个数据集的均值,是一个三通道的均值向量
两行有用的代码:
mu.mean(1).mean(1) 相当于 np.mean(np.mean(mu, axis = 1), axis =1)
zip('BGR', mu) 输出对应 mean-subtracted values: [('B', 104.0069879317889), ('G', 116.66876761696767), ('R', 122.6789143406786)]
caffe.io.Transformer 是 ~/caffe/python/caffe/ 文件夹下 io.py 文件定义的一个类 Transformer
Transformer类
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
对于_init_(self, inputs),将inputs初始化成一个python字典,其他需要 set 的参数也是设置成字典
确定了键值为 'data',就保证了其他需要 set 的参数也是data层的
所以对于_check_input(self, in),in_ (一般是字符串) 就一定要是 初始化时字典中的键值 'data', 以此来check
image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
transformed_image = transformer.preprocess('data', image)
plt.imshow(image)
import numpy as np
import skimage.io
from scipy.ndimage import zoom
from skimage.transform import resize
def preprocess(self, in_, data):
"""
Format input for Caffe:
- convert to single
- resize to input dimensions (preserving number of channels)
- transpose dimensions to K x H x W
- reorder channels (for instance color to BGR)
- scale raw input (e.g. from [0, 1] to [0, 255] for ImageNet models)
- subtract mean
- scale feature
Parameters
----------
in_ : name of input blob to preprocess for
data : (H' x W' x K) ndarray
Returns
-------
caffe_in : (K x H x W) ndarray for input to a Net
"""
self.__check_input(in_)
caffe_in = data.astype(np.float32, copy=False)
transpose = self.transpose.get(in_)
channel_swap = self.channel_swap.get(in_)
raw_scale = self.raw_scale.get(in_)
mean = self.mean.get(in_)
input_scale = self.input_scale.get(in_)
in_dims = self.inputs[in_][2:]
if caffe_in.shape[:2] != in_dims:
caffe_in = resize_image(caffe_in, in_dims)
if transpose is not None:
caffe_in = caffe_in.transpose(transpose)
if channel_swap is not None:
caffe_in = caffe_in[channel_swap, :, :]
if raw_scale is not None:
caffe_in *= raw_scale
if mean is not None:
caffe_in -= mean
if input_scale is not None:
caffe_in *= input_scale
return caffe_in
def deprocess(self, in_, data):
"""
Invert Caffe formatting; see preprocess().
"""
self.__check_input(in_)
decaf_in = data.copy().squeeze()
transpose = self.transpose.get(in_)
channel_swap = self.channel_swap.get(in_)
raw_scale = self.raw_scale.get(in_)
mean = self.mean.get(in_)
input_scale = self.input_scale.get(in_)
if input_scale is not None:
decaf_in /= input_scale
if mean is not None:
decaf_in += mean
if raw_scale is not None:
decaf_in /= raw_scale
if channel_swap is not None:
decaf_in = decaf_in[np.argsort(channel_swap), :, :]
if transpose is not None:
decaf_in = decaf_in.transpose(np.argsort(transpose))
return decaf_in
## Pre-processing
class Transformer:
"""
Transform input for feeding into a Net.
Note: this is mostly for illustrative purposes and it is likely better
to define your own input preprocessing routine for your needs.
Parameters
----------
net : a Net for which the input should be prepared
"""
def __init__(self, inputs):
self.inputs = inputs
self.transpose = {}
self.channel_swap = {}
self.raw_scale = {}
self.mean = {}
self.input_scale = {}
def __check_input(self, in_):
if in_ not in self.inputs:
raise Exception('{} is not one of the net inputs: {}'.format(
in_, self.inputs))
def preprocess(self, in_, data):
"""
Format input for Caffe:
- convert to single
- resize to input dimensions (preserving number of channels)
- transpose dimensions to K x H x W
- reorder channels (for instance color to BGR)
- scale raw input (e.g. from [0, 1] to [0, 255] for ImageNet models)
- subtract mean
- scale feature
Parameters
----------
in_ : name of input blob to preprocess for
data : (H' x W' x K) ndarray
Returns
-------
caffe_in : (K x H x W) ndarray for input to a Net
"""
pass
def deprocess(self, in_, data):
"""
Invert Caffe formatting; see preprocess().
"""
pass
def set_transpose(self, in_, order):
"""
Set the input channel order for e.g. RGB to BGR conversion
as needed for the reference ImageNet model.
Parameters
----------
in_ : which input to assign this channel order
order : the order to transpose the dimensions
"""
self.__check_input(in_)
if len(order) != len(self.inputs[in_]) - 1:
raise Exception('Transpose order needs to have the same number of '
'dimensions as the input.')
self.transpose[in_] = order
def set_channel_swap(self, in_, order):
"""
Set the input channel order for e.g. RGB to BGR conversion
as needed for the reference ImageNet model.
N.B. this assumes the channels are the first dimension AFTER transpose.
Parameters
----------
in_ : which input to assign this channel order
order : the order to take the channels.
(2,1,0) maps RGB to BGR for example.
"""
self.__check_input(in_)
if len(order) != self.inputs[in_][1]:
raise Exception('Channel swap needs to have the same number of '
'dimensions as the input channels.')
self.channel_swap[in_] = order
def set_raw_scale(self, in_, scale):
"""
Set the scale of raw features s.t. the input blob = input * scale.
While Python represents images in [0, 1], certain Caffe models
like CaffeNet and AlexNet represent images in [0, 255] so the raw_scale
of these models must be 255.
Parameters
----------
in_ : which input to assign this scale factor
scale : scale coefficient
"""
self.__check_input(in_)
self.raw_scale[in_] = scale
def set_mean(self, in_, mean):
"""
Set the mean to subtract for centering the data.
Parameters
----------
in_ : which input to assign this mean.
mean : mean ndarray (input dimensional or broadcastable)
"""
self.__check_input(in_)
ms = mean.shape
if mean.ndim == 1:
# broadcast channels
if ms[0] != self.inputs[in_][1]:
raise ValueError('Mean channels incompatible with input.')
mean = mean[:, np.newaxis, np.newaxis]
else:
# elementwise mean
if len(ms) == 2:
ms = (1,) + ms
if len(ms) != 3:
raise ValueError('Mean shape invalid')
if ms != self.inputs[in_][1:]:
raise ValueError('Mean shape incompatible with input shape.')
self.mean[in_] = mean
def set_input_scale(self, in_, scale):
"""
Set the scale of preprocessed inputs s.t. the blob = blob * scale.
N.B. input_scale is done AFTER mean subtraction and other preprocessing
while raw_scale is done BEFORE.
Parameters
----------
in_ : which input to assign this scale factor
scale : scale coefficient
"""
self.__check_input(in_)
self.input_scale[in_] = scale
# load the mean ImageNet image (as distributed with Caffe) for subtraction
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1) # average over pixels to obtain the mean (BGR) pixel values
print 'mean-subtracted values:', zip('BGR', mu)
# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1)) # move image channels to outermost dimension
transformer.set_mean('data', mu) # subtract the dataset-mean value in each channel
transformer.set_raw_scale('data', 255) # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0)) # swap channels from RGB to BGR