HDF5.py ----- defines the HDF5 file write operation. HDF5-layer.py ---- defines the HDF5 data layer and an easy network.
from caffe import layers as L
from caffe import params as P
def logreg(hdf5, batch_size):
# logistic regression: data, matrix multiplication, and 2-class softmax loss
n = caffe.NetSpec()
n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2)
n.ip1 = L.InnerProduct(n.data, num_output=2, weight_filler=dict(type='xavier'))
n.accuracy = L.Accuracy(n.ip1, n.label)
n.loss = L.SoftmaxWithLoss(n.ip1, n.label)
return n.to_proto()
train_net_path = 'examples/hdf5_classification/logreg_auto_train.prototxt'
with open(train_net_path, 'w') as f:
f.write(str(logreg('examples/hdf5_classification/data/train.txt', 10)))
test_net_path = 'examples/hdf5_classification/logreg_auto_test.prototxt'
with open(test_net_path, 'w') as f:
f.write(str(logreg('examples/hdf5_classification/data/test.txt', 10)))
# Write out the data to HDF5 files in a temp directory.
# This file is assumed to be caffe_root/examples/hdf5_classification.ipynb
dirname = os.path.abspath('./examples/hdf5_classification/data')
if not os.path.exists(dirname):
os.makedirs(dirname)
train_filename = os.path.join(dirname, 'train.h5')
test_filename = os.path.join(dirname, 'test.h5')
# HDF5DataLayer source should be a file containing a list of HDF5 filenames.
# To show this off, we'll list the same data file twice.
with h5py.File(train_filename, 'w') as f:
f['data'] = X
f['label'] = y.astype(np.float32)
with open(os.path.join(dirname, 'train.txt'), 'w') as f:
f.write(train_filename + '\n')
f.write(train_filename + '\n')
# HDF5 is pretty efficient, but can be further compressed.
comp_kwargs = {'compression': 'gzip', 'compression_opts': 1}
with h5py.File(test_filename, 'w') as f:
f.create_dataset('data', data=Xt, **comp_kwargs)
f.create_dataset('label', data=yt.astype(np.float32), **comp_kwargs)
with open(os.path.join(dirname, 'test.txt'), 'w') as f:
f.write(test_filename + '\n')