from caffe.proto import caffe_pb2
def solver(train_net_path, test_net_path=None, base_lr=0.001):
s = caffe_pb2.SolverParameter()
# Specify locations of the train and (maybe) test networks.
s.train_net = train_net_path
if test_net_path is not None:
s.test_net.append(test_net_path)
s.test_interval = 1000 # Test after every 1000 training iterations.
s.test_iter.append(200) # Test on 100 batches each time we test.
# The number of iterations over which to average the gradient.
# Effectively boosts the training batch size by the given factor, without
# affecting memory utilization.
s.iter_size = 1
s.max_iter = 5000 # # of times to update the net (training iterations)
# Solve using the stochastic gradient descent (SGD) algorithm.
# Other choices include 'Adam' and 'RMSProp'.
s.type = 'SGD'
# Set the initial learning rate for SGD.
s.base_lr = base_lr
# Set `lr_policy` to define how the learning rate changes during training.
# Here, we 'step' the learning rate by multiplying it by a factor `gamma`
# every `stepsize` iterations.
s.lr_policy = 'step'
s.gamma = 0.6
s.stepsize = 1000
# Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
# weighted average of the current gradient and previous gradients to make
# learning more stable. L2 weight decay regularizes learning, to help prevent
# the model from overfitting.
s.momentum = 0.9
s.weight_decay = 5e-4
# Display the current training loss and accuracy every 1000 iterations.
s.display = 20
s.average_loss = 20
# Snapshots are files used to store networks we've trained. Here, we'll
# snapshot every 10K iterations -- ten times during training.
s.snapshot = 1000
s.snapshot_prefix = './model/Saliency_GCN'
# Train on the GPU. Using the CPU to train large networks is very slow.
s.solver_mode = caffe_pb2.SolverParameter.GPU
# Write the solver to a temporary file and return its filename.
with open('./solver.prototxt', 'w') as f:
f.write(str(s))
return f.name
train_net_path = './train.prototxt'
dev_net_path = './val.prototxt'
solver_name = solver(train_net_path, dev_net_path, 1e-9)
solver = caffe.SGDSolver(solver_name)
from caffe.proto import caffe_pb2
def solver(train_net_path, test_net_path=None, base_lr=0.001):
s = caffe_pb2.SolverParameter()
# Specify locations of the train and (maybe) test networks.
s.train_net = train_net_path
if test_net_path is not None:
s.test_net.append(test_net_path)
s.test_interval = 1000 # Test after every 1000 training iterations.
s.test_iter.append(100) # Test on 100 batches each time we test.
# The number of iterations over which to average the gradient.
# Effectively boosts the training batch size by the given factor, without
# affecting memory utilization.
s.iter_size = 1
s.max_iter = 100000 # # of times to update the net (training iterations)
# Solve using the stochastic gradient descent (SGD) algorithm.
# Other choices include 'Adam' and 'RMSProp'.
s.type = 'SGD'
# Set the initial learning rate for SGD.
s.base_lr = base_lr
# Set `lr_policy` to define how the learning rate changes during training.
# Here, we 'step' the learning rate by multiplying it by a factor `gamma`
# every `stepsize` iterations.
s.lr_policy = 'step'
s.gamma = 0.1
s.stepsize = 20000
# Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
# weighted average of the current gradient and previous gradients to make
# learning more stable. L2 weight decay regularizes learning, to help prevent
# the model from overfitting.
s.momentum = 0.9
s.weight_decay = 5e-4
# Display the current training loss and accuracy every 1000 iterations.
s.display = 1000
# Snapshots are files used to store networks we've trained. Here, we'll
# snapshot every 10K iterations -- ten times during training.
s.snapshot = 10000
s.snapshot_prefix = caffe_root + 'models/finetune_flickr_style/finetune_flickr_style'
# Train on the GPU. Using the CPU to train large networks is very slow.
s.solver_mode = caffe_pb2.SolverParameter.GPU
# Write the solver to a temporary file and return its filename.
with tempfile.NamedTemporaryFile(delete=False) as f:
f.write(str(s))
return f.name
### define solver
from caffe.proto import caffe_pb2
s = caffe_pb2.SolverParameter()
# Set a seed for reproducible experiments:
# this controls for randomization in training.
s.random_seed = 0xCAFFE
# Specify locations of the train and (maybe) test networks.
s.train_net = train_net_path
s.test_net.append(test_net_path)
s.test_interval = 500 # Test after every 500 training iterations.
s.test_iter.append(100) # Test on 100 batches each time we test.
s.max_iter = 10000 # no. of times to update the net (training iterations)
# EDIT HERE to try different solvers
# solver types include "SGD", "Adam", and "Nesterov" among others.
s.type = "SGD"
# Set the initial learning rate for SGD.
s.base_lr = 0.01 # EDIT HERE to try different learning rates
# Set momentum to accelerate learning by
# taking weighted average of current and previous updates.
s.momentum = 0.9
# Set weight decay to regularize and prevent overfitting
s.weight_decay = 5e-4
# Set `lr_policy` to define how the learning rate changes during training.
# This is the same policy as our default LeNet.
s.lr_policy = 'inv'
s.gamma = 0.0001
s.power = 0.75
# EDIT HERE to try the fixed rate (and compare with adaptive solvers)
# `fixed` is the simplest policy that keeps the learning rate constant.
# s.lr_policy = 'fixed'
# Display the current training loss and accuracy every 1000 iterations.
s.display = 1000
# Snapshots are files used to store networks we've trained.
# We'll snapshot every 5K iterations -- twice during training.
s.snapshot = 5000
s.snapshot_prefix = 'mnist/custom_net'
# Train on the GPU
s.solver_mode = caffe_pb2.SolverParameter.GPU
# Write the solver to a temporary file and return its filename.
with open(solver_config_path, 'w') as f:
f.write(str(s))
### load the solver and create train and test nets
solver = None # ignore this workaround for lmdb data (can't instantiate two solvers on the same data)
solver = caffe.get_solver(solver_config_path)