标签:
An interface for the units of computation which can be composed into a Net.
Layer&s must implement a Forward function, in which they take their input (bottom) Blob&s (if any) and compute their output Blob&s (if any). They may also implement a Backward function, in which they compute the error gradients with respect to their input Blob&s, given the error gradients with their output Blob&s.
按照我们对一般卷积神经网络的模型来理解:一个网络(net)包含很多层(layer),而每层里面的东西无外乎数据(前馈的数据和反馈的误差),这些数据在caffe里面已经用blob类来实现了,那么应该可以想到layer中应该包含了很多blob类,或者说是一个blob类型的vector。
1 layer中的数据有哪些?
protected: /** The protobuf that stores the layer parameters */ LayerParameter layer_param_; /** The vector that stores the learnable parameters as a set of blobs. */ vector<shared_ptr<Blob<Dtype> > > blobs_; /** Vector indicating whether to compute the diff of each param blob. */ vector<bool> param_propagate_down_; /** The vector that indicates whether each top blob has a non-zero weight in * the objective function. */ vector<Dtype> loss_;
在源码中可以看到,确实有一个vector来存储很多blob的变量blobs_;
param_propagate_down_在注释中其实说明了,指示本层中的blob是否需要计算diff;为什么会需要这个东西呢?这里就需要再次强调一下了:net > layer > blob。而一个layer中可能包含多个blob,例如有多个bottom, 多个top。
那么loss_是干嘛的呢?暂时也看不明白。
至于LayerParameter定义在caffe.proto中:
message LayerParameter {
repeated string bottom = 2; // the name of the bottom blobs
repeated string top = 3; // the name of the top blobs
optional string name = 4; // the layer name
// Rules controlling whether and when a layer is included in the network,
// based on the current NetState. You may specify a non-zero number of rules
// to include OR exclude, but not both. If no include or exclude rules are
// specified, the layer is always included. If the current NetState meets
// ANY (i.e., one or more) of the specified rules, the layer is
// included/excluded.
repeated NetStateRule include = 32;
repeated NetStateRule exclude = 33;
// NOTE
// Add new LayerTypes to the enum below in lexicographical order (other than
// starting with NONE), starting with the next available ID in the comment
// line above the enum. Update the next available ID when you add a new
// LayerType.
//
// LayerType next available ID: 38 (last added: CONTRASTIVE_LOSS)
enum LayerType {
// "NONE" layer type is 0th enum element so that we don't cause confusion
// by defaulting to an existent LayerType (instead, should usually error if
// the type is unspecified).
NONE = 0;
ABSVAL = 35;
ACCURACY = 1;
ARGMAX = 30;
BNLL = 2;
CONCAT = 3;
CONTRASTIVE_LOSS = 37;
CONVOLUTION = 4;
DATA = 5;
DROPOUT = 6;
DUMMY_DATA = 32;
EUCLIDEAN_LOSS = 7;
ELTWISE = 25;
FLATTEN = 8;
HDF5_DATA = 9;
HDF5_OUTPUT = 10;
HINGE_LOSS = 28;
IM2COL = 11;
IMAGE_DATA = 12;
INFOGAIN_LOSS = 13;
INNER_PRODUCT = 14;
LRN = 15;
MEMORY_DATA = 29;
MULTINOMIAL_LOGISTIC_LOSS = 16;
MVN = 34;
POOLING = 17;
POWER = 26;
RELU = 18;
SIGMOID = 19;
SIGMOID_CROSS_ENTROPY_LOSS = 27;
SILENCE = 36;
SOFTMAX = 20;
SOFTMAX_LOSS = 21;
SPLIT = 22;
SLICE = 33;
TANH = 23;
WINDOW_DATA = 24;
THRESHOLD = 31;
}
optional LayerType type = 5; // the layer type from the enum above
// The blobs containing the numeric parameters of the layer
repeated BlobProto blobs = 6;
// The names of the parameter blobs -- useful for sharing parameters among
// layers (but never required).
repeated string param = 1001;
// Whether to require shared weights to have the same shape, or just the same
// count -- defaults to STRICT if unspecified.
repeated DimCheckMode blob_share_mode = 1002;
enum DimCheckMode {
// Neil: Disabled for windows
// // STRICT (default) requires that num, channels, height, width each match.
// STRICT = 0;
// PERMISSIVE requires only the count (num*channels*height*width) to match.
PERMISSIVE = 1;
}
// The ratio that is multiplied on the global learning rate. If you want to
// set the learning ratio for one blob, you need to set it for all blobs.
repeated float blobs_lr = 7;
// The weight decay that is multiplied on the global weight decay.
repeated float weight_decay = 8;
// The amount of weight to assign each top blob in the objective.
// Each layer assigns a default value, usually of either 0 or 1,
// to each top blob.
repeated float loss_weight = 35;
optional AccuracyParameter accuracy_param = 27;
optional ArgMaxParameter argmax_param = 23;
optional ConcatParameter concat_param = 9;
optional ContrastiveLossParameter contrastive_loss_param = 40;
optional ConvolutionParameter convolution_param = 10;
optional DataParameter data_param = 11;
optional DropoutParameter dropout_param = 12;
optional DummyDataParameter dummy_data_param = 26;
optional EltwiseParameter eltwise_param = 24;
optional HDF5DataParameter hdf5_data_param = 13;
optional HDF5OutputParameter hdf5_output_param = 14;
optional HingeLossParameter hinge_loss_param = 29;
optional ImageDataParameter image_data_param = 15;
optional InfogainLossParameter infogain_loss_param = 16;
optional InnerProductParameter inner_product_param = 17;
optional LRNParameter lrn_param = 18;
optional MemoryDataParameter memory_data_param = 22;
optional MVNParameter mvn_param = 34;
optional PoolingParameter pooling_param = 19;
optional PowerParameter power_param = 21;
optional ReLUParameter relu_param = 30;
optional SigmoidParameter sigmoid_param = 38;
optional SoftmaxParameter softmax_param = 39;
optional SliceParameter slice_param = 31;
optional TanHParameter tanh_param = 37;
optional ThresholdParameter threshold_param = 25;
optional WindowDataParameter window_data_param = 20;
// Parameters for data pre-processing.
optional TransformationParameter transform_param = 36;
// Note: certain layers may have more than one computational engine
// for their implementation. These layers include an Engine type and
// engine parameter for selecting the implementation.
// The default for the engine is set by the ENGINE switch at compile-time.
// DEPRECATED: The layer parameters specified as a V0LayerParameter.
// This should never be used by any code except to upgrade to the new
// LayerParameter specification.
optional V0LayerParameter layer = 1;
}
继续再看hpp会发现各种虚函数,先来看看protected里面的方法:
2 前馈,反馈函数:
/** @brief Using the CPU device, compute the layer output. */
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) = 0;
/**
* @brief Using the GPU device, compute the layer output.
* Fall back to Forward_cpu() if unavailable.
*/
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
// LOG(WARNING) << "Using CPU code as backup.";
return Forward_cpu(bottom, top);
}
/**
* @brief Using the CPU device, compute the gradients for any parameters and
* for the bottom blobs if propagate_down is true.
*/
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
vector<Blob<Dtype>*>* bottom) = 0;
/**
* @brief Using the GPU device, compute the gradients for any parameters and
* for the bottom blobs if propagate_down is true.
* Fall back to Backward_cpu() if unavailable.
*/
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
vector<Blob<Dtype>*>* bottom) {
// LOG(WARNING) << "Using CPU code as backup.";
Backward_cpu(top, propagate_down, bottom);
}3 检测blobs的数量是否正确:
/**
* Called by the parent Layer's SetUp to check that the number of bottom
* and top Blobs provided as input match the expected numbers specified by
* the {ExactNum,Min,Max}{Bottom,Top}Blobs() functions.
*/
virtual void CheckBlobCounts(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
if (ExactNumBottomBlobs() >= 0) {
CHECK_EQ(ExactNumBottomBlobs(), bottom.size())
<< type_name() << " Layer takes " << ExactNumBottomBlobs()
<< " bottom blob(s) as input.";
}
if (MinBottomBlobs() >= 0) {
CHECK_LE(MinBottomBlobs(), bottom.size())
<< type_name() << " Layer takes at least " << MinBottomBlobs()
<< " bottom blob(s) as input.";
}
if (MaxBottomBlobs() >= 0) {
CHECK_GE(MaxBottomBlobs(), bottom.size())
<< type_name() << " Layer takes at most " << MaxBottomBlobs()
<< " bottom blob(s) as input.";
}
if (ExactNumTopBlobs() >= 0) {
CHECK_EQ(ExactNumTopBlobs(), top.size())
<< type_name() << " Layer produces " << ExactNumTopBlobs()
<< " top blob(s) as output.";
}
if (MinTopBlobs() >= 0) {
CHECK_LE(MinTopBlobs(), top.size())
<< type_name() << " Layer produces at least " << MinTopBlobs()
<< " top blob(s) as output.";
}
if (MaxTopBlobs() >= 0) {
CHECK_GE(MaxTopBlobs(), top.size())
<< type_name() << " Layer produces at most " << MaxTopBlobs()
<< " top blob(s) as output.";
}
if (EqualNumBottomTopBlobs()) {
CHECK_EQ(bottom.size(), top.size())
<< type_name() << " Layer produces one top blob as output for each "
<< "bottom blob input.";
}
}这里涉及到了有些奇怪的函数,例如:ExactNumBottomBlobs(),但并不会影响阅读,CheckBlobCounts()实现的功能就是检测输入的bottom blob和输出的top blob是否在指定的范围内,既然是指定的范围,自然就会联想到在什么地方指定这个范围的?带着疑问继续阅读源码,在某个地方,总会明白的。
4 最后一个protected函数,SetLossWeight():
/**
* Called by SetUp to initialize the weights associated with any top blobs in
* the loss function. Store non-zero loss weights in the diff blob.
*/
inline void SetLossWeights(vector<Blob<Dtype>*>* top) {
const int num_loss_weights = layer_param_.loss_weight_size();
if (num_loss_weights) {
CHECK_EQ(top->size(), num_loss_weights) << "loss_weight must be "
"unspecified or specified once per top blob.";
for (int top_id = 0; top_id < top->size(); ++top_id) {
const Dtype loss_weight = layer_param_.loss_weight(top_id);
if (loss_weight == Dtype(0)) { continue; }
this->set_loss(top_id, loss_weight);
const int count = (*top)[top_id]->count();
Dtype* loss_multiplier = (*top)[top_id]->mutable_cpu_diff();
caffe_set(count, loss_weight, loss_multiplier);
}
}
}标签:
原文地址:http://blog.csdn.net/thy_2014/article/details/51943159