caffe-sigmoid_cross_entropy_loss_layer类

sigmoid_cross_entropy_loss_layer类

头文件： ./include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
CPU实现： ./src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
GPU实现： ./src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu

所需要基本操作的CPU和GPU实现：

bottom_diff = sigmoid_output_data - target 的实现如下

// CPU
caffe_sub(count, sigmoid_output_data, target, bottom_diff):

// 对应的GPU
caffe_copy(count, sigmoid_output_data, bottom_diff);
caffe_gpu_axpy(count, Dtype(-1), target, bottom_diff);

bottom_diff 中每个元素乘以loss_weight，共操作count个元素。其实现如下：

// CPU
caffe_scal(count, loss_weight, bottom_diff);
// GPU
caffe_gpu_scal(count, loss_weight, bottom_diff);

上述函数分别使用了cBLAS 和cuBlas两个库函数。sigmoid_output_data是前向传播的结果。上述两步其实是反向传播的过程，最终将结果写入bottom_diff中，它是Blob的一部分，会随着数据的走向继续传播下去。

后向传播

CPU

template <typename Dtype>
void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(
						const vector<Blob<Dtype>*>& top, 
						const vector<bool>& propagate_down,
						const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[1]) {
	LOG(FATAL) << this->type()
			<< " Layer cannot backpropagate to label inputs.";
}
if (propagate_down[0]) {
	// First, compute the diff
	const int count = bottom[0]->count();
	const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
	const Dtype* target = bottom[1]->cpu_data();
	Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
	caffe_sub(count, sigmoid_output_data, target, bottom_diff);
	// Zero out gradient of ignored targets.
	if (has_ignore_label_) {
	for (int i = 0; i < count; ++i) {
			const int target_value = static_cast<int>(target[i]);
			if (target_value == ignore_label_) {
				bottom_diff[i] = 0;
			}
		}
	}
	// Scale down gradient
	Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer_;
	caffe_scal(count, loss_weight, bottom_diff);
	}
}

因为是在CPU端，与GPU无关，所以上述code中没有gpu_data或gpu_diff。
主要操作，取数据，执行操作：

// 取Blob数据
const int count = bottom[0]->count();
const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
const Dtype* target = bottom[1]->cpu_data();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
// 如上述操作
caffe_sub(count, sigmoid_output_data, target, bottom_diff);

GPU

与cpu相似：

template <typename Dtype>
void SigmoidCrossEntropyLossLayer<Dtype>::Backward_gpu(
						const vector<Blob<Dtype>*>& top, 
						const vector<bool>& propagate_down,
						const vector<Blob<Dtype>*>& bottom) {
	if (propagate_down[1]) {
	LOG(FATAL) << this->type()
				<< " Layer cannot backpropagate to label inputs.";
	}
	if (propagate_down[0]) {
		// First, compute the diff
		const int count = bottom[0]->count();
		const Dtype* sigmoid_output_data = sigmoid_output_->gpu_data();
		const Dtype* target = bottom[1]->gpu_data();
		Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
		caffe_copy(count, sigmoid_output_data, bottom_diff);
		caffe_gpu_axpy(count, Dtype(-1), target, bottom_diff);
		// Zero out gradient of ignored targets.
		if (has_ignore_label_) {
			// NOLINT_NEXT_LINE(whitespace/operators)
			SigmoidCrossEntropyLossIgnoreDiffGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
			CAFFE_CUDA_NUM_THREADS>>>(count, ignore_label_, target, bottom_diff);
		}
		// Scale down gradient
		Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer_;
		caffe_gpu_scal(count, loss_weight, bottom_diff);
	}
}

加上kernel函数，其作用是将不需要计算梯度的位置设为零，与CPU含义相同：

template <typename Dtype>
__global__ void SigmoidCrossEntropyLossIgnoreDiffGPU(
						const int count,
						const int ignore_label, 
						const Dtype* target, 
						Dtype* diff) {
		CUDA_KERNEL_LOOP(i, count) {
			const int target_value = static_cast<int>(target[i]);
			if (target_value == ignore_label) {
				diff[i] = 0;
		}
	}
}

前向传播

头文件中的成员属性：

/// 一个SigmoidLayer类对象指针，预测值到概率值的映射
shared_ptr<SigmoidLayer<Dtype> > sigmoid_layer_;
/// 接收SigmoidLayer的输出.
shared_ptr<Blob<Dtype> > sigmoid_output_;
/// bottom vector holder to call the underlying SigmoidLayer::Forward
vector<Blob<Dtype>*> sigmoid_bottom_vec_;
/// top vector holder to call the underlying SigmoidLayer::Forward
vector<Blob<Dtype>*> sigmoid_top_vec_;
/// Whether to ignore instances with a certain label.
bool has_ignore_label_;
/// The label indicating that an instance should be ignored.
int ignore_label_;
/// How to normalize the loss.
LossParameter_NormalizationMode normalization_;
Dtype normalizer_;
int outer_num_, inner_num_;

先执行forward操作：sigmoid_layer_->Forward(_, _) 。其参数sigmoid_bottom_vec_和sigmoid_top_vec_是两个该类的成员变量，其值随操作的执行而改变，这里要改变的是前者，这个实现在源码中的成员函数LayerSetUp()。

sigmoid_layer_也是成员变量，其定义：shared_ptr<SigmoidLayer<Dtype> > sigmoid_layer_;。CPU和GPU实现见下：

CPU

template <typename Dtype>
void SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(
						const vector<Blob<Dtype>*>& bottom, 
						const vector<Blob<Dtype>*>& top) {
	// The forward pass computes the sigmoid outputs.
	// 1. Forward计算sigmoid 的输出，并且取数据
	sigmoid_bottom_vec_[0] = bottom[0];
	sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);
	// Compute the loss (negative log likelihood)
	// Stable version of loss computation from input data
	const Dtype* input_data = bottom[0]->cpu_data();
	const Dtype* target = bottom[1]->cpu_data();

	// 2. 计算 对数似然
	int valid_count = 0;
	Dtype loss = 0;
	for (int i = 0; i < bottom[0]->count(); ++i) {
		const int target_value = static_cast<int>(target[i]);
		if (has_ignore_label_ && target_value == ignore_label_) {
			continue;
		}
		loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
			log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
		++valid_count;
	}
	normalizer_ = get_normalizer(normalization_, valid_count);
	top[0]->mutable_cpu_data()[0] = loss / normalizer_;
}

再看sigmoid_layer_->Forward(_, _);，SigmoidLayer类并没有Formard()方法，所以此方法一定是从其父类继承而来。看源码找到继承顺序：SigmoidLayer::NeuronLayer::Layer，所以这里的Forward()是Layer类的方法，祥看Layer.hpp。

GPU

与CPU类似，将CPU中的for循环由kernel函数代替：

template <typename Dtype>
__global__ void SigmoidCrossEntropyLossForwardGPU(const int nthreads,
		const Dtype* input_data, const Dtype* target, Dtype* loss,
		const bool has_ignore_label_, const int ignore_label_,
		Dtype* counts) {
	CUDA_KERNEL_LOOP(i, nthreads) {
		const int target_value = static_cast<int>(target[i]);
		if (has_ignore_label_ && target_value == ignore_label_) {
			loss[i] = 0;
			counts[i] = 0;
		} else {
			loss[i] = input_data[i] * (target[i] - (input_data[i] >= 0)) -
				log(1 + exp(input_data[i] - 2 * input_data[i] *
				(input_data[i] >= 0)));
			counts[i] = 1;
		}
	}
}

GPU中的前传播：

1	void SigmoidCrossEntropyLossLayer<Dtype>::Forward_gpu(){...}

函数体省略，不过在源码中有一点提出：

// Since this memory is not used for anything, we use it here to avoid having
// to allocate new GPU memory to accumulate intermediate results.
Dtype* loss_data = bottom[0]->mutable_gpu_diff();
Dtype* count_data = bottom[1]->mutable_gpu_diff();
...
...
// Clear scratch memory to prevent interfering with backward (see #6202).
caffe_gpu_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_gpu_diff());
caffe_gpu_set(bottom[1]->count(), Dtype(0), bottom[1]->mutable_gpu_diff());

这是CPU版本中没有的，因为kernel函数中需要传入对象数组，但是这部分的地址没有被开辟，所以为了避免在GPU上为中间结果开辟空间，所以使用Blob的暂时没有使用到的部分，作为临时存储空间，只不过，函数结束后要清理这部分空间。

其他

这个类除了上述的方法，还有其他方法详见源文件。