C++ 实现faster rcnn的proposal layer和wrapper

请参考我在github上发布的项目,下面详细注释一下如何实现proposal layer的

proposal_layer.hpp

 // --------------------------------------------------------
 // Proposal Layer C++ Implement
 // Copyright (c) 2017 Lenovo
 // Written by Zou Jinyi
 // --------------------------------------------------------

#ifndef CAFFE_PROPOSAL_LAYER_HPP_
#define CAFFE_PROPOSAL_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#define max(a, b) (((a)>(b)) ? (a) :(b))
#define min(a, b) (((a)<(b)) ? (a) :(b))
namespace caffe {

/**
 * @brief Provides ROIs by assigning tops directly.
 *
 * This data layer is to provide ROIs from the anchor;
 * backward, and reshape are all no-ops.
 */
template <typename Dtype>
class ProposalLayer : public Layer<Dtype> {
 public:
  explicit ProposalLayer(const LayerParameter& param)
      : Layer<Dtype>(param) {}
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
	  const vector<Blob<Dtype>*>& top) {}

  virtual inline const char* type() const { return "Proposal"; }
  virtual inline int ExactNumBottomBlobs() const { return 3; }
  virtual inline int MinBottomBlobs() const { return 3; }
  virtual inline int MaxBottomBlobs() const { return 3; }
  virtual inline int ExactNumTopBlobs() const { return 1; }
  virtual inline int MinTopBlobs() const { return 1; }
  virtual inline int MaxTopBlobs() const { return 1; }

 protected:
 
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  /// @brief Not implemented	  
  //virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
  //   const vector<Blob<Dtype>*>& top){
  //  NOT_IMPLEMENTED;
  //}
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
    NOT_IMPLEMENTED;
  }
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
    NOT_IMPLEMENTED;
  }
  //生成base_anchors，数量根据prototxt的设定，生成方式完全参照了python版的实现
  virtual void Generate_anchors();

  virtual void _whctrs(vector <float> anchor, vector<float> &ctrs);

  virtual void _ratio_enum(vector <float> anchor, vector <float> &anchors_ratio);

  virtual void _mkanchors(vector<float> ctrs, vector<float> &anchors);

  virtual void _scale_enum(vector<float> anchors_ratio, vector<float> &anchor_boxes);
  //预测dx,dy,dw,dh后在anchor上还原出Boxes
  virtual void bbox_transform_inv(int img_width, int img_height, vector<vector<float> > bbox, vector<vector<float> > select_anchor, vector<vector<float> > &pred);
  //极大值抑制
  virtual void apply_nms(vector<vector<float> > &pred_boxes, vector<float> &confidence);

  int feat_stride_; //resolution
  int anchor_base_size_;
  vector<float> anchor_scale_; //anchor scale
  vector<float> anchor_ratio_; //anchor_ratio

  int max_rois_;
  vector<float> anchor_boxes_;
  
};

}  // namespace caffe

#endif  // CAFFE_PROPOSAL_LAYER_HPP_

proposal_layer.cpp

// --------------------------------------------------------
 // Proposal Layer C++ Implement
 // Copyright (c) 2017 Lenovo
 // Written by Zou Jinyi
 // --------------------------------------------------------
#include <vector>

#include "caffe/layers/proposal_layer.hpp"

namespace caffe {

template <typename Dtype>
void ProposalLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const ProposalParameter& param = this->layer_param_.proposal_param();
  
  feat_stride_ = param.feat_stride();
  anchor_base_size_ = param.anchor_base_size();
  //这里为了实现官方版本的forword设置为这个尺寸，实际上可以根据实际需求修改
  if (param.anchor_scale() == 3)
  {
	  anchor_scale_.push_back(8.0);
	  anchor_scale_.push_back(16.0);
	  anchor_scale_.push_back(32.0);
  }
  else
  {
	  anchor_scale_.push_back(32.0);
  }
  if (param.anchor_ratio() == 3)
  {
	  anchor_ratio_.push_back(0.5);
	  anchor_ratio_.push_back(1.0);
	  anchor_ratio_.push_back(2.0);
  }
  else
  {
	  anchor_ratio_.push_back(1.0);
  }
  max_rois_ = param.max_rois();
  Generate_anchors();
}

template <typename Dtype>	  
void ProposalLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
	const Dtype* score = bottom[0]->cpu_data();
	const Dtype* bbox_deltas = bottom[1]->cpu_data();
	const Dtype* im_info = bottom[2]->cpu_data();
	int height = bottom[0]->height();
	int width = bottom[0]->width();
	float thresh = 0.3;
	vector<vector<float> > select_anchor;
	vector<float> confidence;
	vector<vector<float> > bbox;
	int anchor_num = anchor_scale_.size()*anchor_ratio_.size();
	//将每个confidence大于thresh的框找出，并找出所有恢复这个框所需的参数。这的实现和官方版本略有差异。官方版本只找排名前300的Boxes。
	for (int k = 0; k < anchor_num; k++)
	{
		float w = anchor_boxes_[4 * k + 2] - anchor_boxes_[4 * k] + 1;
		float h = anchor_boxes_[4 * k + 3] - anchor_boxes_[4 * k + 1] + 1;
		float x_ctr = anchor_boxes_[4 * k] + 0.5 * (w - 1);
		float y_ctr = anchor_boxes_[4 * k + 1] + 0.5 * (h - 1);

		for (int i = 0; i < height; i++)
		{
			for (int j = 0; j < width; j++)
			{
			    //这里面对于N,C,H,W存储数据的取值规则要注意
				if (score[anchor_num*height*width + (k * height + i) * width + j] >= thresh)
				{
					vector<float> tmp_anchor;
					vector<float> tmp_bbox;

					tmp_anchor.push_back(j * feat_stride_+ x_ctr);
					tmp_anchor.push_back(i * feat_stride_+ y_ctr);
					tmp_anchor.push_back(w);
					tmp_anchor.push_back(h);
					select_anchor.push_back(tmp_anchor);
					confidence.push_back(score[anchor_num*height*width + (k * height + i) * width + j]);
					tmp_bbox.push_back(bbox_deltas[(4 * k * height + i) * width + j]);
					tmp_bbox.push_back(bbox_deltas[((4 * k +1) * height + i) * width + j]);
					tmp_bbox.push_back(bbox_deltas[((4 * k + 2) * height + i) * width + j]);
					tmp_bbox.push_back(bbox_deltas[((4 * k + 3) * height + i) * width + j]);
					bbox.push_back(tmp_bbox);
				}
			}
		}
	}
	vector<vector<float> > pred_boxes;
	//恢复出Boxes
	bbox_transform_inv(im_info[1], im_info[0], bbox, select_anchor, pred_boxes);
	//极大值抑制
    apply_nms(pred_boxes, confidence);
    //取前多少个boxes，原版中需要将confidence排序再前300个，这里由于confidence被thresh截断，所以一般不会超限
	int num = pred_boxes.size() > max_rois_ ? max_rois_ : pred_boxes.size();

	vector<int> proposal_shape;
	proposal_shape.push_back(num);
	proposal_shape.push_back(5);
	top[0]->Reshape(proposal_shape);
	Dtype* top_data = top[0]->mutable_cpu_data();
	for (int i = 0; i < num; i++)
	{
		top_data[5 * i] = 0;
		top_data[5 * i + 1] = pred_boxes[i][0];
		top_data[5 * i + 2] = pred_boxes[i][1];
		top_data[5 * i + 3] = pred_boxes[i][2];
		top_data[5 * i + 4] = pred_boxes[i][3];
	}
}


//generate anchors
template <typename Dtype>
void ProposalLayer<Dtype>::Generate_anchors() {
	vector<float> base_anchor;
	base_anchor.push_back(0);
	base_anchor.push_back(0);
	base_anchor.push_back(anchor_base_size_ - 1);
	base_anchor.push_back(anchor_base_size_ - 1);
	vector<float> anchors_ratio;
	_ratio_enum(base_anchor, anchors_ratio);
	_scale_enum(anchors_ratio, anchor_boxes_);
}

template <typename Dtype>
void ProposalLayer<Dtype>::_whctrs(vector <float> anchor, vector<float> &ctrs) {
	float w = anchor[2] - anchor[0] + 1;
	float h = anchor[3] - anchor[1] + 1;
	float x_ctr = anchor[0] + 0.5 * (w - 1);
	float y_ctr = anchor[1] + 0.5 * (h - 1);
	ctrs.push_back(w);
	ctrs.push_back(h);
	ctrs.push_back(x_ctr);
	ctrs.push_back(y_ctr);
}

template <typename Dtype>
void ProposalLayer<Dtype>::_ratio_enum(vector<float> anchor, vector<float> &anchors_ratio) {
	vector<float> ctrs;
	_whctrs(anchor, ctrs);
	float size = ctrs[0] * ctrs[1];
	int ratio_num = anchor_ratio_.size();
	for (int i = 0; i < ratio_num; i++)
	{
		float ratio = size / anchor_ratio_[i];
		int ws = int(round(sqrt(ratio)));
		int hs = int(round(ws * anchor_ratio_[i]));
		vector<float> ctrs_in;
		ctrs_in.push_back(ws);
		ctrs_in.push_back(hs);
		ctrs_in.push_back(ctrs[2]);
		ctrs_in.push_back(ctrs[3]);
		_mkanchors(ctrs_in, anchors_ratio);
	}
}

template <typename Dtype>
void ProposalLayer<Dtype>::_scale_enum(vector<float> anchors_ratio, vector<float> &anchor_boxes) {
	int anchors_ratio_num = anchors_ratio.size() / 4;
	for (int i = 0; i < anchors_ratio_num; i++)
	{
		vector<float> anchor;
		anchor.push_back(anchors_ratio[i * 4]);
		anchor.push_back(anchors_ratio[i * 4 + 1]);
		anchor.push_back(anchors_ratio[i * 4 + 2]);
		anchor.push_back(anchors_ratio[i * 4 + 3]);
		vector<float> ctrs;
		_whctrs(anchor, ctrs);
		int scale_num = anchor_scale_.size();
		for (int j = 0; j < scale_num; j++)
		{
			float ws = ctrs[0] * anchor_scale_[j];
			float hs = ctrs[1] * anchor_scale_[j];
			vector<float> ctrs_in;
			ctrs_in.push_back(ws);
			ctrs_in.push_back(hs);
			ctrs_in.push_back(ctrs[2]);
			ctrs_in.push_back(ctrs[3]);
			_mkanchors(ctrs_in, anchor_boxes_);
		}
	}
}

template <typename Dtype>
void ProposalLayer<Dtype>::_mkanchors(vector<float> ctrs, vector<float> &anchors) {
	anchors.push_back(ctrs[2] - 0.5*(ctrs[0] - 1));
	anchors.push_back(ctrs[3] - 0.5*(ctrs[1] - 1));
	anchors.push_back(ctrs[2] + 0.5*(ctrs[0] - 1));
	anchors.push_back(ctrs[3] + 0.5*(ctrs[1] - 1));
}

template <typename Dtype>
void ProposalLayer<Dtype>::bbox_transform_inv(int img_width, int img_height, vector<vector<float> > bbox, vector<vector<float> > select_anchor, vector<vector<float> > &pred)
{
	int num = bbox.size();
	for (int i = 0; i< num; i++)
	{
			float dx = bbox[i][0];
			float dy = bbox[i][1];
			float dw = bbox[i][2];
			float dh = bbox[i][3];
			float pred_ctr_x = select_anchor[i][0] + select_anchor[i][2]*dx;
			float pred_ctr_y = select_anchor[i][1] + select_anchor[i][3] *dy;
			float pred_w = select_anchor[i][2] * exp(dw);
			float pred_h = select_anchor[i][3] * exp(dh);
			vector<float> tmp_pred;
			tmp_pred.push_back(max(min(pred_ctr_x - 0.5* pred_w, img_width - 1), 0));
			tmp_pred.push_back(max(min(pred_ctr_y - 0.5* pred_h, img_height - 1), 0));
			tmp_pred.push_back(max(min(pred_ctr_x + 0.5* pred_w, img_width - 1), 0));
			tmp_pred.push_back(max(min(pred_ctr_y + 0.5* pred_h, img_height - 1), 0));
			pred.push_back(tmp_pred);
	}
}

template <typename Dtype>
void ProposalLayer<Dtype>::apply_nms(vector<vector<float> > &pred_boxes, vector<float> &confidence)
{
	for (int i = 0; i < pred_boxes.size()-1; i++)
	{
		float s1 = (pred_boxes[i][2] - pred_boxes[i][0] + 1) *(pred_boxes[i][3] - pred_boxes[i][1] + 1);
		for (int j = i + 1; j < pred_boxes.size(); j++)
		{
			float s2 = (pred_boxes[j][2] - pred_boxes[j][0] + 1) *(pred_boxes[j][3] - pred_boxes[j][1] + 1);

			float x1 = max(pred_boxes[i][0], pred_boxes[j][0]);
			float y1 = max(pred_boxes[i][1], pred_boxes[j][1]);
			float x2 = min(pred_boxes[i][2], pred_boxes[j][2]);
			float y2 = min(pred_boxes[i][3], pred_boxes[j][3]);

			float width = x2 - x1;
			float height = y2 - y1;
			if (width > 0 && height > 0)
			{
				float IOU = width * height / (s1 + s2 - width * height);
				if (IOU > 0.7)
				{
					if (confidence[i] >= confidence[j])
					{
						pred_boxes.erase(pred_boxes.begin() + j);
						confidence.erase(confidence.begin() + j);
						j--;
					}
					else
					{
						pred_boxes.erase(pred_boxes.begin() + i);
						confidence.erase(confidence.begin() + i);
						i--;
						break;
					}
				}
			}
		}
	}
}


INSTANTIATE_CLASS(ProposalLayer);
REGISTER_LAYER_CLASS(Proposal);

}  // namespace caffe

看我写的辛苦求打赏啊！！！有学术讨论和指点请加微信manutdzou,注明

深度学习 49

技术 1

感悟 1

编程语言 3

算法基础 1

软件使用 2

code 4

搭建环境 1

Linux使用 1

docker使用 2

医疗 1

python使用 1

纯C++代码实现的faster rcnn

C++ 实现faster rcnn的proposal layer和wrapper