已生成.bin文件
YOLOv5 后处理代码解决方案
根据你的需求,社区已有成熟的 YOLOv5 后处理实现方案。以下是关键资源和实现步骤:
核心资源
- 完整教程:Yolov5上板教程 - 基于 yolov3_mobilenetv2_example 改造,包含完整的 postprocess 代码
- 问题讨论:原生yolov5_postprocess后处理问题 - 包含详细的调试日志和代码实现
关键实现步骤
1. 添加后处理头文件
// model_inference/postprocess/yolov5_postprocess.h
#ifndef YOLOV5_POSTPROCESS_H_
#define YOLOV5_POSTPROCESS_H_
#include <vector>
#include <string>
#include <utility>
#include <memory>
#include "model_inference/inference_task.h"
#include "model_inference/postprocess/postprocess.h"
#include "xstream/xstream_world.h"
#include "xstream/vision_type.h"
namespace inference {
struct Detection {
xstream::BBox bbox;
Detection() {}
explicit Detection(xstream::BBox bbox) : bbox(bbox) {}
friend bool operator>(const Detection &lhs, const Detection &rhs) {
return (lhs.bbox.score_ > rhs.bbox.score_);
}
};
class YoloV5PostProcess : public PostProcess {
public:
virtual int Init(const std::string &json_str);
virtual int Execute(
const std::vector<std::shared_ptr<InferenceEngineTask>> &tasks,
std::vector<xstream::BaseDataPtr> *frame_result);
private:
float score_threshold_ = 0.3;
float nms_threshold_ = 0.45;
int nms_top_k_ = 500;
int basic_pyramid_image_height_;
int basic_pyramid_image_width_;
int src_image_height_;
int src_image_width_;
void NMS(std::vector<Detection> &input, float iou_threshold,
int top_k, std::vector<Detection> &result, bool suppress);
};
}
#endif
2. 实现后处理逻辑
// model_inference/postprocess/yolov5_postprocess.cc
#include "model_inference/postprocess/yolov5_postprocess.h"
#include "model_inference/inference_engine.h"
#include "json/json.h"
#include <algorithm>
namespace inference {
// YOLOv5 配置(根据实际模型调整)
Yolo5Config default_yolo5_config = {
{8, 16, 32}, // strides
{{{1.25, 1.625}, {2.0, 3.75}, {4.125, 2.875}},
{{1.875, 3.8125}, {3.875, 2.8125}, {3.6875, 7.4375}},
{{3.625, 2.8125}, {4.875, 6.1875}, {11.65625, 10.1875}}},
80, // class_num
{"person", "bicycle", "car", ...} // class_names
};
int YoloV5PostProcess::Init(const std::string &json_str) {
Json::Reader Reader;
Json::Value config;
Reader.parse(json_str, config);
score_threshold_ = config["score_threshold"].isNumeric() ?
config["score_threshold"].asFloat() : score_threshold_;
nms_threshold_ = config["nms_threshold"].isNumeric() ?
config["nms_threshold"].asFloat() : nms_threshold_;
// 配置金字塔图像尺寸和原图尺寸
basic_pyramid_image_height_ = config["basic_pyramid_image_height"].asInt();
basic_pyramid_image_width_ = config["basic_pyramid_image_width"].asInt();
src_image_height_ = config["src_image_height"].asInt();
src_image_width_ = config["src_image_width"].asInt();
return 0;
}
int YoloV5PostProcess::Execute(
const std::vector<std::shared_ptr<InferenceEngineTask>> &tasks,
std::vector<xstream::BaseDataPtr> *frame_result) {
HOBOT_CHECK(tasks.size() == 1);
auto task = tasks[0];
auto xstream_det_result = std::make_shared<xstream::BaseDataVector>();
frame_result->push_back(xstream_det_result);
std::vector<Detection> dets;
int out_layer = task->float_tensors_.size();
// 遍历输出层(YOLOv5 通常有3个检测头)
for (int i = 0; i < out_layer; ++i) {
float *data = task->float_tensors_[i].value.data();
int num_classes = default_yolo5_config.class_num;
int stride = default_yolo5_config.strides[i];
int num_pred = num_classes + 4 + 1;
std::vector<std::pair<double, double>> &anchors =
default_yolo5_config.anchors_table[i];
// 获取输出tensor尺寸
int height, width;
switch (task->float_tensors_[i].layout) {
case LAYOUT_NHWC:
height = task->float_tensors_[i].dim[1];
width = task->float_tensors_[i].dim[2];
break;
case LAYOUT_NCHW:
height = task->float_tensors_[i].dim[2];
width = task->float_tensors_[i].dim[3];
break;
}
// 解码检测框
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
for (size_t k = 0; k < anchors.size(); k++) {
float *cur_data = data + k * num_pred;
float objness = cur_data[4];
// 计算置信度
float confidence = sigmoid(objness) *
sigmoid(cur_data[5 + class_id]);
if (confidence < score_threshold_) continue;
// 解码坐标(YOLOv5 格式)
float center_x = (sigmoid(cur_data[0]) * 2 - 0.5 + w) * stride;
float center_y = (sigmoid(cur_data[1]) * 2 - 0.5 + h) * stride;
float box_scale_x = pow(sigmoid(cur_data[2]) * 2, 2) * anchors[k].first;
float box_scale_y = pow(sigmoid(cur_data[3]) * 2, 2) * anchors[k].second;
// 转换为 (x1, y1, x2, y2)
float xmin = (center_x - box_scale_x / 2.0);
float ymin = (center_y - box_scale_y / 2.0);
float xmax = (center_x + box_scale_x / 2.0);
float ymax = (center_y + box_scale_y / 2.0);
// 坐标映射到原图
// ...(参考教程中的坐标转换逻辑)
xstream::BBox bbox(xmin_org, ymin_org, xmax_org, ymax_org,
confidence, class_id, class_name);
dets.push_back(Detection(bbox));
}
}
}
}
// NMS 后处理
std::vector<Detection> det_result;
NMS(dets, nms_threshold_, nms_top_k_, det_result, false);
// 转换结果为 BaseData
for (auto &det : det_result) {
auto xstream_box = std::make_shared<xstream::BBox>(det.bbox);
xstream_det_result->datas_.push_back(xstream_box);
}
return 0;
}
}
3. 注册后处理模块
// model_inference/src/postprocess.cc
#include "model_inference/postprocess/yolov5_postprocess.h"
namespace inference {
std::shared_ptr<PostProcess> PostProcess::GetInstance(std::string class_name) {
// ... 其他模型
if (class_name == "yolov5_postprocess") {
return std::make_shared<YoloV5PostProcess>();
}
return nullptr;
}
}
RDK X5 注意事项
版本差异提示:
- RDK X5 的 BPU 架构与 X3 不同,需使用对应的 toolchain 版本
- 模型输入尺寸建议:672x672 或 960x960(根据模型训练配置)
- 输出 tensor 布局可能是 NHWC 或 NCHW,需根据实际模型调整
快速验证
已生成.bin 文件后,建议:
- 先用官方示例验证模型推理是否正常
- 检查输出 tensor 的 dimensions 和 layout
- 根据实际输出调整 anchors 和 strides 配置
需要更详细的代码可以参考上述教程帖子,或提供你的具体报错信息以便进一步排查。
可以参考ModelZoo中的代码:rdk_model_zoo/samples/vision/ultralytics_yolo at rdk_x5 · D-Robotics/rdk_model_zoo
也可以直接使用板端自带的推理代码:YOLO | RDK DOC