求yolov5后处理代码

已生成.bin文件

YOLOv5 后处理代码解决方案

根据你的需求,社区已有成熟的 YOLOv5 后处理实现方案。以下是关键资源和实现步骤:

核心资源

  1. 完整教程Yolov5上板教程 - 基于 yolov3_mobilenetv2_example 改造,包含完整的 postprocess 代码
  2. 问题讨论原生yolov5_postprocess后处理问题 - 包含详细的调试日志和代码实现

关键实现步骤

1. 添加后处理头文件

// model_inference/postprocess/yolov5_postprocess.h
#ifndef YOLOV5_POSTPROCESS_H_
#define YOLOV5_POSTPROCESS_H_

#include <vector>
#include <string>
#include <utility>
#include <memory>
#include "model_inference/inference_task.h"
#include "model_inference/postprocess/postprocess.h"
#include "xstream/xstream_world.h"
#include "xstream/vision_type.h"

namespace inference {
struct Detection {
    xstream::BBox bbox;
    Detection() {}
    explicit Detection(xstream::BBox bbox) : bbox(bbox) {}
    friend bool operator>(const Detection &lhs, const Detection &rhs) {
        return (lhs.bbox.score_ > rhs.bbox.score_);
    }
};

class YoloV5PostProcess : public PostProcess {
public:
    virtual int Init(const std::string &json_str);
    virtual int Execute(
        const std::vector<std::shared_ptr<InferenceEngineTask>> &tasks,
        std::vector<xstream::BaseDataPtr> *frame_result);
private:
    float score_threshold_ = 0.3;
    float nms_threshold_ = 0.45;
    int nms_top_k_ = 500;
    int basic_pyramid_image_height_;
    int basic_pyramid_image_width_;
    int src_image_height_;
    int src_image_width_;
    void NMS(std::vector<Detection> &input, float iou_threshold, 
             int top_k, std::vector<Detection> &result, bool suppress);
};
}
#endif

2. 实现后处理逻辑

// model_inference/postprocess/yolov5_postprocess.cc
#include "model_inference/postprocess/yolov5_postprocess.h"
#include "model_inference/inference_engine.h"
#include "json/json.h"
#include <algorithm>

namespace inference {
// YOLOv5 配置(根据实际模型调整)
Yolo5Config default_yolo5_config = {
    {8, 16, 32},  // strides
    {{{1.25, 1.625}, {2.0, 3.75}, {4.125, 2.875}},
     {{1.875, 3.8125}, {3.875, 2.8125}, {3.6875, 7.4375}},
     {{3.625, 2.8125}, {4.875, 6.1875}, {11.65625, 10.1875}}},
    80,  // class_num
    {"person", "bicycle", "car", ...}  // class_names
};

int YoloV5PostProcess::Init(const std::string &json_str) {
    Json::Reader Reader;
    Json::Value config;
    Reader.parse(json_str, config);
    score_threshold_ = config["score_threshold"].isNumeric() ? 
                       config["score_threshold"].asFloat() : score_threshold_;
    nms_threshold_ = config["nms_threshold"].isNumeric() ? 
                     config["nms_threshold"].asFloat() : nms_threshold_;
    // 配置金字塔图像尺寸和原图尺寸
    basic_pyramid_image_height_ = config["basic_pyramid_image_height"].asInt();
    basic_pyramid_image_width_ = config["basic_pyramid_image_width"].asInt();
    src_image_height_ = config["src_image_height"].asInt();
    src_image_width_ = config["src_image_width"].asInt();
    return 0;
}

int YoloV5PostProcess::Execute(
    const std::vector<std::shared_ptr<InferenceEngineTask>> &tasks,
    std::vector<xstream::BaseDataPtr> *frame_result) {
    
    HOBOT_CHECK(tasks.size() == 1);
    auto task = tasks[0];
    auto xstream_det_result = std::make_shared<xstream::BaseDataVector>();
    frame_result->push_back(xstream_det_result);
    
    std::vector<Detection> dets;
    int out_layer = task->float_tensors_.size();
    
    // 遍历输出层(YOLOv5 通常有3个检测头)
    for (int i = 0; i < out_layer; ++i) {
        float *data = task->float_tensors_[i].value.data();
        int num_classes = default_yolo5_config.class_num;
        int stride = default_yolo5_config.strides[i];
        int num_pred = num_classes + 4 + 1;
        std::vector<std::pair<double, double>> &anchors = 
            default_yolo5_config.anchors_table[i];
        
        // 获取输出tensor尺寸
        int height, width;
        switch (task->float_tensors_[i].layout) {
            case LAYOUT_NHWC:
                height = task->float_tensors_[i].dim[1];
                width = task->float_tensors_[i].dim[2];
                break;
            case LAYOUT_NCHW:
                height = task->float_tensors_[i].dim[2];
                width = task->float_tensors_[i].dim[3];
                break;
        }
        
        // 解码检测框
        for (int h = 0; h < height; h++) {
            for (int w = 0; w < width; w++) {
                for (size_t k = 0; k < anchors.size(); k++) {
                    float *cur_data = data + k * num_pred;
                    float objness = cur_data[4];
                    
                    // 计算置信度
                    float confidence = sigmoid(objness) * 
                                      sigmoid(cur_data[5 + class_id]);
                    if (confidence < score_threshold_) continue;
                    
                    // 解码坐标(YOLOv5 格式)
                    float center_x = (sigmoid(cur_data[0]) * 2 - 0.5 + w) * stride;
                    float center_y = (sigmoid(cur_data[1]) * 2 - 0.5 + h) * stride;
                    float box_scale_x = pow(sigmoid(cur_data[2]) * 2, 2) * anchors[k].first;
                    float box_scale_y = pow(sigmoid(cur_data[3]) * 2, 2) * anchors[k].second;
                    
                    // 转换为 (x1, y1, x2, y2)
                    float xmin = (center_x - box_scale_x / 2.0);
                    float ymin = (center_y - box_scale_y / 2.0);
                    float xmax = (center_x + box_scale_x / 2.0);
                    float ymax = (center_y + box_scale_y / 2.0);
                    
                    // 坐标映射到原图
                    // ...(参考教程中的坐标转换逻辑)
                    
                    xstream::BBox bbox(xmin_org, ymin_org, xmax_org, ymax_org,
                                      confidence, class_id, class_name);
                    dets.push_back(Detection(bbox));
                }
            }
        }
    }
    
    // NMS 后处理
    std::vector<Detection> det_result;
    NMS(dets, nms_threshold_, nms_top_k_, det_result, false);
    
    // 转换结果为 BaseData
    for (auto &det : det_result) {
        auto xstream_box = std::make_shared<xstream::BBox>(det.bbox);
        xstream_det_result->datas_.push_back(xstream_box);
    }
    return 0;
}
}

3. 注册后处理模块

// model_inference/src/postprocess.cc
#include "model_inference/postprocess/yolov5_postprocess.h"

namespace inference {
std::shared_ptr<PostProcess> PostProcess::GetInstance(std::string class_name) {
    // ... 其他模型
    if (class_name == "yolov5_postprocess") {
        return std::make_shared<YoloV5PostProcess>();
    }
    return nullptr;
}
}

RDK X5 注意事项

:warning: 版本差异提示

  • RDK X5 的 BPU 架构与 X3 不同,需使用对应的 toolchain 版本
  • 模型输入尺寸建议:672x672 或 960x960(根据模型训练配置)
  • 输出 tensor 布局可能是 NHWC 或 NCHW,需根据实际模型调整

快速验证

已生成.bin 文件后,建议:

  1. 先用官方示例验证模型推理是否正常
  2. 检查输出 tensor 的 dimensions 和 layout
  3. 根据实际输出调整 anchors 和 strides 配置

需要更详细的代码可以参考上述教程帖子,或提供你的具体报错信息以便进一步排查。

可以参考ModelZoo中的代码:rdk_model_zoo/samples/vision/ultralytics_yolo at rdk_x5 · D-Robotics/rdk_model_zoo

也可以直接使用板端自带的推理代码:YOLO | RDK DOC