RDKX5运行YOLOv8_Detect_YUV420SP.py这个代码，一直不能成功运行

59331090 · 2025 年7 月 13 日 07:36

我识别的图片是人摔倒和站立蹲下的
具体是识别人摔倒的，然后对于这个代码我不知道如何更改

以下是运行代码：
import cv2
import numpy as np
from scipy.special import softmax

from scipy.special import expit as sigmoid

from hobot_dnn import pyeasy_dnn as dnn # BSP Python API
from time import time
import argparse
import logging

日志模块配置

logging configs

logging.basicConfig(
level = logging.DEBUG,
format = ‘[%(name)s] [%(asctime)s.%(msecs)03d] [%(levelname)s] %(message)s’,
datefmt=‘%H:%M:%S’)
logger = logging.getLogger(“RDK_YOLO”)

def main():
parser = argparse.ArgumentParser()
parser.add_argument(‘–model-path’, type=str, default=‘./model_modified.bin’,
help=“Path to BPU Quantized *.bin Model”)
parser.add_argument(‘–test-img’, type=str, default=‘./1.jpg’, help=‘Path to Load Test Image.’)
parser.add_argument(‘–img-save-path’, type=str, default=‘out_putimg.jpg’, help=‘Path to save output image.’)
parser.add_argument(‘–classes-num’, type=int, default=3, help=‘Classes Num to Detect.’) # 修改为3个类别
parser.add_argument(‘–nms-thres’, type=float, default=0.7, help=‘IoU threshold.’)
parser.add_argument(‘–score-thres’, type=float, default=0.25, help=‘confidence threshold.’)
parser.add_argument(‘–reg’, type=int, default=4, help=‘DFL reg layer.’) # 修改为4个值
opt = parser.parse_args()
logger.info(opt)

# 实例化
model = YOLOv8_Detect(opt)
# 读图
img = cv2.imread(opt.test_img)
if img is None:
    logger.error(f"无法加载图片: {opt.test_img}")
    return

# 准备输入数据
input_tensor = model.preprocess_yuv420sp(img)
# 推理
outputs = model.c2numpy(model.forward(input_tensor))
# 后处理
results = model.postProcess(outputs)
# 渲染
logger.info("\033[1;32m" + "Draw Results: " + "\033[0m")
for class_id, score, x1, y1, x2, y2 in results:
    print("(%d, %d, %d, %d) -> %s: %.2f"%(x1,y1,x2,y2, coco_names[class_id], score))
    draw_detection(img, (x1, y1, x2, y2), score, class_id)
# 保存结果
cv2.imwrite(opt.img_save_path, img)
logger.info(f"结果已保存到: {opt.img_save_path}")

class YOLOv8_Detect():
def init(self, opt):
# 加载BPU的bin模型, 打印相关参数
try:
begin_time = time()
self.quantize_model = dnn.load(opt.model_path)
logger.debug(“\033[1;31m” + “Load D-Robotics Quantize model time = %.2f ms”%(1000*(time() - begin_time)) + “\033[0m”)
except Exception as e:
logger.error(“ Failed to load model file: %s”%(opt.model_path))
logger.error(e)
exit(1)

    logger.info("\033[1;32m" + "-> input tensors" + "\033[0m")
    for i, quantize_input in enumerate(self.quantize_model[0].inputs):
        logger.info(f"intput[{i}], name={quantize_input.name}, type={quantize_input.properties.dtype}, shape={quantize_input.properties.shape}")

    logger.info("\033[1;32m" + "-> output tensors" + "\033[0m")
    for i, quantize_input in enumerate(self.quantize_model[0].outputs):
        logger.info(f"output[{i}], name={quantize_input.name}, type={quantize_input.properties.dtype}, shape={quantize_input.properties.shape}")
        # 打印量化信息
        if hasattr(quantize_input.properties, 'quantiType'):
            logger.info(f"  quantiType: {quantize_input.properties.quantiType}")
        if hasattr(quantize_input.properties, 'scale_data'):
            logger.info(f"  scale_data: {quantize_input.properties.scale_data[:4]}...")  # 只打印前几个值

    # 修改量化参数的获取方式
    try:
        # 检查输出层是否有 scale_data 属性
        if hasattr(self.quantize_model[0].outputs[1].properties, 'scale_data') and \
            self.quantize_model[0].outputs[1].properties.scale_data is not None:
            self.s_bboxes_scale = self.quantize_model[0].outputs[1].properties.scale_data[np.newaxis, :]
        else:
            logger.warning("输出层1没有scale_data属性或为空，使用默认值1.0")
            self.s_bboxes_scale = np.array([1.0], dtype=np.float32)[np.newaxis, :]
             
        if hasattr(self.quantize_model[0].outputs[3].properties, 'scale_data') and \
            self.quantize_model[0].outputs[3].properties.scale_data is not None:
            self.m_bboxes_scale = self.quantize_model[0].outputs[3].properties.scale_data[np.newaxis, :]
        else:
            logger.warning("输出层3没有scale_data属性或为空，使用默认值1.0")
            self.m_bboxes_scale = np.array([1.0], dtype=np.float32)[np.newaxis, :]
            
        if hasattr(self.quantize_model[0].outputs[5].properties, 'scale_data') and \
            self.quantize_model[0].outputs[5].properties.scale_data is not None:
            self.l_bboxes_scale = self.quantize_model[0].outputs[5].properties.scale_data[np.newaxis, :]
        else:
            logger.warning("输出层5没有scale_data属性或为空，使用默认值1.0")
            self.l_bboxes_scale = np.array([1.0], dtype=np.float32)[np.newaxis, :]
        
    except Exception as e:
        logger.error(f"获取量化参数失败: {str(e)}")
        # 使用默认值1.0
        self.s_bboxes_scale = np.array([1.0], dtype=np.float32)[np.newaxis, :]
        self.m_bboxes_scale = np.array([1.0], dtype=np.float32)[np.newaxis, :]
        self.l_bboxes_scale = np.array([1.0], dtype=np.float32)[np.newaxis, :]

    logger.info(f"{self.s_bboxes_scale.shape=}, {self.m_bboxes_scale.shape=}, {self.l_bboxes_scale.shape=}")
    
    # DFL求期望的系数
    self.weights_static = np.array([i for i in range(opt.reg)]).astype(np.float32)[np.newaxis, np.newaxis, :]
    logger.info(f"{self.weights_static.shape = }")

    # anchors
    self.s_anchor = np.stack([np.tile(np.linspace(0.5, 79.5, 80), reps=80), 
                    np.repeat(np.arange(0.5, 80.5, 1), 80)], axis=0).transpose(1,0)
    self.m_anchor = np.stack([np.tile(np.linspace(0.5, 39.5, 40), reps=40), 
                    np.repeat(np.arange(0.5, 40.5, 1), 40)], axis=0).transpose(1,0)
    self.l_anchor = np.stack([np.tile(np.linspace(0.5, 19.5, 20), reps=20), 
                    np.repeat(np.arange(0.5, 20.5, 1), 20)], axis=0).transpose(1,0)
    logger.info(f"{self.s_anchor.shape = }, {self.m_anchor.shape = }, {self.l_anchor.shape = }")

    # 输入图像大小和阈值
    self.input_image_size = 640
    self.SCORE_THRESHOLD = opt.score_thres
    self.NMS_THRESHOLD = opt.nms_thres
    self.CONF_THRES_RAW = -np.log(1/self.SCORE_THRESHOLD - 1)
    logger.info("SCORE_THRESHOLD  = %.2f, NMS_THRESHOLD = %.2f"%(self.SCORE_THRESHOLD, self.NMS_THRESHOLD))
    logger.info("CONF_THRES_RAW = %.2f"%self.CONF_THRES_RAW)

    self.input_H, self.input_W = self.quantize_model[0].inputs[0].properties.shape[2:4]
    logger.info(f"{self.input_H = }, {self.input_W = }")

    self.REG = opt.reg
    logger.info(f"{self.REG = }")

    self.CLASSES_NUM = opt.classes_num
    logger.info(f"{self.CLASSES_NUM = }")
def preprocess_yuv420sp(self, img):
    RESIZE_TYPE = 0
    LETTERBOX_TYPE = 1
    PREPROCESS_TYPE = LETTERBOX_TYPE
    logger.info(f"PREPROCESS_TYPE = {PREPROCESS_TYPE}")

    begin_time = time()
    self.img_h, self.img_w = img.shape[0:2]
    if PREPROCESS_TYPE == RESIZE_TYPE:
        # 利用resize的方式进行前处理, 准备nv12的输入数据
        begin_time = time()
        input_tensor = cv2.resize(img, (self.input_W, self.input_H), interpolation=cv2.INTER_NEAREST) # 利用resize重新开辟内存节约一次
        input_tensor = self.bgr2nv12(input_tensor)
        self.y_scale = 1.0 * self.input_H / self.img_h
        self.x_scale = 1.0 * self.input_W / self.img_w
        self.y_shift = 0
        self.x_shift = 0
        logger.info("\033[1;31m" + f"pre process(resize) time = {1000*(time() - begin_time):.2f} ms" + "\033[0m")
    elif PREPROCESS_TYPE == LETTERBOX_TYPE:
        # 利用 letter box 的方式进行前处理, 准备nv12的输入数据
        begin_time = time()
        self.x_scale = min(1.0 * self.input_H / self.img_h, 1.0 * self.input_W / self.img_w)
        self.y_scale = self.x_scale
        
        if self.x_scale <= 0 or self.y_scale <= 0:
            raise ValueError("Invalid scale factor.")
        
        new_w = int(self.img_w * self.x_scale)
        self.x_shift = (self.input_W - new_w) // 2
        x_other = self.input_W - new_w - self.x_shift
        
        new_h = int(self.img_h * self.y_scale)
        self.y_shift = (self.input_H - new_h) // 2
        y_other = self.input_H - new_h - self.y_shift
        
        input_tensor = cv2.resize(img, (new_w, new_h))
        input_tensor = cv2.copyMakeBorder(input_tensor, self.y_shift, y_other, self.x_shift, x_other, cv2.BORDER_CONSTANT, value=[127, 127, 127])
        input_tensor = self.bgr2nv12(input_tensor)
        logger.info("\033[1;31m" + f"pre process(letter box) time = {1000*(time() - begin_time):.2f} ms" + "\033[0m")
    else:
        logger.error(f"illegal PREPROCESS_TYPE = {PREPROCESS_TYPE}")
        exit(-1)

    logger.debug("\033[1;31m" + f"pre process time = {1000*(time() - begin_time):.2f} ms" + "\033[0m")
    logger.info(f"y_scale = {self.y_scale:.2f}, x_scale = {self.x_scale:.2f}")
    logger.info(f"y_shift = {self.y_shift:.2f}, x_shift = {self.x_shift:.2f}")
    return input_tensor

def bgr2nv12(self, bgr_img):
    begin_time = time()
    height, width = bgr_img.shape[0], bgr_img.shape[1]
    area = height * width
    yuv420p = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2YUV_I420).reshape((area * 3 // 2,))
    y = yuv420p[:area]
    uv_planar = yuv420p[area:].reshape((2, area // 4))
    uv_packed = uv_planar.transpose((1, 0)).reshape((area // 2,))
    nv12 = np.zeros_like(yuv420p)
    nv12[:height * width] = y
    nv12[height * width:] = uv_packed
    logger.debug("\033[1;31m" + f"bgr8 to nv12 time = {1000*(time() - begin_time):.2f} ms" + "\033[0m")
    return nv12

def forward(self, input_tensor):
    begin_time = time()
    quantize_outputs = self.quantize_model[0].forward(input_tensor)
    logger.debug("\033[1;31m" + f"forward time = {1000*(time() - begin_time):.2f} ms" + "\033[0m")
    return quantize_outputs

def c2numpy(self, outputs):
    begin_time = time()
    outputs = [dnnTensor.buffer for dnnTensor in outputs]
    logger.debug("\033[1;31m" + f"c to numpy time = {1000*(time() - begin_time):.2f} ms" + "\033[0m")
    return outputs

def postProcess(self, outputs):
    begin_time = time()
    # 重塑输出
    s_clses = outputs[0].reshape(80*80, 64)
    s_bboxes = outputs[1].reshape(80*80, 4)
    m_clses = outputs[2].reshape(40*40, 64)
    m_bboxes = outputs[3].reshape(40*40, 4)
    l_clses = outputs[4].reshape(20*20, 64)
    l_bboxes = outputs[5].reshape(20*20, 4)
    
    # classify: 利用numpy向量化操作完成阈值筛选
    s_max_scores = np.max(s_clses, axis=1)
    s_valid_indices = np.flatnonzero(s_max_scores >= self.CONF_THRES_RAW)
    s_ids = np.argmax(s_clses[s_valid_indices, : ], axis=1)
    s_scores = s_max_scores[s_valid_indices]
    
    m_max_scores = np.max(m_clses, axis=1)
    m_valid_indices = np.flatnonzero(m_max_scores >= self.CONF_THRES_RAW)
    m_ids = np.argmax(m_clses[m_valid_indices, : ], axis=1)
    m_scores = m_max_scores[m_valid_indices]
    
    l_max_scores = np.max(l_clses, axis=1)
    l_valid_indices = np.flatnonzero(l_max_scores >= self.CONF_THRES_RAW)
    l_ids = np.argmax(l_clses[l_valid_indices, : ], axis=1)
    l_scores = l_max_scores[l_valid_indices]
    
    # 3个Classify分类分支：Sigmoid计算
    s_scores = 1 / (1 + np.exp(-s_scores))
    m_scores = 1 / (1 + np.exp(-m_scores))
    l_scores = 1 / (1 + np.exp(-l_scores))

    # 3个Bounding Box分支：直接使用浮点数据，无需反量化
    s_bboxes_float32 = s_bboxes[s_valid_indices,:].astype(np.float32)
    m_bboxes_float32 = m_bboxes[m_valid_indices,:].astype(np.float32)
    l_bboxes_float32 = l_bboxes[l_valid_indices,:].astype(np.float32)
    
    # 3个Bounding Box分支：处理边界框 (ltrb2xyxy)
    s_ltrb = s_bboxes_float32
    s_anchor_indices = self.s_anchor[s_valid_indices, :]
    s_x1y1 = s_anchor_indices - s_ltrb[:, 0:2]
    s_x2y2 = s_anchor_indices + s_ltrb[:, 2:4]
    s_dbboxes = np.hstack([s_x1y1, s_x2y2])*8
    
    m_ltrb = m_bboxes_float32
    m_anchor_indices = self.m_anchor[m_valid_indices, :]
    m_x1y1 = m_anchor_indices - m_ltrb[:, 0:2]
    m_x2y2 = m_anchor_indices + m_ltrb[:, 2:4]
    m_dbboxes = np.hstack([m_x1y1, m_x2y2])*16

    l_ltrb = l_bboxes_float32
    l_anchor_indices = self.l_anchor[l_valid_indices,:]
    l_x1y1 = l_anchor_indices - l_ltrb[:, 0:2]
    l_x2y2 = l_anchor_indices + l_ltrb[:, 2:4]
    l_dbboxes = np.hstack([l_x1y1, l_x2y2])*32
    
    # 大中小特征层阈值筛选结果拼接
    dbboxes = np.concatenate((s_dbboxes, m_dbboxes, l_dbboxes), axis=0)
    scores = np.concatenate((s_scores, m_scores, l_scores), axis=0)
    ids = np.concatenate((s_ids, m_ids, l_ids), axis=0)
    
    # xyxy 2 xyhw
    hw = (dbboxes[:,2:4] - dbboxes[:,0:2])
    xyhw2 = np.hstack([dbboxes[:,0:2], hw])

    # 分类别nms
    results = []
    for i in range(self.CLASSES_NUM):
        id_indices = ids==i
        if np.sum(id_indices) == 0:
            continue
            
        indices = cv2.dnn.NMSBoxes(xyhw2[id_indices,:], scores[id_indices], self.SCORE_THRESHOLD, self.NMS_THRESHOLD)
        if len(indices) == 0:
            continue
        for indic in indices:
            if isinstance(indic, (list, np.ndarray)):
                indic = indic[0]  # 处理OpenCV返回的数组格式
            x1, y1, x2, y2 = dbboxes[id_indices,:][indic]
            x1 = int((x1 - self.x_shift) / self.x_scale)
            y1 = int((y1 - self.y_shift) / self.y_scale)
            x2 = int((x2 - self.x_shift) / self.x_scale)
            y2 = int((y2 - self.y_shift) / self.y_scale)
            
            # 边界检查
            x1 = max(0, min(x1, self.img_w))
            y1 = max(0, min(y1, self.img_h))
            x2 = max(0, min(x2, self.img_w))
            y2 = max(0, min(y2, self.img_h))
            
            results.append((i, scores[id_indices][indic], x1, y1, x2, y2))

    logger.debug("\033[1;31m" + f"Post Process time = {1000*(time() - begin_time):.2f} ms" + "\033[0m")

    return results

修改为您的类别名称

coco_names = [
“stand”,
“sit”,
“down”
]

rdk_colors = [
(56, 56, 255), (151, 157, 255), (31, 112, 255), (29, 178, 255),(49, 210, 207), (10, 249, 72), (23, 204, 146), (134, 219, 61),
(52, 147, 26), (187, 212, 0), (168, 153, 44), (255, 194, 0),(147, 69, 52), (255, 115, 100), (236, 24, 0), (255, 56, 132),
(133, 0, 82), (255, 56, 203), (200, 149, 255), (199, 55, 255)]

def draw_detection(img, bbox, score, class_id) → None:
“”"
Draws a detection bounding box and label on the image.

Parameters:
    img (np.array): The input image.
    bbox (tuple[int, int, int, int]): A tuple containing the bounding box coordinates (x1, y1, x2, y2).
    score (float): The detection score of the object.
    class_id (int): The class ID of the detected object.
"""
x1, y1, x2, y2 = bbox
color = rdk_colors[class_id%20]
cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
label = f"{coco_names[class_id]}: {score:.2f}"
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
label_x, label_y = x1, y1 - 10 if y1 - 10 > label_height else y1 + 10
cv2.rectangle(
    img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED
)
cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)

if name == “main”:
main()

这些是我改动后的代码，运行出来后出现很多的框并且置信度都是1

求大佬解惑，不知道怎么改代码了。

MAACCC · 2025 年7 月 14 日 03:29

你好，严格按照ModelZoo中的流程运行即可，

只需要修改板端推理代码对应标签即可

D-Robotics/rdk_model_zoo

可以在板端推理案例模型验证是否为模型量化过程出现的问题