3.2.5

人工智能学习人工智能考试

📅 2025-11-21 16:52 🔄 2025-11-21 17:04 👤 admin

import os

import time

import cv2

import numpy as np

import vision.utils.box_utils_numpy as box_utils

import onnxruntime as ort

# 定义预测函数，对模型输出的边界框和置信度进行后处理

def predict(width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1):

boxes = boxes[0]

confidences = confidences[0]

picked_box_probs = []

picked_labels = []

for class_index in range(1, confidences.shape[1]):

probs = confidences[:, class_index]

mask = probs > prob_threshold

probs = probs[mask]

if probs.shape[0] == 0:

continue

subset_boxes = boxes[mask, :]

box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1)

box_probs = box_utils.hard_nms(box_probs,

iou_threshold=iou_threshold,

top_k=top_k,

)

picked_box_probs.append(box_probs)

picked_labels.extend([class_index] * box_probs.shape[0])

if not picked_box_probs:

return np.array([]), np.array([]), np.array([])

picked_box_probs = np.concatenate(picked_box_probs)

picked_box_probs[:, 0] *= width

picked_box_probs[:, 1] *= height

picked_box_probs[:, 2] *= width

picked_box_probs[:, 3] *= height

return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4]

# 从标签文件中读取每一行，并去除行首尾的空白字符，得到类别名称列表 2分

class_names = [name.strip() for name in open('voc-model-labels.txt').readlines()]

# 创建 ONNX Runtime 的推理会话，用于运行模型进行推理 2分

ort_session = ort.InferenceSession('version-RFB-320.onnx')

# 获取模型输入的名称 2分

input_name = ort_session.get_inputs()[0].name

# 定义保存检测结果图像的目录路径

result_path = "./detect_imgs_results_onnx"

# 定义置信度阈值，用于筛选出置信度较高的检测结果

threshold = 0.7

# 定义存储待检测图像的目录路径

path = "imgs"

# 用于统计所有图像中检测到的目标框总数，初始化为 0

sum = 0

# 如果保存结果的目录不存在，则创建该目录 2分

if not os.path.exists(result_path):

os.mkdir(result_path)

# 获取指定目录下的所有文件和文件夹名称列表

listdir = os.listdir(path)

# 遍历目录下的每个文件

for file_path in listdir:

# 拼接图像文件的完整路径

img_path = os.path.join(path, file_path)

# 使用 OpenCV 读取图像文件 2分

orig_image = cv2.imread(img_path)

# 将图像从 BGR 颜色空间转换为 RGB 颜色空间（许多模型要求输入为 RGB 格式）

image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)

# 将图像调整为 320x240 的尺寸（符合模型输入的尺寸要求） 2分

image = cv2.resize(image, (320, 240))

# 定义图像归一化的均值数组 2分

image_mean = np.array([127, 127, 127])

# 对图像进行归一化处理，减去均值并除以 128

image = (image - image_mean) / 128

# 将图像的维度从 (高度, 宽度, 通道数) 转换为 (通道数, 高度, 宽度)

image = np.transpose(image, [2, 0, 1])

# 在第一个维度上扩展一个维度，将图像变为 (1, 通道数, 高度, 宽度)，以符合模型输入的维度要求 1分

image = np.expand_dims(image, axis=0)

# 将图像数据类型转换为 float32 类型

image = image.astype(np.float32)

# 记录开始时间，用于计算模型推理的耗时

time_time = time.time()

# 使用 ONNX Runtime 运行模型，输入图像数据，得到模型输出的置信度和边界框 2分

confidences, boxes = ort_session.run(None, {input_name: image})

# 计算并打印模型推理的耗时

print("cost time:{}".format(time.time() - time_time))

# 调用 predict 函数对模型输出的边界框和置信度进行后处理，得到最终的边界框、类别标签和置信度

boxes, labels, probs = predict(orig_image.shape[1], orig_image.shape[0], confidences, boxes, threshold)

# 遍历每个检测到的目标框

for i in range(boxes.shape[0]):

# 获取当前目标框的坐标

box = boxes[i, :]

# 生成当前目标框的标签字符串，包含类别名称和置信度

label = f"{class_names[labels[i]]}: {probs[i]:.2f}"

# 在原始图像上绘制目标框，颜色为 (255, 255, 0)，线条粗细为 4

cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 4)

# 将绘制了目标框的图像保存到结果目录中

cv2.imwrite(os.path.join(result_path, file_path), orig_image)

# 累加当前图像中检测到的目标框数量到总数中

sum += boxes.shape[0]

# 打印所有图像中检测到的目标框总数

print("sum:{}".format(sum))