YOLOv8 get predicted bounding box-CodePudding

I want to integrate OpenCV with YOLOv8 from ultralytics, so I want to obtain the bounding box coordinates from the model prediction. How do I do this?

from ultralytics import YOLO
import cv2

model = YOLO('yolov8n.pt')
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)

while True:
    _, frame = cap.read()
    
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = model.predict(img)

    for r in results:
        for c in r.boxes.cls:
            print(model.names[int(c)])

    cv2.imshow('YOLO V8 Detection', frame)
    if cv2.waitKey(1) & 0xFF == ord(' '):
        break

cap.release()
cv2.destroyAllWindows()

I want to display the YOLO annotated image in OpenCV. I know I can use the stream parameter in model.predict(source='0', show=True). But I want to continuously monitor the predicted class names for my program, at the same time displaying the image output.

CodePudding user response：

You can get all the information using the next code:

for result in results:
    # detection
    result.boxes.xyxy   # box with xyxy format, (N, 4)
    result.boxes.xywh   # box with xywh format, (N, 4)
    result.boxes.xyxyn  # box with xyxy format but normalized, (N, 4)
    result.boxes.xywhn  # box with xywh format but normalized, (N, 4)
    result.boxes.conf   # confidence score, (N, 1)
    result.boxes.cls    # cls, (N, 1)

    # segmentation
    result.masks.masks     # masks, (N, H, W)
    result.masks.segments  # bounding coordinates of masks, List[segment] * N

    # classification
    result.probs     # cls prob, (num_class, )

you can read furthermore in the documentation.

CodePudding user response：

This will draw the bboxes the original RGB frame, not the BGR img fed to the model, with their respective label names:


from ultralytics import YOLO
import cv2
from ultralytics.yolo.utils.plotting import Annotator

model = YOLO('yolov8n.pt')
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)

while True:
    _, frame = cap.read()
    
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = model.predict(img)

    for r in results:
        
        annotator = Annotator(frame)
        
        boxes = r.boxes
        for box in boxes:
            
            b = box.xyxy[0]  # get box coordinates in (top, left, bottom, right) format
            c = box.cls
            annotator.box_label(b, model.names[int(c)])
          
    frame = annotator.result()  
    cv2.imshow('YOLO V8 Detection', frame)     
    if cv2.waitKey(1) & 0xFF == ord(' '):
        break

cap.release()
cv2.destroyAllWindows()