video_objects/object_detection/yolo5showobjects.py

#!/usr/bin/env python3

import cv2
import torch
import yt_dlp
import subprocess
import time
from flask import Flask, Response
import numpy as np
import warnings


warnings.simplefilter("ignore", category=FutureWarning)  #ignore torch warnings

app = Flask(__name__)

YOUTUBE_URL = "https://www.youtube.com/watch?v=i3w7qZVSAsY"  # Stream URL example
CONFIDENCE_THRESHOLD = 0.25  # Confidence threshold for object detection
MODEL = "yolov5s"  # YOLO model version (yolov5s, yolov5m, etc.)

# Load YOLO5 model
print("Loading YOLOv5 model...")
model = torch.hub.load("ultralytics/yolov5", "custom", path=MODEL, force_reload=True)
print("YOLOv5 loaded successfully!")

def get_stream_url():
    """Fetch fresh 720p YouTube stream URL using yt-dlp."""
    ydl_opts = {'quiet': True, 'format': 'bestvideo[height=720]'}
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(YOUTUBE_URL, download=False)
        return info_dict.get("url", None)

def generate_frames():
    """Capture video frames, apply object detection, and stream as MJPEG."""
    stream_url = get_stream_url()
    if not stream_url:
        print("❌ Failed to fetch stream URL!")
        return

    print("🎥 Starting FFmpeg stream...")
    ffmpeg_process = subprocess.Popen([
        "ffmpeg", "-re", "-i", stream_url, "-r", "10",  # frame rate to 10 FPS
        "-fflags", "nobuffer", "-flags", "low_delay",  # buffering delay
        "-f", "rawvideo", "-pix_fmt", "bgr24", "pipe:1"
    ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, bufsize=10**8)

    while True:
        raw_frame = ffmpeg_process.stdout.read(1280 * 720 * 3)  # Read raw BGR frame for 720p
        if not raw_frame:
            print("❌ No frame received!")
            break

        frame = np.frombuffer(raw_frame, np.uint8).reshape((720, 1280, 3))  # Convert to NumPy array

        # Run YOLO object detection
        results = model(frame)
        detections = results.pandas().xyxy[0]  # Convert detections to Pandas DataFrame

        if detections.empty:
            print("No objects detected in this frame.")
        else:
            print(f"✅ Detected {len(detections)} objects!")
            print(detections[["name", "confidence"]])  # Print detected object names and confidence

        # Draw bounding boxes
        for _, row in detections.iterrows():
            if row["confidence"] > CONFIDENCE_THRESHOLD:
                x1, y1, x2, y2 = int(row["xmin"]), int(row["ymin"]), int(row["xmax"]), int(row["ymax"])
                label = f"{row['name']} ({row['confidence']:.2f})"

                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)  # red bounding box
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Encode and yield the frame as JPEG
        _, buffer = cv2.imencode('.jpg', frame)
        yield (b'--frame\r\n'
               b'Content-Type: image/jpeg\r\n\r\n' + buffer.tobytes() + b'\r\n')

@app.route('/video')


def video_feed():
    """Stream processed video frames."""
    return Response(generate_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')

if __name__ == '__main__':
    print("Running at http://localhost:5000/video")
    app.run(host='0.0.0.0', port=5000, debug=True, threaded=True)