# ================================================
# MediaPipe Object Detection - CORRECTED & WORKING
# Draws GREEN BOXES + LABEL + % CONFIDENCE on every object
# Works perfectly on webcam, images, and videos
# Modified to allow tracking a specific object by name, output coordinates and movement
# Further modified to control pan-tilt servos to move camera based on tracked object's movement
# ================================================
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.framework.formats import landmark_pb2
import numpy as np
from gpiozero import AngularServo
from time import sleep
# ================================================
# CONFIGURATION - CHANGE MODEL HERE
# ================================================
MODEL_PATH = "ssd_mobilenet_v2.tflite"  # <-- Fixed space in filename; change to your custom model
# Download once if needed:
# https://storage.googleapis.com/mediapipe-models/object_detector/efficientdet_lite0/float16/1/efficientdet_lite0.tflite
BaseOptions = python.BaseOptions
ObjectDetector = vision.ObjectDetector
ObjectDetectorOptions = vision.ObjectDetectorOptions
VisionRunningMode = vision.RunningMode
# Create detector for VIDEO mode (webcam)
options = ObjectDetectorOptions(
    base_options=BaseOptions(
        model_asset_path=MODEL_PATH,
        delegate="GPU"  # Use "CPU" if GPU not available
    ),
    running_mode=VisionRunningMode.VIDEO,
    max_results=10,
    score_threshold=0.4
)
# ================================================
# SERVO CONFIGURATION - ADJUST PINS AS NEEDED
# Assuming Raspberry Pi with gpiozero
# Pan servo for horizontal movement, Tilt for vertical
# ================================================
PAN_PIN = 17  # Change to your pan servo GPIO pin
TILT_PIN = 18  # Change to your tilt servo GPIO pin
pan_servo = AngularServo(PAN_PIN, min_angle=0, max_angle=180, min_pulse_width=0.0005, max_pulse_width=0.0025)
tilt_servo = AngularServo(TILT_PIN, min_angle=0, max_angle=180, min_pulse_width=0.0005, max_pulse_width=0.0025)
# Initial center position (adjust as needed for your setup)
pan_servo.angle = 90
tilt_servo.angle = 90
# ================================================
# DRAWING FUNCTION - MODIFIED TO HIGHLIGHT TRACKED OBJECT IN RED
# ================================================
def draw_detection(image, detection_result, tracked_object=None):
    """Draws bounding boxes and labels on the input image. Tracked object gets red box."""
    annotated_image = image.copy()
    height, width, _ = image.shape
    for detection in detection_result.detections:
        # Get bounding box
        bbox = detection.bounding_box
        start_point = (int(bbox.origin_x), int(bbox.origin_y))
        end_point = (int(bbox.origin_x + bbox.width), int(bbox.origin_y + bbox.height))
        # Determine color: red for tracked, green for others
        category = detection.categories[0]
        color = (0, 0, 255) if tracked_object and category.category_name.lower() == tracked_object.lower() else (0, 255, 0)
        # Draw rectangle
        cv2.rectangle(annotated_image, start_point, end_point, color, 3)
        # Get label and score
        label = category.category_name
        score = category.score
        display_text = f"{label}: {int(score * 100)}%"
        # Background for text
        text_size = cv2.getTextSize(display_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)[0]
        text_x = start_point[0]
        text_y = start_point[1] - 10 if start_point[1] > 30 else start_point[1] + 30
        cv2.rectangle(annotated_image,
                      (text_x, text_y - text_size[1] - 10),
                      (text_x + text_size[0], text_y + 5),
                      color, -1)
        # White text
        cv2.putText(annotated_image, display_text,
                    (text_x, text_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
    return annotated_image
# ================================================
# LIVE WEBCAM DETECTION - MODIFIED FOR TRACKING AND SERVO CONTROL
# ================================================
print("Starting MediaPipe Object Detector - Press 'q' to quit")
tracked_object = input("Enter the object name to track (e.g., 'person'): ").strip().lower()
prev_x, prev_y = None, None
FRAME_WIDTH = 640
FRAME_HEIGHT = 480
with ObjectDetector.create_from_options(options) as detector:
    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, FRAME_WIDTH)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, FRAME_HEIGHT)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # Flip for mirror view
        frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Create MediaPipe Image
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
        # Run detection
        timestamp_ms = int(cv2.getTickCount() * 1000 / cv2.getTickFrequency())
        result = detector.detect_for_video(mp_image, timestamp_ms)
        # Draw results (all objects detected, tracked highlighted)
        annotated_frame = draw_detection(frame, result, tracked_object=tracked_object)
        # Track the specified object (pick the one with highest score if multiple)
        tracked_detections = [d for d in result.detections if d.categories and d.categories[0].category_name.lower() == tracked_object]
        count = len(result.detections)
        if tracked_detections:
            # Sort by score descending and pick the top one
            tracked_detections.sort(key=lambda d: d.categories[0].score, reverse=True)
            primary = tracked_detections[0]
            bbox = primary.bounding_box
            # Center coordinates (in pixels)
            x = bbox.origin_x + bbox.width / 2
            y = bbox.origin_y + bbox.height / 2
            print(f"Tracked '{tracked_object}': x={x:.2f}, y={y:.2f}")
            # Calculate movement if previous position exists
            if prev_x is not None and prev_y is not None:
                dx = x - prev_x
                dy = y - prev_y
                print(f"Movement: dx={dx:.2f}, dy={dy:.2f} (pixels per frame)")
            # Update previous
            prev_x = x
            prev_y = y
            # Control servos to center the object
            # Simple proportional control: adjust angle to point towards the object
            # Assuming pan for x (horizontal), tilt for y (vertical)
            # Map x (0 to FRAME_WIDTH) to pan angle (0 to 180), but invert if needed based on servo direction
            pan_angle = (x / FRAME_WIDTH) * 180
            # For tilt, y=0 is top, so might need to invert depending on mount (e.g., 180 - angle)
            tilt_angle = (y / FRAME_HEIGHT) * 180
            # tilt_angle = 180 - tilt_angle  # Uncomment if tilt direction is inverted
            # Set servo angles (add smoothing or limits if needed)
            pan_servo.angle = pan_angle
            tilt_servo.angle = tilt_angle
            print(f"Pan angle: {pan_angle:.2f}, Tilt angle: {tilt_angle:.2f}")
            # Overlay total objects
            cv2.putText(annotated_frame, f"Objects: {count}", (10, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 3)
        else:
            prev_x, prev_y = None, None  # Reset if not detected
            # Optionally reset servos to center
            pan_servo.angle = 90
            tilt_servo.angle = 90
            cv2.putText(annotated_frame, "No objects detected", (10, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
        # Show
        cv2.imshow("MediaPipe Object Detection - Live", annotated_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
    # Clean up servos
    pan_servo.close()
    tilt_servo.close()