MediaPipeDataCollectionCode/real_time_gesture.py at main · mylepaklab/MediaPipeDataCollectionCode · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import cv2
import mediapipe as mp
import numpy as np
import joblib

# Load your trained model and label encoder
model = joblib.load("gesture_model.pkl")
label_encoder = joblib.load("gesture_labels.pkl")

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

hands = mp_hands.Hands(
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7,
)

# Start video capture
cap = cv2.VideoCapture(0)

def extract_hand_landmarks(hand_landmarks):
    """
    Extract (x, y, z) coordinates from hand landmarks into a flat list.
    """
    landmarks = []
    for lm in hand_landmarks.landmark:
        landmarks.extend([lm.x, lm.y, lm.z])
    return landmarks

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Flip frame horizontally for natural (mirror) view
    frame = cv2.flip(frame, 1)

    # Convert BGR to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process frame with MediaPipe Hands
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Draw hand landmarks on the frame
            mp_drawing.draw_landmarks(
                frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Extract landmark features
            features = extract_hand_landmarks(hand_landmarks)

            # Convert to numpy array and reshape for model input
            features_np = np.array(features).reshape(1, -1)

            # Predict gesture
            pred = model.predict(features_np)
            gesture = label_encoder.inverse_transform(pred)[0]

            # Display predicted gesture on frame
            cv2.putText(frame, f'Gesture: {gesture}', (10, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 3)

    # Show the frame
    cv2.imshow("Real-time Gesture Recognition", frame)

    # Exit on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Clean up
cap.release()
cv2.destroyAllWindows()