In [2]:
import cv2
import mediapipe as mp
import pickle
import numpy as np

In [None]:
# Load pre-trained models from pick-le files
left_model_filename = r'Models\left.p'
right_model_filename = r'Models\right.p'
pose_model_filename = r'Models\pose.p'

with open(left_model_filename, 'rb') as f:
    left_model_data = pickle.load(f)
    left_model = left_model_data['model']  # Adjust if the key for the model is different

with open(right_model_filename, 'rb') as f:
    right_model_data = pickle.load(f)
    right_model = right_model_data['model']  # Adjust if the key for the model is different

with open(pose_model_filename, 'rb') as f:
    pose_model_data = pickle.load(f)
    pose_model = pose_model_data['model']  # Adjust if the key for the model is different

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [12]:
#Initialize hand extraction functions
# Initialize Mediapipe hands detector
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose

hands = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.7, min_tracking_confidence=0.7)
pose = mp_pose.Pose(min_detection_confidence=0.7, min_tracking_confidence=0.7)


# Function to calculate the angle between two vectors
def calculate_angle1(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    cosine_angle = dot_product / (norm_vec1 * norm_vec2)
    # angle = np.arccos(np.clip(cosine_angle, -1.0, 1.0))
    return cosine_angle  # Convert the angle from radians to degrees

def get_coordinates_safe(landmark, index):
    try:
        return np.array([landmark[index].x, landmark[index].y, landmark[index].z])
    except IndexError:
        return np.array([-1, -1, -1])  # Return default values (e.g., [-1, -1, -1]) if landmark is not found

axes = {
    "x": np.array([1, 0, 0]),
    "-x": np.array([-1, 0, 0]),
    "y": np.array([0, 1, 0]),
    "-y": np.array([0, -1, 0]),
    "z": np.array([0, 0, 1]),
    "-z": np.array([0, 0, -1]),
}  

def angle_between_vectors(v1, v2):
    dot_product = np.dot(v1, v2)
    magnitude_v1 = np.linalg.norm(v1)
    magnitude_v2 = np.linalg.norm(v2)
    cos_theta = dot_product / (magnitude_v1 * magnitude_v2)
    cos_theta = np.clip(cos_theta, -1.0, 1.0)
    theta = np.arccos(cos_theta)
    return np.degrees(theta)

# Function to classify palm orientation
def get_palm_orientation(normal):
    angles = {axis: angle_between_vectors(normal, direction) for axis, direction in axes.items()}
    # Find the axis with the smallest angle
    best_match_axis = min(angles, key=angles.get)
    return best_match_axis


# Function to extract hand features (angles between vectors and axes)
def extract_features(hand_landmarks, pose_landmarks=None):
    hand_pairs = [
        (1, 3),  # Thumb
        (6, 8),  # Index finger
        (10, 12),  # Middle finger
        (14, 16),  # Ring finger
        (18, 20),  # Pinky finger
        (0, 9)  # Palm direction
    ]
    
    features = []
    for pair in hand_pairs:
        landmark1 = hand_landmarks[pair[0]]
        landmark2 = hand_landmarks[pair[1]]
        
        vector = np.array([landmark2.x - landmark1.x, landmark2.y - landmark1.y, landmark2.z - landmark1.z])
        x_axis = np.array([1, 0, 0])
        y_axis = np.array([0, 1, 0])
        z_axis = np.array([0, 0, 1])
        
        angle_x = calculate_angle1(vector, x_axis)
        angle_y = calculate_angle1(vector, y_axis)
        angle_z = calculate_angle1(vector, z_axis)
        
        features.extend([angle_x, angle_y, angle_z])
    
    # Safe access to landmarks for 0, 5, and 17
    vector_0_to_5 = get_coordinates_safe(hand_landmarks, 5) - get_coordinates_safe(hand_landmarks, 0)
    vector_0_to_17 = get_coordinates_safe(hand_landmarks, 17) - get_coordinates_safe(hand_landmarks, 0)
    
    normal_vector = np.cross(vector_0_to_5, vector_0_to_17)
    
    normal_angle_x = calculate_angle1(normal_vector, x_axis)
    normal_angle_y = calculate_angle1(normal_vector, y_axis)
    normal_angle_z = calculate_angle1(normal_vector, z_axis)
    
    features.extend([normal_angle_x, normal_angle_y, normal_angle_z])
    
    # If pose landmarks are available, calculate the distance between nose and wrist
    
    nose_landmark = get_coordinates_safe(pose_landmarks, 0)  # Nose is at index 0 in pose landmarks
    wrist_landmark = get_coordinates_safe(hand_landmarks, 0)  # Wrist is at index 0 in hand landmarks
        
    # Calculate the distance in the x and y axes
    distance_x = abs(nose_landmark[0] - wrist_landmark[0])
    distance_y = abs(nose_landmark[1] - wrist_landmark[1])
        
    # Append the x and y distances as new features
    features.extend([distance_x, distance_y])
    
    return features

# Initialize video capture (webcam feed)
cap = cv2.VideoCapture(0)

In [5]:
#Initialize pose extraction functions
def calculate_normal_safe(p1, p2, p3):
    # Check if any of the points is [-1, -1, -1] (default value for missing landmarks)
    if np.array_equal(p1, [-1, -1, -1]) or np.array_equal(p2, [-1, -1, -1]) or np.array_equal(p3, [-1, -1, -1]):
        return np.array([-1, -1, -1])  # Return [-1, -1, -1] if any point is missing
    else:
        return calculate_normal(p1, p2, p3)  # Otherwise, calculate the normal as usual
        
# Function to calculate angle between three points
def calculate_angle2(p1, p2, p3):
    # Create vectors from points p1, p2, p3
    v1 = p1 - p2
    v2 = p3 - p2
    
    # Calculate the cosine of the angle using dot product
    cos_theta = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    angle = cos_theta
    
    return cos_theta

# Function to calculate the normal of the plane formed by three points
def calculate_normal(p1, p2, p3):
    # Vectors on the plane
    v1 = p2 - p1
    v2 = p3 - p1
    
    # Cross product gives the normal vector
    normal = np.cross(v1, v2)
    
    # Normalize the normal vector
    normal = normal / np.linalg.norm(normal)
    
    return normal

# Function to calculate the angle between the normal and each of the axes
def calculate_normal_angles(normal):
    # Calculate angles with x, y, z axes
    cos_values = []
    for axis in np.eye(3):  # x, y, z unit vectors
        cos_value = np.dot(normal, axis)
        cos_values.append(cos_value)
    return cos_values

# Function to calculate the x and y distance between two points
def calculate_xy_distance(p1, p2):
    x_distance = abs(p1[0] - p2[0])  # x-coordinate distance
    y_distance = abs(p1[1] - p2[1])  # y-coordinate distance
    return x_distance, y_distance


# Function to extract the pose features
def extract_pose_features(image, landmarks):
    # Define the landmark indices for the required sets of points (using Pose landmark indices)
    points_sets = {
        "angle_11_12_14": (get_coordinates_safe(landmarks, 11), get_coordinates_safe(landmarks, 12), get_coordinates_safe(landmarks, 14)),  # Left shoulder, right shoulder, right elbow
        "angle_12_14_16": (get_coordinates_safe(landmarks, 12), get_coordinates_safe(landmarks, 11), get_coordinates_safe(landmarks, 13)),  # Right shoulder, right elbow, right wrist
        "angle_11_13_15": (get_coordinates_safe(landmarks, 11), get_coordinates_safe(landmarks, 13), get_coordinates_safe(landmarks, 15)),  # Left shoulder, left elbow, left wrist
        "angle_13_15_17": (get_coordinates_safe(landmarks, 12), get_coordinates_safe(landmarks, 14), get_coordinates_safe(landmarks, 16)),  # Left elbow, left wrist, left hand
        "normal_1": (get_coordinates_safe(landmarks, 15), get_coordinates_safe(landmarks, 17), get_coordinates_safe(landmarks, 19)),  # Plane formed by left shoulder, left hip, left knee
        "normal_2": (get_coordinates_safe(landmarks, 16), get_coordinates_safe(landmarks, 18), get_coordinates_safe(landmarks, 20))   # Plane formed by right shoulder, right hip, right knee
    }

    # Calculate the angles between the specific sets of points
    angles = []
    for key, (p1, p2, p3) in points_sets.items():
        if key.startswith("angle"):
            angle = calculate_angle2(p1, p2, p3)
            angles.append(angle)
    
    # Calculate normals and angles with axes
    for key, (p1, p2, p3) in points_sets.items():
        if key.startswith("normal"):
            normal = calculate_normal_safe(p1, p2, p3)  # Safe normal calculation
            if np.array_equal(normal, [-1, -1, -1]):
                # If normal is [-1, -1, -1], it indicates missing points, so append [-1, -1, -1] for each axis angle
                angles.extend([-1, -1, -1])
            else:
                normal_angles = calculate_normal_angles(normal)
                angles.extend(normal_angles)  # Append angles with x, y, z axes

    # Add the distance between points 15 (left wrist) and 16 (right wrist)
    p15 = get_coordinates_safe(landmarks, 15)  # Left wrist
    p16 = get_coordinates_safe(landmarks, 16)  # Right wrist
    x_distance, y_distance = calculate_xy_distance(p15, p16)
    angles.extend([x_distance, y_distance])  # Append x and y distance to the feature list
    
    return angles

def calulating_percentage(avg , all_classes):
    individual_threshold = {
    'clean':0.3, 'happy':0.32, 'high': 0.55, 'loud': 0.90, 'quiet':0.9,
     'sad':0.6, 'deep':0.5, 'soft':0.5, 'weak':0.6, 'flat': 0.27,
   'expensive':0.27,  'poot':0.35,  'slow':0.5,  'thick':0.7
    }
    threshold_pecentage = []
    for i,j in zip(avg,all_classes):
        value=individual_threshold[j.lower()]
        threshold_pecentage.append(i*100/value)
    return threshold_pecentage



In [13]:
# Enssemble 
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to RGB for MediaPipe processing
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    results_pose = pose.process(image_rgb)

    # Initialize predictions and probabilities
    left_prediction, right_prediction, pose_prediction = None, None, None
    left_probs, right_probs, pose_probs = None, None, None

    # Process hand landmarks (for both left and right hands)
    if results.multi_hand_landmarks:
        for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
            label = handedness.classification[0].label
            features = extract_features(hand_landmarks.landmark, results_pose.pose_landmarks.landmark if results_pose.pose_landmarks else [])

            if label == 'Left':
                left_prediction = left_model.predict([features])[0]
                left_probs = left_model.predict_proba([features])[0]
            elif label == 'Right':
                right_prediction = right_model.predict([features])[0]
                right_probs = right_model.predict_proba([features])[0]

            # Draw hand landmarks
            mp.solutions.drawing_utils.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Process pose landmarks
    if results_pose.pose_landmarks:
        mp.solutions.drawing_utils.draw_landmarks(frame, results_pose.pose_landmarks, mp_pose.POSE_CONNECTIONS)
        pose_landmarks = results_pose.pose_landmarks.landmark
        pose_features = extract_pose_features(frame, pose_landmarks)  # Pass both frame and pose_landmarks
        pose_prediction = pose_model.predict([pose_features])[0]
        pose_probs = pose_model.predict_proba([pose_features])[0]

    # Initialize the combined prediction logic
    if left_prediction is not None and right_prediction is not None and pose_prediction is not None:
        # All three detected, combine their probabilities
        all_classes = sorted(set(left_model.classes_).union(set(right_model.classes_)).union(set(pose_model.classes_)))

        # Align probabilities with all possible classes
        left_prob_dict = {cls: prob for cls, prob in zip(left_model.classes_, left_probs)}
        right_prob_dict = {cls: prob for cls, prob in zip(right_model.classes_, right_probs)}
        pose_prob_dict = {cls: prob for cls, prob in zip(pose_model.classes_, pose_probs)}

        left_probs_aligned = np.array([left_prob_dict.get(cls, 0) for cls in all_classes])*100
        right_probs_aligned = np.array([right_prob_dict.get(cls, 0) for cls in all_classes])*100
        pose_probs_aligned = np.array([pose_prob_dict.get(cls, 0) for cls in all_classes])*100

        # Compute average probabilities
        avg = (left_probs_aligned  + right_probs_aligned   + pose_probs_aligned) / 300
        avg_probs = calulating_percentage(avg, all_classes)
        # Find the class with the highest average probability
        max_prob_index = np.argmax(avg_probs)
        max_prob_class = all_classes[max_prob_index]

        # Display the final prediction
        cv2.putText(frame, f"Final Prediction: {max_prob_class}, Prob: {avg_probs[max_prob_index]:.2f}",
                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

        # Display the individual class probabilities
        y_offset = 60  # Starting y position for probabilities display
        for i, prob in enumerate(avg_probs):
            class_name = all_classes[i]
            cv2.putText(frame, f"{class_name}: {prob:.2f}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
            y_offset += 30  # Move down for the next class

    else:
        # Handle combinations when one or two parts are detected
        prob_dicts = []
        predictions = []
        models = []
        
        # Add available predictions and probabilities
        if left_prediction is not None:
            prob_dicts.append(left_probs)
            predictions.append(left_prediction)
            models.append(left_model)
        if right_prediction is not None:
            prob_dicts.append(right_probs)
            predictions.append(right_prediction)
            models.append(right_model)
        if pose_prediction is not None:
            prob_dicts.append(pose_probs)
            predictions.append(pose_prediction)
            models.append(pose_model)

        # If only one part detected, use that directly
        if len(prob_dicts) == 1:
            prob_dict = prob_dicts[0]
            prediction = predictions[0]
            model = models[0]
            max_prob_index = np.argmax(prob_dict)
            max_prob_class = model.classes_[max_prob_index]
            cv2.putText(frame, f"Final Prediction: {max_prob_class}, Prob: {prob_dict[max_prob_index]:.2f}",
                        (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

        # If two parts are detected, average the probabilities
        elif len(prob_dicts) == 2:
            all_classes = sorted(set(models[0].classes_).union(set(models[1].classes_)))

            # Align probabilities with all possible classes
            prob_dict_1 = {cls: prob for cls, prob in zip(models[0].classes_, prob_dicts[0])}
            prob_dict_2 = {cls: prob for cls, prob in zip(models[1].classes_, prob_dicts[1])}

            probs_aligned_1 = np.array([prob_dict_1.get(cls, 0) for cls in all_classes])
            probs_aligned_2 = np.array([prob_dict_2.get(cls, 0) for cls in all_classes])

            # Compute average probabilities
            avg = (probs_aligned_1 + probs_aligned_2) / 2
            avg_probs = calulating_percentage(avg, all_classes)
            
            # Find the class with the highest average probability
            max_prob_index = np.argmax(avg_probs)
            max_prob_class = all_classes[max_prob_index]

            # Display the final prediction
            cv2.putText(frame, f"Final Prediction: {max_prob_class}, Prob: {avg_probs[max_prob_index]:.2f}",
                        (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

            # Optionally, display individual probabilities for combined cases
            y_offset = 60  # Starting y position for probabilities display
            for i, prob in enumerate(avg_probs):
                class_name = all_classes[i]
                cv2.putText(frame, f"{class_name}: {prob:.2f}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
                y_offset += 30  # Move down for the next class

    # Show the result
    cv2.imshow("Hand and Pose Tracking", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture object
cap.release()


In [6]:
#Only Left hand and pose
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to RGB for MediaPipe processing
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    results_pose = pose.process(image_rgb)

    # Initialize predictions and probabilities for left hand and pose
    left_prediction, pose_prediction = None, None
    left_probs, pose_probs = None, None

    # Process left hand landmarks
    if results.multi_hand_landmarks:
        for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
            label = handedness.classification[0].label
            if label == 'Left':
                features = extract_features(hand_landmarks.landmark, results_pose.pose_landmarks.landmark if results_pose.pose_landmarks else [])

                left_prediction = left_model.predict([features])[0]
                left_probs = left_model.predict_proba([features])[0]

                # Draw left hand landmarks
                mp.solutions.drawing_utils.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Process pose landmarks
    if results_pose.pose_landmarks:
        mp.solutions.drawing_utils.draw_landmarks(frame, results_pose.pose_landmarks, mp_pose.POSE_CONNECTIONS)
        pose_landmarks = results_pose.pose_landmarks.landmark
        pose_features = extract_pose_features(frame, pose_landmarks)  # Pass both frame and pose_landmarks
        pose_prediction = pose_model.predict([pose_features])[0]
        pose_probs = pose_model.predict_proba([pose_features])[0]

    # Initialize the combined prediction logic for left hand and pose
    if left_prediction is not None and pose_prediction is not None:
        # Both left hand and pose detected, combine their probabilities
        all_classes = sorted(set(left_model.classes_).union(set(pose_model.classes_)))

        # Align probabilities with all possible classes
        left_prob_dict = {cls: prob for cls, prob in zip(left_model.classes_, left_probs)}
        pose_prob_dict = {cls: prob for cls, prob in zip(pose_model.classes_, pose_probs)}

        left_probs_aligned = np.array([left_prob_dict.get(cls, 0) for cls in all_classes])*100
        pose_probs_aligned = np.array([pose_prob_dict.get(cls, 0) for cls in all_classes])*100

        # Compute average probabilities
        avg_probs = (left_probs_aligned + pose_probs_aligned*1.5) / 250

        # Find the class with the highest average probability
        max_prob_index = np.argmax(avg_probs)
        max_prob_class = all_classes[max_prob_index]

        # Display the final prediction
        cv2.putText(frame, f"Final Prediction: {max_prob_class}, Prob: {avg_probs[max_prob_index]:.2f}",
                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

        # Display the individual class probabilities
        y_offset = 60  # Starting y position for probabilities display
        for i, prob in enumerate(avg_probs):
            class_name = all_classes[i]
            cv2.putText(frame, f"{class_name}: {prob:.2f}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
            y_offset += 30  # Move down for the next class

    else:
        # Handle combinations when only left hand or pose is detected
        prob_dicts = []
        predictions = []
        models = []
        
        # Add available predictions and probabilities
        if left_prediction is not None:
            prob_dicts.append(left_probs)
            predictions.append(left_prediction)
            models.append(left_model)
        if pose_prediction is not None:
            prob_dicts.append(pose_probs)
            predictions.append(pose_prediction)
            models.append(pose_model)

        # If only one part detected, use that directly
        if len(prob_dicts) == 1:
            prob_dict = prob_dicts[0]
            prediction = predictions[0]
            model = models[0]
            max_prob_index = np.argmax(prob_dict)
            max_prob_class = model.classes_[max_prob_index]
            cv2.putText(frame, f"Final Prediction: {max_prob_class}, Prob: {prob_dict[max_prob_index]:.2f}",
                        (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

        # If both parts (left hand and pose) are detected, average the probabilities
        elif len(prob_dicts) == 2:
            all_classes = sorted(set(models[0].classes_).union(set(models[1].classes_)))

            # Align probabilities with all possible classes
            prob_dict_1 = {cls: prob for cls, prob in zip(models[0].classes_, prob_dicts[0])}
            prob_dict_2 = {cls: prob for cls, prob in zip(models[1].classes_, prob_dicts[1])}

            probs_aligned_1 = np.array([prob_dict_1.get(cls, 0) for cls in all_classes])
            probs_aligned_2 = np.array([prob_dict_2.get(cls, 0) for cls in all_classes])

            # Compute average probabilities
            avg_probs = (probs_aligned_1 + probs_aligned_2) / 2

            # Find the class with the highest average probability
            max_prob_index = np.argmax(avg_probs)
            max_prob_class = all_classes[max_prob_index]

            # Display the final prediction
            cv2.putText(frame, f"Final Prediction: {max_prob_class}, Prob: {avg_probs[max_prob_index]:.2f}",
                        (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

            # Optionally, display individual probabilities for combined cases
            y_offset = 60  # Starting y position for probabilities display
            for i, prob in enumerate(avg_probs):
                class_name = all_classes[i]
                cv2.putText(frame, f"{class_name}: {prob:.2f}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
                y_offset += 30  # Move down for the next class

    # Show the result
    cv2.imshow("Left Hand and Pose Tracking", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture object
cap.release()


KeyboardInterrupt: 

In [None]:
#10 frames
def calulating_percentage(avg, all_classes):
    individual_threshold = {
        'clean': 0.3, 'happy': 0.32, 'high': 0.55, 'loud': 0.80, 'quiet': 0.9,
        'sad': 0.6, 'deep': 0.5, 'soft': 0.5, 'weak': 0.6, 'flat': 0.30,
        'expensive': 0.27, 'poot': 0.35, 'slow': 0.6, 'thick': 0.7
    }
    
    threshold_percentage = []
    for i, j in zip(avg, all_classes):
        value = individual_threshold[j.lower()]
        threshold_percentage.append(i * 100 / value)
    return threshold_percentage

cap = cv2.VideoCapture(0)

# Create a dictionary to accumulate the probabilities for 10 frames
frame_count = 0
accumulated_probs = None
final_prediction_text = ""

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to RGB for MediaPipe processing
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    results_pose = pose.process(image_rgb)

    # Initialize predictions and probabilities
    left_prediction, right_prediction, pose_prediction = None, None, None
    left_probs, right_probs, pose_probs = None, None, None
    left_normal_direction = -1
    # Process hand landmarks (for both left and right hands)
    if results.multi_hand_landmarks:
        for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
            label = handedness.classification[0].label
            features = extract_features(hand_landmarks.landmark, results_pose.pose_landmarks.landmark if results_pose.pose_landmarks else [])

            if label == 'Left':
                left_prediction = left_model.predict([features])[0]
                left_probs = left_model.predict_proba([features])[0]
            elif label == 'Right':
                right_prediction = right_model.predict([features])[0]
                right_probs = right_model.predict_proba([features])[0]

            # Draw hand landmarks
            mp.solutions.drawing_utils.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Process pose landmarks
    if results_pose.pose_landmarks:
        mp.solutions.drawing_utils.draw_landmarks(frame, results_pose.pose_landmarks, mp_pose.POSE_CONNECTIONS)
        pose_landmarks = results_pose.pose_landmarks.landmark
        pose_features = extract_pose_features(frame, pose_landmarks)  # Pass both frame and pose_landmarks
        pose_prediction = pose_model.predict([pose_features])[0]
        pose_probs = pose_model.predict_proba([pose_features])[0]

    # Initialize the combined prediction logic
    if left_prediction is not None and right_prediction is not None and pose_prediction is not None:
        # All three detected, combine their probabilities
        all_classes = sorted(set(left_model.classes_).union(set(right_model.classes_)).union(set(pose_model.classes_)))

        # Align probabilities with all possible classes
        left_prob_dict = {cls: prob for cls, prob in zip(left_model.classes_, left_probs)}
        right_prob_dict = {cls: prob for cls, prob in zip(right_model.classes_, right_probs)}
        pose_prob_dict = {cls: prob for cls, prob in zip(pose_model.classes_, pose_probs)}

        left_probs_aligned = np.array([left_prob_dict.get(cls, 0) for cls in all_classes]) * 100
        right_probs_aligned = np.array([right_prob_dict.get(cls, 0) for cls in all_classes]) * 100
        pose_probs_aligned = np.array([pose_prob_dict.get(cls, 0) for cls in all_classes]) * 100

        # Compute average probabilities
        avg = (left_probs_aligned + right_probs_aligned + pose_probs_aligned) / 300
        avg_probs = calulating_percentage(avg, all_classes)

        # If accumulated_probs is None, initialize it
        if accumulated_probs is None:
            accumulated_probs = np.zeros_like(avg_probs)

        # Accumulate the probabilities over 10 frames
        accumulated_probs += avg_probs
        frame_count += 1

        if frame_count == 10:
            # After 10 frames, find the class with the highest accumulated probability
            max_prob_index = np.argmax(accumulated_probs)
            max_prob_class = all_classes[max_prob_index]
            
            # Store the final prediction text
            final_prediction_text = f"Final Prediction: {max_prob_class}, Prob: {accumulated_probs[max_prob_index]:.2f}"

            # Reset accumulated_probs for the next cycle
            accumulated_probs = None
            frame_count = 0

    # Display the final prediction permanently on the screen
    if final_prediction_text:
        cv2.putText(frame, final_prediction_text,
                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    # Optionally, display individual probabilities for combined cases
    if accumulated_probs is not None:
        y_offset = 60  # Starting y position for probabilities display
        for i, prob in enumerate(accumulated_probs):
            class_name = all_classes[i]
            cv2.putText(frame, f"{class_name}: {prob:.2f}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
            y_offset += 30  # Move down for the next class

    # Show the result
    cv2.imshow("Hand and Pose Tracking", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture object
cap.release()


In [56]:
import os
import cv2
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tqdm import tqdm

# Initialize variables to store the ground truth and predicted labels
y_true = []
y_pred = []

# Define the folder containing the dataset
data_folder = 'D:/Test-3/Test_data2'

# Get all the class names from the dataset
all_classes = sorted(os.listdir(data_folder))
num_classes = len(all_classes)

# Initialize progress bar for dataset processing
total_images = sum([len(os.listdir(os.path.join(data_folder, label))) for label in all_classes])
progress_bar = tqdm(total=total_images, desc="Processing Images", unit="image")

# Initialize accuracy tracking for each class
class_correct = {cls: 0 for cls in all_classes}
class_total = {cls: 0 for cls in all_classes}

# Start processing each image
for label in all_classes:
    class_folder = os.path.join(data_folder, label)
    
    if os.path.isdir(class_folder):
        # Iterate through images in each class folder
        for image_name in os.listdir(class_folder):
            image_path = os.path.join(class_folder, image_name)
            
            # Read the image
            image = cv2.imread(image_path)
            
            # Convert image to RGB
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = hands.process(image_rgb)
            
            # Process pose landmarks
            image_pose_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            pose_results = pose.process(image_pose_rgb)
            
            # Initialize predictions and probabilities
            left_prediction, right_prediction, pose_prediction = None, None, None
            left_probs, right_probs, pose_probs = None, None, None

            # Process hand landmarks (for both left and right hands)
            if results.multi_hand_landmarks:
                for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
                    label = handedness.classification[0].label
                    features = extract_features(hand_landmarks.landmark, pose_results.pose_landmarks.landmark if pose_results.pose_landmarks else [])

                    if label == 'Left':
                        left_prediction = left_model.predict([features])[0]
                        left_probs = left_model.predict_proba([features])[0]
                    elif label == 'Right':
                        right_prediction = right_model.predict([features])[0]
                        right_probs = right_model.predict_proba([features])[0]

            # Process pose landmarks
            if pose_results.pose_landmarks:
                pose_landmarks = pose_results.pose_landmarks.landmark
                pose_features = extract_pose_features(image, pose_landmarks)
                pose_prediction = pose_model.predict([pose_features])[0]
                pose_probs = pose_model.predict_proba([pose_features])[0]


            if pose_results.pose_landmarks and not results.multi_hand_landmarks:
                # Only Pose landmarks are available
                y_true.append(label)  # True label is the folder name (the class)
                y_pred.append(pose_prediction)  # Predict based on Pose only

            elif results.multi_hand_landmarks and not pose_results.pose_landmarks:
                # Only Hand landmarks are available (Left or Right)
                if left_prediction is not None:
                    y_true.append(label)  # True label is the folder name (the class)
                    y_pred.append(left_prediction)  # Predict based on Left Hand only
                elif right_prediction is not None:
                    y_true.append(label)  # True label is the folder name (the class)
                    y_pred.append(right_prediction)  # Predict based on Right Hand only

            elif results.multi_hand_landmarks and pose_results.pose_landmarks:
                # Both Hand and Pose landmarks are available
                if left_prediction is not None and right_prediction is not None:
                    # Both Left and Right Hand and Pose are available
                    all_classes_sorted = sorted(set(left_model.classes_).union(set(right_model.classes_)).union(set(pose_model.classes_)))

                    # Align probabilities with all possible classes
                    left_prob_dict = {cls: prob for cls, prob in zip(left_model.classes_, left_probs)}
                    right_prob_dict = {cls: prob for cls, prob in zip(right_model.classes_, right_probs)}
                    pose_prob_dict = {cls: prob for cls, prob in zip(pose_model.classes_, pose_probs)}

                    left_probs_aligned = np.array([left_prob_dict.get(cls, 0) for cls in all_classes_sorted])
                    right_probs_aligned = np.array([right_prob_dict.get(cls, 0) for cls in all_classes_sorted])
                    pose_probs_aligned = np.array([pose_prob_dict.get(cls, 0) for cls in all_classes_sorted])

                    # Compute average probabilities
                    avg_probs = (left_probs_aligned * 1.2 + right_probs_aligned / 1.2 + pose_probs_aligned) / 3

                    # Find the class with the highest average probability
                    max_prob_index = np.argmax(avg_probs)
                    max_prob_class = all_classes_sorted[max_prob_index]

                    y_true.append(label)  # True label is the folder name (the class)
                    y_pred.append(max_prob_class)  # Predicted label

                elif left_prediction is not None and pose_prediction is not None:
                    # Only Left Hand and Pose available
                    all_classes_sorted = sorted(set(left_model.classes_).union(set(pose_model.classes_)))

                    left_prob_dict = {cls: prob for cls, prob in zip(left_model.classes_, left_probs)}
                    pose_prob_dict = {cls: prob for cls, prob in zip(pose_model.classes_, pose_probs)}

                    left_probs_aligned = np.array([left_prob_dict.get(cls, 0) for cls in all_classes_sorted])
                    pose_probs_aligned = np.array([pose_prob_dict.get(cls, 0) for cls in all_classes_sorted])

                    avg_probs = (left_probs_aligned + pose_probs_aligned) / 2
                    max_prob_index = np.argmax(avg_probs)
                    max_prob_class = all_classes_sorted[max_prob_index]

                    y_true.append(label)
                    y_pred.append(max_prob_class)

                elif right_prediction is not None and pose_prediction is not None:
                    # Only Right Hand and Pose available
                    all_classes_sorted = sorted(set(right_model.classes_).union(set(pose_model.classes_)))

                    right_prob_dict = {cls: prob for cls, prob in zip(right_model.classes_, right_probs)}
                    pose_prob_dict = {cls: prob for cls, prob in zip(pose_model.classes_, pose_probs)}

                    right_probs_aligned = np.array([right_prob_dict.get(cls, 0) for cls in all_classes_sorted])
                    pose_probs_aligned = np.array([pose_prob_dict.get(cls, 0) for cls in all_classes_sorted])

                    avg_probs = (right_probs_aligned + pose_probs_aligned) / 2
                    max_prob_index = np.argmax(avg_probs)
                    max_prob_class = all_classes_sorted[max_prob_index]

                    y_true.append(label)
                    y_pred.append(max_prob_class)

            # Update the progress bar
            progress_bar.update(1)


Processing Images: 100%|██████████| 1828/1828 [03:50<00:00,  7.82image/s]

In [57]:
accuracy = accuracy_score(y_true, y_pred)
print(f"\nOverall Accuracy: {accuracy:.2f}")

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred, labels=all_classes)
print("\nConfusion Matrix:")
print(conf_matrix)

# Classification Report
class_report = classification_report(y_true, y_pred, target_names=all_classes, labels=all_classes)
print("\nClassification Report:")
print(class_report)

# Finish progress bar
progress_bar.close()

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Processing Images: 100%|██████████| 1828/1828 [04:05<00:00,  7.44image/s]


Overall Accuracy: 0.00

Confusion Matrix:
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0]
 [0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0]]

Classification Report:
              precision    recall  f1-score   support

        Poot       0.00      0.00      0.00         1
       Quiet       0.00      0.00      0.00         0
         Sad       0.00      0.00      0.00         0
        Slow       0.00      0.00      0.00         0
        Soft       0.00      1.00      0.01         1
       Thick       0.00      1.00      0.01         1
        Weak       0.00      0.00      0.00         0

   micro avg       0.00      0.67      0.00         3
   macro avg       0.00      0.29      0.00         3
weighted avg       0.00      0.67      0.00         3






In [19]:
# Enssemble 
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to RGB for MediaPipe processing
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    results_pose = pose.process(image_rgb)

    # Initialize predictions and probabilities
    left_prediction, right_prediction, pose_prediction = None, None, None
    left_probs, right_probs, pose_probs = None, None, None

    # Process hand landmarks (for both left and right hands)
    if results.multi_hand_landmarks:
        for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
            label = handedness.classification[0].label
            features = extract_features(hand_landmarks.landmark, results_pose.pose_landmarks.landmark if results_pose.pose_landmarks else [])

            if label == 'Left':
                left_prediction = left_model.predict([features])[0]
                left_probs = left_model.predict_proba([features])[0]
            elif label == 'Right':
                right_prediction = right_model.predict([features])[0]
                right_probs = right_model.predict_proba([features])[0]

            # Draw hand landmarks
            mp.solutions.drawing_utils.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Process pose landmarks
    if results_pose.pose_landmarks:
        mp.solutions.drawing_utils.draw_landmarks(frame, results_pose.pose_landmarks, mp_pose.POSE_CONNECTIONS)
        pose_landmarks = results_pose.pose_landmarks.landmark
        pose_features = extract_pose_features(frame, pose_landmarks)  # Pass both frame and pose_landmarks
        pose_prediction = pose_model.predict([pose_features])[0]
        pose_probs = pose_model.predict_proba([pose_features])[0]

    # Initialize the combined prediction logic
    if left_prediction is not None and right_prediction is not None and pose_prediction is not None:
        # All three detected, combine their probabilities
        all_classes = sorted(set(left_model.classes_).union(set(right_model.classes_)).union(set(pose_model.classes_)))

        # Align probabilities with all possible classes
        left_prob_dict = {cls: prob for cls, prob in zip(left_model.classes_, left_probs)}
        right_prob_dict = {cls: prob for cls, prob in zip(right_model.classes_, right_probs)}
        pose_prob_dict = {cls: prob for cls, prob in zip(pose_model.classes_, pose_probs)}

        left_probs_aligned = np.array([left_prob_dict.get(cls, 0) for cls in all_classes])*100
        right_probs_aligned = np.array([right_prob_dict.get(cls, 0) for cls in all_classes])*100
        pose_probs_aligned = np.array([pose_prob_dict.get(cls, 0) for cls in all_classes])*100

        # Compute average probabilities
        avg = (left_probs_aligned  + right_probs_aligned   + pose_probs_aligned) / 300
        avg_probs = calulating_percentage(avg, all_classes)
        # Find the class with the highest average probability
        max_prob_index = np.argmax(avg_probs)
        max_prob_class = all_classes[max_prob_index]

        # Display the final prediction
        cv2.putText(frame, f"Final Prediction: {max_prob_class}, Prob: {avg_probs[max_prob_index]:.2f}",
                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

        # Display the individual class probabilities
        y_offset = 60  # Starting y position for probabilities display
        for i, prob in enumerate(avg_probs):
            class_name = all_classes[i]
            cv2.putText(frame, f"{class_name}: {prob:.2f}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
            y_offset += 30  # Move down for the next class

    else:
        # Handle combinations when one or two parts are detected
        prob_dicts = []
        predictions = []
        models = []
        
        # Add available predictions and probabilities
        if left_prediction is not None:
            prob_dicts.append(left_probs)
            predictions.append(left_prediction)
            models.append(left_model)
        if right_prediction is not None:
            prob_dicts.append(right_probs)
            predictions.append(right_prediction)
            models.append(right_model)
        if pose_prediction is not None:
            prob_dicts.append(pose_probs)
            predictions.append(pose_prediction)
            models.append(pose_model)

        # If only one part detected, use that directly
        if len(prob_dicts) == 1:
            prob_dict = prob_dicts[0]
            prediction = predictions[0]
            model = models[0]
            max_prob_index = np.argmax(prob_dict)
            max_prob_class = model.classes_[max_prob_index]
            cv2.putText(frame, f"Final Prediction: {max_prob_class}, Prob: {prob_dict[max_prob_index]:.2f}",
                        (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

        # If two parts are detected, average the probabilities
        elif len(prob_dicts) == 2:
            all_classes = sorted(set(models[0].classes_).union(set(models[1].classes_)))

            # Align probabilities with all possible classes
            prob_dict_1 = {cls: prob for cls, prob in zip(models[0].classes_, prob_dicts[0])}
            prob_dict_2 = {cls: prob for cls, prob in zip(models[1].classes_, prob_dicts[1])}

            probs_aligned_1 = np.array([prob_dict_1.get(cls, 0) for cls in all_classes])
            probs_aligned_2 = np.array([prob_dict_2.get(cls, 0) for cls in all_classes])

            # Compute average probabilities
            avg = (probs_aligned_1 + probs_aligned_2) / 2
            avg_probs = calulating_percentage(avg, all_classes)
            
            # Find the class with the highest average probability
            max_prob_index = np.argmax(avg_probs)
            max_prob_class = all_classes[max_prob_index]

            # Display the final prediction
            cv2.putText(frame, f"Final Prediction: {max_prob_class}, Prob: {avg_probs[max_prob_index]:.2f}",
                        (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

            # Optionally, display individual probabilities for combined cases
            y_offset = 60  # Starting y position for probabilities display
            for i, prob in enumerate(avg_probs):
                class_name = all_classes[i]
                cv2.putText(frame, f"{class_name}: {prob:.2f}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
                y_offset += 30  # Move down for the next class

    # Show the result
    cv2.imshow("Hand and Pose Tracking", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture object
cap.release()
