#Loading

In [None]:
!pip install mediapipe

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mediapipe
  Downloading mediapipe-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (33.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.9/33.9 MB[0m [31m59.4 MB/s[0m eta [36m0:00:00[0m
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.4.6-py3-none-any.whl (31 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.0 sounddevice-0.4.6


In [None]:
import cv2
from google.colab.patches import cv2_imshow
import math
import numpy as np
import pandas as pd

class Video:
  def __init__(self, path):
    self.path = path
    self.video = cv2.VideoCapture(self.path) 
    self.cur_frame = 0
    self.fps = self.video.get(cv2.CAP_PROP_FPS)

  def get_frame_id(self, minutes, seconds):
    frame_id = int(self.fps*(minutes*60+seconds))
    return frame_id

  def set_frame(self, frame_id):
    self.video.set(cv2.CAP_PROP_POS_FRAMES, frame_id)
    self.cur_frame = frame_id

  def get_frame(self, frame_id):
    self.set_frame(frame_id)
    success, frame = self.video.read()
    return frame

  def get_cur_frame(self):
    success, frame = self.video.read()
    return frame

  def get_set_frame(self, minutes, seconds):
    self.set_frame_time(minutes, seconds)
    return self.get_cur_frame()

  def get_timeframe(self, start_minutes, start_seconds, end_minutes, end_seconds, frame_rate):
    n_frames = (self.get_frame_id(end_minutes, end_seconds) - self.get_frame_id(start_minutes, start_seconds)) // frame_rate
    cur_frame_id = self.get_frame_id(start_minutes, start_seconds)
    frame_list = []
    for i in range(0, n_frames):
      frame = self.get_frame(cur_frame_id)
      frame_list.append(frame)
      cur_frame_id += frame_rate
    return frame_list

In [None]:
DESIRED_HEIGHT = 480
DESIRED_WIDTH = 480
def resize_and_show(image):
  h, w = image.shape[:2]
  if h < w:
    img = cv2.resize(image, (DESIRED_WIDTH, math.floor(h/(w/DESIRED_WIDTH))))
  else:
    img = cv2.resize(image, (math.floor(w/(h/DESIRED_HEIGHT)), DESIRED_HEIGHT))
  cv2_imshow(img)

# Set Frame List

In [None]:
files = ["americano_part3", "diggin_part2", "jagger_part4", "pink_part3"] 
songs = ['americano', 'diggin', 'jagger', 'pink']

for f, s in zip(files, songs):
  vid = Video("/content/drive/MyDrive/Thesis/Experiment Dances/{}.mov".format(f))
  time_df = pd.read_csv ('/content/drive/MyDrive/Thesis/Experiment Scores/{}.csv'.format(s))

  start_frames = []
  end_frames = []

  for index, row in time_df.iterrows():

    start_time = row['start_time']
    start_minute, start_second = start_time.split(':')
    start_frame = vid.get_frame_id(int(start_minute), float(start_second))
    start_frames.append(start_frame)

    end_time = row['end_time']
    end_minute, end_second = end_time.split(':')
    end_frame = vid.get_frame_id(int(end_minute), float(end_second))
    end_frames.append(end_frame)

  time_df['start_frame'] = start_frames
  time_df['end_frame'] = end_frames

  time_df.to_csv('/content/drive/MyDrive/Thesis/Experiment Dances/{}_frames.csv'.format(f))

# Extract SK Data

Extract the skeletal data from all the particicpants for every song:

In [None]:
import mediapipe as mp

mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils 
mp_drawing_styles = mp.solutions.drawing_styles

frame_rate = 2
failed_frames = []

landmarks = ["nose", "left eye (inner)", "left eye", "left eye (outer)", "right eye (inner)", "right eye", "right eye (outer)", "left ear", "right ear",
             "mouth (left)", "mouth (right)", "left shoulder", "right shoulder", "left elbow", "right elbow", "left wrist", "right wrist", "left pinky",
             "right pinky", "left index", "right index", "left thumb", "right thumb", "left hip", "right hip", "left knee", "right knee", "left ankle",
             "right ankle", "left heel", "right heel", "left foot index", "right foot index"]

songs = ["americano", "diggin", "jagger", "pink"] 
participants = ["part1", "part2", "part3", "part4"]


for song in songs: # loop over all songs

  time_df = pd.read_csv ('/content/drive/MyDrive/Thesis/Experiment Dances/{}_frames.csv'.format(song))

  frame_list = []

  for index, row in time_df.iterrows():
    start_frame = row['start_frame']
    end_frame = row['end_frame']
    for frame in range(start_frame, end_frame, frame_rate):
      frame_list.append(frame)

  for participant in participants: # loop over all participants

    co_df = pd.DataFrame(columns = landmarks)
    
    vid = Video("/content/drive/MyDrive/Thesis/Experiment Dances/{}_{}.mov".format(song, participant))
    print('Processing {} - {}...'.format(song, participant))

    # Run MediaPipe Pose and draw pose landmarks.
    with mp_pose.Pose(
        static_image_mode=False, min_detection_confidence=0.5, 
        model_complexity=2, enable_segmentation=True) as pose:

      for frame_id in frame_list:
        
        frame = vid.get_frame(frame_id)
        
        # Convert the BGR image to RGB and process it with MediaPipe Pose:
        results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        
        image_hight, image_width, _ = frame.shape

        c = []

        if not results.pose_landmarks:
          for j in range(0,33):
            c.append((0, 0))
        else:
          for j in range(0,33):
            c.append((results.pose_landmarks.landmark[j].x, results.pose_landmarks.landmark[j].y))
        
        try:
          co_df.loc[frame_id] = c
        except ValueError:
          failed_frames.append(frame_id)

    co_df.to_csv("/content/drive/MyDrive/Thesis/Experiment Dances/{}_{}_sk.csv".format(song, participant))
    print('Completed {} - {}!'.format(song, participant))

Extract the skeletal data from the baselines for every song:

In [None]:
import mediapipe as mp

mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils 
mp_drawing_styles = mp.solutions.drawing_styles


frame_rate = 2
failed_frames = []

landmarks = ["nose", "left eye (inner)", "left eye", "left eye (outer)", "right eye (inner)", "right eye", "right eye (outer)", "left ear", "right ear",
             "mouth (left)", "mouth (right)", "left shoulder", "right shoulder", "left elbow", "right elbow", "left wrist", "right wrist", "left pinky",
             "right pinky", "left index", "right index", "left thumb", "right thumb", "left hip", "right hip", "left knee", "right knee", "left ankle",
             "right ankle", "left heel", "right heel", "left foot index", "right foot index"]

songs = ["americano", "diggin", "jagger", "pink"] 

for song in songs: # loop over all songs

  time_df = pd.read_csv ('/content/drive/MyDrive/Thesis/Experiment Dances/{}_frames.csv'.format(song))

  frame_list = []

  for index, row in time_df.iterrows():
    start_frame = row['start_frame']
    end_frame = row['end_frame']
    for frame in range(start_frame, end_frame, frame_rate):
      frame_list.append(frame)

  co_df = pd.DataFrame(columns = landmarks)
    
  vid = Video("/content/drive/MyDrive/Thesis/Experiment Dances/{}_baseline.mp4".format(song))
  print('Processing {} - baseline...'.format(song))

  # Run MediaPipe Pose and draw pose landmarks.
  with mp_pose.Pose(
      static_image_mode=False, min_detection_confidence=0.9, 
      model_complexity=0, enable_segmentation=True) as pose:
    #for name, image in images.items():
    for i in range(1):
      # Convert the BGR image to RGB and process it with MediaPipe Pose.
      results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
      
      # Print nose landmark.
      image_hight, image_width, _ = frame.shape
      if not results.pose_landmarks:
        continue

      for j in range(0,33):
        coordinates[j][0] = results.pose_landmarks.landmark[j].x #* image_width
        coordinates[j][1] = results.pose_landmarks.landmark[j].y #* image_hight
        #print(results.pose_landmarks.landmark[j].z)
      #for pose_landmarks

      # Draw pose landmarks.
      #rint(f'Pose landmarks of {name}:')
      annotated_image = frame.copy()
      red_img = np.zeros_like(annotated_image, dtype=np.uint8)
      red_img[:, :] = (255,255,255)
      segm_2class = 0.2 + 0.8 * results.segmentation_mask
      segm_2class = np.repeat(segm_2class[..., np.newaxis], 3, axis=2)
      annotated_image = annotated_image * segm_2class + red_img * (1 - segm_2class)
      mp_drawing.draw_landmarks(
          annotated_image,
          results.pose_landmarks,
          mp_pose.POSE_CONNECTIONS,
          landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
      resize_and_show(annotated_image)

Check the shapes of the dataframes:

In [None]:
songs = ["americano", "diggin", "jagger", "pink"] 

participants.append('baseline')

for song in songs:
  print(song, ':')
  for part in participants:
    try:
      df = pd.read_csv ('/content/drive/MyDrive/Thesis/Experiment Dances/{}_{}_sk.csv'.format(song, part))
      print('- {}: {}'.format(part, df.shape))
    except:
      continue
  print('\n')