<a href="https://colab.research.google.com/github/vchiang001/sexual_diversity_dissertation/blob/main/Dijkstra_lab_African_cichlids_behavioural_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Using existing trained DeepLabCut model to analyse videos https://github.com/DeepLabCut/DeepLabCut/blob/main/examples/COLAB/COLAB_maDLC_TrainNetwork_VideoAnalysis.ipynb


In [None]:
###ATTENTION: Variables to set###
path_config_file = '/content/drive/My Drive/config.yaml' #set path to the config file to the project in https://drive.google.com/drive/folders/1ZPSlzhH2hSAqg3dB1TKuadsiGfDhfLMC?usp=sharing
video_directory = '' #link to the video files you want to perform pose estimation
video_type = '.MOV' #video file type


In [None]:
#(this will take a few minutes to install all the dependences!)
!apt update && apt install cuda-11-8
!pip install "deeplabcut[tf]"
%reload_ext numpy
%reload_ext scipy
%reload_ext matplotlib
%reload_ext mpl_toolkits
!pip install --upgrade scikit-image
!pip3 install pickle5

In [None]:
#all imports
import deeplabcut
import pickle5 as pickle

In [None]:
#Now, let's link to your Google Drive. Run this cell and follow the authorization instructions:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#analyse the videos without animal assembly & tracking
deeplabcut.analyze_videos(config=path_config_file,
                          videos=video_directory,
                          videotype=video_type,
                          shuffle=1,
                          save_as_csv=True,
                          auto_track=False,
                          n_tracks=None,
                          identity_only=True,
                          )

In [None]:
#visualise the pose estimation results
deeplabcut.create_video_with_all_detections(config=path_config_file,
                                            videos=video_directory,
                                            videotype=video_type,
                                            shuffle=1,
                                            displayedbodyparts='all',
                                            )

#Transform pose estimation output for downstream analysis of two-cichlids separated setup by a process called "unpickling"

In [None]:
###ATTENTION: run on laptop command prompt/terminal to find region of interest(ROI) for your video
#colab doesn't support GUI, to define ROI in a randomly extracted frame of your video (should match the pickle file you set)

#All imports
import cv2
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon

#ROI Polygon function
class ROIPolygon:
    def __init__(self, image):
        self.image = image
        self.fig, self.ax = plt.subplots()
        self.ax.set_title('Select ROI')
        self.ax.imshow(self.image, cmap='gray')
        self.ROI_coords = []

    def select_roi(self):
        print("Select points to define the ROI polygon. Press 'Enter' to finish.")
        self.ROI_coords = plt.ginput(n=-1, timeout=-1)
        polygon = Polygon(self.ROI_coords, closed=True, fill=None, edgecolor='r')
        self.ax.add_patch(polygon)
        plt.show()

###ATTENTION: Variables to set###
video_path = r"C:\Users\vscch\OneDrive\Desktop\python\2m_separated\2m_separated5.mov" #File path of your video

#Load the video & randomly extracts a frame to find your ROI
cap = cv2.VideoCapture(video_path)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
random_frame_index = np.random.randint(0, total_frames)
cap.set(cv2.CAP_PROP_POS_FRAMES, random_frame_index)
ret, frame = cap.read()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cap.release()

# Create an instance of ROIPolygon with the randomly extracted frame
ROI = ROIPolygon(frame_rgb)
ROI.select_roi()

In [None]:
###ATTENTION: Variables to set###
# Define the bounding box coordinates of which cichlid you want to analyse(x,y)
coord1 = (61, 162)  # Top-left corner
coord2 = (921, 306)  # Top-right corner
coord3 = (940, 766)  # Bottom-right corner
coord4 = (61, 886)  # Bottom-left corner
#Record the coordinates you used into a sheet with the relevant file names, so you can refer back to it later & used to verify

#pose estimation output from DeepLabCut for one video (with file name _full.pickle)
file_path = '/content/drive/MyDrive/BEHAVIOMICS/SpecificAim2/202404_unpickle/6-J1-1115_butts1DLC_resnet50_Astatotilapia_burtoniFeb17shuffle1_100000_full.pickle'
directory_path = '/content/drive/MyDrive/BEHAVIOMICS/SpecificAim2/202404_unpickle' #location of where you want to save your unpickled file

In [None]:
#Can be used to verify the location of your polygon is correct - use on your laptop

coord1 = (61, 162)  # Top-left corner
coord2 = (921, 306)  # Top-right corner
coord3 = (940, 766)  # Bottom-right corner
coord4 = (61, 886)  # Bottom-left corner

# Create a polygon representing the slanted bounding box
bounding_box_polygon = Polygon([coord1, coord2, coord3, coord4])

# Load the video & randomly extracts a frame to find your ROI
cap = cv2.VideoCapture(video_path)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
random_frame_index = np.random.randint(0, total_frames)
cap.set(cv2.CAP_PROP_POS_FRAMES, random_frame_index)
ret, frame = cap.read()
cap.release()

# Convert the frame to RGB
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

# Draw the polygon on the frame
pts = np.array(bounding_box_polygon.exterior.coords, np.int32)
pts = pts.reshape((-1,1,2))
cv2.polylines(frame_rgb,[pts],True,(255,0,0),2) # Draw the bounding box polygon on the frame

# Display the frame with the polygon drawn
cv2.imshow('Frame with Bounding Box', frame_rgb)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
#All imports
import pandas as pd
import numpy as np
from scipy.signal import savgol_filter
import matplotlib.pyplot as plt
from matplotlib.widgets import RectangleSelector
from shapely.geometry import Polygon, Point
import os

In [None]:
# Create a polygon representing the slanted bounding box
print('   Unpickling', file_path)
df = pd.read_pickle(file_path)
print('   Read pickle file with', len(df), 'frames containing detections')

body_parts = [
    'eyes_left',
    'eyes_right',
    'mouthend_left',
    'mouthend_right',
    'caudalfin_top_base',
    'caudalfin_bottom_base',
    'caudalfin_top_end',
    'caudalfin_bottom_end',
    'dorsal_opercular',
    'dorsal_opercular_mid',
    'dorsal_mid',
    'dorsal_midbase',
    'ventral_opercular',
    'ventral_opercular_mid',
    'ventral_mid',
    'ventral_midbase',
    'pectoralfin_base_left',
    'pectoralfin_base_right',
]

bounding_box_polygon = Polygon([coord1, coord2, coord3, coord4]) # Create a polygon representing the slanted bounding box
columns = []
columns = ['frame']
for part in body_parts:
  columns.append(part + '_x')
  print("x:", part + '_x')
  columns.append(part + '_y')
  print("y:", part + '_y')
  columns.append(part + '_likelihood')
  print("likelihood:", part + '_likelihood')

columns

In [None]:
#Unpickle the files & save the output as a csv with original name + unpickle

output = []
empty_cell_val = 'NA'

counter = 0
for k, v in df.items():
    counter += 1
    if not k.startswith('frame'):
        continue
    row = k[5:]

    data_row = dict.fromkeys(columns)
    data_row['frame'] = row

    # Iterate over both coordinates and confidence values simultaneously
    for bp, (coord_arr, conf_arr) in enumerate(zip(v['coordinates'][0][:18], v['confidence'])):
        for p, (xy_coords, conf) in enumerate(zip(coord_arr, conf_arr)):
            point = Point(xy_coords[0], xy_coords[1])
            if bounding_box_polygon.contains(point):
                # Check if the x-coordinate is empty or if the new confidence is higher
                if data_row[body_parts[bp] + '_x'] is None:
                    # If x-coordinate is empty, set the values
                    data_row[body_parts[bp] + '_x'] = xy_coords[0]
                    data_row[body_parts[bp] + '_y'] = xy_coords[1]
                    data_row[body_parts[bp] + '_likelihood'] = conf[0]
                else:
                    # If x-coordinate is not empty, compare the new confidence
                    existing_confidence = data_row[body_parts[bp] + '_likelihood']
                    if conf[0] > existing_confidence:
                        print(row, "replacing point with higher confidence")
                        # If new confidence is higher, update the values
                        data_row[body_parts[bp] + '_x'] = xy_coords[0]
                        data_row[body_parts[bp] + '_y'] = xy_coords[1]
                        data_row[body_parts[bp] + '_likelihood'] = conf[0]
    output.append(data_row)

    if counter > 10000000:
        print('   Hit the maximum of 10 MILLION frames..')
        break

output_file = pd.DataFrame.from_dict(output)
output_file = output_file.set_index('frame')
output_file.fillna(empty_cell_val, inplace=True)
output_file = output_file.sort_values(by='frame')

output_file_name = file_path.split('.')[-2].split(os.sep)[-1] + '_UNPICKLED.csv'

output_path = directory_path + os.sep + output_file_name
print('   Writing output to:', output_path)

output_file.to_csv(output_path)
print('   Done unpickling file!')

In [None]:
###ATTENTION: Double check each output has pose data that corresponds to your video results, before moving on.

#Transform unpickled file with compatible headers for csv & h5 file types

Before starting this section


1.   Compile all the unpickled files so that you can run all of them together using the code this section which can automatically apply to all csv files in a folder
2.   Since unpickle file names are now super long (due to how DeepLabCut spits out file outputs), I would recommend renaming them to a shorter version, so it's easier to know which files you are working with



In [None]:
###ATTENTION: Variables to set###
scorer = 'vscc'
input_folder = '/path/to/input_folder/' #folder where you saved all the unpickled files
output_folder = '/content/drive/MyDrive/BEHAVIOMICS/SpecificAim2/202404_unpickle'


In [None]:
#All imports
import pandas as pd
import os

In [None]:
#Defines body parts of our cichlids, and create functions for correct formatting of files
body_parts = [
    'eyes_left',
    'eyes_right',
    'mouthend_left',
    'mouthend_right',
    'caudalfin_top_base',
    'caudalfin_bottom_base',
    'caudalfin_top_end',
    'caudalfin_bottom_end',
    'dorsal_opercular',
    'dorsal_opercular_mid',
    'dorsal_mid',
    'dorsal_midbase',
    'ventral_opercular',
    'ventral_opercular_mid',
    'ventral_mid',
    'ventral_midbase',
    'pectoralfin_base_left',
    'pectoralfin_base_right',
]

def correct_header(df, scorer, body_parts, output_folder):
    formatted_file_name = os.path.splitext(os.path.basename(df))[0] + '_formatted.csv'
    output_file_path = os.path.join(output_folder, formatted_file_name)
    columns = pd.MultiIndex.from_product([[scorer], body_parts, ['x', 'y', 'likelihood']], names=['scorer', 'bodyparts', 'coords'])
    data = pd.read_csv(df, index_col=0)
    data.index.name = None
    data.reset_index(drop=True, inplace=True)
    data.columns = columns
    data.to_csv(output_file_path)
    guarantee_multiindex_rows(data)
    data.to_hdf(output_file_path.replace(".csv", ".h5"), key="df_with_missing", mode="w")
    return output_file_path

def guarantee_multiindex_rows(df):
    if not isinstance(df.index, pd.MultiIndex):
        path = df.index[0]
        try:
            sep = "/" if "/" in path else "\\"
            splits = tuple(df.index.str.split(sep))
            df.index = pd.MultiIndex.from_tuples(splits)
        except TypeError:
            pass
    try:
        df.index = df.index.set_levels(df.index.levels[1].astype(str), level=1)
    except AttributeError:
        pass


In [None]:
# Iterate over all CSV files in the input folder & saves CSV & H5 with correct format to another folder
for file_name in os.listdir(input_folder):
    if file_name.endswith('.csv'):
        file_path = os.path.join(input_folder, file_name)
        formatted_file_path = correct_header(file_path, scorer, body_parts, output_folder)
        #data = pd.read_csv(formatted_file_path, index_col=0)
        #guarantee_multiindex_rows(data)
        #data.to_hdf(formatted_file_path.replace(".csv", ".h5"), key="df_with_missing", mode="w")

In [None]:
###ATTENTION: to make subsequent steps easier, create a folder 'h5' and 'csv' and save the relevant file types into the folder.
###Also double check that the formatted files has the same content as your unpickled files before moving on.

#Calculating velocity, acceleration, speed using DLC2kinematics https://github.com/AdaptiveMotorControlLab/DLC2Kinematics

In [None]:
#install dlc2kinematics: when it asks if the session should be restarted, you should do so
!pip install dlc2kinematics

In [None]:
###ATTENTION: Variables to set###
input_folder = '/path/to/input_folder/' #folder where you saved all the H5 files to analyse
output_folder = '/content/drive/MyDrive/BEHAVIOMICS/SpecificAim2/202404_unpickle'

In [None]:
#All imports
import dlc2kinematics
import os

In [None]:
def velocity_acceleration_speed(file_path, output_folder):
    # Load data from the H5 file
    df, bodyparts, scorer = dlc2kinematics.load_data(file_path)

    # Compute velocity and save to CSV
    df_vel = dlc2kinematics.compute_velocity(df, bodyparts=['all'])
    vel_csv_path = os.path.join(output_folder, os.path.splitext(os.path.basename(file_path))[0] + '_vel.csv')
    df_vel.to_csv(vel_csv_path, index=True)

    # Compute acceleration and save to CSV
    df_acc = dlc2kinematics.compute_acceleration(df, bodyparts=['all'])
    acc_csv_path = os.path.join(output_folder, os.path.splitext(os.path.basename(file_path))[0] + '_acc.csv')
    df_acc.to_csv(acc_csv_path, index=True)

    # Compute speed and save to CSV
    df_speed = dlc2kinematics.compute_speed(df, bodyparts=['all'])
    speed_csv_path = os.path.join(output_folder, os.path.splitext(os.path.basename(file_path))[0] + '_speed.csv')
    df_speed.to_csv(speed_csv_path, index=True)

In [None]:
# Iterate over all H5 files in the input folder and save files to output folder
for file_name in os.listdir(input_folder):
    if file_name.endswith('.h5'):
        # Construct the full file path
        file_path = os.path.join(input_folder, file_name)
        # Process the H5 file
        velocity_acceleration_speed(file_path, output_folder)

#Plotting xy coordinates occupied https://github.com/DeepLabCut/DLCutils/blob/master/Demo_loadandanalyzeDLCdata.ipynb

In [None]:
# Importing the toolbox (takes several seconds)
import os
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
###ATTENTION: Variables to set###
folder_path = '/content/drive/MyDrive/BEHAVIOMICS/SpecificAim2/20240502_two_cichlid_analyses/formatted'
output_folder = '/content/drive/MyDrive/BEHAVIOMICS/SpecificAim2/20240502_two_cichlid_analyses/xy_coord'

In [None]:
def plot_xy_from_h5(file_path, output_folder):
    # Load data from H5 file into a DataFrame
    df = pd.read_hdf(file_path)

    # Get unique body parts
    bodyparts = df.columns.get_level_values(1).unique()

    # Set up the figure and colors
    fs = (10, 6)  # Example figsize
    plt.figure(figsize=fs)
    colors = plt.cm.get_cmap('jet', len(bodyparts))

    # Plot the data points for each body part
    scorer = df.columns.get_level_values(0)[0]  # Get the scorer name
    for bpindex, bp in enumerate(bodyparts):
        Index = df[scorer][bp]['likelihood'].values > 0.1
        plt.plot(df[scorer][bp]['x'].values[Index], df[scorer][bp]['y'].values[Index], '.', color=colors(bpindex), alpha=0.2)

    plt.gca().invert_yaxis()

    # Create the colorbar
    sm = plt.cm.ScalarMappable(cmap=plt.get_cmap('jet'), norm=plt.Normalize(vmin=0, vmax=len(bodyparts) - 1))
    sm._A = []
    cbar = plt.colorbar(sm, ax=plt.gca(), ticks=range(len(bodyparts)))
    cbar.set_ticklabels(bodyparts)

    # Extract file name from file path
    file_name = os.path.splitext(os.path.basename(file_path))[0]

    # Specify the output folder for saving the plot
    output_path = os.path.join(output_folder, file_name + '_xyplot.svg')

    # Save the plot as an SVG file with the appropriate file name in the specified output folder
    plt.savefig(output_path, format='svg')

    # Show the plot
    plt.close()  # Close the figure to prevent overlapping plots

In [None]:
# Iterate over each H5 file in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith('.h5'):
        file_path = os.path.join(folder_path, file_name)
        plot_xy_from_h5(file_path, output_folder)

#Calculating total distances https://github.com/farhanaugustine/DeepLabCut-Analysis-Jupyter-Scripts/blob/main/DLC_ROI_Distance_Velocity_Entries.ipynb

In [None]:
#All imports
import os
import pandas as pd
import numpy as np

In [None]:
###ATTENTION: Variables to set###
input_directory = '/content/drive/MyDrive/BEHAVIOMICS/SpecificAim2/20240502_two_cichlid_analyses/formatted'
output_directory = '/content/drive/MyDrive/BEHAVIOMICS/SpecificAim2/20240502_two_cichlid_analyses/distance'
confidence = 0.4 #confidence threshold of how confident you want the bodyparts to be to be considered for calculation
max_distance_per_frame = 100  #Set distance in pixels that the bodypart can jump before being considered as erroneous
frame_rate = 30  #Frame rate of your videos

In [None]:
# Define the function to detect jumps
def detect_jumps(x_coords, y_coords, max_distance):
    jumps = []
    for i in range(1, len(x_coords)):
        distance = np.sqrt((x_coords[i] - x_coords[i-1])**2 + (y_coords[i] - y_coords[i-1])**2)
        if distance > max_distance:
            jumps.append(i)
    return jumps

# Define the function to calculate filtered distance
def calculate_filtered_distance(body_part_data, part_name, max_distance_per_frame):
    total_distance = 0
    x_coords = body_part_data[part_name]['x']
    y_coords = body_part_data[part_name]['y']
    jumps = detect_jumps(x_coords, y_coords, max_distance_per_frame)

    # Iterate over consecutive indices, excluding the ones identified as jumps
    i = 0
    while i < len(x_coords) - 1:
        if i not in jumps and i + 1 not in jumps:
            x_diff = x_coords[i + 1] - x_coords[i]
            y_diff = y_coords[i + 1] - y_coords[i]
            if not (np.isnan(x_diff) or np.isnan(y_diff)):
                distance = np.sqrt(x_diff**2 + y_diff**2)
                total_distance += distance
            i += 1  # Move to the next valid index
        else:
            # Skip the consecutive indices identified as jumps
            i += 2
    return total_distance, jumps

# Define the list of body parts to analyze
body_parts = [
    'eyes_left',
    'eyes_right',
    'mouthend_left',
    'mouthend_right',
    'caudalfin_top_base',
    'caudalfin_bottom_base',
    'caudalfin_top_end',
    'caudalfin_bottom_end',
    'dorsal_opercular',
    'dorsal_opercular_mid',
    'dorsal_mid',
    'dorsal_midbase',
    'ventral_opercular',
    'ventral_opercular_mid',
    'ventral_mid',
    'ventral_midbase',
    'pectoralfin_base_left',
    'pectoralfin_base_right',
]

In [None]:
# Iterate over each CSV file in the input directory
for filename in os.listdir(input_directory):
    if filename.endswith(".csv"):
        unpickle = os.path.join(input_directory, filename)

        # Read the CSV file
        df = pd.read_csv(unpickle, index_col=0, header=list(range(3)))

        # Process the data
        for col_name in df.columns:
            if "likelihood" in col_name[2]:
                x_col = (col_name[0], col_name[1], 'x')
                y_col = (col_name[0], col_name[1], 'y')
                df.loc[df[col_name] < confidence, [x_col, y_col]] = float('nan')

        body_part_data = {}
        unique_body_parts = df.columns.get_level_values(1).unique().tolist()
        scorer = df.columns.get_level_values(0)[0]

        for part in unique_body_parts:
            body_part_data[part] = {
                "x": df.loc[:, (scorer, part, 'x')].to_numpy(),
                "y": df.loc[:, (scorer, part, 'y')].to_numpy(),
                "likelihood": df.loc[:, (scorer, part, 'likelihood')].to_numpy()
            }

        distances = {}
        jumps_detected = {}

        for part in body_parts:
            distances[part], jumps_detected[part] = calculate_filtered_distance(body_part_data, part, max_distance_per_frame)

        total_filtered_distance_moved_in_units = sum(distances.values())

        # Convert the dictionary to a pandas DataFrame
        df_output = pd.DataFrame(distances.items(), columns=['Body Part', 'Distance'])

        # Save the DataFrame to a CSV file with the original filename appended with "distance"
        output_filename = os.path.join(output_directory, filename.split('.')[0] + "_distance.csv")
        df_output.to_csv(output_filename, index=False)

#Calculating the number of butts

Factors included:
*   Confidence of the body part
*   In region of interest defined
*   Whether the cichlid is moving towards the divider
*   Whether the velocity is above a defined level





In [None]:
###ATTENTION: run on laptop command prompt/terminal to find region of interest(ROI) for your video
#colab doesn't support GUI, to define ROI in a randomly extracted frame of your video (should match the pickle file you set)
import cv2
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon

class ROIPolygon:
    def __init__(self, image):
        self.image = image
        self.fig, self.ax = plt.subplots()
        self.ax.set_title('Select ROI')
        self.ax.imshow(self.image, cmap='gray')
        self.ROI_coords = []

    def select_roi(self):
        print("Select points to define the ROI polygon. Press 'Enter' to finish.")
        self.ROI_coords = plt.ginput(n=-1, timeout=-1)
        polygon = Polygon(self.ROI_coords, closed=True, fill=None, edgecolor='r')
        self.ax.add_patch(polygon)
        plt.show()

video_path = r"C:\Users\2m_separated5.MOV" #Location of your video

cap = cv2.VideoCapture(video_path)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
random_frame_index = np.random.randint(0, total_frames)
cap.set(cv2.CAP_PROP_POS_FRAMES, random_frame_index)
ret, frame = cap.read()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cap.release()
ROI = ROIPolygon(frame_rgb)
ROI.select_roi()

In [None]:
###ATTENTION: Variables to set###
velocity = '/content/drive/MyDrive/2m_separated4_left_formatted_vel.csv' #csv file of the velocity file
unpickle = '/content/drive/MyDrive/2m_separated4_left_formatted.csv' #the one with the correct header, csv file
confidence = 0.4 #number between 0 - 1
directory_path = '/content/drive/MyDrive/butts'#where to save the files

# Define the bounding box coordinates ROI (x,y)
coord1 = (809, 375)  # Top-left corner
coord2 = (1103, 387)  # Top-right corner
coord3 = (1114, 1030)  # Bottom-right corner
coord4 = (793, 1022)  # Bottom-left corner
#Record the coordinates you used into a sheet with the relevant file names, so you can refer back to it later & used to verify

vel_thresh = 1 #above this velocity is considered as butt (usually 0.0 - 30.0 pixels per second)
vel_wndw = 40 #how many frames velocity_rolling_window 0-50 (based on 30frames per second)
side = 'left' #which side of the tank is cichlid on, either 'left' or 'right'

In [None]:
#All imports
import pandas as pd
from shapely.geometry import Polygon
from shapely.geometry import Point
import os

In [None]:
#Cleans up data input by confidence level set for bodyparts
bounding_box_polygon = Polygon([coord1, coord2, coord3, coord4]) #Polygon defined
df_vel = pd.read_csv(velocity, index_col = 0, header=list(range(3)))
for col_name in df_vel.columns:
    if "likelihood" in col_name[2]:
      print(col_name[1])
      x_col = (col_name[0], col_name[1], 'x')
      y_col = (col_name[0], col_name[1], 'y')
      df_vel.loc[df_vel[col_name] < confidence, [x_col, y_col]] = float('nan')

print(df_vel)

unpickle_df = pd.read_csv(unpickle, index_col = 0, header=list(range(3)))
for col_name in unpickle_df.columns:
    if "likelihood" in col_name[2]:
      print(col_name[1])
      x_col = (col_name[0], col_name[1], 'x')
      y_col = (col_name[0], col_name[1], 'y')
      unpickle_df.loc[unpickle_df[col_name] < confidence, [x_col, y_col]] = float('nan')

print(unpickle_df)

In [None]:
#Runs through how butts are determined: direction, velocity, location
#Saves csv of butts for each bodypart across frames
butts_per_bodypart_df = pd.DataFrame(index=unpickle_df.index, columns=unpickle_df.columns.levels[1])
prev_x_list = []
for column in unpickle_df.columns:
    if 'x' in column:
        x_column = column
        y_column = (column[0], column[1], 'y')
        for index, row in unpickle_df.iterrows():
            if bounding_box_polygon.contains(Point(row[x_column], row[y_column])):
                if prev_x_list:
                    if side == "left":
                        if any(prev_x < row[x_column] for prev_x in prev_x_list):
                            x_vel_column = ('vscc', column[1], 'x')
                            y_vel_column = ('vscc', column[1], 'y')
                            x_vel = df_vel.at[index, x_vel_column]
                            y_vel = df_vel.at[index, y_vel_column]
                            if x_vel > vel_thresh and y_vel > vel_thresh:
                                butts_per_bodypart_df[column[1]][index] = 1
                                print(column[1], index, 'butts')
                    elif side == "right":
                        if any(prev_x > row[x_column] for prev_x in prev_x_list):
                            x_vel_column = ('vscc', column[1], 'x')
                            y_vel_column = ('vscc', column[1], 'y')
                            x_vel = df_vel.at[index, x_vel_column]
                            y_vel = df_vel.at[index, y_vel_column]
                            if x_vel > vel_thresh and y_vel > vel_thresh:
                                butts_per_bodypart_df[column[1]][index] = 1
                                print(column[1], index, 'butts')
                prev_x_list.append(row[x_column])
                prev_x_list = prev_x_list[-vel_wndw:]
butts_per_bodypart_df.to_csv(os.path.join(directory_path, f'{os.path.splitext(os.path.basename(unpickle))[0]}_butts_bp.csv'))

In [None]:
#Convert bodyparts file to one column detecting butts based on if any bodyparts detects butts
#Saves csv of combined butts if any body part detects it across frames
#Saves csv of frames converted to seconds (30fps) if any block of 30 frames detects a butts, then the second is assigned as butts
butts_df = pd.DataFrame(index=butts_per_bodypart_df.index, columns=['butts'])
for index, row in butts_per_bodypart_df.iterrows():
    if 1 in row.values:
        butts_df.at[index, 'butts'] = 1
    else:
        butts_df.at[index, 'butts'] = 0
butts_30fps = butts_df.groupby(butts_df.index // 30).any().astype(int)
total_butts = butts_30fps['butts'].sum()
print(butts_30fps)
print("total number of butts:", (total_butts))
butts_df.to_csv(os.path.join(directory_path, f'{os.path.splitext(os.path.basename(unpickle))[0]}_butts.csv'))
butts_30fps.to_csv(os.path.join(directory_path, f'{os.path.splitext(os.path.basename(unpickle))[0]}_butts_30fps.csv'))