In [1]:
import os, cv2, copy
import numpy as np
import pandas as pd
import shutil
from sklearn.model_selection import train_test_split
from pytorchvideo.data.encoded_video import EncodedVideo
from FOS_dataset import FOS_set, transform



In [2]:
# Read processed dataset csv file
df_10s = pd.read_csv(r'C:\Users\36394\PycharmProjects\care_autism_children\processed_data.csv')

In [3]:
# Check how many videos can't be opened
# Initialize the df_vision dataset
bad_videos = []
good_videos = []
df_vision = pd.DataFrame(columns=['subject_name', 'video_path', 'labels'])
for index, row in df_10s.iterrows():
    video_path = row['path']
    try:
        video = EncodedVideo.from_path(video_path)
        labels = row['labels'][1:-1].replace("'", "").split(', ')
        subject_name = video_path.split('Data_processed')[1]
        df_vision.loc[len(df_vision.index)] = [subject_name, video_path, labels]
        good_videos.append(video_path)
    except:
        path_bad_video = video_path.split('Data_processed')[1]
        # print("Bad video: {}, We can't open it!".format(path_bad_video))
        bad_videos.append(path_bad_video)
print("Number of good videos: {}".format(len(good_videos)))
print("Number of bad videos: {}".format(len(bad_videos)))

Number of good videos: 8003
Number of bad videos: 37


In [4]:
vision_classes = ['C+', 'C-', 'PN', 'EA']
for vision_class in vision_classes:
    df_vision[vision_class] = df_vision.apply(lambda row: 1 if vision_class in row['labels'] else 0, axis=1)
    print("{}: {}".format(vision_class, np.sum(df_vision[vision_class] == 1)))

C+: 2224
C-: 15
PN: 72
EA: 3585


In [5]:
df_vision

Unnamed: 0,subject_name,video_path,labels,C+,C-,PN,EA
0,\20th\BAM\Hospital_Playtime_New Toys_0.mp4,C:/Users/36394/Study/GWU/PHD in Biomedical Eng...,"[C+, SI+, VI+, S+, O]",1,0,0,0
1,\20th\BAM\Hospital_Playtime_New Toys_1.mp4,C:/Users/36394/Study/GWU/PHD in Biomedical Eng...,"[C+, VI+, Q+, S+, O]",1,0,0,0
2,\20th\BAM\Hospital_Playtime_New Toys_2.mp4,C:/Users/36394/Study/GWU/PHD in Biomedical Eng...,"[C+, VI+, S+, O]",1,0,0,0
3,\20th\BAM\Hospital_Playtime_New Toys_3.mp4,C:/Users/36394/Study/GWU/PHD in Biomedical Eng...,"[C+, VI+, S+, O]",1,0,0,0
4,\20th\BAM\Hospital_Playtime_New Toys_4.mp4,C:/Users/36394/Study/GWU/PHD in Biomedical Eng...,"[C+, VI+, S+, O]",1,0,0,0
...,...,...,...,...,...,...,...
7998,\19th\HHS\Home_Playtime_Casual Interaction_25.mp4,C:/Users/36394/Study/GWU/PHD in Biomedical Eng...,"[S+, EA]",0,0,0,1
7999,\19th\HHS\Home_Playtime_Casual Interaction_26.mp4,C:/Users/36394/Study/GWU/PHD in Biomedical Eng...,"[SI+, MI, S+, EA]",0,0,0,1
8000,\19th\HHS\Home_Playtime_Casual Interaction_27.mp4,C:/Users/36394/Study/GWU/PHD in Biomedical Eng...,"[C+, SI+, VI+, S+, EA]",1,0,0,1
8001,\19th\HHS\Home_Playtime_Casual Interaction_28.mp4,C:/Users/36394/Study/GWU/PHD in Biomedical Eng...,"[S+, EA]",0,0,0,1


In [7]:
# Split the dataset based on the label
df_C_plus = df_vision[df_vision['C+'] == 1]
df_C_minus = df_vision[df_vision['C-'] == 1]
df_PN = df_vision[df_vision['PN'] == 1]
df_EA = df_vision[df_vision['EA'] == 1]

# Sort the label dataset by subject name
df_C_plus = df_C_plus.sort_values(by=['subject_name'])
df_C_minus = df_C_minus.sort_values(by=['subject_name'])
df_PN = df_PN.sort_values(by=['subject_name'])
df_EA = df_EA.sort_values(by=['subject_name'])

In [None]:
# Split the dataset into train and test by the subject name
def split_train_test_by_subject(df, test_size=0.2):
    if test_size > 1 or test_size < 0:
        raise ValueError("test_size must be between 0 and 1")

    num_subjects = len(df.subject_name.unique())
    num_train_subjects = int(num_subjects * (1 - test_size))

    names_train_subjects = df.subject_name.unique()[:num_train_subjects]
    names_test_subjects = df.subject_name.unique()[num_train_subjects:]

    df_train = pd.DataFrame()
    df_test = pd.DataFrame()

    for name in names_train_subjects:
        df_train = pd.concat([df_train, df[df['subject_name'] == name]])
        df_train = df_train.reset_index(drop=True)
    print("Number of train samples: {}".format(len(df_train)))
    for name in names_test_subjects:
        df_test = pd.concat([df_test, df[df['subject_name'] == name]])
        df_test = df_test.reset_index(drop=True)
    print("Number of test samples: {}".format(len(df_test)))

    return df_train, df_test

In [73]:
# Handle the data imbalance problem
# Random drop some samples for EA class
remove_n = 2200
drop_indices = np.random.choice(df_EA.index, remove_n, replace=False)
df_EA_modified = df_EA.drop(drop_indices)
print('For EA class, we have {} samples'.format(len(df_EA_modified)))
df_EA_train, df_EA_test = split_train_test_by_subject(df_EA_modified, test_size=0.2)

# Random drop some samples for EA class
remove_n = 1500
drop_indices = np.random.choice(df_C_plus.index, remove_n, replace=False)
df_C_plus_modified = df_C_plus.drop(drop_indices)
print('For C+ class, we have {} samples'.format(len(df_C_plus_modified)))
df_C_plus_train, df_C_plus_test = split_train_test_by_subject(df_C_plus_modified, test_size=0.2)


# C- class have few samples, drop this class


# Oversample the PN class for 15 times
df_PN_modified = pd.concat([df_PN]*20, ignore_index=True)
print('For PN class, we have {} samples'.format(len(df_PN_modified)))
df_PN_train, df_PN_test = split_train_test_by_subject(df_PN_modified, test_size=0.2)

vision_classes = ['C+', 'PN', 'EA']
df_vision_modified = pd.concat([df_C_plus_modified, df_PN_modified, df_EA_modified], ignore_index=True)
for vision_class in vision_classes:
    print("{}: {}".format(vision_class, np.sum(df_vision_modified[vision_class] == 1)))
df_train = pd.concat([df_C_plus_train, df_PN_train, df_EA_train], ignore_index=True)
df_test = pd.concat([df_C_plus_test, df_PN_test, df_EA_test], ignore_index=True)
# print(df_vision_modified.label.value_counts())

For EA class, we have 1385 samples
Number of train samples: 1108
Number of test samples: 277
For C+ class, we have 724 samples
Number of train samples: 579
Number of test samples: 145
For PN class, we have 1440 samples
Number of train samples: 1140
Number of test samples: 300
C+: 1922
PN: 1452
EA: 1693


In [74]:
print('This is the train set')
for vision_class in vision_classes:
    print("{}: {}".format(vision_class, np.sum(df_train[vision_class] == 1)))
print('This is valid set')
for vision_class in vision_classes:
    print("{}: {}".format(vision_class, np.sum(df_test[vision_class] == 1)))

This is the train set
C+: 1470
PN: 1148
EA: 1364
This is valid set
C+: 452
PN: 304
EA: 329


In [75]:
# Store the train and val dataset
df_train.to_csv('train.csv', index=False)
df_test.to_csv('val.csv', index=False)

# Old code just for reference

In [2]:
# Define the dataset folder
dir_10s = r'C:\Users\36394\Study\GWU\PHD in Biomedical Engineer\Research\FOS\Autism_dataset\Data_processed'
names_vision = ['C+', 'C-', 'PN', 'EA']

In [3]:
df_vision = pd.DataFrame(columns=['subject_name', 'video_path', 'label'])
bad_videos = []
for name in names_vision:
    g = os.walk(os.path.join(dir_10s, name))

    for path,dir_list,file_list in g:
        for file_name in file_list:
            video_path = os.path.join(path, file_name)
            try:
                video = EncodedVideo.from_path(video_path)
                subject_name = path.split('Data_processed')[1]
                label = name
                df_vision.loc[len(df_vision.index)] = [subject_name, video_path, label]
            except:
                path_bad_video = video_path.split('Data_processed')[1]
                print("Bad video: {}, We can't open it!".format(path_bad_video))
                bad_videos.append(path_bad_video)


Bad video: \C+\12th\LDK\2_0.mp4, We can't open it!
Bad video: \C+\5th\LJ\45_6.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_22.MP4, We can't open it!
Bad video: \C+\6th\KHD\37_23.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_24.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_25.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_26.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_27.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_28.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_29.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_30.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_31.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_32.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_33.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_34.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_35.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_36.mp4, We can't open it!
Bad video: \C+\6th\KHD\37_37.mp4, We can't open it!
Bad video: \C+\6th\KHD\41_52.mp4, We can't open it!
Bad video: \C+\

In [4]:
len(bad_videos), len(df_vision)

(95, 3132)

In [5]:
print([i.split('\\')[1] for i in bad_videos])

['C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C+', 'C-', 'PN', 'PN', 'PN', 'PN', 'PN', 'PN', 'PN', 'PN', 'PN', 'PN', 'PN', 'PN', 'PN', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA', 'EA']


In [None]:
# Check the distribution of the dataset
print(df_vision.label.value_counts())

# Split the dataset based on the label
df_C_plus = df_vision[df_vision['label'] == 'C+']
df_C_minus = df_vision[df_vision['label'] == 'C-']
df_PN = df_vision[df_vision['label'] == 'PN']
df_EA = df_vision[df_vision['label'] == 'EA']

# Sort the label dataset by subject name
df_C_plus = df_C_plus.sort_values(by=['subject_name'])
df_C_minus = df_C_minus.sort_values(by=['subject_name'])
df_PN = df_PN.sort_values(by=['subject_name'])
df_EA = df_EA.sort_values(by=['subject_name'])

In [None]:
from sklearn.model_selection import KFold
# Split the dataset into train and test by the subject name and do the K-fold cross validation
def split_train_test_by_subject(df, test_size=0.2):
    if test_size > 1 or test_size < 0:
        raise ValueError("test_size must be between 0 and 1")

    # Get the unique subject name
    subject_names = df.subject_name.unique()

    # Split the subject name into 5 folds
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    kf.get_n_splits(subject_names)

    # Split the subject name into train and test
    train_subject_names = []
    test_subject_names = []
    for train_index, test_index in kf.split(subject_names):
        train_subject_names.append(subject_names[train_index])
        test_subject_names.append(subject_names[test_index])

    # Split the dataset into train and test
    df_train = pd.DataFrame(columns=['subject_name', 'video_path', 'label'])
    df_test = pd.DataFrame(columns=['subject_name', 'video_path', 'label'])
    for i in range(len(train_subject_names)):
        df_train = df_train.append(df[df['subject_name'].isin(train_subject_names[i])])
        df_test = df_test.append(df[df['subject_name'].isin(test_subject_names[i])])

    return df_train, df_test

In [None]:
# Split the dataset into train and test by the subject name and do the K-fold cross validation
def split_train_test_by_subject(df, test_size=0.2):
    if test_size > 1 or test_size < 0:
        raise ValueError("test_size must be between 0 and 1")

    num_subjects = len(df.subject_name.unique())
    num_train_subjects = int(num_subjects * (1 - test_size))

    names_train_subjects = df.subject_name.unique()[:num_train_subjects]
    names_test_subjects = df.subject_name.unique()[num_train_subjects:]

    df_train = pd.DataFrame(columns=['subject_name', 'video_path', 'label'])
    df_test = pd.DataFrame(columns=['subject_name', 'video_path', 'label'])

    for name in names_train_subjects:
        df_train = pd.concat([df_train, df[df['subject_name'] == name]])
    print("Number of train samples: {}".format(len(df_train)))

    for name in names_test_subjects:
        df_test = pd.concat([df_test, df[df['subject_name'] == name]])
    print("Number of test samples: {}".format(len(df_test)))

    return df_train, df_test

In [None]:
# Handle the data imbalance problem
# Random drop some samples for EA class
remove_n = 1461
drop_indices = np.random.choice(df_EA.index, remove_n, replace=False)
df_EA_modified = df_EA.drop(drop_indices)
print('For EA class, we have {} samples'.format(len(df_EA_modified)))
df_EA_train, df_EA_test = split_train_test_by_subject(df_EA_modified, test_size=0.2)

# Keep the C+ class as it is
df_C_plus_modified = df_C_plus
print('For C+ class, we have {} samples'.format(len(df_C_plus_modified)))
df_C_plus_train, df_C_plus_test = split_train_test_by_subject(df_C_plus_modified, test_size=0.2)


# Oversample the C- class for 63 times
df_C_minus_modified = pd.concat([df_C_minus]*63, ignore_index=True)
print('For C- class, we have {} samples'.format(len(df_C_minus_modified)))
df_C_minus_train, df_C_minus_test = split_train_test_by_subject(df_C_minus_modified, test_size=0.2)


# Oversample the PN class for 30 times
df_PN_modified = pd.concat([df_PN]*30, ignore_index=True)
print('For PN class, we have {} samples'.format(len(df_PN_modified)))
df_PN_train, df_PN_test = split_train_test_by_subject(df_PN_modified, test_size=0.2)


df_vision_modified = pd.concat([df_C_plus_modified, df_C_minus_modified, df_PN_modified, df_EA_modified], ignore_index=True)
df_train = pd.concat([df_C_plus_train, df_C_minus_train, df_PN_train, df_EA_train], ignore_index=True)
df_test = pd.concat([df_C_plus_test, df_C_minus_test, df_PN_test, df_EA_test], ignore_index=True)
print(df_vision_modified.label.value_counts())

In [None]:
# Store the train and val dataset
df_train.to_csv('train.csv', index=False)
df_test.to_csv('val.csv', index=False)

In [None]:
len(df_train), len(df_test)

In [None]:
# Split the dataset into train and val based on the label
print(df_train.label.value_counts())
print(df_test.label.value_counts())

In [1]:
from sklearn.model_selection import KFold
# Split the dataset into train and test by the subject name and do the K-fold cross validation
def split_train_test_by_subject(df, test_size=0.2):
    if test_size > 1 or test_size < 0:
        raise ValueError("test_size must be between 0 and 1")

    # Get the unique subject name
    subject_names = df.subject_name.unique()

    # Split the subject name into 5 folds
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    kf.get_n_splits(subject_names)

    # Split the subject name into train and test
    train_subject_names = []
    test_subject_names = []
    for train_index, test_index in kf.split(subject_names):
        train_subject_names.append(subject_names[train_index])
        test_subject_names.append(subject_names[test_index])

    # Split the dataset into train and test
    df_train = pd.DataFrame(columns=['subject_name', 'video_path', 'label'])
    df_test = pd.DataFrame(columns=['subject_name', 'video_path', 'label'])
    for i in range(len(train_subject_names)):
        df_train = df_train.append(df[df['subject_name'].isin(train_subject_names[i])])
        df_test = df_test.append(df[df['subject_name'].isin(test_subject_names[i])])

    return df_train, df_test

In [8]:
# Split the dataset into train and test by the subject name and do the K-fold cross validation
def split_train_test_by_subject(df, test_size=0.2):
    if test_size > 1 or test_size < 0:
        raise ValueError("test_size must be between 0 and 1")

    num_subjects = len(df.subject_name.unique())
    num_train_subjects = int(num_subjects * (1 - test_size))

    names_train_subjects = df.subject_name.unique()[:num_train_subjects]
    names_test_subjects = df.subject_name.unique()[num_train_subjects:]

    df_train = pd.DataFrame(columns=['subject_name', 'video_path', 'label'])
    df_test = pd.DataFrame(columns=['subject_name', 'video_path', 'label'])

    for name in names_train_subjects:
        df_train = pd.concat([df_train, df[df['subject_name'] == name]])
    print("Number of train samples: {}".format(len(df_train)))

    for name in names_test_subjects:
        df_test = pd.concat([df_test, df[df['subject_name'] == name]])
    print("Number of test samples: {}".format(len(df_test)))

    return df_train, df_test

In [9]:
# Handle the data imbalance problem
# Random drop some samples for EA class
remove_n = 1461
drop_indices = np.random.choice(df_EA.index, remove_n, replace=False)
df_EA_modified = df_EA.drop(drop_indices)
print('For EA class, we have {} samples'.format(len(df_EA_modified)))
df_EA_train, df_EA_test = split_train_test_by_subject(df_EA_modified, test_size=0.2)

# Keep the C+ class as it is
df_C_plus_modified = df_C_plus
print('For C+ class, we have {} samples'.format(len(df_C_plus_modified)))
df_C_plus_train, df_C_plus_test = split_train_test_by_subject(df_C_plus_modified, test_size=0.2)


# Oversample the C- class for 63 times
df_C_minus_modified = pd.concat([df_C_minus]*63, ignore_index=True)
print('For C- class, we have {} samples'.format(len(df_C_minus_modified)))
df_C_minus_train, df_C_minus_test = split_train_test_by_subject(df_C_minus_modified, test_size=0.2)


# Oversample the PN class for 30 times
df_PN_modified = pd.concat([df_PN]*30, ignore_index=True)
print('For PN class, we have {} samples'.format(len(df_PN_modified)))
df_PN_train, df_PN_test = split_train_test_by_subject(df_PN_modified, test_size=0.2)


df_vision_modified = pd.concat([df_C_plus_modified, df_C_minus_modified, df_PN_modified, df_EA_modified], ignore_index=True)
df_train = pd.concat([df_C_plus_train, df_C_minus_train, df_PN_train, df_EA_train], ignore_index=True)
df_test = pd.concat([df_C_plus_test, df_C_minus_test, df_PN_test, df_EA_test], ignore_index=True)
print(df_vision_modified.label.value_counts())

For EA class, we have 820 samples
Number of train samples: 729
Number of test samples: 91
For C+ class, we have 811 samples
Number of train samples: 591
Number of test samples: 220
For C- class, we have 819 samples
Number of train samples: 189
Number of test samples: 630
For PN class, we have 810 samples
Number of train samples: 510
Number of test samples: 300
EA    820
C-    819
C+    811
PN    810
Name: label, dtype: int64


In [13]:
# Store the train and val dataset
df_train.to_csv('train.csv', index=False)
df_test.to_csv('val.csv', index=False)

In [10]:
len(df_train), len(df_test)

(2019, 1241)

In [11]:
# Split the dataset into train and val based on the label
print(df_train.label.value_counts())
print(df_test.label.value_counts())

EA    729
C+    591
PN    510
C-    189
Name: label, dtype: int64
C-    630
PN    300
C+    220
EA     91
Name: label, dtype: int64


In [None]:
# Split the dataset into train and val based on the label
df_train, df_val = train_test_split(df_vision_modified, test_size=0.2, stratify=df_vision_modified['label'], random_state=42)
print(df_train.label.value_counts())
print(df_val.label.value_counts())

# Store the train and val dataset
df_train.to_csv('train.csv', index=False)
df_val.to_csv('val.csv', index=False)

# Old way to split the dataset

In [None]:
dir_train = os.path.join(dir_vision, 'train')
dir_val = os.path.join(dir_vision, 'val')
if not os.path.exists(dir_train): os.mkdir(dir_train)
if not os.path.exists(dir_val): os.mkdir(dir_val)

durations = []
for name in names_vision:
    g = os.walk(os.path.join(dir_10s, name))
    dir_target_train = os.path.join(dir_train, name)
    dir_target_val = os.path.join(dir_val, name)
    if not os.path.exists(dir_target_train): os.mkdir(dir_target_train)
    if not os.path.exists(dir_target_val): os.mkdir(dir_target_val)

    paths_video = []
    for path,dir_list,file_list in g:
        for file_name in file_list:
            path_video = os.path.join(path, file_name)
            paths_video.append(path_video)
    for num, path_video in enumerate(paths_video):
        cap = cv2.VideoCapture(path_video)
        if cap.isOpened():
            rate = cap.get(5)
            FrameNumber = cap.get(7)
            duration = (FrameNumber/rate)
            durations.append(duration)


In [None]:
from sklearn.model_selection import train_test_split

dir_train = os.path.join(dir_vision, 'train')
dir_val = os.path.join(dir_vision, 'val')
if not os.path.exists(dir_train): os.mkdir(dir_train)
if not os.path.exists(dir_val): os.mkdir(dir_val)

for name in names_vision:
    g = os.walk(os.path.join(dir_10s, name))
    dir_target_train = os.path.join(dir_train, name)
    dir_target_val = os.path.join(dir_val, name)
    if not os.path.exists(dir_target_train): os.mkdir(dir_target_train)
    if not os.path.exists(dir_target_val): os.mkdir(dir_target_val)

    paths_video = []
    for path,dir_list,file_list in g:
        for file_name in file_list:
            path_video = os.path.join(path, file_name)
            paths_video.append(path_video)
    paths_train, paths_val = train_test_split(paths_video, test_size=0.2)
    for num, path_train in enumerate(paths_train):
        shutil.copyfile(path_train, os.path.join(dir_target_train, '{}_{}.mp4'.format(name, num)))
    for num, path_val in enumerate(paths_val):
        shutil.copyfile(path_val, os.path.join(dir_target_val, '{}_{}.mp4'.format(name, num)))

In [3]:
from sklearn.model_selection import train_test_split

dir_train = os.path.join(dir_vision, 'train')
dir_val = os.path.join(dir_vision, 'val')
if not os.path.exists(dir_train): os.mkdir(dir_train)
if not os.path.exists(dir_val): os.mkdir(dir_val)

for name in names_vision:
    g = os.walk(os.path.join(dir_10s, name))
    dir_target_train = os.path.join(dir_train, name)
    dir_target_val = os.path.join(dir_val, name)
    if not os.path.exists(dir_target_train): os.mkdir(dir_target_train)
    if not os.path.exists(dir_target_val): os.mkdir(dir_target_val)

    paths_video = []
    for path,dir_list,file_list in g:
        for file_name in file_list:
            path_video = os.path.join(path, file_name)
            paths_video.append(path_video)
    paths_train, paths_val = train_test_split(paths_video, test_size=0.2)
    for num, path_train in enumerate(paths_train):
        shutil.copyfile(path_train, os.path.join(dir_target_train, '{}_{}.mp4'.format(name, num)))
    for num, path_val in enumerate(paths_val):
        shutil.copyfile(path_val, os.path.join(dir_target_val, '{}_{}.mp4'.format(name, num)))