In [1]:
import pandas as pd
import os

In [2]:
dataset_root = 'dataset'
test_videos = ['ACCFP', 'CCAH', 'CCSAD', 'CCUIM', 'EIB', 'EWCC', 'GGCC', 'SCCC', 'TICC', 'WICC']
val_videos = ['CCGFS', 'CCIAP', 'CICC', 'EFCC', 'FIJI', 'HCCAB', 'HRDCC', 'HUSNS', 'MACC', 'SAPFS']
train_videos = [
    'ACCC', 'AIAQ', 'AIDT', 'AMCC', 'BDCC', 'BECCC', 'BWFF', 'CBAQC', 'CCBN', 'CCBNN',
    'CCCBL', 'CCCP', 'CCCS', 'CCD', 'CCFS', 'CCFWW', 'CCH', 'CCHES', 'CCIAA', 'CCIAH', 'CCICD',
    'CCIS', 'CCISL', 'CCMA', 'CCSC', 'CCTA', 'CCTP', 'CCWC', 'CCWQ', 'CESS', 'COP',
    'CPCC', 'CTCM', 'DACC', 'DFCC', 'DPIC', 'DTECC', 'ECCDS', 'FCC', 'FLW', 'FTACC',
    'HCCAE', 'HCCAW', 'HCCIG', 'HCI', 'HDWC', 'HHVBD', 'HSHWA', 'HSPW', 'IMRF', 'INCAS',
    'MICC', 'NASA', 'OCCC', 'PCOCC', 'PWCCA', 'RAGG', 'RASCC', 'RCCCS', 'RCCS', 'RHTCC',
    'RPDCC', 'SDDA', 'SLCCA', 'SSTCC', 'TCBCC', 'TECCC', 'TIOCC', 'TIYH', 'TTFCC',
    'TUCC', 'UKCC', 'VFVCC', 'VPCC', 'WCCA', 'WFHSW', 'WICCE', 'WISE', 'WTCC', 'YPTL'
]

In [3]:
def count_labels(videos, dataset_root):
    total_labels = 0
    label_counts = {0: 0, 1: 0, 2: 0}
    
    for video in videos:
        csv_path = os.path.join(dataset_root, video, f"{video}.csv")
        if os.path.exists(csv_path):
            data = pd.read_csv(csv_path, header=None, skiprows=1, names=['label', 'text'])
            label_counts[0] += (data['label'] == 0).sum()
            label_counts[1] += (data['label'] == 1).sum()
            label_counts[2] += (data['label'] == 2).sum()
            total_labels += len(data)
        else:
            print(f"CSV file for {video} not found.")
    
    return total_labels, label_counts

test_total_labels, test_label_counts = count_labels(test_videos, dataset_root)
val_total_labels, val_label_counts = count_labels(val_videos, dataset_root)
train_total_labels, train_label_counts = count_labels(train_videos, dataset_root)

In [4]:
print("Test Videos:")
print(f"Total Labels: {test_total_labels}")
print(f"Label 0: {test_label_counts[0]}, Label 1: {test_label_counts[1]}, Label 2: {test_label_counts[2]}\n")

print("Validation Videos:")
print(f"Total Labels: {val_total_labels}")
print(f"Label 0: {val_label_counts[0]}, Label 1: {val_label_counts[1]}, Label 2: {val_label_counts[2]}\n")

print("Training Videos:")
print(f"Total Labels: {train_total_labels}")
print(f"Label 0: {train_label_counts[0]}, Label 1: {train_label_counts[1]}, Label 2: {train_label_counts[2]}\n")

Test Videos:
Total Labels: 420
Label 0: 73, Label 1: 194, Label 2: 153

Validation Videos:
Total Labels: 417
Label 0: 83, Label 1: 204, Label 2: 130

Training Videos:
Total Labels: 3372
Label 0: 1036, Label 1: 1449, Label 2: 887



In [7]:
print(f"Total 0: {test_label_counts[0] + val_label_counts[0] + train_label_counts[0]}")
print(f"Total 1: {test_label_counts[1] + val_label_counts[1] + train_label_counts[1]}")
print(f"Total 2: {test_label_counts[2] + val_label_counts[2] + train_label_counts[2]}")
print(f"Total: {test_total_labels + val_total_labels + train_total_labels}")

Total 0: 1192
Total 1: 1847
Total 2: 1170
Total: 4209
