# DataLoader

In [None]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import random_split

import numpy as np

import os
import chardet

folder_path = '../dataset'
files = os.listdir(folder_path)

class_files = {'cls': [], 'ctc': [], 'fre': []}
for file in files:
    if 'cls' in file:
        class_files['cls'].append(file)
    elif 'ctc' in file:
        class_files['ctc'].append(file)
    elif 'fre' in file:
        class_files['fre'].append(file)
        

def extract_joint_from_filename(filename):
    parts = filename.split('-')
    if parts[1].startswith('joint'):
        joint_number = parts[2].split('.')[0]
        return joint_number
    return None

def load_and_combine_files(file_list, folder_path):
    combined_df = pd.DataFrame()
    for file in file_list:
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path)
        df.columns = ['Data' for _ in df.columns]
        
        joint_number = extract_joint_from_filename(file)
        df['joint_number'] = joint_number
        
        combined_df = pd.concat([combined_df, df], ignore_index=True)
    return combined_df

cls_data = load_and_combine_files(class_files['cls'], folder_path)
ctc_data = load_and_combine_files(class_files['ctc'], folder_path)
fre_data = load_and_combine_files(class_files['fre'], folder_path)

cls_data['label'] = 0  
ctc_data['label'] = 1  
fre_data['label'] = 2  

combined_data = pd.concat(
    [cls_data.iloc[1:], ctc_data.iloc[1:], fre_data.iloc[1:]],
    ignore_index=True, axis=0
)

def prepare_dataset(df):
    df['label'] = df['label'].astype(int)
    df['joint_number'] = df['joint_number'].astype(int)
    
    # 클래스 레이블과 조인트 위치 추출
    labels = df['label'].values
    joints = df['joint_number'].values

    signals = df.drop(['label', 'joint_number'], axis=1).values
    signals = signals.astype(float)  

    return torch.tensor(signals, dtype=torch.float32), torch.tensor(labels, dtype=torch.int32), torch.tensor(joints, dtype=torch.int32)

signals, labels, joints = prepare_dataset(combined_data)

dataset = TensorDataset(signals, labels, joints)
total_size = len(dataset)
train_size = int(total_size * 0.7) # adjust the value  
test_size = total_size - train_size


train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)