## Code for "My" Dataset

In [1]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import random_split

import numpy as np

import os
import chardet

### Data Loader

In [2]:
file_path = '/home/rtlink/robros/dataset/robros_dataset/free_joint_1.csv' 
df = pd.read_csv(file_path)
df.columns = ['Data' for _ in df.columns]

df_head = df.head()
df_head

Unnamed: 0,Data,Data.1,Data.2,Data.3,Data.4,Data.5,Data.6,Data.7,Data.8,Data.9,...,Data.10,Data.11,Data.12,Data.13,Data.14,Data.15,Data.16,Data.17,Data.18,Data.19
0,-0.009232,-0.009501,-0.009774,-0.010053,-0.010337,-0.010627,-0.010924,-0.011228,-0.011538,-0.011855,...,0.641356,0.642515,0.643674,0.644834,,,,,,
1,-0.009253,-0.009522,-0.009795,-0.010073,-0.010357,-0.010648,-0.010945,-0.011248,-0.011558,-0.011875,...,0.643477,0.644642,,,,,,,,
2,-0.009279,-0.009547,-0.00982,-0.010098,-0.010382,-0.010672,-0.010969,-0.011272,-0.011581,-0.011898,...,,,,,,,,,,
3,-0.009575,-0.009847,-0.010124,-0.010407,-0.010694,-0.010988,-0.011289,-0.011596,-0.01191,-0.01223,...,,,,,,,,,,
4,-0.009601,-0.009873,-0.01015,-0.010432,-0.01072,-0.011013,-0.011314,-0.01162,-0.011934,-0.012254,...,,,,,,,,,,


In [3]:
def standardize_row_lengths(df, target_length=1000):
    standardized_data = []
 
    for _, row in df.iterrows():
        # NaN 값 제거 (선형 보간을 통해 채움)
        row = row.interpolate().fillna(method='bfill').fillna(method='ffill')
        current_length = len(row)
        if current_length < target_length:
            # 길이가 짧은 경우 interpolate
            x = np.linspace(0, current_length - 1, num=current_length)
            xp = np.linspace(0, current_length - 1, num=target_length)
            interpolated_row = np.interp(xp, x, row)
            standardized_data.append(interpolated_row)
        elif current_length > target_length:
            # 길이가 긴 경우 샘플링
            indices = np.linspace(0, current_length - 1, num=target_length, dtype=int)
            sampled_row = row.iloc[indices].values
            standardized_data.append(sampled_row)
        else:
            # 이미 길이가 1000인 경우
            standardized_data.append(row.values[:target_length])
 
    # 모든 행이 동일한 길이를 가지도록 DataFrame 생성
    standardized_df = pd.DataFrame(standardized_data, columns=range(target_length))
    return standardized_df

def find_nan_locations_df(df):
    # NaN 값이 있는 위치 찾기
    nan_locations = []
    for row_index, row in df.iterrows():
        for col_index, value in row.items():
            if pd.isna(value):
                nan_locations.append((row_index, col_index))
 
    return len(nan_locations)

In [4]:
standardized_df = standardize_row_lengths(df)

standardized_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
0,-0.009232,-0.009501,-0.009774,-0.010337,-0.010627,-0.010924,-0.011538,-0.011855,-0.012509,-0.012846,...,0.636727,0.637883,0.640198,0.641356,0.643674,0.644834,0.644834,0.644834,0.644834,0.644834
1,-0.009253,-0.009522,-0.009795,-0.010357,-0.010648,-0.010945,-0.011558,-0.011875,-0.012528,-0.012865,...,0.638821,0.639984,0.642312,0.643477,0.644642,0.644642,0.644642,0.644642,0.644642,0.644642
2,-0.009279,-0.009547,-0.00982,-0.010382,-0.010672,-0.010969,-0.011581,-0.011898,-0.01255,-0.012887,...,0.640642,0.641812,0.642983,0.642983,0.642983,0.642983,0.642983,0.642983,0.642983,0.642983
3,-0.009575,-0.009847,-0.010124,-0.010694,-0.010988,-0.011289,-0.01191,-0.01223,-0.012891,-0.013232,...,0.643176,0.643176,0.643176,0.643176,0.643176,0.643176,0.643176,0.643176,0.643176,0.643176
4,-0.009601,-0.009873,-0.01015,-0.01072,-0.011013,-0.011314,-0.011934,-0.012254,-0.012914,-0.013254,...,0.642258,0.642258,0.642258,0.642258,0.642258,0.642258,0.642258,0.642258,0.642258,0.642258


In [5]:
# NaN 위치 찾기
nan_locations = find_nan_locations_df(standardized_df)
 
# 결과 출력
print("NaN 값이 있는 위치:", nan_locations)

NaN 값이 있는 위치: 0


In [11]:
folder_path = '../dataset/robros_dataset'
files = os.listdir(folder_path)

class_files = {'cls': [], 'fre': []}
for file in files:
    if 'cls' in file:
        class_files['cls'].append(file)
    elif 'fre' in file:
        class_files['fre'].append(file)

In [13]:
def extract_joint_from_filename(filename):
    parts = filename.split('_')
    if parts[1].startswith('joint'):
        joint_number = parts[2].split('.')[0]
        return joint_number
    return None

In [20]:
def load_and_combine_files(file_list, folder_path):
    combined_df = pd.DataFrame()
    for file in file_list:
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path)
        df = standardize_row_lengths(df)
        df.columns = ['Data' for _ in df.columns]
        
        joint_number = extract_joint_from_filename(file)
        df['joint_number'] = joint_number
        
        combined_df = pd.concat([combined_df, df], ignore_index=True)
    return combined_df

In [21]:
fre_data = load_and_combine_files(class_files['fre'], folder_path)

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  return torch._C._cuda_getDeviceCount() > 0


In [17]:
fre_data['label'] = 1

In [24]:
combined_data = pd.concat([fre_data], ignore_index=False, axis=1)
print(fre_data.shape)

(693, 1001)


In [26]:
fre_data.head()

Unnamed: 0,Data,Data.1,Data.2,Data.3,Data.4,Data.5,Data.6,Data.7,Data.8,Data.9,...,Data.10,Data.11,Data.12,Data.13,Data.14,Data.15,Data.16,Data.17,Data.18,joint_number
0,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.2e-05,...,3e-06,3e-06,3e-06,3e-06,3e-06,3e-06,3e-06,3e-06,3e-06,3
1,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.2e-05,2.2e-05,...,3e-06,3e-06,3e-06,3e-06,3e-06,3e-06,3e-06,3e-06,3e-06,3
2,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.2e-05,2.2e-05,2.2e-05,...,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,3
3,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.2e-05,2.2e-05,2.2e-05,2.2e-05,...,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,3
4,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.1e-05,2.2e-05,2.2e-05,2.2e-05,2.2e-05,...,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,3
