### Final Diff

In [1]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
 
def process_and_save_differential(input_dir, output_dir, chunk_size=50000):
    os.makedirs(output_dir, exist_ok=True)  
    file_list = os.listdir(input_dir)
    for filename in tqdm(file_list, desc="Processing files"):
        if filename.endswith(".csv"):
            file_path = os.path.join(input_dir, filename)
            output_file_path = os.path.join(output_dir, filename)
 
            chunk_container = pd.read_csv(file_path, chunksize=chunk_size, header=None)
 
            result_df = pd.DataFrame()
            for chunk in tqdm(chunk_container, desc=f"Processing {filename}", leave=False):
                data = chunk.to_numpy()
                differential_values = np.diff(data, axis=1, prepend=data[:, :1])
                differential_df = pd.DataFrame(differential_values)
                result_df = pd.concat([result_df, differential_df], ignore_index=True)
            result_df.to_csv(output_file_path, index=False, header=None)
 
 
input_dir_example = "/home/rtlink/robros/dataset/0216_free/input_data/joint_position"
output_dir_example = "/home/rtlink/robros/dataset/0216_free/input_data/joint_veloctiy"
 
process_and_save_differential(input_dir_example, output_dir_example)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
Processing files:   0%|          | 0/7 [00:00<?, ?it/s]

: 

In [4]:
import os
import csv
import numpy as np

def differentiate_and_save(input_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    file_list = os.listdir(input_dir)
    for filename in file_list:
        if filename.endswith(".csv"):
            file_path = os.path.join(input_dir, filename)
            output_file_path = os.path.join(output_dir, filename)
            with open(file_path, mode='r') as file:
                reader = csv.reader(file)
                for row in reader:
                    # 문자열 데이터를 float 타입의 numpy 배열로 변환
                    data = np.array(row, dtype=float)
                    # 데이터 미분 계산
                    differentiated_data = np.diff(data)
                    # 미분된 데이터 저장
                    with open(output_file_path, mode='w', newline='') as output_file:
                        writer = csv.writer(output_file)
                        writer.writerow(differentiated_data)

input_dir_example = "/home/rtlink/robros/dataset/0216_free/input_data/joint_position"
output_dir_example = "/home/rtlink/robros/dataset/0216_free/input_data/joint_velocity"

differentiate_and_save(input_dir_example, output_dir_example)


### Normalize

In [26]:
import os 
import csv
import numpy as np

def normalize(input_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    file_list = os.listdir(input_dir)
    for filename in file_list:
        if filename.endswith(".csv"):
            file_path = os.path.join(input_dir, filename)
            output_file_path = os.path.join(output_dir, filename)
            # 초기 데이터 컨테이너
            all_data = []
            with open(file_path, mode='r') as file:
                reader = csv.reader(file)
                for row in reader:
                    # 문자열 데이터를 float 타입의 numpy 배열로 변환
                    data = np.array(row, dtype=float)
                    all_data.append(data)
            # numpy 배열로 전환
            all_data = np.array(all_data)
            # 전체 데이터에 대한 min-max 정규화
            min_val = np.min(all_data)
            max_val = np.max(all_data)
            normalized_data = (all_data - min_val) / (max_val - min_val + 1e-9)  # 0으로 나누는 것 방지
            # 정규화된 데이터를 한 줄에 저장
            with open(output_file_path, mode='w', newline='') as file:
                writer = csv.writer(file)
                writer.writerow(normalized_data.flatten())  # 1D 배열로 변환하여 저장


input_dir = "/home/rtlink/robros/dataset/0215/0215_free/target_data"
output_dir = "/home/rtlink/robros/dataset/0215_norm/0215_free/target_data" 
 
normalize(input_dir, output_dir)

In [13]:
import os
import pandas as pd
import numpy as np
 
def min_max_normalize_and_save(input_dir, output_dir, chunk_size=10000):
    os.makedirs(output_dir, exist_ok=True)
    file_list = os.listdir(input_dir)
    for filename in file_list:
        if filename.endswith(".csv"):
            file_path = os.path.join(input_dir, filename)
            output_file_path = os.path.join(output_dir, filename)
 
            chunk_container = pd.read_csv(file_path, chunksize=chunk_size, header=None)
 
            normalized_df = pd.DataFrame()
            for chunk in chunk_container:
                data = chunk.to_numpy()
                min_val = data.min(axis=0)
                max_val = data.max(axis=0)
                # Avoid division by zero
                range_val = max_val - min_val
                range_val[range_val == 0] = 1
                normalized_data = (data - min_val) / range_val
                normalized_chunk = pd.DataFrame(normalized_data)
                normalized_df = pd.concat([normalized_df, normalized_chunk], ignore_index=True)
 
            normalized_df.to_csv(output_file_path, index=False, header=None)
 
input_dir = "/home/rtlink/robros/dataset/0215/0215_free/input_data/joint_velocity"
output_dir = "/home/rtlink/robros/dataset/0215_norm/0215_free/input_data/joint_velocity" 
 
min_max_normalize_and_save(input_dir, output_dir)

AttributeError: 'int' object has no attribute 'to_numpy'

In [1]:
import os
import pandas as pd
import numpy as np
 
base_dir = "/home/rtlink/robros/dataset/0215_dataset/input_data" 

input_dir = os.path.join(base_dir, "joint_position")
velocity_dir = os.path.join(base_dir, "joint_velocity")
acceleration_dir = os.path.join(base_dir, "joint_acceleration")


os.makedirs(velocity_dir, exist_ok=True)
os.makedirs(acceleration_dir, exist_ok=True)
 

for filename in os.listdir(input_dir):

    if filename.endswith(".csv"):

        file_path = os.path.join(input_dir, filename)

        df = pd.read_csv(file_path)
 
        # 가로축(시간)을 기준으로 미분하여 속도(velocity) 계산

        velocity = df.diff(axis=1).fillna(0)

        velocity_file_path = os.path.join(velocity_dir, filename)

        velocity.to_csv(velocity_file_path, index=False)
 
        # 속도를 다시 미분하여 가속도(acceleration) 계산

        acceleration = velocity.diff(axis=1).fillna(0)
        acceleration_file_path = os.path.join(acceleration_dir, filename)
        acceleration.to_csv(acceleration_file_path, index=False)


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


KeyboardInterrupt: 

In [3]:
import os
import pandas as pd
import numpy as np

base_dir = "/home/rtlink/robros/dataset/collision/len50/cleaned/"

input_dir = os.path.join(base_dir, "input_data/joint_position")
padded_input_dir = os.path.join(base_dir, "input_data/padded_joint_position")
velocity_dir = os.path.join(base_dir, "input_data/joint_velocity")
acceleration_dir = os.path.join(base_dir, "input_data/joint_acceleration")

os.makedirs(padded_input_dir, exist_ok=True)  
os.makedirs(velocity_dir, exist_ok=True)
os.makedirs(acceleration_dir, exist_ok=True)

def read_csv_with_padding(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    max_cols = max(len(line.split(',')) for line in lines)
    
    padded_data = []
    for line in lines:
        row = line.strip().split(',')
        row += ['0'] * (max_cols - len(row))
        padded_data.append(row)
    
    return pd.DataFrame(padded_data, dtype=float)

for filename in os.listdir(input_dir):
    if filename.endswith(".csv"):
        file_path = os.path.join(input_dir, filename)
        
        # 파일을 읽고 padding 적용
        df = read_csv_with_padding(file_path)
        
        # 새로운 위치에 padding된 데이터 저장
        padded_file_path = os.path.join(padded_input_dir, filename)
        df.to_csv(padded_file_path, index=False)
        
        # 속도 계산
        velocity = df.diff(axis=1).fillna(0)
        velocity_file_path = os.path.join(velocity_dir, filename)
        velocity.to_csv(velocity_file_path, index=False)
        
        # 가속도 계산
        acceleration = velocity.diff(axis=1).fillna(0)
        acceleration_file_path = os.path.join(acceleration_dir, filename)
        acceleration.to_csv(acceleration_file_path, index=False)


In [None]:
import os
import numpy as np

base_dir = "/home/rtlink/robros/dataset/0215_dataset/input_data" 

input_dir = os.path.join(base_dir, "joint_position")
velocity_dir = os.path.join(base_dir, "joint_velocity")
acceleration_dir = os.path.join(base_dir, "joint_acceleration")


os.makedirs(velocity_dir, exist_ok=True)
os.makedirs(acceleration_dir, exist_ok=True)

for filename in os.listdir(input_dir):

    if filename.endswith(".csv"):
        file_path = os.path.join(input_dir, filename)
        df = pd.read_csv(file_path)



In [19]:
file_path = '/home/rtlink/robros/dataset/0215_dataset/input_data/joint_velocity/fre_joint_1.csv'

import csv

def read_csv_and_print_length(file_path):
    with open(file_path, mode='r') as file:
        reader = csv.reader(file)
        for row in reader:
            return len(row)  

length = read_csv_and_print_length(file_path)
print("length is :", length)


length is : 421617
