In [None]:
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta
import getpass

username = getpass.getuser()
pd.read_csv(f'C:/Users/{username}/SynologyDrive/AI/Short_circuit_exp_2_data/raw_data/Cr0.5/Cr0.5_1_006_DC.csv')

In [None]:
dataset_directory=f'C:/Users/{username}/SynologyDrive/AI/Short_circuit_exp_2_data/raw_data/Cr0.5'
file_list=os.listdir(dataset_directory)
file_list=[os.path.join(dataset_directory, file) for file in file_list if file.endswith('csv')]
print(len(file_list))

new_directory_path=f'C:/Users/{username}/SynologyDrive/AI/Short_circuit_exp_2_data/pre_ver_processed_data'

In [None]:
import cudf
import dask_cudf
import dask_cudf
import cudf
import pandas as pd
from datetime import timedelta
import os
import time

def change_sec(time):
    days, hours, minutes, seconds_milliseconds = time.split(':')
    seconds, milliseconds = seconds_milliseconds.split('.')
    time_delta = timedelta(days=int(days), hours=int(hours), minutes=int(minutes), seconds=int(seconds), milliseconds=int(milliseconds))
    total_seconds = int(days) * 86400 + int(hours) * 3600 + int(minutes) * 60 + int(seconds) + int(milliseconds) / 1000
    return time_delta.total_seconds()

def process_file(file):
    df_res = dask_cudf.read_csv(file).compute()  # 전체 파일을 Dask-cuDF 데이터프레임으로 변환
    cell_name = os.path.basename(file).replace('_DC.csv', '')[:-4]

    print(str(cell_name))

    # Process each cycle
    cycle_number = df_res["Cycle_No."].unique().to_arrow().to_pylist()
    cycle_data = []

    for cyc in range(1, len(cycle_number) + 1):
        cur_df = df_res[df_res['Cycle_No.'] == cyc]   # cuDF 데이터프레임에서 필터링
        current_in_A = cur_df["Current(A)"]
        voltage_in_V = cur_df['Voltage(V)']
        num_steps = cur_df['Step_No.'].unique().to_arrow().to_pylist()
        discharge_step = []

        for step in num_steps:
            step_df = cur_df[cur_df['Step_No.'] == step]
            if step_df['Current(A)'].median() < 0:
                discharge_step.append(step)
                discharge_step.append(step + 1)

        index = [cur_df[cur_df['Step_No.'] == step_num].index.to_arrow().to_pylist() for step_num in discharge_step]

        chr_df = cur_df.copy()
        dis_df = cur_df.copy()

        index = sum(index, [])

        chr_df.loc[index, '|Q|(Ah)'] = 0
        dis_df.loc[[id for id in dis_df.index.to_arrow().to_pylist() if id not in index], '|Q|(Ah)'] = 0

        discharge_capacity_in_Ah = dis_df['|Q|(Ah)'].to_arrow().to_pylist()
        charge_capacity_in_Ah = chr_df['|Q|(Ah)'].to_arrow().to_pylist()
        assert len(discharge_capacity_in_Ah) == len(charge_capacity_in_Ah), "capacity length of dis & chr is not equal"
        
        timedata = cur_df['Test_Time(s)'].to_arrow().to_pylist()

        time_in_s = [change_sec(time) for time in timedata]
        temperature_in_C = None
        internal_resistance_in_ohm = None
        cyc_dict = {
            'cycle_number': cyc,
            'step': num_steps,
            'time_in_s': time_in_s,
            'current_in_A': current_in_A,
            'voltage_in_V': voltage_in_V,
            'discharge_capacity_in_Ah': discharge_capacity_in_Ah,
            'charge_capacity_in_Ah': charge_capacity_in_Ah,
            'temperature_in_C': temperature_in_C,
            'internal_resistance_in_ohm': internal_resistance_in_ohm
        }
        cycle_data.append(cyc_dict)

    dataframes = {
        'cell_id': cell_name,
        'cycle_data': cycle_data,
        'form_factor': 'coin_cell',
        'anode_material': 'Li-metal',
        'cathode_material': 'NCM523',
        'electrolyte_material': None,
        'nominal_capacity_in_Ah': 0.0034,
        'depth_of_charge': 1.0,
        'depth_of_discharge': 1.0,
        'already_spent_cycles': int(cycle_number[0] - 1),
        'max_voltage_limit_in_V': 4.3,
        'min_voltage_limit_in_V': 3.0,
        'max_current_limit_in_A': None,
        'min_current_limit_in_A': None,
        'reference': None,
        'description': None,
        'charge_protocol': None,
        'discharge_protocol': None,
        'relaxation_protocol': None,
        'short_circuit_cycle': len(cycle_data)-1
    }

    new_file_name = cell_name + '.pkl'
    new_file_path = os.path.join(new_directory_path, new_file_name)
    pd.to_pickle(dataframes, new_file_path)
    print(f'Success to save \'{new_file_path}\'')

# Process each file
for file in file_list:
    process_file(file)