# Combined CSV

In [3]:
import os
import pandas as pd

# Folder containing CSV files
folder_path = r"C:\V89\Snore_Apnea_Analyze\EDF_RML\data_csv\use_test\each_person_csv"
output_path = r"C:\V89\Snore_Apnea_Analyze\EDF_RML\data_csv\use_train\test"

In [4]:
# List all CSV files in the folder
csv_files = [f for f in os.listdir(folder_path) if f.lower().endswith(".csv")]

# Read and combine
df_list = []
for file in csv_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)
    df_list.append(df)

# Concatenate into one DataFrame
combined_df = pd.concat(df_list, ignore_index=True)

# Save to a single CSV
output_path = os.path.join(folder_path, "combined_test.csv")
combined_df.to_csv(output_path, index=False)

print(f"✅ Combined {len(csv_files)} CSV files into {output_path}")
print(f"Total rows: {len(combined_df)}")


✅ Combined 3 CSV files into C:\V89\Snore_Apnea_Analyze\EDF_RML\data_csv\use_test\each_person_csv\combined_test.csv
Total rows: 3898


# Check

In [5]:
df = pd.read_csv(r"C:\V89\Snore_Apnea_Analyze\EDF_RML\data_csv\use_test\test\combined_test.csv")
df

Unnamed: 0,patient_id,event_id,family,type,start_sec,duration_sec,end_sec,segment_index,segment_local_start_sec,recording_start_iso
0,1010,0,User,Gain,0.0,0.0,0.0,0,0.0,2019-05-03T23:06:21
1,1010,1,User,ChannelFail,0.0,30.0,30.0,0,0.0,2019-05-03T23:06:21
2,1010,2,User,ChannelFail,0.0,6.0,6.0,0,0.0,2019-05-03T23:06:21
3,1010,3,Nasal,Snore,40.0,6.0,46.0,0,40.0,2019-05-03T23:06:21
4,1010,4,Cardiac,LongRR,56.0,2.0,58.0,0,56.0,2019-05-03T23:06:21
...,...,...,...,...,...,...,...,...,...,...
3893,1016,810,Cardiac,PttDrop,21554.0,9.0,21563.0,5,3554.0,2019-05-08T23:04:32
3894,1016,811,Cardiac,PttDrop,21648.0,12.0,21660.0,6,48.0,2019-05-08T23:04:32
3895,1016,812,User,ChannelFail,21660.0,90.0,21750.0,6,60.0,2019-05-08T23:04:32
3896,1016,813,Cardiac,PttDrop,21768.0,12.0,21780.0,6,168.0,2019-05-08T23:04:32


In [6]:
df['type'].value_counts()

type
Arousal                 704
LongRR                  521
Bradycardia             472
RelativeDesaturation    451
ObstructiveApnea        447
Snore                   415
PttDrop                 401
ChannelFail             189
Hypopnea                155
Gain                     70
Tachycardia              48
MixedApnea               21
CentralApnea              4
Name: count, dtype: int64

In [8]:
df2 = pd.read_csv(r'C:\V89\Snore_Apnea_Analyze\EDF_RML\data_csv\use_train\train\combined_train.csv')

In [9]:
df2['type'].value_counts()

type
Arousal                 1491
Bradycardia              965
ObstructiveApnea         645
LongRR                   622
PttDrop                  609
ChannelFail              442
RelativeDesaturation     435
LegMovement              420
Hypopnea                 386
Snore                    279
Gain                     209
Tachycardia              188
MixedApnea                71
CentralApnea              38
Name: count, dtype: int64

In [10]:
# กำหนด type ที่ต้องการเก็บ
keep_types = ["Hypopnea", "Snore", "MixedApnea", "CentralApnea"]

# กรองข้อมูล"
df_filtered = df2[df2['type'].isin(keep_types)].reset_index(drop=True)

# บันทึกทับไฟล์เดิมหรือเป็นไฟล์ใหม่
output_path = r"C:\V89\Snore_Apnea_Analyze\EDF_RML\data_csv\use_train\train\use_train.csv"
df_filtered.to_csv(output_path, index=False)

print(f"✅ Filtered CSV saved to {output_path}")
print(f"Remaining rows: {len(df_filtered)}")
print(df_filtered['type'].value_counts())

✅ Filtered CSV saved to C:\V89\Snore_Apnea_Analyze\EDF_RML\data_csv\use_train\train\use_train.csv
Remaining rows: 774
type
Hypopnea        386
Snore           279
MixedApnea       71
CentralApnea     38
Name: count, dtype: int64


In [11]:
# กำหนด type ที่ต้องการเก็บ
keep_types = ["Hypopnea", "Snore", "MixedApnea", "CentralApnea"]

# กรองข้อมูล"
df_filtered = df[df['type'].isin(keep_types)].reset_index(drop=True)

# บันทึกทับไฟล์เดิมหรือเป็นไฟล์ใหม่
output_path = r"C:\V89\Snore_Apnea_Analyze\EDF_RML\data_csv\use_train\train\use_test.csv"
df_filtered.to_csv(output_path, index=False)

print(f"✅ Filtered CSV saved to {output_path}")
print(f"Remaining rows: {len(df_filtered)}")
print(df_filtered['type'].value_counts())

✅ Filtered CSV saved to C:\V89\Snore_Apnea_Analyze\EDF_RML\data_csv\use_train\train\use_test.csv
Remaining rows: 595
type
Snore           415
Hypopnea        155
MixedApnea       21
CentralApnea      4
Name: count, dtype: int64
