In [5]:
import numpy as np
import pandas as pd
from scipy.signal import savgol_filter

# ==== 파일 경로 ====
input_path = "/content/drive/MyDrive/urp/merged_filtered.csv"
output_path_A = "/content/drive/MyDrive/urp/merged_filtered_sg.csv"


# ==== SG smoothing 함수 ====
def sg_smoothing(y, window_length=7, polyorder=3):
    y = np.asarray(y)
    if window_length >= len(y):
        window_length = len(y) - 1 if len(y) % 2 == 0 else len(y)
    if window_length % 2 == 0:
        window_length -= 1
    return savgol_filter(y, window_length=window_length, polyorder=polyorder)


# ==== SG만 적용 ====
def preprocess_spectra_A(df, label_col='label'):
    y = df[label_col].values
    X = df.drop(columns=[label_col]).values.astype(float)

    n_samples, n_features = X.shape
    print(f"[A] 원본 스펙트럼 크기: {X.shape}")

    X_proc = np.zeros_like(X)
    for i in range(n_samples):
        spec = X[i, :]
        spec_smooth = sg_smoothing(spec, window_length=7, polyorder=3)
        X_proc[i, :] = spec_smooth

    return X_proc, y


# ==== 실행: A 전처리 + CSV 저장 ====
df = pd.read_csv(input_path)
X_A, y_A = preprocess_spectra_A(df, label_col='label')

spec_cols = df.drop(columns=['label']).columns
df_A = pd.DataFrame(X_A, columns=spec_cols)
df_A.insert(0, 'label', y_A)

df_A.to_csv(output_path_A, index=False)
print(f"[A] 전처리된 CSV 저장 완료: {output_path_A}")



[A] 원본 스펙트럼 크기: (1709, 3551)
[A] 전처리된 CSV 저장 완료: /content/drive/MyDrive/urp/merged_filtered_sg.csv
