### 데이터 전처리

넙치 중


In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

def load_and_preprocess_data(file_path):
    # 데이터 로드
    df = pd.read_csv('광어_ml.csv', encoding='utf-8')
    
    # 날짜 컬럼을 datetime으로 변환
    df['날짜'] = pd.to_datetime(df['날짜'])
    
    # 환율 컬럼의 쉼표 제거 후 float로 변환
    df['환율'] = df['환율'].str.replace(',', '').astype(float)
    
    # 결측치 처리
    numeric_columns = df.select_dtypes(include=[np.number]).columns
    df[numeric_columns] = df[numeric_columns].fillna(method='ffill')
    
    # 피처와 타겟 분리 (예시로 넙치_대를 타겟으로 설정)
    features = ['환율', '기준금리', 'WTI', '마라도_실제수온', '마라도_예상수온', '마라도_수온편차']
    target = '넙치_중'
    
    X = df[features]
    y = df[target]
    
    # 데이터 분할
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # 스케일링
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return X_train_scaled, X_test_scaled, y_train, y_test

def evaluate_model(y_true, y_pred, model_name):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n{model_name} 모델 평가 결과:")
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"R2 Score: {r2:.4f}")
    
    return mse, rmse, r2

ModuleNotFoundError: No module named 'pandas'