In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# 파일 경로 설정
merged_data_path = 'C:/Users/82106/Desktop/데이터 분석 프로젝트 2/날씨 빅데이터 콘테스트/데이터/merged_data.csv'
merged_data= pd.read_csv(merged_data_path, encoding='cp949')

In [3]:
# 독립 변수 및 종속 변수 설정
X = merged_data.drop(['강수량(mm)', '날짜'], axis=1)  # '날짜' 열과 타겟 열 제외
y = merged_data['강수량(mm)']

In [5]:
from sklearn.preprocessing import StandardScaler

# 데이터 준비 (표준화 포함)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

### 레이어 구성

In [17]:
# 하이퍼파라미터 설정
batch_size = 32
epochs = 100
initial_learning_rate = 0.01
decay_steps = 10000
decay_rate = 0.96

In [11]:
# 모델 구성 (Functional API)
def build_model(input_shape):
    inputs = tf.keras.Input(shape=(input_shape,))
    x = tf.keras.layers.Dense(128, activation='relu')(inputs)
    x = tf.keras.layers.Dropout(0.3)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    outputs = tf.keras.layers.Dense(1)(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [7]:
# 모델 생성
model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [12]:
# 학습률 스케줄링
initial_learning_rate = 0.01
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.96,
    staircase=True)

In [15]:
# 교차검증 및 훈련
num_folds = 5
kfold = KFold(n_splits=num_folds, shuffle=True)
fold_no = 1

In [18]:
for train, test in kfold.split(X_scaled, y):
    # 모델 조정
    model = build_model(X_scaled.shape[1])
    
    # 최적화기 및 손실 함수 설정
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
    model.compile(optimizer=optimizer,
                  loss='mean_squared_error',
                  metrics=['mean_squared_error'])
    
    # 모델 훈련
    print(f'Training for fold {fold_no}...')
    history = model.fit(X_scaled[train], y[train],
                        batch_size=batch_size,
                        epochs=epochs,
                        validation_data=(X_scaled[test], y[test]))
    
    # 모델 평가
    scores = model.evaluate(X_scaled[test], y[test], verbose=0)
    print(f'Score for fold {fold_no}: Mean Squared Error of {scores[1]}')
    
    fold_no += 1

Training for fold 1...
Epoch 1/100
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 106.2959 - mean_squared_error: 106.2967 - val_loss: 57.6988 - val_mean_squared_error: 57.8699
Epoch 2/100
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 77.8037 - mean_squared_error: 77.8044 - val_loss: 52.9046 - val_mean_squared_error: 52.8927
Epoch 3/100
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 69.7599 - mean_squared_error: 69.7606 - val_loss: 53.8089 - val_mean_squared_error: 53.7005
Epoch 4/100
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 67.3982 - mean_squared_error: 67.3988 - val_loss: 54.6966 - val_mean_squared_error: 54.6097
Epoch 5/100
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 62.8759 - mean_squared_error: 62.8765 - val_loss: 63.1171 - val_mean_squared_error: 63.3031
Epoch 6/100
[1m451/451[0m [32m━━━━━━━