In [11]:
# basic tft, ts=4, mp1


import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, LayerNormalization, Flatten
from sklearn.metrics import mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping
from itertools import product

# 데이터 로드
file_path = 'C:\\Users\\co279\\mp1.csv'
data = pd.read_csv(file_path)

# 결측치 처리 (예: 0으로 대체)
data = data.fillna(0)

# 2020, 2021, 2022, 2023년에 모두 존재하는 player_id 추출
data_2020 = data[data['year'] == 2020]
data_2021 = data[data['year'] == 2021]
data_2022 = data[data['year'] == 2022]
data_2023 = data[data['year'] == 2023]

player_ids_2020 = set(data_2020['player_id'].unique())
player_ids_2021 = set(data_2021['player_id'].unique())
player_ids_2022 = set(data_2022['player_id'].unique())
player_ids_2023 = set(data_2023['player_id'].unique())

common_player_ids = player_ids_2020 & player_ids_2021 & player_ids_2022 & player_ids_2023

# Extract data for common player_ids
common_data = data[data['player_id'].isin(common_player_ids)]

# 2020~2023년 데이터를 학습에 사용
final = common_data[common_data['year'].isin([2020, 2021, 2022, 2023])]
final = final.sort_values(by=['player_id', 'year'])

# 필요한 컬럼 선택 (year와 이름 관련 컬럼 제외)
features = [col for col in final.columns if col not in ['player_id', 'year', 'p_era', 'last_name, first_name']]
target = 'p_era'

# 독립변수와 종속변수 분리
X = final[['player_id'] + features].values  # player_id 포함
y = final[target].values

# 데이터 스케일링
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = np.hstack((X[:, [0]], scaler_X.fit_transform(X[:, 1:])))  # player_id는 그대로 유지
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# 시계열 데이터 형태로 변환
def create_sequences_grouped(X, y, seq_length):
    X_seq, y_seq = [], []
    unique_players = np.unique(X[:, 0])  # player_id 추출
    for player_id in unique_players:
        player_data = X[X[:, 0] == player_id]  # player_id별 데이터 추출
        player_targets = y[X[:, 0] == player_id]
        for i in range(len(player_data)):
            seq_x = player_data[max(0, i - seq_length + 1):i + 1, 1:]  # player_id 제외
            seq_x = seq_x[-seq_length:]  # 마지막 seq_length 데이터만 추출
            X_seq.append(seq_x)
            y_seq.append(player_targets[i])
    return np.array(X_seq), np.array(y_seq)

seq_length = 4  # 시퀀스 길이 설정
X_seq, y_seq = create_sequences_grouped(X_scaled, y_scaled, seq_length)

# 학습 데이터 설정
X_train, y_train = X_seq, y_seq

# 하이퍼파라미터 설정
state_sizes = [10, 20]
dropout_rates = [0.1, 0.2]
minibatch_sizes = [32]
learning_rates = [0.0001, 0.001]
max_gradient_norms = [0.01, 1]

# 하이퍼파라미터 조합 생성
hyperparameter_combinations = list(product(state_sizes, dropout_rates, minibatch_sizes, learning_rates, max_gradient_norms))

best_rmse = float('inf')
best_params = None
best_y_pred = None

# BasicTFTModel 정의
class BasicTFTModel(Model):
    def __init__(self, seq_length, feature_dim, ff_dim, state_size, dropout_rate):
        super(BasicTFTModel, self).__init__()
        self.layer_norm1 = LayerNormalization()
        self.ffn1 = Dense(ff_dim, activation="relu")
        self.layer_norm2 = LayerNormalization()
        self.ffn2 = Dense(feature_dim)
        self.flatten = Flatten()
        self.dense1 = Dense(state_size, activation="relu")
        self.dropout1 = Dropout(dropout_rate)
        self.dense2 = Dense(state_size // 4, activation="relu")
        self.dense3 = Dense(1, activation="linear")

    def call(self, inputs):
        x = self.layer_norm1(inputs)
        x = self.ffn1(x)
        x = self.layer_norm2(x)
        x = self.ffn2(x)
        flat_output = self.flatten(x)
        dense_output1 = self.dense1(flat_output)
        drop_output1 = self.dropout1(dense_output1)
        dense_output2 = self.dense2(drop_output1)
        return self.dense3(dense_output2)

# 하이퍼파라미터 튜닝
for state_size, dropout_rate, minibatch_size, learning_rate, max_gradient_norm in hyperparameter_combinations:
    tf.keras.backend.clear_session()
    
    # 모델 생성
    model = BasicTFTModel(seq_length=seq_length, feature_dim=X_train.shape[2], ff_dim=32, state_size=state_size, dropout_rate=dropout_rate)
    
    # 컴파일
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, clipnorm=max_gradient_norm)
    model.compile(
        loss="mse",
        optimizer=optimizer
    )

    # 조기 종료 콜백
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    
    # 모델 학습
    model.fit(X_train, y_train, epochs=50, batch_size=minibatch_size, validation_split=0.2, verbose=0, callbacks=[early_stopping])
    
    # 2024년 데이터를 예측
    def create_sequences_for_prediction(X, player_ids, seq_length):
        X_seq = []
        for player_id in player_ids:
            player_data = X[X[:, 0] == player_id][:, 1:]
            seq_x = player_data[-seq_length:]  # 마지막 seq_length 데이터만 추출
            X_seq.append(seq_x)
        return np.array(X_seq)

    player_ids = common_data['player_id'].unique()
    data_last_years = common_data[common_data['year'].isin([2020, 2021, 2022, 2023])][['player_id'] + features]
    X_last_years_scaled = np.hstack((data_last_years[['player_id']].values, scaler_X.transform(data_last_years[features].values)))

    X_2024_seq = create_sequences_for_prediction(X_last_years_scaled, player_ids, seq_length)
    y_pred_scaled = model.predict(X_2024_seq)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)

    rmse = np.sqrt(mean_squared_error(y_train[:len(y_pred)], y_pred))
    if rmse < best_rmse:
        best_rmse = rmse
        best_y_pred = y_pred
        best_params = (state_size, dropout_rate, minibatch_size, learning_rate, max_gradient_norm)

# 최적의 예측 결과 출력
df_results = pd.DataFrame({
    'Player_ID': player_ids,
    'Predicted_ERA_2024': best_y_pred.flatten()
})
print(f"Best RMSE: {best_rmse}")
print(f"Best Params: {best_params}")
print(df_results)


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (296,) + inhomogeneous part.

In [None]:
# full-tft ts=4

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, Flatten, MultiHeadAttention
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from tensorflow.keras.callbacks import EarlyStopping
from itertools import product
import gc

# 데이터셋 불러오기
file_path = 'C:\\Users\\co279\\mp1.csv'
data = pd.read_csv(file_path)

# 필요 없는 컬럼 제거
data = data.drop(columns=['last_name, first_name'])

# 결측치 처리 (예: 0으로 대체)
data = data.fillna(0)

# 2019, 2020, 2021, 2022, 2023년에 모두 존재하는 player_id 추출
data_2019 = data[data['year'] == 2019]
data_2020 = data[data['year'] == 2020]
data_2021 = data[data['year'] == 2021]
data_2022 = data[data['year'] == 2022]
data_2023 = data[data['year'] == 2023]

player_ids_2019 = set(data_2019['player_id'].unique())
player_ids_2020 = set(data_2020['player_id'].unique())
player_ids_2021 = set(data_2021['player_id'].unique())
player_ids_2022 = set(data_2022['player_id'].unique())
player_ids_2023 = set(data_2023['player_id'].unique())

common_player_ids = player_ids_2019 & player_ids_2020 & player_ids_2021 & player_ids_2022 & player_ids_2023

# 공통 player_id에 해당하는 데이터 추출
common_data = data[data['player_id'].isin(common_player_ids)]

# 2019~2023년 데이터만 추출
final = common_data[common_data['year'].isin([2019, 2020, 2021, 2022])]
final = final.sort_values(by=['player_id', 'year'])

# 필요한 컬럼 선택 (year 제외)
features = [col for col in final.columns if col not in ['player_id', 'year', 'p_era']]
target = 'p_era'

# 독립변수와 종속변수 분리
X = final[features].values
y = final[target].values

# 데이터 스케일링
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# 시계열 데이터 형태로 변환
def create_sequences(X, y, seq_length):
    X_seq, y_seq = [], []
    for i in range(len(X)):
        seq_x = X[max(0, i - seq_length + 1):i + 1]
        seq_x = np.pad(seq_x, ((seq_length - len(seq_x), 0), (0, 0)), 'constant')
        seq_y = y[i]
        X_seq.append(seq_x)
        y_seq.append(seq_y)
    return np.array(X_seq), np.array(y_seq)

seq_length = 4  # 시퀀스 길이 설정
X_seq, y_seq = create_sequences(X_scaled, y_scaled, seq_length)

# 학습 데이터와 전체 데이터를 동일하게 설정
X_train, y_train = X_seq, y_seq

# 하이퍼파라미터 설정
state_sizes = [10, 20, 40]
dropout_rates = [0.1, 0.2]
minibatch_sizes = [32, 64]
learning_rates = [0.0001, 0.001]
max_gradient_norms = [0.01, 1]
num_heads = [1, 2]

# 하이퍼파라미터 조합 생성
hyperparameter_combinations = list(product(state_sizes, dropout_rates, minibatch_sizes, learning_rates, max_gradient_norms, num_heads))

best_rmse = float('inf')
best_params = None
best_model = None

# TFT 모델 정의
class GatedResidualNetwork(tf.keras.layers.Layer):
    def __init__(self, input_dim, state_size, dropout_rate):
        super(GatedResidualNetwork, self).__init__()
        self.dense1 = Dense(state_size, activation="relu")
        self.dense2 = Dense(input_dim)
        self.gate = Dense(input_dim, activation="sigmoid")
        self.layer_norm = LayerNormalization()
        self.dropout = Dropout(dropout_rate)

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dropout(x)
        x = self.dense2(x)
        gate_output = self.gate(inputs)
        gated_output = x * gate_output + inputs
        return self.layer_norm(gated_output)

class TFTModel(Model):
    def __init__(self, seq_length, feature_dim, num_heads, ff_dim, state_size, dropout_rate):
        super(TFTModel, self).__init__()
        self.multi_head_attention = MultiHeadAttention(num_heads=num_heads, key_dim=feature_dim)
        self.layer_norm1 = LayerNormalization()
        self.grn1 = GatedResidualNetwork(feature_dim, state_size, dropout_rate)
        self.grn2 = GatedResidualNetwork(feature_dim, state_size, dropout_rate)
        self.flatten = Flatten()
        self.dense1 = Dense(state_size, activation="relu")
        self.dropout1 = Dropout(dropout_rate)
        self.dense2 = Dense(state_size // 4, activation="relu")
        self.dense3 = Dense(1, activation="linear")

    def call(self, inputs):
        attn_output = self.multi_head_attention(inputs, inputs)
        out1 = self.layer_norm1(inputs + attn_output)
        out1 = self.grn1(out1)
        out2 = self.grn2(out1)
        flat_output = self.flatten(out2)
        dense_output1 = self.dense1(flat_output)
        drop_output1 = self.dropout1(dense_output1)
        dense_output2 = self.dense2(drop_output1)
        return self.dense3(dense_output2)

# 하이퍼파라미터 튜닝 및 학습
for state_size, dropout_rate, minibatch_size, learning_rate, max_gradient_norm, num_heads in hyperparameter_combinations:
    tf.keras.backend.clear_session()
    model = TFTModel(seq_length=seq_length, feature_dim=X_train.shape[2], num_heads=num_heads, ff_dim=32, state_size=state_size, dropout_rate=dropout_rate)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, clipnorm=max_gradient_norm)
    model.compile(loss="mse", optimizer=optimizer, metrics=["mse"])
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=100, batch_size=minibatch_size, validation_split=0.2, verbose=0, callbacks=[early_stopping])
    y_pred_scaled = model.predict(X_train)
    rmse = np.sqrt(mean_squared_error(y_train, scaler_y.inverse_transform(y_pred_scaled)))
    if rmse < best_rmse:
        best_rmse = rmse
        best_model = model
        best_params = (state_size, dropout_rate, minibatch_size, learning_rate, max_gradient_norm, num_heads)

    del model, history, y_pred_scaled  # 불필요한 변수 삭제
    tf.keras.backend.clear_session()
    gc.collect()
    
print(f"Best RMSE: {best_rmse}, Best Params: {best_params}")

# 2024년 데이터 예측
data_last_years = common_data[common_data['year'].isin([2020, 2021, 2022, 2023])]
X_last_years = data_last_years[features].values
X_last_years_scaled = scaler_X.transform(X_last_years)

X_2024_seq, _ = create_sequences(X_last_years_scaled, np.zeros(len(X_last_years_scaled)), seq_length)
y_pred_scaled = best_model.predict(X_2024_seq)
y_pred = scaler_y.inverse_transform(y_pred_scaled)

# player_id와 예측된 2024년 ERA 값 출력
player_ids = data_last_years['player_id'].unique()
df_results = pd.DataFrame({'player_id': player_ids, 'predicted_era_2024': y_pred.flatten()})
print(df_results)




KeyboardInterrupt: 

In [4]:
import pandas as pd

# 데이터 로드
file_path = 'C:\\Users\\co279\\mp1.csv'
data = pd.read_csv(file_path)

# 각 연도의 데이터 추출
data_2020 = data[data['year'] == 2020]
data_2021 = data[data['year'] == 2021]
data_2022 = data[data['year'] == 2022]
data_2023 = data[data['year'] == 2023]

# 각 연도의 player_id 집합 생성
player_ids_2020 = set(data_2020['player_id'].unique())
player_ids_2021 = set(data_2021['player_id'].unique())
player_ids_2022 = set(data_2022['player_id'].unique())
player_ids_2023 = set(data_2023['player_id'].unique())

# 모든 연도에 존재하는 player_id 계산
common_player_ids = player_ids_2020 & player_ids_2021 & player_ids_2022 & player_ids_2023

print(f"2020~2023년 모두 존재하는 선수 수: {len(common_player_ids)}")
print("공통 player_id:", sorted(common_player_ids))


2020~2023년 모두 존재하는 선수 수: 74
공통 player_id: [425794, 425844, 448179, 450203, 453286, 456501, 458681, 471911, 477132, 502043, 502171, 506433, 527048, 542881, 543037, 543135, 543243, 543294, 543475, 548389, 554430, 571578, 571760, 572971, 579328, 592332, 592351, 592826, 592836, 592866, 594835, 596133, 605135, 605200, 605288, 605347, 605397, 605400, 605483, 605540, 607536, 607625, 608331, 608337, 608379, 608723, 621107, 621244, 622491, 628711, 640455, 641154, 641302, 641745, 641771, 642232, 645261, 650644, 656302, 656427, 656756, 656849, 657277, 663776, 663903, 664062, 664199, 664285, 664299, 666200, 668678, 669203, 669456, 670950]


In [8]:
print(f"Length of player_ids: {len(player_ids)}")
print(f"Length of Predicted_ERA_2024: {len(best_y_pred)}")

Length of player_ids: 74
Length of Predicted_ERA_2024: 296


In [1]:
player_ids_2020 = set(data[data['year'] == 2020]['player_id'].unique())
player_ids_2021 = set(data[data['year'] == 2021]['player_id'].unique())
player_ids_2022 = set(data[data['year'] == 2022]['player_id'].unique())
player_ids_2023 = set(data[data['year'] == 2023]['player_id'].unique())

# 모든 연도에 존재하는 player_id 추출 (교집합)
common_player_ids = player_ids_2020 & player_ids_2021 & player_ids_2022 & player_ids_2023

# 결과 출력
print(f"2020~2023년 모두 존재하는 선수 수: {len(common_player_ids)}")
print("공통 player_id:", common_player_ids)

NameError: name 'data' is not defined

In [None]:
# mha tft, ts=4, mp1

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, LayerNormalization, Flatten, MultiHeadAttention
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from tensorflow.keras.callbacks import EarlyStopping
from itertools import product

# 데이터셋 불러오기
file_path = 'C:\\Users\\co279\\mp1.csv'
data = pd.read_csv(file_path)

# 결측치 처리 (예: 0으로 대체)
data = data.fillna(0)

# 2020, 2021, 2022, 2023년에 모두 존재하는 player_id 추출
data_2020 = data[data['year'] == 2020]
data_2021 = data[data['year'] == 2021]
data_2022 = data[data['year'] == 2022]
data_2023 = data[data['year'] == 2023]

player_ids_2020 = set(data_2020['player_id'].unique())
player_ids_2021 = set(data_2021['player_id'].unique())
player_ids_2022 = set(data_2022['player_id'].unique())
player_ids_2023 = set(data_2023['player_id'].unique())

common_player_ids = player_ids_2020 & player_ids_2021 & player_ids_2022 & player_ids_2023

# Extract data for common player_ids
common_data = data[data['player_id'].isin(common_player_ids)]

# 2020~2023년 데이터를 학습에 사용
final = common_data[common_data['year'].isin([2020, 2021, 2022, 2023])]
final = final.sort_values(by=['player_id', 'year'])

# 필요한 컬럼 선택 (year와 이름 관련 컬럼 제외)
features = [col for col in final.columns if col not in ['player_id', 'year', 'p_era', 'last_name, first_name']]
target = 'p_era'

# 독립변수와 종속변수 분리
X = final[features].values
y = final[target].values

# 데이터 스케일링
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# 시계열 데이터 형태로 변환
def create_sequences(X, y, seq_length):
    X_seq, y_seq = [], []
    for i in range(len(X)):
        seq_x = X[max(0, i - seq_length + 1):i + 1]
        seq_x = np.pad(seq_x, ((seq_length - len(seq_x), 0), (0, 0)), 'constant')
        seq_y = y[i]
        X_seq.append(seq_x)
        y_seq.append(seq_y)
    return np.array(X_seq), np.array(y_seq)

seq_length = 4  # 시퀀스 길이 설정
X_seq, y_seq = create_sequences(X_scaled, y_scaled, seq_length)

# 학습 데이터와 전체 데이터를 동일하게 설정
X_train, y_train = X_seq, y_seq

# 하이퍼파라미터 설정
state_sizes = [10, 20, 40, 80, 160, 240, 320]
dropout_rates = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.9]
minibatch_sizes = [32, 64, 128]
learning_rates = [0.0001, 0.001, 0.01]
max_gradient_norms = [0.01, 1, 100.0]
num_heads = [1, 2, 3, 4]

# 하이퍼파라미터 조합 생성
hyperparameter_combinations = list(product(state_sizes, dropout_rates, minibatch_sizes, learning_rates, max_gradient_norms, num_heads))

best_rmse = float('inf')
best_mae = float('inf')
best_mape = float('inf')
best_params = None
best_y_pred = None

# TFT 모델 정의
class TFTModel(Model):
    def __init__(self, seq_length, feature_dim, num_heads, ff_dim, state_size, dropout_rate):
        super(TFTModel, self).__init__()
        self.multi_head_attention1 = MultiHeadAttention(num_heads=num_heads, key_dim=feature_dim)
        self.layer_norm1 = LayerNormalization()
        self.multi_head_attention2 = MultiHeadAttention(num_heads=num_heads, key_dim=feature_dim)
        self.layer_norm2 = LayerNormalization()
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(feature_dim)
        ])
        self.flatten = Flatten()
        self.dense1 = Dense(state_size, activation="relu")
        self.dropout1 = Dropout(dropout_rate)
        self.dense2 = Dense(state_size // 4, activation="relu")
        self.dense3 = Dense(1, activation="linear")

    def call(self, inputs):
        attn_output1 = self.multi_head_attention1(inputs, inputs)
        out1 = self.layer_norm1(inputs + attn_output1)
        attn_output2 = self.multi_head_attention2(out1, out1)
        out2 = self.layer_norm2(out1 + attn_output2)
        ffn_output = self.ffn(out2)
        flat_output = self.flatten(ffn_output)
        dense_output1 = self.dense1(flat_output)
        drop_output1 = self.dropout1(dense_output1)
        dense_output2 = self.dense2(drop_output1)
        return self.dense3(dense_output2)

# 하이퍼파라미터 튜닝
for state_size, dropout_rate, minibatch_size, learning_rate, max_gradient_norm, num_heads in hyperparameter_combinations:
    tf.keras.backend.clear_session()
    
    # 모델 인스턴스 생성
    model = TFTModel(seq_length=seq_length, feature_dim=X_train.shape[2], num_heads=num_heads, ff_dim=32, state_size=state_size, dropout_rate=dropout_rate)
    
    # 컴파일
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, clipnorm=max_gradient_norm)
    model.compile(
        loss="mse",
        optimizer=optimizer,
        metrics=["accuracy"]
    )

    # 조기 종료 콜백
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    
    # 모델 학습
    history = model.fit(X_train, y_train, epochs=50, batch_size=minibatch_size, validation_split=0.2, verbose=0, callbacks=[early_stopping])
    
    # 2020~2023년 데이터를 사용하여 2024년 예측
    data_last_years = common_data[common_data['year'].isin([2020, 2021, 2022, 2023])][['player_id', 'last_name, first_name'] + features]  # 선수 ID와 이름 포함, features에서 이름 제거됨
    X_last_years = data_last_years[features].values
    X_last_years_scaled = scaler_X.transform(X_last_years)

    # 시계열 데이터 형태로 변환 (2020~2023년 데이터를 기반으로 2024년 예측)
    def create_sequences_for_prediction(X, seq_length):
        X_seq = []
        for i in range(len(X)):
            seq_x = X[max(0, i - seq_length + 1):i + 1]
            seq_x = np.pad(seq_x, ((seq_length - len(seq_x), 0), (0, 0)), 'constant')
            X_seq.append(seq_x)
        return np.array(X_seq)

    X_2024_seq = create_sequences_for_prediction(X_last_years_scaled, seq_length)

    # 2024년 데이터 예측
    y_pred_scaled = model.predict(X_2024_seq)

    # 스케일 복원
    y_pred = scaler_y.inverse_transform(y_pred_scaled)

    # RMSE 계산 (테스트 데이터가 없어 실제값과 비교하지 않습니다)

    # 최적의 하이퍼파라미터 찾기
    rmse = np.sqrt(mean_squared_error(y_train[:len(y_pred)], y_pred))
    if rmse < best_rmse:
        best_rmse = rmse
        best_y_pred = y_pred
        best_params = (state_size, dropout_rate, minibatch_size, learning_rate, max_gradient_norm, num_heads)

# 최적 하이퍼파라미터에 대한 선수별 예측 결과 출력
df_results = pd.DataFrame({
    'Player_ID': data_last_years['player_id'],
    'Player_Name': data_last_years['last_name, first_name'],  # 선수 이름
    'Predicted_ERA_2024': best_y_pred.flatten()
})

# 최적 모델 결과 출력
print(f"Best RMSE: {best_rmse}, Best Params: {best_params}")
print(df_results)

In [None]:
df_results.to_csv('pred2024_mha_ts4.csv', index=False)