In [4]:
import numpy as np
import pandas as pd

data = pd.read_csv('F:\Python\FlyAI_Project\Data\RawData\GameStats\LCK_2023_SPRING.csv')

# 특정 Player가 10번 이하로 나오는 경우 제외
player_counts = data['Player'].value_counts()
players_to_keep = player_counts[player_counts > 10].index
filtered_data = data[data['Player'].isin(players_to_keep)]

# Role로 데이터 분리
roles = filtered_data['Role'].unique()
data_by_role = {role: filtered_data[filtered_data['Role'] == role] for role in roles}

from sklearn.preprocessing import MinMaxScaler

# 사용할 feature 선택
features = ['GOLD%', 'VS%', 'DMG%', 'KP%', 'XPD@15']

# MinMaxScaler 초기화
scaler = MinMaxScaler()

scaled_data_by_role = {}
for role, df in data_by_role.items():
    for feature in features:
        if df[feature].dtype == 'object':
            df[feature] = df[feature].str.rstrip('%').astype('float') / 100.0

# 다시 MinMaxScaling 적용
scaled_data_by_role = {}
for role, df in data_by_role.items():
    scaled_df = df.copy()
    scaled_df[features] = scaler.fit_transform(df[features])
    scaled_data_by_role[role] = scaled_df

# 각 role에 대한 스케일링 된 데이터의 처음 몇 행 확인
scaled_data_by_role_sample = {role: df[features].head(2) for role, df in scaled_data_by_role.items()}
scaled_data_by_role_sample


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[feature] = df[feature].str.rstrip('%').astype('float') / 100.0


{'TOP':       GOLD%       VS%      DMG%    KP%    XPD@15
 0  0.424779  0.470968  0.493188  0.563  0.910324
 5  0.433628  0.425806  0.504087  0.500  0.089676,
 'JUNGLE':       GOLD%       VS%      DMG%    KP%    XPD@15
 1  0.482759  0.466049  0.469136  0.688  0.941869
 6  0.505747  0.438272  0.481481  1.000  0.058131,
 'MID':       GOLD%       VS%      DMG%    KP%    XPD@15
 2  0.500000  0.342105  0.160976  0.438  0.323726
 7  0.614583  0.219298  0.353659  1.000  0.676274,
 'ADC':       GOLD%       VS%    DMG%   KP%    XPD@15
 3  0.573643  0.510040  0.6100  0.75  0.721921
 8  0.379845  0.325301  0.4025  0.50  0.263362,
 'SUPPORT':       GOLD%       VS%      DMG%    KP%   XPD@15
 4  0.194030  0.234848  0.146104  0.688  0.62861
 9  0.343284  0.462121  0.136364  0.500  0.37139}

In [5]:
from sklearn.model_selection import train_test_split

# train/test 데이터 분리
train_data_by_role = {}
test_data_by_role = {}

for role, df in scaled_data_by_role.items():
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
    train_data_by_role[role] = train_df
    test_data_by_role[role] = test_df

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# 랜덤 포레스트 분류기 초기화
clf = RandomForestClassifier(random_state=42)

for role in train_data_by_role.keys():
    train_data_by_role[role] = train_data_by_role[role].dropna(subset=features)
    test_data_by_role[role] = test_data_by_role[role].dropna(subset=features)

accuracies = {}
for role in train_data_by_role.keys():
    X_train = train_data_by_role[role][features]
    y_train = train_data_by_role[role]['Player']
    X_test = test_data_by_role[role][features]
    y_test = test_data_by_role[role]['Player']
    
    # 모델 학습
    clf.fit(X_train, y_train)
    
    # 예측
    y_pred = clf.predict(X_test)
    
    # 정확도 계산
    accuracy = accuracy_score(y_test, y_pred)
    accuracies[role] = accuracy

print(accuracies)

{'TOP': 0.19318181818181818, 'JUNGLE': 0.11363636363636363, 'MID': 0.1724137931034483, 'ADC': 0.10465116279069768, 'SUPPORT': 0.14772727272727273}
