# 1. DATABASE에서 데이터 불러오기

In [6]:
import sqlite3
import pandas as pd
  
    
con = sqlite3.connect("data.db")
cur = con.cursor()

query = cur.execute("SELECT * From pubg_stat")

cols = [column[0] for column in query.description]

df = pd.DataFrame.from_records(data=query.fetchall(), columns=cols)

con.close()

df = df.drop(['id'], axis=1)

df

Unnamed: 0,kills,damageDealt,totalDistance,winPlace
0,0,5.186188,750.73900,22
1,0,196.747100,6731.80860,9
2,0,0.000000,180.19120,25
3,1,83.184010,9439.58030,13
4,0,0.000000,1674.16920,18
...,...,...,...,...
34046,0,52.170000,538.11450,16
34047,0,24.600000,217.13995,9
34048,0,169.999980,3513.84570,12
34049,0,0.000000,1456.48965,14


# 2. 전처리

In [7]:
import numpy as np
df['winPlace'] = np.where(df['winPlace'] <= 10, 1, 0)

df

Unnamed: 0,kills,damageDealt,totalDistance,winPlace
0,0,5.186188,750.73900,0
1,0,196.747100,6731.80860,1
2,0,0.000000,180.19120,0
3,1,83.184010,9439.58030,0
4,0,0.000000,1674.16920,0
...,...,...,...,...
34046,0,52.170000,538.11450,0
34047,0,24.600000,217.13995,1
34048,0,169.999980,3513.84570,0
34049,0,0.000000,1456.48965,0


# 3. train/test 데이터 분리

In [10]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.2, random_state=1)

print(train.shape, test.shape)

(27240, 4) (6811, 4)


In [13]:
features = ['kills', 'damageDealt', 'totalDistance']
label = 'winPlace'

X_train = train[features]
X_test = test[features]

y_train = train[label]
y_test = test[label]

# 4. 모델 학습

In [16]:
from category_encoders import OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.metrics import f1_score

pipe = make_pipeline(
    OrdinalEncoder(), 
    SimpleImputer(), 
    RandomForestClassifier(random_state=10, n_jobs=-1, oob_score=True)
)

model = pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print('f1_score:', f1_score(y_test, pred_test))

f1_score: 0.5575282453308739


# 5. pickle 파일 변환

In [18]:
import pickle

pickle.dump(model, open('pubg.pkl', 'wb'))