# 환자 데이터를 활용한 응급 환자 분류 예측

## Gradient Boosting

### 라이브러리 호출

In [11]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_validate

### 데이터 불러오기

In [5]:
path = 'D:/ML/Example/Dataset/'
# path = 'C:/Users/kcpak/Desktop/Projects/MDA04/ML/Dataset/'
data = pd.read_csv(path + 'synthetic_medical_triage.csv')

In [6]:
data.head(1)

Unnamed: 0,age,heart_rate,systolic_blood_pressure,oxygen_saturation,body_temperature,pain_level,chronic_disease_count,previous_er_visits,arrival_mode,triage_level
0,17.9,95.4,147.1,97.4,36.48,1,0,0,walk_in,0


### 데이터 전처리

In [8]:
data2 = data.copy()

## arrival_mode : 내원 수단
# walk_in : 0
# wheelchair : 1
# ambulance : 2

mode_map = {"walk_in": 0, "wheelchair": 1, "ambulance": 2}
data2["arrival_mode"] = data2["arrival_mode"].map(mode_map)

## triage_level 중증도 단계
# 0: 낮음
# 1: 보통
# 2: 높음
# 3: 최긴급

mode_map2 = {0: '낮음', 1: '보통', 2: "높음", 3:'최긴급'}
# level_order = ['Low', 'Medium', 'High', 'Critical']
data2["triage_level"] = data2["triage_level"].map(mode_map2)
# data2["triage_level"] = pd.Categorical(data2["triage_level"], 
#                                        categories=level_order, ordered=True)

## 컬럼명 변경
# age : 나이
# heart_rate : 심박수
# systolic_blood_pressure : 수축기 혈압
# oxygen_saturation : 산소 포화도
# body_temperature : 체온
# pain_level : 통증 강도
# chronic_disease_count : 기저질환 수
# previous_er_visits : 과거 응급실 방문 횟수
# arrival_mode : 내원 수단
# triage_level 응급 단계

columns_dict = {'age': '나이',
                'heart_rate': '심박수',
                'systolic_blood_pressure': '수축기 혈압',
                'oxygen_saturation': '산소 포화도',
                'body_temperature': '체온',
                'pain_level': '통증 강도',
                'chronic_disease_count': '기저질환 수',
                'previous_er_visits': '과거 응급실 방문 횟수',
                'arrival_mode': '내원 수단',
                'triage_level': '응급 단계'}

data2.rename(columns = columns_dict, inplace = True)
data2.head()

Unnamed: 0,나이,심박수,수축기 혈압,산소 포화도,체온,통증 강도,기저질환 수,과거 응급실 방문 횟수,내원 수단,응급 단계
0,17.9,95.4,147.1,97.4,36.48,1,0,0,0,낮음
1,79.2,147.9,158.6,96.0,39.35,10,4,2,2,최긴급
2,51.1,87.1,128.2,98.5,37.74,5,2,2,0,보통
3,56.8,84.7,147.2,92.5,37.55,4,4,4,0,보통
4,39.2,58.0,107.8,99.0,36.26,2,1,1,0,낮음


### 데이터 내 feature / class 추출

In [10]:
X = data2.iloc[:, :-1]
y = data2.iloc[:, -1]

### 훈련용/테스트용 데이터 생성

In [13]:
train_X, test_X, train_y, test_y = train_test_split(X, y,
                                                    test_size = 0.2,
                                                    random_state = 1)

### Gradient Boosting

In [15]:
gb = GradientBoostingClassifier(random_state=99)
scores = cross_validate(gb, train_X, train_y,
return_train_score=True, n_jobs=-1)
print(np.mean(scores['train_score']))
print(np.mean(scores['test_score']))

0.956857638888889
0.9415972222222221


In [16]:
gb = GradientBoostingClassifier(n_estimators=500, learning_rate=0.2,
random_state=42)
scores = cross_validate(gb, train_X, train_y,
return_train_score=True, n_jobs=-1)
print(np.mean(scores['train_score']))
print(np.mean(scores['test_score']))

0.9944791666666667
0.9335416666666667


In [None]:
gb.fit(train_X, train_y)
print(gb.feature_importances_)

[0.0293632  0.03118606 0.0155258  0.02986521 0.05448572 0.81751747
 0.00660821 0.0115986  0.00384973]


In [20]:
imp = pd.Series(gb.feature_importances_, index=X.columns)\
        .sort_values(ascending=False)

imp

통증 강도           0.817517
체온              0.054486
심박수             0.031186
산소 포화도          0.029865
나이              0.029363
수축기 혈압          0.015526
과거 응급실 방문 횟수    0.011599
기저질환 수          0.006608
내원 수단           0.003850
dtype: float64