In [17]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

In [19]:
# 데이터셋 로드
data = pd.read_csv("datasets/diabetes.csv")

# 데이터 분리
X = data.drop('Outcome', axis=1)
y = data['Outcome']

# 데이터셋 나누기
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 스케일링
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

**1. 당뇨병 데이터를 가지고 머신러닝 5가지 분류를 수행.
(SVM, LR, RF, DT, KNN)**

In [21]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

# 모델 정의
models = {
    'SVM': SVC(),
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'K-Nearest Neighbors': KNeighborsClassifier()
}

# 모델 훈련 및 평가
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{model_name} Accuracy: {accuracy_score(y_test, y_pred):.2f}")
    print(classification_report(y_test, y_pred))

SVM Accuracy: 0.73
              precision    recall  f1-score   support

           0       0.77      0.83      0.80        99
           1       0.65      0.56      0.60        55

    accuracy                           0.73       154
   macro avg       0.71      0.70      0.70       154
weighted avg       0.73      0.73      0.73       154

Logistic Regression Accuracy: 0.75
              precision    recall  f1-score   support

           0       0.81      0.80      0.81        99
           1       0.65      0.67      0.66        55

    accuracy                           0.75       154
   macro avg       0.73      0.74      0.73       154
weighted avg       0.76      0.75      0.75       154

Random Forest Accuracy: 0.74
              precision    recall  f1-score   support

           0       0.80      0.80      0.80        99
           1       0.64      0.64      0.64        55

    accuracy                           0.74       154
   macro avg       0.72      0.72      0.72  

**2. 동일한 데이터로 딥러닝 분류 수행하라. (dense layer 만 사용)**

In [22]:
# 딥러닝 모델 정의
model = Sequential([
    Dense(64, input_shape=(X_train.shape[1],), activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

# 모델 컴파일
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [23]:
# 모델 훈련
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), verbose=2)

# 모델 평가
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Dense Neural Network Accuracy: {accuracy:.2f}")

Epoch 1/100
20/20 - 1s - 48ms/step - accuracy: 0.5717 - loss: 0.6771 - val_accuracy: 0.7403 - val_loss: 0.5956
Epoch 2/100
20/20 - 0s - 6ms/step - accuracy: 0.7215 - loss: 0.5530 - val_accuracy: 0.7273 - val_loss: 0.5462
Epoch 3/100
20/20 - 0s - 7ms/step - accuracy: 0.7492 - loss: 0.5000 - val_accuracy: 0.7208 - val_loss: 0.5208
Epoch 4/100
20/20 - 0s - 5ms/step - accuracy: 0.7720 - loss: 0.4694 - val_accuracy: 0.7597 - val_loss: 0.5140
Epoch 5/100
20/20 - 0s - 5ms/step - accuracy: 0.7769 - loss: 0.4551 - val_accuracy: 0.7403 - val_loss: 0.5113
Epoch 6/100
20/20 - 0s - 5ms/step - accuracy: 0.7850 - loss: 0.4427 - val_accuracy: 0.7532 - val_loss: 0.5142
Epoch 7/100
20/20 - 0s - 5ms/step - accuracy: 0.7834 - loss: 0.4380 - val_accuracy: 0.7662 - val_loss: 0.5112
Epoch 8/100
20/20 - 0s - 6ms/step - accuracy: 0.7818 - loss: 0.4330 - val_accuracy: 0.7597 - val_loss: 0.5181
Epoch 9/100
20/20 - 0s - 5ms/step - accuracy: 0.7834 - loss: 0.4280 - val_accuracy: 0.7468 - val_loss: 0.5201
Epoch 10/

**3. 해당 데이터에서 Outcome을 삭제하고 BMI를 예측하는 회귀를 수행하라.**

In [24]:
# 데이터 준비
X_reg = data.drop(['Outcome', 'BMI'], axis=1)
y_reg = data['BMI']

# 데이터셋 나누기
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

# 스케일링
X_train_reg = scaler.fit_transform(X_train_reg)
X_test_reg = scaler.transform(X_test_reg)

In [25]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor

# 모델 정의
regressors = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(),
    'Decision Tree': DecisionTreeRegressor(),
    'K-Nearest Neighbors': KNeighborsRegressor()
}

# 모델 훈련 및 평가
for reg_name, regressor in regressors.items():
    regressor.fit(X_train_reg, y_train_reg)
    y_pred_reg = regressor.predict(X_test_reg)
    print(f"{reg_name} MSE: {np.mean((y_pred_reg - y_test_reg)**2):.2f}")

Linear Regression MSE: 52.24
Random Forest MSE: 48.31
Decision Tree MSE: 112.64
K-Nearest Neighbors MSE: 52.00


**3번과 동일하지만 dense layer만 사용한 신경만으로 회귀를 수행하라.**

In [26]:
# 딥러닝 모델 정의
reg_model = Sequential([
    Dense(64, input_shape=(X_train_reg.shape[1],), activation='relu'),
    Dense(32, activation='relu'),
    Dense(1)
])

# 모델 컴파일
reg_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [27]:
# 모델 훈련
history_reg = reg_model.fit(X_train_reg, y_train_reg, epochs=100, batch_size=32, validation_data=(X_test_reg, y_test_reg), verbose=2)

# 모델 평가
mse = reg_model.evaluate(X_test_reg, y_test_reg)[1]
print(f"Dense Neural Network Regression MSE: {mse:.2f}")

Epoch 1/100
20/20 - 1s - 44ms/step - loss: 1075.5907 - mse: 1075.5907 - val_loss: 1059.5692 - val_mse: 1059.5692
Epoch 2/100
20/20 - 0s - 5ms/step - loss: 1016.9605 - mse: 1016.9605 - val_loss: 993.3488 - val_mse: 993.3488
Epoch 3/100
20/20 - 0s - 5ms/step - loss: 940.3936 - mse: 940.3936 - val_loss: 893.8750 - val_mse: 893.8750
Epoch 4/100
20/20 - 0s - 4ms/step - loss: 824.4592 - mse: 824.4592 - val_loss: 753.6199 - val_mse: 753.6199
Epoch 5/100
20/20 - 0s - 5ms/step - loss: 671.3572 - mse: 671.3572 - val_loss: 575.2970 - val_mse: 575.2970
Epoch 6/100
20/20 - 0s - 4ms/step - loss: 486.3302 - mse: 486.3302 - val_loss: 381.8347 - val_mse: 381.8347
Epoch 7/100
20/20 - 0s - 5ms/step - loss: 305.2419 - mse: 305.2419 - val_loss: 218.1450 - val_mse: 218.1450
Epoch 8/100
20/20 - 0s - 5ms/step - loss: 173.6826 - mse: 173.6826 - val_loss: 125.0603 - val_mse: 125.0603
Epoch 9/100
20/20 - 0s - 4ms/step - loss: 109.9465 - mse: 109.9465 - val_loss: 95.6810 - val_mse: 95.6810
Epoch 10/100
20/20 - 0s