# 環境準備

## 安裝 HappyML 函式庫

In [2]:
# Install HappyML
import os

if not os.path.isdir("HappyML"):
  os.system("git clone https://github.com/cnchi/HappyML.git")

## 下載資料集

In [3]:
# Upload Dataset
Dataset_File = "Mushrooms.csv"

if not os.path.isfile(Dataset_File):
  os.system("wget https://raw.githubusercontent.com/cnchi/datasets/master/" + Dataset_File)

# 資料集前處理

## 載入資料集

In [4]:
import HappyML.preprocessor as pp

# Load Data
dataset = pp.dataset(file="Mushrooms.csv")
dataset

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,e,k,s,n,f,n,a,c,b,y,...,s,o,o,p,o,o,p,b,c,l
8120,e,x,s,n,f,n,a,c,b,y,...,s,o,o,p,n,o,p,b,v,l
8121,e,f,s,n,f,n,a,c,b,n,...,s,o,o,p,o,o,p,b,c,l
8122,p,k,y,n,f,y,f,c,n,b,...,k,w,w,p,w,o,e,w,v,l


## 切分自變數與應變數

In [6]:
# Decomposition
X, Y = pp.decomposition(dataset, x_columns=[i for i in range(1, 23)], y_columns=[0])


## 類別資料數位化

In [10]:
# Categorical Data
X = pp.onehot_encoder(X, columns=[i for i in range(22)], remove_trap=True)
Y = pp.label_encoder(Y)


In [11]:
X

Unnamed: 0,cap-shape_c,cap-shape_f,cap-shape_k,cap-shape_s,cap-shape_x,cap-surface_g,cap-surface_s,cap-surface_y,cap-color_c,cap-color_e,...,population_n,population_s,population_v,population_y,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u,habitat_w
0,0,0,0,0,1,0,1,0,0,0,...,0,1,0,0,0,0,0,0,1,0
1,0,0,0,0,1,0,1,0,0,0,...,1,0,0,0,1,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,0,1,0,0,0
3,0,0,0,0,1,0,0,1,0,0,...,0,1,0,0,0,0,0,0,1,0
4,0,0,0,0,1,0,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,0,0,1,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8120,0,0,0,0,1,0,1,0,0,0,...,0,0,1,0,0,1,0,0,0,0
8121,0,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8122,0,0,1,0,0,0,0,1,0,0,...,0,0,1,0,0,1,0,0,0,0


In [12]:
Y

Unnamed: 0,class
0,1
1,0
2,0
3,1
4,0
...,...
8119,0
8120,0
8121,0
8122,1


## 切分訓練集、測試集

In [None]:
# Split Training / Testing Set
X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y)


## 特徵縮放

In [None]:
# Feature Scaling
X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))


# 模型建置

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
#
## Initialize the whole Neural Networks
model = Sequential()
#
## Add the Input & First Hidden Layer
model.add(Dense(input_dim=X_train.shape[1], units=45, kernel_initializer="glorot_normal", activation="relu"))
#
## Add the Second Hidden Layer
model.add(Dense(units=23, kernel_initializer="glorot_normal", activation="relu"))
#
## Add the Output Layer
model.add(Dense(units=1, kernel_initializer="glorot_normal", activation="sigmoid"))
#
## Compile the whole Neural Networks
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["acc"])

In [None]:
#import HappyML.neural_networks as nn

# Model Compiling
#model = nn.create_seq_model(nodes=[X_train.shape[1], 45, 23, 1], output_activation="sigmoid", loss_name="binary_crossentropy")

# 模型校正

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

# Create TensorBoard log directory
import os
from datetime import datetime
from tensorflow.keras.callbacks import TensorBoard

logdir = os.path.join("logs", datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = TensorBoard(logdir, histogram_freq=1)

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [None]:
# Start the TensorBoard
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 29797), started 0:28:46 ago. (Use '!kill 29797' to kill it.)

<IPython.core.display.Javascript object>

In [None]:
# Model Training
epochs_metrics = model.fit(x=X_train, y=Y_train, validation_split=0.2, batch_size=10, epochs=10, callbacks=[tensorboard_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# 模型訓練

In [None]:
# Model Training
epochs_metrics = model.fit(x=X_train, y=Y_train, validation_split=0.2, batch_size=10, epochs=20)

# 預測答案

In [None]:
# Predict
import pandas as pd
Y_pred = model.predict(x=X_test).astype(int)
Y_pred = pd.DataFrame(Y_pred, index=Y_test.index, columns=Y_test.columns)

# Show the Predict Result
df = pd.concat([Y_test, Y_pred], axis=1)
print(df)

# 模型評估

In [None]:
test_loss, test_acc = model.evaluate(X_test, Y_test)
print("Loss of Testing Set:", test_loss)
print("Accuracy of Testing Set:", test_acc)