In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense
from keras import regularizers

from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
%matplotlib inline


# ignore 'More than 20 figure' warning and others
import warnings
warnings.filterwarnings('ignore')

data = pd.read_csv('data.csv')

genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)

scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, 1:-1], dtype=float))


Using TensorFlow backend.


# Regularization or not

In [2]:
def reg_plot(X, y, n):    
    # n ---- The times of selecting different set to compare the function of regularization   
    
    Acc_no=[]
    Acc_yes=[]
    Judge=[]
    for i in range(n):
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)     
        
        # No regularization
        model = Sequential()
        model.add(Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(10, activation='softmax')) 
        model.compile(optimizer='adam',
                     loss='sparse_categorical_crossentropy', 
                     metrics=['accuracy'])
        history = model.fit(X_train, 
                y_train, 
                epochs=20,
                batch_size=128,
                verbose=0)
        loss, acc1 = model.evaluate(X_test, y_test, verbose=0)
        Acc_no.append(acc1)
        
        # With regularization
        model = Sequential()
        model.add(Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
        model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
        model.add(Dense(10, activation='softmax')) 
        model.compile(optimizer='adam',
                     loss='sparse_categorical_crossentropy', 
                     metrics=['accuracy'])
        history = model.fit(X_train, 
                y_train, 
                epochs=20,
                batch_size=128,
                verbose=0)        
        loss, acc2 = model.evaluate(X_test, y_test, verbose=0)
        Acc_yes.append(acc2)
        
        if acc1<acc2:
            Judge.append('yes')
        else:
            Judge.append('no')
        
        
    # plotting
    table = {
    'Acc_no': Acc_no,
    'Acc_yes': Acc_yes,  
    'Reg or not': Judge
    }
    
    df = pd.DataFrame(table)
    
    print(df)

In [12]:
reg_plot(X, y, 10)

     Acc_no   Acc_yes Reg or not
0  0.676647  0.634731         no
1  0.694611  0.688623         no
2  0.652695  0.634731         no
3  0.712575  0.658683         no
4  0.682635  0.658683         no
5  0.676647  0.664671         no
6  0.676647  0.658683         no
7  0.688623  0.664671         no
8  0.676647  0.640719         no
9  0.670659  0.664671         no


# Validation or not

In [9]:
def val_plot(X, y, n):
    
    Acc_no=[]
    Acc_yes=[]
    Judge=[]
    for i in range(n):
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        
        # No validation
        model = Sequential()
        model.add(Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(10, activation='softmax')) 
        model.compile(optimizer='adam',
                     loss='sparse_categorical_crossentropy', 
                     metrics=['accuracy'])
        history = model.fit(X_train, 
                y_train, 
                epochs=200,
                batch_size=128,
                verbose=0)
        loss, acc1 = model.evaluate(X_test, y_test, verbose=0)
        Acc_no.append(acc1)
        
        # With validation
        model = Sequential()
        model.add(Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(10, activation='softmax')) 
        model.compile(optimizer='adam',
                     loss='sparse_categorical_crossentropy', 
                     metrics=['accuracy'])
        history = model.fit(X_train, 
                y_train, 
                epochs=200,
                batch_size=128,
                validation_split=0.2,
                verbose=0)
        loss, acc2 = model.evaluate(X_test, y_test, verbose=0)
        Acc_yes.append(acc2)
         
        if acc1<acc2:
            Judge.append('yes')
        else:
            Judge.append('no')
        
        
    # plotting
    table = {
    'Acc_no': Acc_no,
    'Acc_yes': Acc_yes,  
    'Val or not': Judge
    }

    df = pd.DataFrame(table)

    print(df)       

In [10]:
val_plot(X, y, 10)

     Acc_no   Acc_yes Val or not
0  0.736527  0.700599         no
1  0.700599  0.676647         no
2  0.580838  0.580838         no
3  0.706587  0.682635         no
4  0.664671  0.664671         no
5  0.730539  0.712575         no
6  0.640719  0.706587        yes
7  0.688623  0.670659         no
8  0.616766  0.586826         no
9  0.652695  0.640719         no


# PCA or not

In [6]:
def pca_plot(X, y, n):
    
    Acc_no=[]
    Acc_yes=[]
    Judge=[]
    for i in range(n):
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        
        # No pca
        model = Sequential()
        model.add(Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(10, activation='softmax')) 
        model.compile(optimizer='adam',
                     loss='sparse_categorical_crossentropy', 
                     metrics=['accuracy'])
        history = model.fit(X_train, 
                y_train, 
                epochs=100,
                batch_size=128,
                verbose=0)
        loss, acc1 = model.evaluate(X_test, y_test, verbose=0)
        Acc_no.append(acc1)
        
        # With pca
        pca = PCA()
        pca.fit(X)
        pca_data = pca.transform(X)        
        model = Sequential()
        model.add(Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(10, activation='softmax')) 
        model.compile(optimizer='adam',
                     loss='sparse_categorical_crossentropy', 
                     metrics=['accuracy'])
        history = model.fit(X_train, 
                y_train, 
                epochs=100,
                batch_size=128,
                verbose=0)
        loss, acc2 = model.evaluate(X_test, y_test, verbose=0)
        Acc_yes.append(acc2)
        
        if acc1<acc2:
            Judge.append('yes')
        else:
            Judge.append('no')
        
        
    # plotting
    table = {
    'Acc_no': Acc_no,
    'Acc_yes': Acc_yes,  
    'pca or not': Judge
    }

    df = pd.DataFrame(table)

    print(df)       

In [5]:
pca_plot(X, y, 10)

     Acc_no   Acc_yes pca or not
0  0.712575  0.712575         no
1  0.694611  0.694611         no
2  0.676647  0.664671         no
3  0.694611  0.688623         no
4  0.712575  0.730539        yes
5  0.664671  0.670659        yes
6  0.640719  0.628743         no
7  0.640719  0.640719         no
8  0.718563  0.700599         no
9  0.622755  0.652695        yes


In [10]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context  # cancle verification

from keras.applications.vgg16 import VGG16

vgg16_model = VGG16()
vgg16_model.layers.pop()

from keras.layers import InputLayer

new_model = Sequential()
new_model.add(InputLayer(input_shape=(224,224,3)))

for layer in vgg16_model.layers:
  new_model.add(layer)

for layer in new_model.layers:
  layer.trainable=False

new_model.add(Dense(10, activation='softmax'))

new_model.summary()

Model: "sequential_43"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)     