In [1]:
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Reshape, Flatten, LeakyReLU
from keras.layers import BatchNormalization
from keras.optimizers import Adam
from keras.backend import clear_session
from keras.utils import to_categorical
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm_notebook
from tqdm import tqdm
from numpy.random import seed
import tensorflow as tf
from IPython.display import display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets

In [2]:
#Data Parameters
num_of_classes = 4
data_shape = (7,1)

#MLP Parameters
times_to_run = 50 #Number of times to run MLP model
mlp_epochs = 40
valid_split = 0.20

#GAN Parameters
latent_dim = 100
gan_epochs = 5000

#Random Seeds
selection_seed = 150
seed_multiplier = 1000000

In [3]:
fraction_of_data = 0.1
data_to_gen = 250

In [4]:
dataset = pd.read_csv("./UJIIndoorLoc/train_sorted_by_BF.csv")
dataset["BF"] = pd.factorize(dataset["BF"])[0].astype(int)#将标签映射到顺序数字上
labels = dataset.BF.values
features = dataset.drop(columns=['TIMESTAMP','PHONEID','USERID','RELATIVEPOSITION',
                                'SPACEID','BUILDINGID','FLOOR','LATITUDE','LONGITUDE',
                                'BF']).values
features,labels

(array([[-110, -110, -110, ..., -110, -110, -110],
        [-110, -110, -110, ..., -110, -110, -110],
        [-110, -110, -110, ..., -110, -110, -110],
        ...,
        [-110, -110, -110, ..., -110, -110, -110],
        [-110, -110, -110, ..., -110, -110, -110],
        [-110, -110, -110, ..., -110, -110, -110]], dtype=int64),
 array([ 0,  0,  0, ..., 12, 12, 12]))

In [5]:
# features[features==-110]=100
# features

In [6]:
#划分数据集
X_train, X_test, Y_train, Y_test = train_test_split(features, 
                                                    labels, 
                                                    test_size=0.3,
                                                    random_state= selection_seed,
                                                    #random_state：可以理解为随机数种子，主要是为了复现结果而设置
                                                    stratify=labels)
X_train,Y_train = shuffle(X_train, Y_train)
Y_train_encoded = to_categorical(Y_train)
Y_test_encoded = to_categorical(Y_test)
scaler = StandardScaler()  #from sklearn.preprocessing import StandardScaler
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.fit_transform(X_test)
Y_test_encoded,Y_test_encoded.shape

(array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 1., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 (5982, 13))

In [7]:
model = Sequential()
sae_hidden_layers= [256,128,64,128,256]
model.add(Dense(sae_hidden_layers[0],input_dim=520,activation=tf.keras.layers.LeakyReLU(alpha=0.3),use_bias=False))
for units in sae_hidden_layers[1:]:
    model.add(Dense(units, activation=tf.keras.layers.LeakyReLU(alpha=0.3), use_bias=False))
model.add(Dense(520, activation=tf.keras.layers.LeakyReLU(alpha=0.3), use_bias=False))
model.compile(optimizer='adam', loss='mse')
model.fit(X_train,X_train,batch_size=64,epochs=20,verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1b385227760>

In [8]:
num_to_remove = (len(sae_hidden_layers) + 1) // 2
for i in range(num_to_remove):
    model.pop()

In [9]:
classifier_hidden_layers = [128,128]
for units in classifier_hidden_layers:
    model.add(Dense(units, activation='relu', use_bias=False))
model.add(Dense(13, activation='sigmoid', use_bias=False))  # 'sigmoid' for multi-label classification
model.compile(optimizer= 'adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train_transformed, Y_train_encoded, validation_split=0.2, batch_size=64, epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1b385555220>

In [10]:
preds = model.predict(X_test_transformed, batch_size=32)
result = np.equal(np.argmax(Y_test_encoded,axis=1), np.argmax(preds,axis=1)).astype(int)
acc = result.mean()
acc



0.9933132731527917