In [46]:
import tensorflow as tf 
import numpy as np
import pandas as pd
from keras.layers import Flatten,concatenate,Dense,Lambda,Dropout
from keras.layers import Input
from keras.models import Model
from keras.utils.vis_utils import plot_model
import matplotlib.pyplot as plt
from keras.optimizers import RMSprop
from keras import backend as K
import random

### 划分数据集

In [47]:
path = "./UJIIndoorLoc/children_13/sorted/00_sorted.csv"
train_df = pd.read_csv(path,header=0)
print(train_df.shape)
a = train_df.mean()
a["WAP004"]
for i in train_df.columns[:520]:
    if a[i]==-110:
        del train_df[i]
print(train_df.shape)
train_df["REF"] = pd.factorize(train_df["REF"])[0].astype(int)#将标签映射到顺序数字上
labels = train_df.REF.values
features = train_df.drop(columns=['TIMESTAMP','PHONEID','USERID','RELATIVEPOSITION',
                                'SPACEID','BUILDINGID','FLOOR','LATITUDE','LONGITUDE',
                                'BF','REF']).values
features,labels,features.shape

(1059, 531)
(1059, 130)


(array([[-110, -110, -110, ..., -110, -110, -110],
        [-110, -110, -110, ..., -110, -110, -110],
        [-110, -110, -110, ..., -110, -110, -110],
        ...,
        [-110, -110, -110, ..., -110, -110, -110],
        [-110, -110, -110, ..., -110, -110, -110],
        [-110, -110, -110, ..., -110, -110, -110]], dtype=int64),
 array([ 0,  0,  0, ..., 53, 53, 53]),
 (1059, 119))

In [48]:
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
X_train, X_test, Y_train, Y_test = train_test_split(features, 
                                                    labels, 
                                                    test_size=0.3,
                                                    random_state= 150,
                                                    #random_state：可以理解为随机数种子，主要是为了复现结果而设置
                                                    stratify=labels)#保证划分的test中的所有标签齐全
X_train,Y_train = shuffle(X_train, Y_train)
Y_train,Y_test

(array([12, 20, 51, 15, 41, 48, 37, 11, 33, 21, 27, 46, 31,  5, 30, 13, 41,
        43, 29, 28, 21,  9, 28, 46, 29, 12, 39, 47, 49, 27, 25, 47, 40, 53,
        42, 12, 39, 52, 22, 36,  2, 41, 26,  5, 31,  5, 35, 35, 30, 51, 42,
        26, 36, 51,  0, 31, 34, 38, 28, 50, 14,  2, 33, 36, 41, 33, 49, 25,
        52, 15, 25, 12, 26, 29, 37, 51, 52, 29,  9, 11, 50, 53, 35, 47, 14,
        42, 22, 20, 13, 44, 15, 13, 30, 36, 45,  7, 16, 47,  7,  8, 32, 52,
        15, 29, 28,  0, 50, 36,  5, 40, 20, 32, 13, 49, 35, 13,  9, 47, 27,
        41,  5, 18, 12, 41, 24, 24, 38, 19, 24, 46, 50, 48, 23, 50, 41, 23,
        43, 45, 13,  6, 33,  6,  1, 17, 53, 38, 21, 15, 13, 47, 26,  6,  0,
        25, 26, 34, 39, 22, 18, 25, 51, 35, 35, 51,  6, 12,  7, 23, 29, 14,
        34, 36, 51,  1, 29,  3, 51, 20, 39, 36,  9, 48, 46, 33,  6, 20, 20,
        36, 39, 15,  4, 47, 25, 37, 26,  4,  0, 10,  0, 17,  5, 14, 15,  8,
        48, 11, 49, 26, 13, 16, 20,  7, 34, 31, 20, 24, 36, 13,  5, 12, 20,
         7, 

In [49]:
from sklearn.preprocessing import StandardScaler
from keras.utils import to_categorical
scale = StandardScaler().fit(X_train)
X_train_transform = scale.transform(X_train)
X_test_transform = scale.transform(X_test)
Y_train_encoded = to_categorical(Y_train)
Y_test_encoded = to_categorical(Y_test)
input_dim = (119,)

### 定义特征提取网络

In [50]:
from keras.models import Model
def featureNet(input_dim):
    inp = Input(shape=input_dim ,name = 'ap_feature')
    model = Dense(1024,activation='relu')(inp)
    model = Dense(512,activation='relu')(model)
    model = Dense(256,activation='relu')(model)
    output = Dense(119,activation='relu')(model)
    return Model(inp,output)

In [51]:
def euclidean_distance(vects):
    x, y = vects
    return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))


def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)

In [52]:
base_network = featureNet(input_dim)
input_a = Input(shape=input_dim)
input_b = Input(shape=input_dim)

# because we re-use the same instance`base_network`,
# the weights of the network
# will be shared across the two branches
processed_a = base_network(input_a)
processed_b = base_network(input_b)

distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b])

model = Model(inputs=[input_a, input_b], outputs=distance)

In [53]:
def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
    return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))

### 创建正负样本对

In [54]:
def create_pairs(x,digit_indices):
    pairs = []
    labels = []
    #这里n是所有类别的样本数目的最小值再减1
    n = min([len(digit_indices[d]) for d in range(54)]) - 1
    for d in range(54):
        for i in range(n):
            # 遍历d类的样本，取临近的两个样本为正样本对
            z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
            pairs += [[x[z1], x[z2]]]
            # randrange会产生1~9之间的随机数，含1和9
            inc = random.randrange(1, 10)
            # (d+inc)%10一定不是d，用来保证负样本对的图片绝不会来自同一个类
            dn = (d + inc) % 10
            # 在d类和dn类中分别取i样本构成负样本对
            z1, z2 = digit_indices[d][i], digit_indices[dn][i]
            pairs += [[x[z1], x[z2]]]
            # 添加正负样本标签
            labels += [1.0, 0.0] #标签一定使用浮点数，不然会报错
    return np.array(pairs), np.array(labels)

In [55]:
digit_indices = [np.where(Y_train == i)[0] for i in range(54)]
tr_pairs, tr_y = create_pairs(X_train_transform, digit_indices)

digit_indices = [np.where(Y_test == i)[0] for i in range(54)]
te_pairs, te_y = create_pairs(X_test_transform, digit_indices)

In [56]:
def compute_accuracy(predictions, labels):
    '''Compute classification accuracy with a fixed threshold on distances.
    '''
    return labels[predictions.ravel() < 0.5].mean()

In [57]:
tr_pairs[:, 0],tr_pairs[:, 1],tr_y

(array([[-0.50679351, -0.13210547, -0.15448632, ..., -0.21598904,
         -0.03676073, -0.03676073],
        [-0.50679351, -0.13210547, -0.15448632, ..., -0.21598904,
         -0.03676073, -0.03676073],
        [-0.50679351, -0.13210547, -0.15448632, ..., -0.21598904,
         -0.03676073, -0.03676073],
        ...,
        [-0.50679351, -0.13210547, -0.15448632, ..., -0.21598904,
         -0.03676073, -0.03676073],
        [-0.50679351, -0.13210547, -0.15448632, ..., -0.21598904,
         -0.03676073, -0.03676073],
        [-0.50679351, -0.13210547, -0.15448632, ..., -0.21598904,
         -0.03676073, -0.03676073]]),
 array([[-0.50679351, -0.13210547, -0.15448632, ..., -0.21598904,
         -0.03676073, -0.03676073],
        [-0.50679351, -0.13210547, -0.15448632, ..., -0.21598904,
         -0.03676073, -0.03676073],
        [-0.50679351, -0.13210547, -0.15448632, ..., -0.21598904,
         -0.03676073, -0.03676073],
        ...,
        [-0.50679351, -0.13210547, -0.15448632, ..., -

In [58]:
from keras.optimizers import Adam
rms = RMSprop()
model.compile(loss=contrastive_loss, optimizer=rms)
model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,
          validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),
          batch_size=64,
          epochs=50,
          verbose=1)

# compute final accuracy on training and test sets
pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
tr_acc = compute_accuracy(pred, tr_y)
pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])
te_acc = compute_accuracy(pred, te_y)

print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
* Accuracy on training set: 100.00%
* Accuracy on test set: 95.15%
