In [1]:
import numpy as np
import pandas as pd
import pickle
import copy


## Read training [data, data_won ] & Data preprocessing

In [2]:

data_srcs = ['./data/', './data_won']

#################################################
##############  read training data  #############

# read training data
wlan_dfs = []
for wlan in ['wlan0', 'wlan1', 'wlan2']:
    df_cat = None
    for data_src in data_srcs:
        df = pd.read_csv(f'{data_src}/{wlan.strip()}.csv')
        df_cat = df if df is None else pd.concat([df_cat, df])
    wlan_dfs.append(df_cat)

#################################################
##############  data preprocessing  #############

# 取 exp : y = e^(-x/80)
normalized_wlan_dfs = []
for wlan_df in wlan_dfs:
    sel_indexs = list(wlan_df.columns)[4:]
    wlan_df = wlan_df.copy()
    wlan_df[sel_indexs] = (-wlan_df[sel_indexs]/80)#.apply(np.exp)
    normalized_wlan_dfs.append(wlan_df)

# find clean mac list
for i in range(3):
    wlan_df = normalized_wlan_dfs[i]
    
    mac_list = []
    for mac_, na_ in zip(wlan_df.columns[4:], wlan_df.isna().sum()[4:]):
        if na_ < len(wlan_df)*0.60:
            mac_list.append(mac_)

mac_list = sorted(list(set(mac_list)))
    
# make clean mac df
normalized_cleaned_wlan_dfs = []
for i in range(3):
    wlan_df = normalized_wlan_dfs[i]    
    selec_col = ['x','y','z','timestamp'] + mac_list
    df = pd.DataFrame(data={col:wlan_df[col] if col in wlan_df.columns else [np.nan] * len(wlan_df) for col in selec_col})
    normalized_cleaned_wlan_dfs.append(df)

# fill nan to 0
for i in range(3):
    normalized_cleaned_wlan_dfs[i] = normalized_cleaned_wlan_dfs[i].fillna(0)
    
[len(normalized_cleaned_wlan_dfs[i].columns) for i in range(3)]

[61, 61, 61]

In [3]:
print(f'\t[Info] Select Mac List Length: {len(mac_list)}')
print(f'\t[Info] Mac List: {mac_list}')

	[Info] Select Mac List Length: 57
	[Info] Mac List: ['00:0B:86:96:59:A0', '00:0B:86:96:59:A1', '00:0B:86:96:59:A2', '00:0B:86:96:60:80', '00:0B:86:96:60:81', '00:0B:86:96:60:82', '00:0B:86:96:61:C0', '00:0B:86:96:61:C1', '00:0B:86:96:61:C2', '00:0B:86:96:62:60', '00:0B:86:96:62:61', '00:0B:86:96:62:62', '00:0B:86:96:63:A0', '00:0B:86:96:69:00', '00:0B:86:96:69:01', '00:0B:86:96:69:02', '00:0B:86:96:6F:02', '00:0B:86:96:70:C2', '00:11:32:9D:2B:30', '00:11:32:9D:30:3A', '00:11:32:AD:8C:82', '00:11:32:AD:8E:B7', '00:11:32:B6:79:9E', '00:11:32:B6:7A:7F', '00:11:32:B6:86:00', '00:1A:1E:F3:67:A0', '04:8D:38:04:4E:86', '04:D4:C4:B5:E5:28', '0A:28:19:84:B7:DF', '0C:9D:92:02:8B:20', '0C:9D:92:02:8B:40', '16:11:32:9D:2B:30', '16:11:32:9D:30:3A', '16:11:32:B6:7A:7F', '16:11:32:B6:86:00', '40:B0:76:34:03:48', '44:A5:6E:41:9B:BA', '4A:E2:44:27:58:F2', '6C:F3:7F:31:FA:20', '6C:F3:7F:31:FA:21', '6C:F3:7F:31:FA:22', '6C:F3:7F:3A:B7:B0', '6C:F3:7F:3A:B7:B1', '6C:F3:7F:3A:B7:B2', '6C:F3:7F:3A:C2:90', '

## Read Testing [data_won_test]

In [4]:

data_srcs = ['./data_won_test']

#################################################
##############  read training data  #############

# read training data
df = pd.read_csv(f'./data_won_test/wlan012.csv')
wlan_dfs_test = []
for wlan in ['wlan0', 'wlan1', 'wlan2']:
    wlan_dfs_test.append( df.loc[df['name']==f'{wlan}'].iloc[:, 2:] )

#################################################
##############  data preprocessing  #############

# 取 exp : y = e^(-x/80)
normalized_wlan_dfs_test = []
for wlan_df in wlan_dfs_test:
    sel_indexs = list(wlan_df.columns)[4:]
    wlan_df = wlan_df.copy()
    wlan_df[sel_indexs] = (-wlan_df[sel_indexs]/80)#.apply(np.exp)
    normalized_wlan_dfs_test.append(wlan_df)

# # find clean mac list
# for i in range(3):
#     wlan_df = normalized_wlan_dfs[i]
    
#     mac_list = []
#     for mac_, na_ in zip(wlan_df.columns[4:], wlan_df.isna().sum()[4:]):
#         if na_ < len(wlan_df)*0.40:
#             mac_list.append(mac_)

# mac_list = sorted(list(set(mac_list)))
    
# make clean mac df
normalized_cleaned_wlan_dfs_test = []
for i in range(3):
    wlan_df = normalized_wlan_dfs_test[i]    
    selec_col = ['x','y','z','timestamp'] + mac_list
    df = pd.DataFrame(data={col:wlan_df[col] if col in wlan_df.columns else [np.nan] * len(wlan_df) for col in selec_col})
    normalized_cleaned_wlan_dfs_test.append(df)

# fill nan to 0
for i in range(3):
    normalized_cleaned_wlan_dfs_test[i] = normalized_cleaned_wlan_dfs_test[i].fillna(0)
    
[len(normalized_cleaned_wlan_dfs_test[i].columns) for i in range(3)]

[61, 61, 61]


---

## Wlan0,1,2並起來, Random Training data 5x5x5 or 5

In [5]:


###################################################################
########################## Train & Valid ##########################

X, Y = [], []

# 5x5x5
for idx1, row1 in normalized_cleaned_wlan_dfs[0].iterrows():
    x1 = row1.values[0]
    y1 = row1.values[1]
    signals1 = row1.values[4:]
    for idx2, row2 in normalized_cleaned_wlan_dfs[1].iterrows():
        x2 = row2.values[0]
        y2 = row2.values[1]
        signals2 = row2.values[4:]
        if x2!=x1 or y2!=y1:
            continue
        else:
            for idx3, row3 in normalized_cleaned_wlan_dfs[2].iterrows():
                x3 = row3.values[0]
                y3 = row3.values[1]
                signals3 = row3.values[4:]
                if x3!=x2 or y3!=y2:
                    continue
                else:
                    X.append(np.concatenate([signals1,signals2,signals3]))
                    Y.append([x1,y1])

#                     normalized_cleaned_wlan_dfs[2].drop(idx3, inplace=True)
#                     break
#             normalized_cleaned_wlan_dfs[1].drop(idx2, inplace=True)
#             break

X, Y = np.stack(X), np.stack(Y)

###################################################################
############################## Test ###############################

X_test, Y_test = [], []

# 5*5*5
for idx1, row1 in normalized_cleaned_wlan_dfs_test[0].iterrows():
    x1 = row1.values[0]
    y1 = row1.values[1]
    signals1 = row1.values[4:]
    for idx2, row2 in normalized_cleaned_wlan_dfs_test[1].iterrows():
        x2 = row2.values[0]
        y2 = row2.values[1]
        signals2 = row2.values[4:]
        if x2!=x1 or y2!=y1:
            continue
        else:
            for idx3, row3 in normalized_cleaned_wlan_dfs_test[2].iterrows():
                x3 = row3.values[0]
                y3 = row3.values[1]
                signals3 = row3.values[4:]
                if x3!=x2 or y3!=y2:
                    continue
                else:
                    X_test.append(np.concatenate([signals1,signals2,signals3]))
                    Y_test.append([x1,y1])

#                     normalized_cleaned_wlan_dfs_test[2].drop(idx3, inplace=True)
#                     break
#             normalized_cleaned_wlan_dfs_test[1].drop(idx2, inplace=True)
#             break

X_test, Y_test = np.stack(X_test), np.stack(Y_test)

with open('./ckpt/dataset.pkl', 'wb') as fp:
    pickle.dump({'mac_list': mac_list, 'x': X, 'y': Y, 'x_test': X_test, 'y_test': Y_test}, fp, protocol=pickle.HIGHEST_PROTOCOL)


In [6]:
with open('./ckpt/dataset.pkl', 'rb') as fp:
    dic = pickle.load(fp)
    mac_list, X, Y, X_test, Y_test = dic['mac_list'], dic['x'], dic['y'], dic['x_test'], dic['y_test']

In [7]:
len(mac_list), X.shape, Y.shape, X_test.shape, Y_test.shape

(57, (25119, 171), (25119, 2), (5088, 171), (5088, 2))

## Split Train & Test set

In [8]:
from sklearn.model_selection import train_test_split

def make_train_test_split(X, Y, mode='sklearn'):
    test_size = 0.10
    if mode=='sklearn':
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42)
    elif mode=='mask':
        point_set = np.unique(Y, axis=0)
        train_point_set = point_set[int(np.ceil(len(point_set)*test_size)):]
        valid_point_set = point_set[:int(np.ceil(len(point_set)*test_size))]
        X_train, X_test, Y_train, Y_test = [], [], [], []
        for x, y in zip(X, Y):
            find = False
            for point in train_point_set:
                if sum(np.abs(y-point)) == 0:
                    X_train.append(x)
                    Y_train.append(y)
                    find = True
            if find:
                continue
            else:
                X_test.append(x)
                Y_test.append(y)
        X_train, X_test, Y_train, Y_test = np.stack(X_train), np.stack(X_test), np.stack(Y_train), np.stack(Y_test)
    return X_train, X_test, Y_train, Y_test
    

In [9]:
X.shape, Y.shape

((25119, 171), (25119, 2))

In [10]:
X

array([[0.    , 0.    , 0.95  , ..., 0.    , 0.    , 0.7625],
       [0.    , 0.    , 0.95  , ..., 0.8875, 0.9125, 0.7875],
       [0.    , 0.    , 0.95  , ..., 0.8875, 0.9125, 0.7625],
       ...,
       [0.    , 0.    , 0.    , ..., 0.    , 0.8875, 0.7375],
       [0.    , 0.    , 0.    , ..., 1.0375, 0.8875, 0.7375],
       [0.    , 0.    , 0.    , ..., 0.8625, 0.8875, 0.7625]])

## Model

In [11]:
import torch 
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, in_dim, hid_dim=64, out_dim=2):
        super().__init__()
        self.mlp = nn.Sequential(
            #nn.Dropout(0.5),
            nn.Linear(in_dim, hid_dim, bias=True),
            nn.Tanh(),
            nn.BatchNorm1d(hid_dim),
            
#             nn.Dropout(0.5),
            nn.Linear(hid_dim, hid_dim, bias=True),
            nn.Tanh(),
            nn.BatchNorm1d(hid_dim),
            
            nn.Dropout(0.5),
            nn.Linear(hid_dim, hid_dim, bias=True),
            nn.Tanh(),
            nn.BatchNorm1d(hid_dim),
                        
            nn.Dropout(0.5),
            nn.Linear(hid_dim, out_dim, bias=True)
        )
        
    def forward(self, x):
        x = self.mlp(x)
        return x
    

## Train

In [12]:
import os, wandb
os.environ["WANDB_SILENT"] = "true"
wandb.init(project='Iot_wifi_location', name='mlp')

In [13]:
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

def weighted_mse_loss(input, target, weight):
    weight = weight.to(target.device)
    return torch.sum(weight * (input - target) ** 2)

class Averager():
    def __init__(self):
        self.sum = 0
        self.n = 0
    def push(self, a):
        self.sum += a
        self.n += 1
    def avg(self):
        return self.sum/(self.n+1e-8)

def train():
    global steps
    model.train()
    
    losses = Averager()
    for x, y_ in train_loader:
        x, y_ = x.cuda(), y_.cuda()
        y = model(x)
        
        optimizer.zero_grad()
        loss = criterion(y, y_)
#         loss = weighted_mse_loss(y, y_, torch.tensor([1.0, 1.0]))
        loss.backward()
        optimizer.step()
        
        steps += 1
        losses.push(loss.item())
        wandb.log({'train_mse':loss.item()})
    return losses.avg()

@torch.no_grad()
def valid():
    model.eval()
    
    losses = Averager()
    for x, y_ in valid_loader:    
        x, y_ = x.cuda(), y_.cuda()    
        y = model(x)
        
        optimizer.zero_grad()
        loss = criterion(y, y_)   
        optimizer.step()
        
        losses.push(loss.item())
    wandb.log({'valid_mse':losses.avg()})
    return losses.avg()
    
# X_train, X_valid, Y_train, Y_valid = make_train_test_split(X, Y, mode='sklearn') # mode: sklearn, mask
# X_train, Y_train, X_valid, Y_valid = torch.Tensor(X_train), torch.Tensor(Y_train),\
#                                     torch.Tensor(X_valid), torch.Tensor(Y_valid)
X_train, Y_train, X_valid, Y_valid = torch.Tensor(X), torch.Tensor(Y), torch.Tensor(X_test), torch.Tensor(Y_test)

train_dataset = TensorDataset(X_train, Y_train)
valid_dataset = TensorDataset(X_valid, Y_valid)

train_loader = DataLoader(train_dataset, 
                          batch_size=256,
                          drop_last=True)
valid_loader = DataLoader(valid_dataset,
                          batch_size=256)

model = Model(in_dim=X.shape[1])
model.cuda()

criterion = nn.MSELoss()

optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=0)

steps = 0
max_steps = 100000
max_epochs = 500 
best_valid_loss = np.inf
for epoch in range(max_epochs):
    print(f'\t[{epoch+1}/{max_epochs}]', end='  \r')
    
    train_loss = train()
    valid_loss = valid()
    
    if valid_loss < best_valid_loss:
        best_model = copy.deepcopy(model)
        torch.save({'mse':valid_loss ,'model':best_model.state_dict()}, './ckpt/best.pt')
        best_valid_loss = valid_loss
        
#     print(f'\t[{steps}/{max_steps}]', end='  \r')
#     if steps > max_steps:
#         break
    

	[500/500]  

## Test_won_test

In [14]:

# data_srcs = ['./data_won']#, './data']

# #################################################
# ##############  read training data  #############

# # read training data
# wlan_dfs = []
# for wlan in ['wlan0', 'wlan1', 'wlan2']:
#     df_cat = None
#     for data_src in data_srcs:
#         df = pd.read_csv(f'{data_src}/{wlan.strip()}.csv')
#         df_cat = df if df is None else pd.concat([df_cat, df])
#     wlan_dfs.append(df_cat)

# #################################################
# ##############  data preprocessing  #############

# # 取 exp : y = e^(-x/80)
# normalized_wlan_dfs = []
# for wlan_df in wlan_dfs:
#     sel_indexs = list(wlan_df.columns)[4:]
#     wlan_df = wlan_df.copy()
#     wlan_df[sel_indexs] = (-wlan_df[sel_indexs]/80)#.apply(np.exp)
#     normalized_wlan_dfs.append(wlan_df)

# # find clean mac list
# # for i in range(3):
# #     wlan_df = normalized_wlan_dfs[i]
    
# #     mac_list = []
# #     for mac_, na_ in zip(wlan_df.columns[4:], wlan_df.isna().sum()[4:]):
# #         if na_ < len(wlan_df)*0.40:
# #             mac_list.append(mac_)

# # mac_list = sorted(list(set(mac_list)))

# # make clean mac df
# normalized_cleaned_wlan_dfs = []
# for i in range(3):
#     wlan_df = normalized_wlan_dfs[i]    
#     selec_col = ['x','y','z','timestamp'] + mac_list
#     df = pd.DataFrame(data={col:wlan_df[col] if col in wlan_df.columns else [np.nan] * len(wlan_df) for col in selec_col})
#     normalized_cleaned_wlan_dfs.append(df)


# # fill nan to 0
# for i in range(3):
#     normalized_cleaned_wlan_dfs[i] = normalized_cleaned_wlan_dfs[i].fillna(0)
    

# ############################################################################
# ############################################################################

# X, Y = [], []

# # 5x5x5
# for idx1, row1 in normalized_cleaned_wlan_dfs[0].iterrows():
#     x1 = row1.values[0]
#     y1 = row1.values[1]
#     signals1 = row1.values[4:]
#     for idx2, row2 in normalized_cleaned_wlan_dfs[1].iterrows():
#         x2 = row2.values[0]
#         y2 = row2.values[1]
#         signals2 = row2.values[4:]
#         if x2!=x1 or y2!=y1:
#             continue
#         else:
#             for idx3, row3 in normalized_cleaned_wlan_dfs[2].iterrows():
#                 x3 = row3.values[0]
#                 y3 = row3.values[1]
#                 signals3 = row3.values[4:]
#                 if x3!=x2 or y3!=y2:
#                     continue
#                 else:
#                     X.append(np.concatenate([signals1,signals2,signals3]))
#                     Y.append([x1,y1])

# #                     normalized_cleaned_wlan_dfs[2].drop(idx3, inplace=True)
# #                     break
# #             normalized_cleaned_wlan_dfs[1].drop(idx2, inplace=True)
# #             break

# X_test, Y_test = np.stack(X), np.stack(Y)

############################################################################
############################################################################
#X_test, Y_test = torch.tensor(X_test), torch.tensor(Y_test)

test_dataset = TensorDataset(torch.tensor(X_test), torch.tensor(Y_test))

test_loader = DataLoader(test_dataset,
                          batch_size=256)


In [21]:

mse, n = 0, 0
model.eval()
pred, answ = [], []
for x, y_ in test_loader:
    y = best_model(x.cuda().float())
    
    pred.append(y.cpu().detach())
    answ.append(y_.cpu().detach())

pred = torch.cat(pred)
answ = torch.cat(answ)
    
# mse 
mse = torch.nn.MSELoss()(pred, answ)
mse_x = torch.nn.MSELoss()(pred[:,0], answ[:,0])
mse_y = torch.nn.MSELoss()(pred[:,1], answ[:,1])
std_x = torch.std(pred[:,0])
std_y = torch.std(pred[:,1])
print('mse:',float(mse), ' mse_x:',float(mse_x), ' mse_y:', float(mse_y), ' ; ', ' std_x:', float(std_x), ' std_y', float(std_y))

mse: 6.779625807306959  mse_x: 5.179384355307586  mse_y: 8.379867259306332  ;   std_x: 2.0091824531555176  std_y 0.4653508961200714


In [22]:
pred_aug = []
answ_aug = []
x,y = 0,0
tmp = []
for i in range(len(answ)):
    x_, y_ = answ[i]
    if x==x_ and y==y_:
        tmp.append(pred[i])
    else:
        if len(tmp)!=0:
            pred_aug.append(torch.stack(tmp).mean(axis=0))
            answ_aug.append(ans)
        tmp = [pred[i]]
        ans = answ[i]
        x, y = x_, y_
        
pred_aug = torch.stack(pred_aug)
answ_aug = torch.stack(answ_aug)

len(pred_aug), len(pred)

(20, 5088)

In [23]:

mse = torch.nn.MSELoss()(pred_aug, answ_aug)
mse_x = torch.nn.MSELoss()(pred_aug[:,0], answ_aug[:,0])
mse_y = torch.nn.MSELoss()(pred_aug[:,1], answ_aug[:,1])
std_x = torch.std(pred_aug[:,0])
std_y = torch.std(pred_aug[:,1])
print('mse:',float(mse), ' mse_x:',float(mse_x), ' mse_y:', float(mse_y), ' ; ', ' std_x:', float(std_x), ' std_y', float(std_y))


mse: 4.882162972777902  mse_x: 4.109006375826632  mse_y: 5.655319569729171  ;   std_x: 1.9143109321594238  std_y 0.47439807653427124


In [24]:
torch.save({'mse':float(mse) ,'model':best_model.state_dict()}, './ckpt/final.pt')

In [25]:
for i in range(len(answ_aug)):
    print(pred_aug[i].numpy(), answ_aug[i].numpy())

[9.67199  3.473061] [9.13  2.436]
[9.109683  3.5988626] [9.235 5.876]
[10.507136   2.5335171] [13.466 -0.19 ]
[11.74102   3.457102] [14.37  7.15]
[10.343455   3.0551696] [8.36  7.122]
[5.806749  3.5191298] [4.17 5.4 ]
[6.7405457 3.3443322] [3.725 1.78 ]
[6.7706885 2.720373 ] [6.63 6.31]
[7.0206285 3.458142 ] [3.9  3.49]
[6.9714355 3.3399568] [4.5  6.07]
[7.1470594 3.3458545] [6.974 4.831]
[11.004126   2.6783493] [11.771  7.524]
[6.8950634 3.5028985] [6.85  2.555]
[6.623708 3.220631] [2.624 0.294]
[9.143018  2.3294158] [11.783  2.433]
[10.338418   2.7148223] [7.94  3.218]
[7.4819956 3.2415762] [6.695 4.186]
[6.4124923 3.541331 ] [4.43 3.25]
[9.321188  3.4856868] [8.907 4.939]
[11.33448   4.442448] [10.579  2.339]
