In [46]:
# Imports
import numpy as np #Mathematical tools
import matplotlib.pyplot as plt #Plots charts
import pandas as pd #Import and manage data sets
from sklearn.neural_network import MLPClassifier 
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
# Preprocessing library
from sklearn.preprocessing import Imputer
# Categorical data encoder library
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
# Splitting the data set into training and testing sets library
from sklearn.model_selection import train_test_split
#Feature scaling library
from sklearn.preprocessing import StandardScaler
%matplotlib inline

In [47]:
# Definitions

def best_output(row):
    # se o downloadTime é diferente de 100 para T2 e T3, ambos os targets completam o download
    if ((row.downloadTimeT2!=100)&(row.downloadTimeT3!=100)):
        # o melhor output será aquele para o qual o downloadTime é menor
        if (row.downloadTimeT2<=row.downloadTimeT3):
            return 0
        else:
            return 1
    
    # se o downloadTime é diferente de 100 apenas para um dos targets, só um deles completa o download
    elif ((row.downloadTimeT2!=100)|(row.downloadTimeT3!=100)):
        # o melhor output será aquele para o qual o downloadTime é diferente de 100
        if (row.downloadTimeT2!=100):
            return 0
        else:
            return 1
        
    # se o downloadTime é igual a 100 para T2 e T3, ambos os targets não completam o download
    elif ((row.downloadTimeT2==100)&(row.downloadTimeT3==100)):
        # o melhor output será aquele para o qual o rxBytes é maior
        if (row.rxBytesT2>=row.rxBytesT3):
            return 0
        else:
            return 1
        
def download_complete(row, column):
    if (row[column]==0):
        if(row.downloadTimeT2!=100.0):
            return 1
        else:
            return 0
    else:
        if(row.downloadTimeT3!=100.0):
            return 1
        else: 
            return 0

        
def download_time(row, column):
    if ((row[column]==0)&(row.downloadTimeT2<100)):
        return row.downloadTimeT2
    elif ((row[column]==1)&(row.downloadTimeT3<100)):
        return row.downloadTimeT3

    
def throughput(row, column):
    if (row[column]==0):
        tp = (row.rxBytesT2/row.downloadTimeT2)*8/1e6
        return tp
    else:
        tp = (row.rxBytesT3/row.downloadTimeT3)*8/1e6
        return tp
    
def optimum_choice(row, column):
    if (row[column]==row.best_output):
        return 1
    else:
        return 0
    
def a2a4rsrp(row):
    if (row.rsrp2>=row.rsrp3):
        return 0
    else:
        return 1

# Sem shadowing

In [48]:
# Importing data

# importando os datasets
t2 = pd.read_csv('resultados/t2_OkumuraHata_Modificado', delimiter='\t')
t3 = pd.read_csv('resultados/t3_OkumuraHata_Modificado', delimiter='\t')

# garantindo que utilizaremos apenas as sementes presentes nos dois datasets
t2 = t2[t2.nRun.isin(t3.nRun)]
t3 = t3[t3.nRun.isin(t2.nRun)]
t2 = t2.reset_index(drop=True)
t3 = t3.reset_index(drop=True)

# combinando os datasets
data = t2
data = data.drop(['targetCellId', 'downloadTime', 'rxBytes'], axis=1)
data['downloadTimeT2'] = t2.downloadTime
data['downloadTimeT3'] = t3.downloadTime
data['rxBytesT2'] = t2.rxBytes
data['rxBytesT3'] = t3.rxBytes

data['best_output'] = data.apply(best_output, axis=1)
data.head()

Unnamed: 0,nRun,angle,rsrp1,rsrq1,rsrp2,rsrq2,rsrp3,rsrq3,previousrsrp1,previousrsrq1,previousrsrp2,previousrsrq2,previousrsrp3,previousrsrq3,downloadTimeT2,downloadTimeT3,rxBytesT2,rxBytesT3,best_output
0,1,-54.573,-91.4534,-4.36266,-96.7808,-9.69006,-102.921,-15.8302,-91.2293,-4.27418,-96.9354,-9.9803,-102.884,-15.9288,37.0,100.0,15728640,10170752,0
1,2,-24.0392,-91.261,-4.30386,-97.6844,-10.7273,-100.527,-13.5702,-91.0186,-4.21518,-97.8386,-11.0352,-100.562,-13.7581,46.809,100.0,15728640,15106984,0
2,3,-20.0429,-91.3627,-4.33647,-97.8093,-10.783,-100.232,-13.2054,-91.1204,-4.24653,-97.9572,-11.0833,-100.276,-13.4026,48.781,42.7271,15728640,15728640,1
3,4,-49.5133,-91.3211,-4.3304,-96.8836,-9.8929,-102.474,-15.4829,-91.0927,-4.24175,-97.0454,-10.1945,-102.447,-15.5956,38.277,100.0,15728640,10490760,0
4,5,-9.37145,-91.3008,-4.29974,-98.4062,-11.4052,-99.5329,-12.5319,-91.0547,-4.21276,-98.5311,-11.6892,-99.6079,-12.7659,100.0,39.651,15560208,15728640,1


In [49]:
# Splitting test data and train data
y = data['best_output']
x = data[['rsrp1','rsrq1','rsrp2','rsrq2','rsrp3','rsrq3','previousrsrp1','previousrsrq1','previousrsrp2','previousrsrq2','previousrsrp3','previousrsrq3']]
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state = 0)

# Feature scaling (normalizing or stadardization of the scales)
# Helps the conversion of the algorithm
sc_X = StandardScaler()
x_train = sc_X.fit_transform(x_train)
x_test = sc_X.transform(x_test) # There is no need to fit after the training set is fit

In [50]:
# Fitting the RF to the training set
from sklearn.ensemble import RandomForestClassifier
param_test = {'n_estimators':[(i) for i in range(1,50)]}
classifier=GridSearchCV(estimator=RandomForestClassifier(criterion = 'entropy', random_state = 0), param_grid=param_test,cv=10, scoring='roc_auc')
#classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(x_train, y_train)

GridSearchCV(cv=10, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=0, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'n_estimators': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='roc_auc', verbose=0)

In [51]:
# Predicting the test set results
y_pred = classifier.predict(x_test)

# Evaluation of the model
# Making the confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

#print(classifier.best_params_)

# Calculating the percentual of error
p_knn =((y_pred != y_test).sum()/ y_pred.size)
p_knn

0.0050000000000000001

# Com shadowing

In [52]:
# Importing data

# importando os datasets
t2 = pd.read_csv('resultados/t2_OhBuildings_ComShadowing_Modificado', delimiter='\t')
t3 = pd.read_csv('resultados/t3_OhBuildings_ComShadowing_Modificado', delimiter='\t')

# garantindo que utilizaremos apenas as sementes presentes nos dois datasets
t2 = t2[t2.nRun.isin(t3.nRun)]
t3 = t3[t3.nRun.isin(t2.nRun)]
t2 = t2.reset_index(drop=True)
t3 = t3.reset_index(drop=True)

# combinando os datasets
data = t2
data = data.drop(['targetCellId', 'downloadTime', 'rxBytes'], axis=1)
data['downloadTimeT2'] = t2.downloadTime
data['downloadTimeT3'] = t3.downloadTime
data['rxBytesT2'] = t2.rxBytes
data['rxBytesT3'] = t3.rxBytes

data['best_output'] = data.apply(best_output, axis=1)
data.head()

Unnamed: 0,nRun,angle,rsrp1,rsrq1,rsrp2,rsrq2,rsrp3,rsrq3,previousrsrp1,previousrsrq1,previousrsrp2,previousrsrq2,previousrsrp3,previousrsrq3,downloadTimeT2,downloadTimeT3,rxBytesT2,rxBytesT3,best_output
0,1,-59.5059,-91.2149,-7.4026,-88.8116,-4.99927,-111.473,-27.6603,-91.0255,-7.18842,-88.9649,-5.12783,-111.421,-27.5835,34.5431,100.0,15728640,10065048,0
1,2,-43.5576,-99.2476,-3.50662,-109.401,-13.6605,-116.117,-20.3763,-99.2281,-3.50788,-109.365,-13.6443,-116.108,-20.3875,100.0,100.0,14300448,14300448,0
2,3,-50.6608,-94.044,-9.12913,-104.509,-19.594,-89.2722,-4.35738,-93.8069,-8.96722,-104.664,-19.8248,-89.2456,-4.40588,38.948,100.0,15728640,3330400,0
3,4,-12.9374,-91.5915,-8.67026,-97.4317,-14.5104,-87.7539,-4.83262,-91.2717,-8.38377,-97.5539,-14.666,-87.8278,-4.93989,55.6891,41.7761,15728640,15728640,1
4,5,-20.8164,-119.807,-17.3387,-105.859,-3.39044,-120.685,-18.2173,-119.778,-17.3485,-105.819,-3.38917,-120.652,-18.2227,100.0,100.0,1836064,1836064,0


In [53]:
# Splitting test data and train data
y = data['best_output']
x = data[['rsrp1','rsrq1','rsrp2','rsrq2','rsrp3','rsrq3','previousrsrp1','previousrsrq1','previousrsrp2','previousrsrq2','previousrsrp3','previousrsrq3']]
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state = 0)

# Feature scaling (normalizing or stadardization of the scales)
# Helps the conversion of the algorithm
sc_X = StandardScaler()
x_train = sc_X.fit_transform(x_train)
x_test = sc_X.transform(x_test) # There is no need to fit after the training set is fit

In [54]:
# Fitting the RF to the training set
from sklearn.ensemble import RandomForestClassifier
param_test = {'n_estimators':[(i) for i in range(1,50)]}
classifier=GridSearchCV(estimator=RandomForestClassifier(criterion = 'entropy', random_state = 0), param_grid=param_test, cv=10, scoring='roc_auc')
#classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(x_train, y_train)

GridSearchCV(cv=10, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=0, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'n_estimators': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='roc_auc', verbose=0)

In [55]:
# Predicting the test set results
y_pred = classifier.predict(x_test)

# Evaluation of the model
# Making the confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(classifier.best_params_)

# Calculating the percentual of error
p_knn =((y_pred != y_test).sum()/ y_pred.size)
p_knn

{'n_estimators': 45}


0.34794520547945207