In [42]:
# Imports
import numpy as np #Mathematical tools
import matplotlib.pyplot as plt #Plots charts
import pandas as pd #Import and manage data sets
from sklearn.neural_network import MLPClassifier 
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
# Preprocessing library
from sklearn.preprocessing import Imputer
# Categorical data encoder library
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
# Splitting the data set into training and testing sets library
from sklearn.model_selection import train_test_split
#Feature scaling library
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
%matplotlib inline

In [43]:
# Imports part 2

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [44]:
# Functions part 1

def best_output(row):
    # se o downloadTime é diferente de 100 para T2 e T3, ambos os targets completam o download
    if ((row.downloadTimeT2!=100)&(row.downloadTimeT3!=100)):
        # o melhor output será aquele para o qual o downloadTime é menor
        if (row.downloadTimeT2<=row.downloadTimeT3):
            return 0
        else:
            return 1
    
    # se o downloadTime é diferente de 100 apenas para um dos targets, só um deles completa o download
    elif ((row.downloadTimeT2!=100)|(row.downloadTimeT3!=100)):
        # o melhor output será aquele para o qual o downloadTime é diferente de 100
        if (row.downloadTimeT2!=100):
            return 0
        else:
            return 1
        
    # se o downloadTime é igual a 100 para T2 e T3, ambos os targets não completam o download
    elif ((row.downloadTimeT2==100)&(row.downloadTimeT3==100)):
        # o melhor output será aquele para o qual o rxBytes é maior
        if (row.rxBytesT2>=row.rxBytesT3):
            return 0
        else:
            return 1
        
def download_complete(row, column):
    if (row[column]==0):
        if(row.downloadTimeT2!=100.0):
            return 1
        else:
            return 0
    else:
        if(row.downloadTimeT3!=100.0):
            return 1
        else: 
            return 0

        
def download_time(row, column):
    if ((row[column]==0)&(row.downloadTimeT2<100)):
        return row.downloadTimeT2
    elif ((row[column]==1)&(row.downloadTimeT3<100)):
        return row.downloadTimeT3

    
def throughput(row, column):
    if (row[column]==0):
        tp = (row.rxBytesT2/row.downloadTimeT2)*8/1e6
        return tp
    else:
        tp = (row.rxBytesT3/row.downloadTimeT3)*8/1e6
        return tp
    
def optimum_choice(row, column):
    if (row[column]==row.best_output):
        return 1
    else:
        return 0
    
def a2a4rsrp(row):
    if (row.rsrp2>=row.rsrp3):
        return 0
    else:
        return 1

In [45]:
# Functions part 2
def percentageOfErrors(y_real, y_predicted):
    p= ((y_predicted != y_real).sum()/ y_real.size)
    return p 

def applyingClassifier (classifier,x_test, x_train, y_test, y_train):
    classifier.fit(x_train, y_train)
    # Predicting the test set results
    y_pred = classifier.predict(x_test)

    # Evaluation of the model
    # Making the confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    p=percentageOfErrors(y_real=y_test, y_predicted=y_pred)
    print(p)
    return p
def applyingAllClassifiers (randomState):
    # Splitting test data and train data
    x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state = randomState)

    # Feature scaling (normalizing or stadardization of the scales)
    # Helps the conversion of the algorithm
    sc_X = StandardScaler()
    x_train = sc_X.fit_transform(x_train)
    x_test = sc_X.transform(x_test) # There is no need to fit after the training set is fit
    # Fitting the KNN to the training set
    param_test = {'n_neighbors':[(i) for i in range(1,21)], 'p':[1,2],'weights':['uniform', 'distance']}
    classifier=GridSearchCV(estimator=KNeighborsClassifier(metric = 'minkowski', n_jobs=10), param_grid=param_test, scoring='roc_auc')
    #classifier = KNeighborsClassifier(n_neighbors = 20, metric = 'minkowski', p = 2)
    p_knn=applyingClassifier(classifier=classifier,x_test=x_test, x_train=x_train, y_test=y_test, y_train= y_train)

    # Fitting the SVM to the training set
    param_test = {'C':[(i) for i in range(1,10)],'kernel':['linear', 'poly', 'rbf', 'sigmoid'],'decision_function_shape':['ovo','ovr'], 'shrinking':[True, False]}
    classifier=GridSearchCV(estimator=SVC(random_state=0), param_grid=param_test, cv=10, scoring='roc_auc')
    #classifier=SVC(random_state=0)
    p_svm=applyingClassifier(classifier=classifier,x_test=x_test,x_train=x_train,y_test=y_test,y_train=y_train)

    # Fitting the DT to the training set
    classifier = DecisionTreeClassifier(criterion= 'entropy', random_state=0) # the more homogeneous are the groups, the more the entropy falls
    p_dt=applyingClassifier(classifier=classifier,x_test=x_test,x_train=x_train,y_test=y_test,y_train=y_train)

    # Fitting the RF to the training set
    param_test = {'n_estimators':[(i) for i in range(1,50)]}
    classifier=GridSearchCV(estimator=RandomForestClassifier(criterion = 'entropy', random_state = 0), param_grid=param_test,cv=10, scoring='roc_auc')
    #classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
    p_rf=applyingClassifier(classifier=classifier,x_test=x_test,x_train=x_train,y_test=y_test,y_train=y_train)
    p=np.array([p_knn, p_svm, p_dt, p_rf])
    return p   
    

# Sem shadowing

In [46]:
# Importing data

# importando os datasets
t2 = pd.read_csv('resultados/t2_OkumuraHata_Modificado', delimiter='\t')
t3 = pd.read_csv('resultados/t3_OkumuraHata_Modificado', delimiter='\t')

# garantindo que utilizaremos apenas as sementes presentes nos dois datasets
t2 = t2[t2.nRun.isin(t3.nRun)]
t3 = t3[t3.nRun.isin(t2.nRun)]
t2 = t2.reset_index(drop=True)
t3 = t3.reset_index(drop=True)

# combinando os datasets
data = t2
data = data.drop(['targetCellId', 'downloadTime', 'rxBytes'], axis=1)
data['downloadTimeT2'] = t2.downloadTime
data['downloadTimeT3'] = t3.downloadTime
data['rxBytesT2'] = t2.rxBytes
data['rxBytesT3'] = t3.rxBytes

data['best_output'] = data.apply(best_output, axis=1)
data.head()

# Splitting the data between independent variables and dependent variable
y = data['best_output']
x = data[['rsrp1','rsrq1','rsrp2','rsrq2','rsrp3','rsrq3','previousrsrp1','previousrsrq1','previousrsrp2','previousrsrq2','previousrsrp3','previousrsrq3']]

In [47]:
# Applying the classifiers
maxRange=10
avgP=np.array([0, 0, 0, 0])
for (i) in range(0,maxRange):
    print("RandomState {}".format(i))
    p =applyingAllClassifiers(randomState=i)
    avgP=avgP+p
avgP=avgP/maxRange

RandomState 0
0.0
0.0
0.005
0.005
RandomState 1
0.005
0.005
0.005
0.0
RandomState 2
0.005
0.01
0.005
0.005
RandomState 3
0.01
0.005
0.01
0.01
RandomState 4
0.005
0.0
0.005
0.01
RandomState 5
0.005
0.01
0.005
0.005
RandomState 6
0.0
0.0
0.0
0.0
RandomState 7
0.0
0.005
0.005
0.005
RandomState 8
0.01
0.0
0.01
0.0
RandomState 9
0.01
0.005
0.005
0.005


In [48]:
1 - avgP

array([ 0.005 ,  0.004 ,  0.0055,  0.0045])

# Com shadowing

In [49]:
# Importing data

# importando os datasets
t2 = pd.read_csv('resultados/t2_OhBuildings_ComShadowing_Modificado', delimiter='\t')
t3 = pd.read_csv('resultados/t3_OhBuildings_ComShadowing_Modificado', delimiter='\t')

# garantindo que utilizaremos apenas as sementes presentes nos dois datasets
t2 = t2[t2.nRun.isin(t3.nRun)]
t3 = t3[t3.nRun.isin(t2.nRun)]
t2 = t2.reset_index(drop=True)
t3 = t3.reset_index(drop=True)

# combinando os datasets
data = t2
data = data.drop(['targetCellId', 'downloadTime', 'rxBytes'], axis=1)
data['downloadTimeT2'] = t2.downloadTime
data['downloadTimeT3'] = t3.downloadTime
data['rxBytesT2'] = t2.rxBytes
data['rxBytesT3'] = t3.rxBytes

data['best_output'] = data.apply(best_output, axis=1)
data.head()

# Splitting the data between independent variables and dependent variable
y = data['best_output']
x = data[['rsrp1','rsrq1','rsrp2','rsrq2','rsrp3','rsrq3','previousrsrp1','previousrsrq1','previousrsrp2','previousrsrq2','previousrsrp3','previousrsrq3']]

In [50]:
# Applying the classifiers
maxRange=10
avgP=np.array([0, 0, 0, 0])
for (i) in range(0,maxRange):
    print("RandomState {}".format(i))
    p =applyingAllClassifiers(randomState=i)
    avgP=avgP+p
avgP=avgP/maxRange

RandomState 0
0.378082191781
0.312328767123
0.369863013699
0.347945205479
RandomState 1
0.33698630137
0.284931506849
0.372602739726
0.361643835616
RandomState 2
0.372602739726
0.27397260274
0.356164383562
0.364383561644
RandomState 3
0.394520547945
0.339726027397
0.347945205479
0.342465753425
RandomState 4
0.383561643836
0.295890410959
0.378082191781
0.369863013699
RandomState 5
0.356164383562
0.284931506849
0.369863013699
0.350684931507
RandomState 6
0.372602739726
0.284931506849
0.369863013699
0.375342465753
RandomState 7
0.369863013699
0.320547945205
0.334246575342
0.328767123288
RandomState 8
0.361643835616
0.298630136986
0.356164383562
0.416438356164
RandomState 9
0.372602739726
0.33698630137
0.380821917808
0.361643835616


In [52]:
1 - avgP

array([ 0.63013699,  0.69671233,  0.63643836,  0.63808219])