## Loading and pre-processing data

In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline  
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

import timeit

# Importing the dataset from the url
url =  "https://raw.githubusercontent.com/uzay00/KaVe/master/Ders3/data/Social_Network_Ads.csv"
data = pd.read_csv(url)

In [2]:
data.head(6)

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
5,15728773,Male,27,58000,0


In [3]:
X = data.iloc[:, [2, 3]].values
y = data.iloc[:, 4].values

X[:6]

array([[   19, 19000],
       [   35, 20000],
       [   26, 43000],
       [   27, 57000],
       [   19, 76000],
       [   27, 58000]])

In [4]:
y[:6]

array([0, 0, 0, 0, 0, 0])

In [5]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Our own Logistic Regression Class
![alt text](https://raw.githubusercontent.com/uzay00/KaVe/master/Ders3/data/myclassifier.png)

In [6]:
def sigmoid(z):
    return(1 / (1 + np.exp(-z)))
  
class myClassification():
    def __init__(self,X_train, y_train):
        self.m, self.n = X_train.shape
        self.n += 1 # Add one for x_0 column 
        
        self.X_train = np.hstack((np.ones((self.m,1)), X_train))
        self.y_train = y_train.reshape((self.m,1))
        self.W = np.random.randn(self.n,1)
            
    def cost(self):
        h = sigmoid(self.X_train.dot(self.W))
        return np.sum(np.power(h-self.y_train,2))/ (2*self.m)

    def derivative(self):
        h = sigmoid(self.X_train.dot(self.W))
        derivative = np.sum(self.X_train * (h-self.y_train) * h * (1-h) , axis=0)/ self.m
        return derivative.reshape(self.W.shape)

    def gradient_descent(self, alpha = 0.05, number_steps = 10000):
        for i in range(number_steps):
            self.W = self.W - alpha * self.derivative()
        return self.W
    
    def predict(self, X_test, threshold=0.5):
        m, n = X_test.shape
        X_test = np.hstack((np.ones((m,1)), X_test))
        h = sigmoid(X_test.dot(self.W)) 
        p = h >= threshold
        return (p.astype('int'))

    def fit(self):
        self.W = self.gradient_descent()

## Running Logistic Regression on data

In [7]:
# Fitting Logistic Regression to the Training set
from sklearn.linear_model import LogisticRegression

start_time = timeit.default_timer()

classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
# Predicting the Test set results
y_pred = classifier.predict(X_test)

print("Time: ", timeit.default_timer() - start_time)



# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print("\t\t\t\t\t---SKlearn Logistic Regression---")
print("confusion_matrix:\n", cm)
print("accuracy_score: ", accuracy_score(y_test, y_pred))

Time:  0.11048573200059764
					---SKlearn Logistic Regression---
confusion_matrix:
 [[65  3]
 [ 8 24]]
accuracy_score:  0.89


In [8]:
start_time = timeit.default_timer()

me = myClassification(X_train, y_train)
me.fit()
me_pred = me.predict(X_test)

print("Time: ", timeit.default_timer() - start_time)


cm = confusion_matrix(y_test, me_pred)
print("\t\t\t\t---Our Own Logistic Regression with Gradient Descent---")
print("confusion_matrix:\n", cm)
print("accuracy_score: ", accuracy_score(y_test, y_pred))

Time:  0.4278603999991901
				---Our Own Logistic Regression with Gradient Descent---
confusion_matrix:
 [[64  4]
 [ 6 26]]
accuracy_score:  0.89


# Logistic Regression with ABM

In [9]:
def sigmoid(z):
    return(1 / (1 + np.exp(-z)))
 
class agent():
    def __init__(self,ID):
        self.ID = ID
        self.score = 0
        
    def feed(self, X_part, y_part):
        self.m, self.n = X_part.shape
        self.n  += 1 # Add one for x_0 column 
        
        self.X = np.hstack((np.ones((self.m,1)), X_part))
        self.y = y_part.reshape((self.m,1))
        self.W = np.random.randn(self.n ,1)
        
        self.score = self.performance()
    
    def performance(self, threshold = 0.5):
        h = sigmoid(self.X.dot(self.W)) 
        p = h >= threshold
        return 1/(1+np.sum(np.power(self.y - p.astype('int'),2)))
    
    def immitate(self, other, pr = 1): # immitate betters
        if np.random.rand() < pr:
            k =  max(int(self.n * 0.4),1)
            row = np.random.randint(self.n,size =k)
            self.W[row,:] = other.W[row,:]
            self.score = self.performance()
                  
    def innovate(self, pr = 1):
        if np.random.rand() < pr: # Go on your own - mutation
            k =  max(self.n//5,1)
            row = np.random.randint(self.n,size =k)
            self.W[row,:] = np.random.randn(k,1)
            self.score = self.performance()
    
        
class abmClassifier():
    def __init__(self, X, y, N = 500, time = 200000):
        self.X, self.y, self.N, self.time = X, y, N, time
        self.population = [agent(i) for i in range(self.N)]
        
    def feed(self):
        for A in self.population:
            A.feed(self.X, self.y) 
    
    def social_optimisation(self):
        self.feed()
        for i in range(self.time):
            iA, iB = np.random.choice(range(self.N), 2, replace=False)
            A, B = self.population[iA], self.population[iB]
            
            if A.score > B.score: B.immitate(A)
            else: A.immitate(B)
                
            A.innovate();B.innovate()
            
            
    def best_agent(self):   
        scores = [A.score for A in self.population]
        ibest = scores.index(max(scores))
        return self.population[ibest]
        
    def predict(self, X_test, threshold = 0.5):
        m, n = X_test.shape
        X_test = np.hstack((np.ones((m,1)), X_test))
        
        h = sigmoid(X_test.dot(self.best_agent().W)) 
        p = h >= threshold
        return (p.astype('int'))
    
    def fit(self):
        self.social_optimisation()
      

In [10]:
start_time = timeit.default_timer()

abm = abmClassifier(X_train, y_train)
abm.fit()
abm_pred = abm.predict(X_test)

print("Time: ", timeit.default_timer() - start_time)


cm = confusion_matrix(y_test, abm_pred)
print("\t\t\t\t---Our Own Logistic Regression with ABM---")
print("confusion_matrix:\n", cm)
print("accuracy_score: ", accuracy_score(y_test, abm_pred))

Time:  36.77802290799991
				---Our Own Logistic Regression with ABM---
confusion_matrix:
 [[63  5]
 [ 4 28]]
accuracy_score:  0.91


In [11]:
from sklearn.neural_network import MLPClassifier
start_time = timeit.default_timer()


nn=MLPClassifier(hidden_layer_sizes=(7,), max_iter=1000, alpha=0.1,
                     solver='sgd',  random_state=21,tol=0.000000001)

nn.fit(X_train,y_train)
# Predicting the Test set results
y_pred = nn.predict(X_test)

print("Time: ", timeit.default_timer() - start_time)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print("\t\t\t\t---SKLearn Neural Networks---")
print("confusion_matrix:\n", cm)
print("accuracy_score: ", accuracy_score(y_test, y_pred))

Time:  0.48711214899958577
				---SKLearn Neural Networks---
confusion_matrix:
 [[63  5]
 [ 5 27]]
accuracy_score:  0.9


# Application to Text Mining

In [12]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
url = 'https://raw.githubusercontent.com/uzay00/KaVe/master/Proje3/Restaurant_Reviews.tsv'
dataset = pd.read_csv(url, delimiter = '\t', quoting = 3)

# Cleaning the texts
import re
import nltk
#nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
corpus = []
for i in range(0, 1000):
    review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
    review = review.lower()
    review = review.split()
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)

# Creating the Bag of Words model
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 500)
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, -1].values

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import GaussianNB
start_time = timeit.default_timer()
classifier = GaussianNB()
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)
print("Time: ", timeit.default_timer() - start_time)


# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print("\t\t\t\t\t---SKlearn Naive Bayes---")
print("confusion_matrix:\n", cm)
print("accuracy_score: ", accuracy_score(y_test, y_pred))

Time:  0.00667131099908147
					---SKlearn Naive Bayes---
confusion_matrix:
 [[50 47]
 [12 91]]
accuracy_score:  0.705


In [13]:
# Fitting Logistic Regression to the Training set
from sklearn.linear_model import LogisticRegression
start_time = timeit.default_timer()
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)
print("Time: ", timeit.default_timer() - start_time)


# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print("\t\t\t\t\t---SKlearn Logistic Regression---")
print("confusion_matrix:\n", cm)
print("accuracy_score: ", accuracy_score(y_test, y_pred))

Time:  0.1331212040004175
					---SKlearn Logistic Regression---
confusion_matrix:
 [[68 29]
 [29 74]]
accuracy_score:  0.71


In [14]:
start_time = timeit.default_timer()

me = myClassification(X_train, y_train)
me.fit()
me_pred = me.predict(X_test)

print("Time: ", timeit.default_timer() - start_time)



cm = confusion_matrix(y_test, me_pred)
print("\t\t\t\t---Our Own Logistic Regression with Gradient Descent---")
print("confusion_matrix:\n", cm)
print("accuracy_score: ", accuracy_score(y_test, y_pred))

Time:  32.84267134100082
				---Our Own Logistic Regression with Gradient Descent---
confusion_matrix:
 [[57 40]
 [45 58]]
accuracy_score:  0.71


In [15]:
start_time = timeit.default_timer()

abm = abmClassifier(X_train, y_train)
abm.fit()
abm_pred = abm.predict(X_test)

print("Time: ", timeit.default_timer() - start_time)



cm = confusion_matrix(y_test, abm_pred)
print("\t\t\t\t---Our Own Logistic Regression with ABM---")
print("confusion_matrix:\n", cm)
print("accuracy_score: ", accuracy_score(y_test, abm_pred))

Time:  135.16589753599874
				---Our Own Logistic Regression with ABM---
confusion_matrix:
 [[51 46]
 [55 48]]
accuracy_score:  0.495


In [16]:
from sklearn.neural_network import MLPClassifier
start_time = timeit.default_timer()


nn=MLPClassifier(hidden_layer_sizes=(7,), max_iter=1000, alpha=0.1,
                     solver='sgd',  random_state=21,tol=0.000000001)

nn.fit(X_train,y_train)
# Predicting the Test set results
y_pred = nn.predict(X_test)

print("Time: ", timeit.default_timer() - start_time)


# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print("\t\t\t\t---SKLearn Neural Networks---")
print("confusion_matrix:\n", cm)
print("accuracy_score: ", accuracy_score(y_test, y_pred))

Time:  2.3067403200002445
				---SKLearn Neural Networks---
confusion_matrix:
 [[80 17]
 [31 72]]
accuracy_score:  0.76
