## Build a machine learning model to predict user will click the ad or not based on his experience and estimated salary for a given dataset

### Data Preprocessing

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# reading dataset from file & storing it as pandas dataframe
social_network_data = pd.read_csv('/home/admin1/PycharmProjects/Machine Learning using libraries/Classification/Datasets/Social_Network_Ads.csv')
social_network_data.head()

In [None]:
social_network_data.info()       # observing datatypes of different columns & checking null values

In [None]:
# column of user id & Gender are not required for prediction so dropping it
social_network_data.drop(['User ID','Gender'], axis=1, inplace=True)
social_network_data.head()

#### Separating out feature colums & label column 

In [None]:
x_values = social_network_data.iloc[:,:-1].values
y_values = social_network_data.iloc[:,-1].values

In [None]:
from sklearn.preprocessing import StandardScaler

sc_x = StandardScaler()
x_values = sc_x.fit_transform(x_values)

#### Splitting dataset into train set & test set

In [None]:
from sklearn.model_selection import train_test_split
train_x_values, test_x_values, train_y_values, test_y_values = train_test_split(x_values, y_values, train_size=0.8, random_state=0)

### Building Logistic Regression model

In [None]:
from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(random_state=0)
classifier.fit(train_x_values, train_y_values)

#### Storing predictions for test set

In [None]:
test_prediction = classifier.predict(test_x_values)

### Evaluating model against test set 

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix
print(f'Accuracy score: {accuracy_score(test_y_values, test_prediction)}')

In [None]:
print(f'Confusion matrix:\n {confusion_matrix(test_y_values, test_prediction)}')

### Plotting colormap for linear classification

In [None]:
from matplotlib.colors import ListedColormap

def plot_colormap(x_set, y_set, title):
    """Function plots colormap showing linear classification using decision boundary"""
    # createting grid of continuous points in given range of values from two columns of feature
    # meshgrid returns matrices for their cartesian product after giving set of arrays 
    x1_grids, x2_grids = np.meshgrid(np.arange(x_set[:,0].min(), x_set[:,0].max(), 0.001), 
                            np.arange(x_set[:,1].min(), x_set[:,1].max(), 0.001))
    
    # we are patitioning data-ponts using decision boundary so coloring area on either side
    # created feature matrix for area/continuous values from grid points from 2 features 
    x_continuous_values = np.array([x1_grids.flatten(), x2_grids.flatten()]).T
    
    # plotting area i.e continuous points & classification using regressor prediction 
    plt.contourf(x1_grids, x2_grids, classifier.predict(x_continuous_values).reshape(x1_grids.shape),
                alpha=0.6, cmap= ListedColormap(('red', 'blue')))
    
    # 
    plt.xlim(x1_grids.min(), x1_grids.max())
    plt.ylim(x2_grids.min(), x2_grids.max())
    
    # plotting actual classified data-points/observations with thier repective category
    for j in np.unique(y_set):
        plt.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
                   c=('red', 'blue')[j], label=j, s=6)
    plt.xlabel('Age')
    plt.ylabel('Estimated Salary')     # labeling axes
    plt.legend()
    plt.title(title)
    plt.show()         

In [None]:
plot_colormap(train_x_values, train_y_values, 'Train set')

In [None]:
plot_colormap(test_x_values, test_y_values, 'Test set')