# Data preparation

In [None]:
from random import randint
import pandas as pd
import numpy as np

row_num=40
min_num=-100
max_num=100

with open(".random.csv", "w+") as f: 
    f.write("x1,x2,x3,y\n") 
    for i in range(row_num):
        x1 = randint(min_num, max_num)
        x2 = randint(min_num, max_num)
        x3 = randint(min_num, max_num)               
        y = 1 if( x1 + x2 > x3) else 0
        
        f.write("{},{},{},{}\n".format(x1,x2,x3,y))

In [None]:
df = pd.read_csv(".random.csv",header=0)

In [None]:
#df.head()

In [None]:
#df.info()

# Training & Evaluation

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

In [None]:
# Separate features from class
X = df.drop("y", axis=1)
y = df["y"]

# Create a separate train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)

In [None]:
clf = LogisticRegression(random_state=0, solver='lbfgs')
clf = clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
acc = accuracy_score(y_test, y_pred)

In [None]:
print("Accuracy = " + str(acc))

# Example of passing unknown validation data

In [None]:
new_input=np.array([-105,60,30])
print(clf.predict([new_input]))

# Visualization

In [None]:
##Logistics regression visualization
# #############################################################################
# Plot results functions
clf = LogisticRegression(random_state=0, solver='lbfgs')
coef = np.random.randn(3)
coef[50:] = 0.0 
alphas = np.logspace(-5, 1, 60)
train_errors = list()
test_errors = list()
for alpha in alphas:
#    clf.set_params(alpha=alpha)
    clf.fit(X_train, y_train)
    train_errors.append(clf.score(X_train, y_train))
    test_errors.append(clf.score(X_test, y_test))
    
i_alpha_optim = np.argmax(test_errors)
alpha_optim = alphas[i_alpha_optim]
print("Optimal regularization parameter : %s" % alpha_optim)
coef_ = clf.fit(X, y).coef_

import matplotlib.pyplot as plt
plt.subplot(2, 1, 1)
plt.semilogx(alphas, train_errors, label='Train')
plt.semilogx(alphas, test_errors, label='Test')
plt.vlines(alpha_optim, plt.ylim()[0], np.max(test_errors), color='k',
           linewidth=3, label='Optimum on test')
plt.legend(loc='lower left')
plt.ylim([0, 1.2])
plt.xlabel('Regularization parameter')
plt.ylabel('Performance')

# Show estimated coef_ vs true coef
plt.subplot(2, 1, 2)
plt.plot(coef, label='True coef')
plt.plot(coef_, label='Estimated coef')
plt.legend()
#plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.26)
plt.show()



In [None]:
## Visualization for the tree

from sklearn.tree.export import export_text
r = export_text(clf)
print(r)