In [1]:
import numpy as np

from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.linear_model import SGDClassifier

import random

fruits = np.random.choice(["Apple", "Orange", "Pear"], size=1000)
diameter = []
weight = []
color = []

for mark in fruits:
    if mark == "Apple":
        d = random.randint(6, 8)
        diameter.append(d)
        w = random.randint(30, 40)
        weight.append(w)
        c = random.uniform(0,1.5)
        color.append(c)
    if mark == "Orange":
        d = random.randint(6, 8)
        diameter.append(d)
        w = random.randint(30, 40)
        weight.append(w)
        c = random.uniform(1,2.5)
        color.append(c)
    if mark == "Pear":
        d = random.randint(6, 8)
        diameter.append(d)
        w = random.randint(25, 35)
        weight.append(w)
        c = random.uniform(2,3.5)
        color.append(c)


In [2]:
# combine the features into a single matrix
X = np.column_stack((diameter, weight, color))

In [3]:
Y = fruits

In [4]:
from sklearn.model_selection import train_test_split

# split the data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, 
test_size=0.2, random_state=42)


In [5]:
X_train

array([[ 7.        , 36.        ,  1.32831689],
       [ 8.        , 36.        ,  1.69243815],
       [ 8.        , 40.        ,  1.53767005],
       ...,
       [ 7.        , 32.        ,  2.86996098],
       [ 6.        , 35.        ,  2.44976601],
       [ 8.        , 38.        ,  2.37280411]])

In [6]:
names = ["Nearest_Neighbors", "Linear_SVM", "Polynomial_SVM", "RBF_SVM", "Gaussian_Process",
         "Gradient_Boosting", "Decision_Tree", "Extra_Trees", "Random_Forest", "Neural_Net", "AdaBoost",
         "Naive_Bayes", "QDA", "SGD"]
#GaussianProcessClassifier(1.0 * RBF(1.0)),
#GaussianProcessClassifier(kernel=custom_kernel()),
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(kernel="poly", degree=3, C=0.025),
    SVC(kernel="rbf", C=1, gamma=2),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    GradientBoostingClassifier(n_estimators=100, learning_rate=1.0),
    DecisionTreeClassifier(max_depth=5),
    ExtraTreesClassifier(n_estimators=10, min_samples_split=2),
    RandomForestClassifier(max_depth=5, n_estimators=100),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(algorithm="SAMME", n_estimators=100),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
    SGDClassifier(loss="hinge", penalty="l2")]

In [7]:
scores = []
for name, clf in zip(names, classifiers):
    clf.fit(X_train, Y_train)
    score = clf.score(X_test, Y_test)
    scores.append(score)

In [8]:
df = pd.DataFrame()
df['name'] = names
df['score'] = scores
df = df.sort_values(by=['score'], ascending=False)
df

Unnamed: 0,name,score
5,Gradient_Boosting,0.845
6,Decision_Tree,0.845
9,Neural_Net,0.84
12,QDA,0.84
13,SGD,0.84
11,Naive_Bayes,0.835
1,Linear_SVM,0.83
10,AdaBoost,0.83
7,Extra_Trees,0.825
4,Gaussian_Process,0.82


In [9]:
import seaborn as sns
cm = sns.light_palette("green", as_cmap=True)
s = df.style.background_gradient(cmap=cm)
s

Unnamed: 0,name,score
5,Gradient_Boosting,0.845
6,Decision_Tree,0.845
9,Neural_Net,0.84
12,QDA,0.84
13,SGD,0.84
11,Naive_Bayes,0.835
1,Linear_SVM,0.83
10,AdaBoost,0.83
7,Extra_Trees,0.825
4,Gaussian_Process,0.82
