# Simple Machine Learning demo

In [None]:
from numpy                   import hstack, vstack, mean
from sklearn.model_selection import train_test_split
from plotly.express          import scatter, scatter_3d
from sklearn.datasets        import load_iris
from sklearn.neighbors       import KNeighborsClassifier
from sklearn.linear_model    import LinearRegression
from sklearn.cluster         import KMeans
from sklearn.decomposition   import PCA

In [None]:
# load the iris data set
iris    = load_iris()
x       = iris.data
x_names = iris.feature_names
y       = iris.target
y_names = iris.target_names

In [None]:
# visualize the data
def show_iris(x, y, labels=None, symbol_label=None, **kwargs):
    i, j, k = 2, 1, 0          # choose some coordinates
    if labels is None:
        labels = {'x':x_names[i], 'y':x_names[j], 'z':x_names[k], 
                  'color':'species', 'symbol': symbol_label}
    fig = scatter_3d(x   = x[:,i],       # x axis of plot
                     y   = x[:,j],       # y axis of plot
                     z   = x[:,k],       # z axis of plot
                     color  = [y_names[n] for n in y],
                     labels = labels,
                     **kwargs)
    fig['layout']['scene']['aspectmode'] = "data"
    return fig
show_iris(x, y)

In [None]:
# split it into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=42)
x_all = vstack([x_train, x_test])    # first comes the train, then the test examples
y_all = hstack([y_train, y_test])
train_test = x_train.shape[0]*['train'] + x_test.shape[0]*['test']

In [None]:
x_train.shape

In [None]:
# visualize the data
show_iris(x_all, y_all, 
          symbol          = train_test, 
          symbol_sequence = ['circle', 'circle-open'],
          symbol_label    = 'set')

## Classification (supervised learning)

If the output set is discrete and available for training, we have a classification problem.

In [None]:
# predict the species from the measure petal/sepal lengths
# input = x_train, output = y_train
nn = KNeighborsClassifier(n_neighbors=1)     # 
nn.fit(x_train, y_train)                     # learns the parameters

In [None]:
# calculate the train accuracy
correct_train = nn.predict(x_train) == y_train
print(correct_train.mean())
show_iris(x_train, y_train,
          symbol          = correct_train,
          symbol_sequence = ['circle', 'x'],
          symbol_label    = 'correct?',
          title = 'training')

In [None]:
# calculate the test accuracy
correct_test = nn.predict(x_test) == y_test
print(correct_test.mean())
show_iris(x_test, y_test,
          symbol          = correct_test,
          symbol_sequence = ['circle', 'x'],
          symbol_label    = 'correct?',
          title='test set')

## Regression (supervised learning)

If the output set is continuous and available for training, we have a regression problem.

In [None]:
# predict one of the measured lengths from the others
# input = x_train[:3], output = xtrain[3]
lr = LinearRegression()
lr.fit(x_train[:,:3], x_train[:,3])   # predict last from previous coordinates
print(lr.coef_)
print(lr.intercept_)

In [None]:
# calculate the train error
mean((lr.predict(x_train[:,:3]) - x_train[:,3])**2)

In [None]:
# calculate the test error
mean((lr.predict(x_test[:,:3]) - x_test[:,3])**2)

## Clustering (unsupervised learning)

If the output set is discrete and not available for training, we have a clustering problem.

In [None]:
# invent clusters that act like labels
km = KMeans(n_clusters=3)
km.fit(x_train)
z_train = km.predict(x_train)
z_test  = km.predict(x_test)

In [None]:
show_iris(x_train, y_train, symbol = z_train, symbol_label = 'cluster', title='training set')

## PCA (unsupervised learning)

If the output set is continuous and not available for training, we have a dimensionality reduction problem.

In [None]:
pca = PCA(n_components=3)
pca.fit(x_train)
print(pca.explained_variance_ratio_)
z_train = pca.transform(x_train)
z_test  = pca.transform(x_test)

In [None]:
show_iris(z_train, y_train, labels={})