In [None]:
%matplotlib inline


# Classifier comparison


A comparison of a several classifiers in scikit-learn on synthetic datasets.
The point of this example is to illustrate the nature of decision boundaries
of different classifiers.
This should be taken with a grain of salt, as the intuition conveyed by
these examples does not necessarily carry over to real datasets.

Particularly in high-dimensional spaces, data can more easily be separated
linearly and the simplicity of classifiers such as naive Bayes and linear SVMs
might lead to better generalization than is achieved by other classifiers.

The plots show training points in solid colors and testing points
semi-transparent. The lower right shows the classification accuracy on the test
set.


In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline 
from sklearn.cluster import KMeans


In [None]:

df = pd.read_csv('~/Downloads/mix5_5p_cpds.csv')
df = df[['Mass', 'RT']]

sns.pairplot(df, aspect=2)


In [None]:
N = 2
km = KMeans(n_clusters=N).fit(df)

df['Labels'] = km.labels_
plt.figure(figsize=(12, 8))
sns.scatterplot(df['Mass'], df['RT'], hue=df['Labels'],
                palette=sns.color_palette('hls', N))
plt.title('KMeans with {} Clusters'.format(N))
plt.show()

In [None]:
X = df

In [None]:
from sklearn.cluster import AgglomerativeClustering 

agglom = AgglomerativeClustering(n_clusters=N, linkage='average').fit(X)

X['Labels'] = agglom.labels_
plt.figure(figsize=(12, 8))
sns.scatterplot(X['Mass'], X['RT'], hue=X['Labels'], 
                palette=sns.color_palette('hls', N))
plt.title('Agglomerative with {} Clusters'.format(N))
plt.show()

In [None]:
from sklearn.cluster import DBSCAN 

# df = pd.read_csv('~/Downloads/mix5_5p_cpds.csv')
df = pd.read_csv('~/Studio/seq/tmp.csv')
X = df
db = DBSCAN(eps=100, min_samples=10).fit(X)

X['Labels'] = db.labels_
plt.figure(figsize=(12, 8))
sns.scatterplot(X['Mass'], X['RT'], hue=X['Labels'], 
                palette=sns.color_palette('hls', np.unique(db.labels_).shape[0]))
plt.title('DBSCAN with epsilon 11, min samples 6')
plt.show()

In [None]:
from sklearn.cluster import MeanShift, estimate_bandwidth

# The following bandwidth can be automatically detected using
bandwidth = estimate_bandwidth(X, quantile=0.1)
ms = MeanShift(bandwidth).fit(X)

X['Labels'] = ms.labels_
plt.figure(figsize=(12, 8))
sns.scatterplot(X['Mass'], X['RT'], hue=X['Labels'], 
                palette=sns.color_palette('hls', np.unique(ms.labels_).shape[0]))
plt.plot()
plt.title('MeanShift')
plt.show()

In [None]:
X = X.sort_values(by='Mass')
x = X['Mass']
y = X['RT']
# y = 4.23 * np.log(x) - 28.697
# y = 4.23 * np.log(x) - 28.197

# y2 = -1.11E-7 * x*x + 0.0021*x -0.399
y2 = -1.31E-7 * x*x + 0.0021*x + 0.7
y3 = y2 -2.0

plt.figure(figsize=(12, 8))
plt.scatter(X['Mass'], X['RT'])
# plt.plot(x, y, color='red')
plt.plot(x, y2, color='black')
# plt.plot(x, y3-1, color='black')
plt.show()

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals


import tensorflow as tf

In [None]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

In [None]:
predictions = model(x_train[:1]).numpy()
predictions

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf

In [None]:
tf.__version__

In [None]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)