-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
/
Copy pathML-Python-utils.py
65 lines (56 loc) · 2.59 KB
/
ML-Python-utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
def plot_decision_boundaries(X, y, model_class, **model_params):
"""
Function to plot the decision boundaries of a classification model.
This uses just the first two columns of the data for fitting
the model as we need to find the predicted value for every point in
scatter plot.
Arguments:
X: Feature data as a NumPy-type array.
y: Label data as a NumPy-type array.
model_class: A Scikit-learn ML estimator class
e.g. GaussianNB (imported from sklearn.naive_bayes) or
LogisticRegression (imported from sklearn.linear_model)
**model_params: Model parameters to be passed on to the ML estimator
Typical code example:
plt.figure()
plt.title("KNN decision boundary with neighbros: 5",fontsize=16)
plot_decision_boundaries(X_train,y_train,KNeighborsClassifier,n_neighbors=5)
plt.show()
"""
try:
X = np.array(X)
y = np.array(y).flatten()
except:
print("Coercing input data to NumPy arrays failed")
# Reduces to the first two columns of data
reduced_data = X[:, :2]
# Instantiate the model object
model = model_class(**model_params)
# Fits the model with the reduced data
model.fit(reduced_data, y)
# Step size of the mesh. Decrease to increase the quality of the VQ.
h = .02 # point in the mesh [x_min, m_max]x[y_min, y_max].
# Plot the decision boundary. For that, we will assign a color to each
x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
# Meshgrid creation
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Obtain labels for each point in mesh using the model.
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
# Predictions to obtain the classification results
Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
# Plotting
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, alpha=0.8)
plt.xlabel("Feature-1",fontsize=15)
plt.ylabel("Feature-2",fontsize=15)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
return plt