Skip to content

Commit be69253

Browse files
authored
Add files via upload
week 3 and 4 uploaded
1 parent 5a6e7c2 commit be69253

21 files changed

+5738
-0
lines changed

MLiP-week03and4/03 Introduction to Machine Learning in ScikitLearn.ipynb

Lines changed: 3383 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
4+
5+
def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None):
6+
if eps is None:
7+
eps = X.std() / 2.
8+
x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
9+
y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
10+
xx = np.linspace(x_min, x_max, 100)
11+
yy = np.linspace(y_min, y_max, 100)
12+
13+
X1, X2 = np.meshgrid(xx, yy)
14+
X_grid = np.c_[X1.ravel(), X2.ravel()]
15+
try:
16+
decision_values = classifier.decision_function(X_grid)
17+
levels = [0]
18+
fill_levels = [decision_values.min(), 0, decision_values.max()]
19+
except AttributeError:
20+
# no decision_function
21+
decision_values = classifier.predict_proba(X_grid)[:, 1]
22+
levels = [.5]
23+
fill_levels = [0, .5, 1]
24+
25+
if ax is None:
26+
ax = plt.gca()
27+
if fill:
28+
ax.contourf(X1, X2, decision_values.reshape(X1.shape),
29+
levels=fill_levels, colors=['blue', 'red'])
30+
else:
31+
ax.contour(X1, X2, decision_values.reshape(X1.shape), levels=levels,
32+
colors="black")
33+
ax.set_xlim(x_min, x_max)
34+
ax.set_ylim(y_min, y_max)
35+
ax.set_xticks(())
36+
ax.set_yticks(())
37+
38+
39+
if __name__ == '__main__':
40+
from sklearn.datasets import make_blobs
41+
from sklearn.linear_model import LogisticRegression
42+
X, y = make_blobs(centers=2, random_state=42)
43+
clf = LogisticRegression().fit(X, y)
44+
plot_2d_separator(clf, X, fill=True)
45+
plt.scatter(X[:, 0], X[:, 1], c=y)
46+
plt.show()

MLiP-week03and4/imgs/digits.png

24.9 KB
Loading
8.41 KB
Loading

MLiP-week03and4/imgs/faces.png

576 KB
Loading

MLiP-week03and4/imgs/house_price.jpeg

8.47 KB
Loading

MLiP-week03and4/imgs/iris.png

1.43 MB
Loading

MLiP-week03and4/imgs/iris_setosa.jpg

17.8 KB
Loading
21.3 KB
Loading
60.6 KB
Loading

MLiP-week03and4/imgs/knn.png

5.58 KB
Loading

MLiP-week03and4/imgs/ml_taxonomy.png

198 KB
Loading

MLiP-week03and4/imgs/spam1.jpeg

7.33 KB
Loading

MLiP-week03and4/imgs/spam2.jpeg

6.74 KB
Loading

MLiP-week03and4/imgs/supervised_workflow.svg

Lines changed: 558 additions & 0 deletions
Loading

MLiP-week03and4/imgs/train_test_split_matrix.svg

Lines changed: 1650 additions & 0 deletions
Loading
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
faces = fetch_olivetti_faces()
2+
3+
# set up the figure
4+
fig = plt.figure(figsize=(6, 6)) # figure size in inches
5+
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
6+
7+
# plot the faces:
8+
for i in range(64):
9+
ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])
10+
ax.imshow(faces.images[i], cmap=plt.cm.bone, interpolation='nearest')
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
plt.figure(figsize=(10, 6))
2+
3+
4+
for i in incorrect_idx:
5+
print('%d: Predicted %d | True label %d' % (i, y_pred[i], y_test[i]))
6+
7+
# Plot two dimensions
8+
9+
colors = ["darkblue", "darkgreen", "gray"]
10+
11+
for n, color in enumerate(colors):
12+
idx = np.where(y_test == n)[0]
13+
plt.scatter(X_test[idx, 1], X_test[idx, 2], color=color, label="Class %s" % str(n))
14+
15+
for i, marker in zip(incorrect_idx, ['x', 's', 'v']):
16+
plt.scatter(X_test[i, 1], X_test[i, 2],
17+
color="darkred",
18+
marker=marker,
19+
s=60,
20+
label=i)
21+
22+
plt.xlabel('sepal width [cm]')
23+
plt.ylabel('petal length [cm]')
24+
plt.legend(loc=1, scatterpoints=1)
25+
plt.title("Iris Classification results")
26+
plt.show()
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from sklearn.datasets import load_iris
2+
from sklearn.model_selection import train_test_split
3+
4+
5+
iris = load_iris()
6+
X = iris.data
7+
y = iris.target
8+
9+
X_train, X_test, y_train, y_test = train_test_split(X, y,
10+
test_size=0.25,
11+
random_state=1234,
12+
stratify=y)
13+
14+
X_trainsub, X_valid, y_trainsub, y_valid = train_test_split(X_train, y_train,
15+
test_size=0.5,
16+
random_state=1234,
17+
stratify=y_train)
18+
19+
for k in range(1, 20):
20+
knn = KNeighborsClassifier(n_neighbors=k)
21+
train_score = knn.fit(X_trainsub, y_trainsub).\
22+
score(X_trainsub, y_trainsub)
23+
valid_score = knn.score(X_valid, y_valid)
24+
print('k: %d, Train/Valid Acc: %.3f/%.3f' %
25+
(k, train_score, valid_score))
26+
27+
28+
knn = KNeighborsClassifier(n_neighbors=9)
29+
knn.fit(X_train, y_train)
30+
print('k=9 Test Acc: %.3f' % knn.score(X_test, y_test))
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from sklearn.datasets import load_boston
2+
from sklearn.model_selection import train_test_split
3+
from sklearn.linear_model import LinearRegression
4+
5+
6+
boston = load_boston()
7+
X = boston.data
8+
y = boston.target
9+
10+
print('X.shape:', X.shape)
11+
X_train, X_test, y_train, y_test = train_test_split(X, y,
12+
test_size=0.25,
13+
random_state=42)
14+
15+
linreg = LinearRegression()
16+
knnreg = KNeighborsRegressor(n_neighbors=1)
17+
18+
linreg.fit(X_train, y_train)
19+
print('Linear Regression Train/Test: %.3f/%.3f' %
20+
(linreg.score(X_train, y_train),
21+
linreg.score(X_test, y_test)))
22+
23+
knnreg.fit(X_train, y_train)
24+
print('KNeighborsRegressor Train/Test: %.3f/%.3f' %
25+
(knnreg.score(X_train, y_train),
26+
knnreg.score(X_test, y_test)))
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
XX_train = np.concatenate((X_train, np.sin(4 * X_train)), axis=1)
2+
XX_test = np.concatenate((X_test, np.sin(4 * X_test)), axis=1)
3+
regressor.fit(XX_train, y_train)
4+
y_pred_test_sine = regressor.predict(XX_test)
5+
6+
plt.plot(X_test, y_test, 'o', label="data")
7+
plt.plot(X_test, y_pred_test_sine, 'o', label="prediction with sine")
8+
plt.plot(X_test, y_pred_test, label='prediction without sine')
9+
plt.legend(loc='best');

0 commit comments

Comments
 (0)