In [None]:
'''
用MLP解决如下问题
x = [[0.1, 0.7, 0.8, 0.8, 1.0, 0.3, 0.0, -0.3, -0.5, -1.5],
     [1.2, 1.8, 1.6, 0.6, 0.8, 0.5, 0.2, 0.8, -1.5, -1.3]]
所对应的10组二元目标矢量为：
y = [[1, 1, 1, 0, 0, 1, 1, 1, 0, 0],
     [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]]
'''

In [89]:
from sklearn.neural_network import MLPClassifier
import numpy as np

train_x = np.array([
    [ 0.1, 1.2],
    [ 0.7, 1.8],
    [ 0.8, 1.6],
    [ 0.8, 0.6],
    [ 1. , 0.8],
    [ 0.3, 0.5],
    [ 0. , 0.2],
    [-0.3, 0.8],
    [-0.5,-1.5],
    [-1.5,-1.3]])

train_y = np.array([
    [1, 0],
    [1, 0],
    [1, 0],
    [0, 0],
    [0, 0],
    [1, 1],
    [1, 1],
    [1, 1],
    [0, 1],
    [0, 1]])
model = MLPClassifier(
    activation="relu",
    solver="adam",
    learning_rate_init=0.001,
    tol=0.00001,
    max_iter=500,
    hidden_layer_sizes=(100, 100, 100))
model.fit(train_x, train_y)
print(model.score(train_x, train_y))

1.0


array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 0],
       [0, 0],
       [1, 1],
       [1, 1],
       [1, 1],
       [0, 1],
       [0, 1]])

In [None]:
'''
用MLP解决Kaggle手写数字识别问题，通过KFold找到你的最优网络
'''

In [2]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold
import numpy as np
import pandas as pd

train = np.matrix(pd.read_csv("dr_train.csv"))
train_x = train[:, 1:]
train_y = train[:, 0].A1
print(">>> Data loaded.")

kfold = StratifiedKFold(n_splits=10)

def output_csv(filename, y):
    df_pred = pd.DataFrame({"ImageId": range(1, len(y) + 1), "Label": y}) 
    df_pred.to_csv(filename, index=False)

def _choose(pred):
    """ Choose the most    """
    m = {}
    for n in pred:
        m[n] = m.get(n, 0) + 1
    return sorted(m.keys(), key=lambda n:m[n])[-1]

def bulk_predict(trained_models, x_test):
    predicts = []
    for m in trained_models:
        y_predict = m.predict(x_test)
        predicts.append(y_predict)
    pred_mat = np.matrix(predicts)
    y = []
    for i in range(pred_mat.shape[1]):
        p = _choose(pred_mat[:, i].A1)
        y.append(p)
    return np.array(y)

dt_models = []
for idx_train, idx_test in kfold.split(train_x, train_y):
    trn_x = train_x[idx_train]
    trn_y = train_y[idx_train]
    trn_tst_x = train_x[idx_test]
    trn_tst_y = train_y[idx_test]
    model = MLPClassifier(
        activation="relu",
        solver="adam",
        verbose=False,
        learning_rate_init=0.001,
        tol=0.000001,
        max_iter=3000,
        hidden_layer_sizes=np.array([500, 500, 500]))
    model.fit(trn_x, trn_y)
    print(model.score(trn_tst_x, trn_tst_y))
    dt_models.append(model)

>>> Data loaded.
0.957679505468
0.956945765937
0.963112803427
0.947393477743
0.959523809524
0.894022386282
0.946415813289
0.956635692161
0.963536701621
0.954957102002


In [None]:
test_x = np.matrix(pd.read_csv("dr_test.csv"))
test_y = bulk_predict(dt_models, test_x)
output_csv("dt_kaggle.csv", test_y)