1. NN

In [1]:
import pandas as pd

data_SNP = pd.read_table('CIMMYTwheat_markers.txt', header=None,  sep =  ' ')


# 目的変数
data_yields = pd.read_table('CIMMYTwheat_yields.txt', header=None,  sep =  ' ')

#１列目での予測失敗、最大値を使った予測失敗、最小値を使った予測失敗、2・3列目での予測失敗
data_amount_list = []
for i in range(len(data_yields)):
    data_amount_list.append(data_yields.iloc[i, 0])

data_amount = pd.Series(data_amount_list)

from sklearn.model_selection import train_test_split
x_train, x_test, t_train, t_test = train_test_split(data_SNP, data_amount, test_size=0.3) 

In [2]:
import numpy as np

# affine変換
def affine(z, W, b):
    return np.dot(z, W) + b

# affine変換勾配
def affine_back(du, z, W, b):
    dz = np.dot(du, W.T)
    dW = np.dot(z.T, du)
    db = np.dot(np.ones(z.shape[0]).T, du)
    return dz, dW, db

# 活性化関数(ReLU)
def relu(u):
    return np.maximum(0, u)

# 活性化関数(ReLU)勾配
def relu_back(dz, u):
    return dz * np.where(u > 0, 1, 0)

# 活性化関数(恒等関数)
def koutou(u):
    return u

# 誤差(平均二乗誤差）
def mean_sequared_error(y, t):
    return 0.5*np.sum((y-t)**2)

# 誤差(平均二乗誤差）＋活性化関数(恒等関数)勾配
def koutou_mean_sequared_error_back(y, t):
    return y - t


# 要素を1次元の配列に並べる
def flatten(x):
    return x.reshape((-1))

In [3]:
def learn(x, t, W1, b1, W2, b2, W3, b3, lr):
    # 順伝播
    u1 = affine(x, W1, b1)
    z1 = relu(u1)
    u2 = affine(z1, W2, b2)
    z2 = relu(u2)
    u3 = affine(z2, W3, b3)
    y  = flatten(koutou(u3))
    # 逆伝播
    dy = np.expand_dims(koutou_mean_sequared_error_back(y, t), axis=1)
    dz2, dW3, db3 = affine_back(dy, z2, W3, b3)
    du2 = relu_back(dz2, u2)
    dz1, dW2, db2 = affine_back(du2, z1, W2, b2)
    du1 = relu_back(dz1, u1)
    dx, dW1, db1 = affine_back(du1, x, W1, b1)
    # 重み、バイアスの更新
    W1 = W1 - lr * dW1
    b1 = b1 - lr * db1
    W2 = W2 - lr * dW2
    b2 = b2 - lr * db2
    W3 = W3 - lr * dW3
    b3 = b3 - lr * db3

    return W1, b1, W2, b2, W3, b3

In [4]:
def predict(x, W1, b1, W2, b2, W3, b3):
    # 順伝播
    u1 = affine(x, W1, b1)
    z1 = relu(u1)
    u2 = affine(z1, W2, b2)
    z2 = relu(u2)
    u3 = affine(z2, W3, b3)
    y  = koutou(u3)
    return flatten(y)

In [5]:
# 正解率
def accuracy_rate(y, t):
    max_y = np.argmax(y, axis=1)
    max_t = np.argmax(t, axis=1)
    return np.sum(max_y == max_t)/y.shape[0]

In [6]:
#.values.reshape((-1))

In [7]:
# ノード数設定
d0 = x_train.shape[1]
d1 = 100 # 1層目のノード数
d2 = 50  # 2層目のノード数
d3 = 1

# 重みの初期化(-0.1～0.1の乱数)
np.random.seed(8)
W1 = np.random.rand(d0, d1) * 0.2 - 0.1
W2 = np.random.rand(d1, d2) * 0.2 - 0.1
W3 = np.random.rand(d2, d3) * 0.2 - 0.1

# バイアスの初期化(0)
b1 = np.zeros(d1)
b2 = np.zeros(d2)
b3 = np.zeros(d3)

# 学習率
lr = 1e-4

# バッチサイズ
batch_size = 100

# 学習回数
epoch = 50

# 予測（学習データ）
y_train = predict(x_train, W1, b1, W2, b2, W3, b3)
# 予測（テストデータ）
y_test = predict(x_test, W1, b1, W2, b2, W3, b3)
# 正解率、誤差表示
train_rate, train_err = mean_sequared_error(y_train, t_train), mean_sequared_error(y_train, t_train)
test_rate, test_err = mean_sequared_error(y_test, t_test), mean_sequared_error(y_test, t_test)
print("{0:3d} train_rate={1:6.2f}% test_rate={2:6.2f}% train_err={3:8.5f} test_err={4:8.5f}".format((0), train_rate*100, test_rate*100, train_err, test_err))
for i in range(epoch):
    # 学習
    for j in range(0, x_train.shape[0], batch_size):
        W1, b1, W2, b2, W3, b3 = learn(x_train[j:j+batch_size], t_train[j:j+batch_size].values.reshape((-1)), W1, b1, W2, b2, W3, b3, lr)
    # 予測（学習データ）
    y_train = predict(x_train, W1, b1, W2, b2, W3, b3)
    # 予測（テストデータ）
    y_test = predict(x_test, W1, b1, W2, b2, W3, b3)
    # 正解率、誤差表示
    train_rate, train_err = mean_sequared_error(y_train, t_train), mean_sequared_error(y_train, t_train)
    test_rate, test_err = mean_sequared_error(y_test, t_test), mean_sequared_error(y_test, t_test)
    print("{0:3d} train_rate={1:6.2f}% test_rate={2:6.2f}% train_err={3:8.5f} test_err={4:8.5f}".format((i+1), train_rate*100, test_rate*100, train_err, test_err))

  0 train_rate=22954.48% test_rate=7234.33% train_err=229.54481 test_err=72.34326
  1 train_rate=22867.51% test_rate=7202.84% train_err=228.67515 test_err=72.02836
  2 train_rate=22788.26% test_rate=7178.00% train_err=227.88259 test_err=71.77999
  3 train_rate=22712.44% test_rate=7156.15% train_err=227.12440 test_err=71.56151
  4 train_rate=22639.99% test_rate=7137.69% train_err=226.39987 test_err=71.37693
  5 train_rate=22566.51% test_rate=7121.22% train_err=225.66513 test_err=71.21224
  6 train_rate=22493.42% test_rate=7105.17% train_err=224.93423 test_err=71.05173
  7 train_rate=22419.29% test_rate=7088.27% train_err=224.19289 test_err=70.88274
  8 train_rate=22345.16% test_rate=7071.90% train_err=223.45161 test_err=70.71902
  9 train_rate=22273.83% test_rate=7055.59% train_err=222.73835 test_err=70.55591
 10 train_rate=22203.74% test_rate=7040.14% train_err=222.03738 test_err=70.40140
 11 train_rate=22132.24% test_rate=7025.08% train_err=221.32241 test_err=70.25084
 12 train_rate=2

In [8]:
'''
# ノード数設定
d0 = x_train.shape[1]
d1 = 100 # 1層目のノード数
d2 = 50  # 2層目のノード数
d3 = 10

# 重みの初期化(-0.1～0.1の乱数)
np.random.seed(8)
W1 = np.random.rand(d0, d1) * 0.2 - 0.1
W2 = np.random.rand(d1, d2) * 0.2 - 0.1
W3 = np.random.rand(d2, d3) * 0.2 - 0.1

# バイアスの初期化(0)
b1 = np.zeros(d1)
b2 = np.zeros(d2)
b3 = np.zeros(d3)

# 学習率
lr = 0.5

# バッチサイズ
batch_size = 100

# 学習回数
epoch = 50

# 予測（学習データ）
y_train = predict(x_train, W1, b1, W2, b2, W3, b3)

# 予測（テストデータ）
y_test = predict(x_test, W1, b1, W2, b2, W3, b3)

# 正解率、誤差表示
train_rate, train_err = accuracy_rate(y_train, t_train), mean_sequared_error(y_train, t_train)
test_rate, test_err = accuracy_rate(y_test, t_test), mean_sequared_error(y_test, t_test)
print("{0:3d} train_rate={1:6.2f}% test_rate={2:6.2f}% train_err={3:8.5f} test_err={4:8.5f}".format((0), train_rate*100, test_rate*100, train_err, test_err))

for i in range(epoch):
    # 学習
    for j in range(0, x_train.shape[0], batch_size):
        W1, b1, W2, b2, W3, b3 = learn(x_train[j:j+batch_size], t_train[j:j+batch_size], W1, b1, W2, b2, W3, b3, lr)

    # 予測（学習データ）
    y_train = predict(x_train, W1, b1, W2, b2, W3, b3)
    # 予測（テストデータ）
    y_test = predict(x_test, W1, b1, W2, b2, W3, b3)
    # 正解率、誤差表示
    train_rate, train_err = accuracy_rate(y_train, t_train), mean_sequared_error(y_train, t_train)
    test_rate, test_err = accuracy_rate(y_test, t_test), mean_sequared_error(y_test, t_test)
    print("{0:3d} train_rate={1:6.2f}% test_rate={2:6.2f}% train_err={3:8.5f} test_err={4:8.5f}".format((i+1), train_rate*100, test_rate*100, train_err, test_err))
'''

'\n# ノード数設定\nd0 = x_train.shape[1]\nd1 = 100 # 1層目のノード数\nd2 = 50  # 2層目のノード数\nd3 = 10\n\n# 重みの初期化(-0.1～0.1の乱数)\nnp.random.seed(8)\nW1 = np.random.rand(d0, d1) * 0.2 - 0.1\nW2 = np.random.rand(d1, d2) * 0.2 - 0.1\nW3 = np.random.rand(d2, d3) * 0.2 - 0.1\n\n# バイアスの初期化(0)\nb1 = np.zeros(d1)\nb2 = np.zeros(d2)\nb3 = np.zeros(d3)\n\n# 学習率\nlr = 0.5\n\n# バッチサイズ\nbatch_size = 100\n\n# 学習回数\nepoch = 50\n\n# 予測（学習データ）\ny_train = predict(x_train, W1, b1, W2, b2, W3, b3)\n\n# 予測（テストデータ）\ny_test = predict(x_test, W1, b1, W2, b2, W3, b3)\n\n# 正解率、誤差表示\ntrain_rate, train_err = accuracy_rate(y_train, t_train), mean_sequared_error(y_train, t_train)\ntest_rate, test_err = accuracy_rate(y_test, t_test), mean_sequared_error(y_test, t_test)\nprint("{0:3d} train_rate={1:6.2f}% test_rate={2:6.2f}% train_err={3:8.5f} test_err={4:8.5f}".format((0), train_rate*100, test_rate*100, train_err, test_err))\n\nfor i in range(epoch):\n    # 学習\n    for j in range(0, x_train.shape[0], batch_size):\n        W1, b1, W2,

1. 結果　

線形回帰だけでなくNNでも予測がうまくできなかった