## パーセプトロンによる分類学習

### トランプデッキの読み込み

In [1]:
import pandas as pd

In [3]:
deck = pd.read_csv("./TrumpDeck.csv")
deck.sample(5)

Unnamed: 0,Id,Rank,Suit,Num,SuitOrder,Color,Face,EvenOdd,Royal,Symmetry,Asymmetry
30,5H,5,H,5,3,red,False,odd,False,False,True
34,9H,9,H,9,3,red,False,odd,False,False,True
10,JC,J,C,11,1,black,True,odd,True,True,False
20,8D,8,D,8,2,red,False,even,False,True,False
37,QH,Q,H,12,3,red,True,even,True,True,False


### 教師データの設定

In [4]:
features = ["Suit","Num","Color","Face","EvenOdd","Royal"]
target = "Asymmetry"
X = deck[features]
y = deck[target]
X = pd.get_dummies(X, drop_first=True)

In [5]:
X.sample(5)

Unnamed: 0,Num,Face,Royal,Suit_D,Suit_H,Suit_S,Color_red,EvenOdd_odd
16,4,False,False,1,0,0,1,0
40,2,False,False,0,0,1,0,0
19,7,False,False,1,0,0,1,1
36,11,True,True,0,1,0,1,1
49,11,True,True,0,0,1,0,1


In [6]:
y.sample(5)

27    False
6      True
42    False
22    False
31     True
Name: Asymmetry, dtype: bool

### 学習データと評価データへの分割

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

In [12]:

X_test.head()

Unnamed: 0,Num,Face,Royal,Suit_D,Suit_H,Suit_S,Color_red,EvenOdd_odd
28,3,False,False,0,1,0,1,1
37,12,True,True,0,1,0,1,0
11,12,True,True,0,0,0,0,0
34,9,False,False,0,1,0,1,1
2,3,False,False,0,0,0,0,1


In [13]:
y_test.head()

28     True
37    False
11    False
34     True
2      True
Name: Asymmetry, dtype: bool

### 法則の推測

In [14]:
from sklearn.linear_model import Perceptron
clf = Perceptron()


In [16]:
type(clf)

sklearn.linear_model._perceptron.Perceptron

In [17]:
clf.fit(X_train, y_train)

In [18]:
from sklearn.metrics import confusion_matrix
y_train_pred = clf.predict(X_train)
conf_train = confusion_matrix(y_train, y_train_pred)
index = pd.MultiIndex.from_product([["Actual"],clf.classes_])
columns = pd.MultiIndex.from_product([["Predicted"],clf.classes_])
conf_train = pd.DataFrame(conf_train,
                          index=index,
                          columns=columns)

In [19]:
conf_train

Unnamed: 0_level_0,Unnamed: 1_level_0,Predicted,Predicted
Unnamed: 0_level_1,Unnamed: 1_level_1,False,True
Actual,False,18,0
Actual,True,5,3


### 回答

In [20]:
y_test_pred = clf.predict(X_test)
y_test_pred = pd.Series(y_test_pred, index=y_test.index)
y_test_pred.name = "prediction"
y_test_pred

28    False
37    False
11    False
34    False
2      True
30    False
40     True
32    False
26    False
4     False
10    False
22    False
31    False
29    False
42    False
43    False
7     False
14    False
33    False
50    False
18    False
49    False
35    False
15    False
5     False
27    False
Name: prediction, dtype: bool

In [21]:
pd.concat([X_test, y_test_pred], axis="columns")

Unnamed: 0,Num,Face,Royal,Suit_D,Suit_H,Suit_S,Color_red,EvenOdd_odd,prediction
28,3,False,False,0,1,0,1,1,False
37,12,True,True,0,1,0,1,0,False
11,12,True,True,0,0,0,0,0,False
34,9,False,False,0,1,0,1,1,False
2,3,False,False,0,0,0,0,1,True
30,5,False,False,0,1,0,1,1,False
40,2,False,False,0,0,1,0,0,True
32,7,False,False,0,1,0,1,1,False
26,1,False,True,0,1,0,1,1,False
4,5,False,False,0,0,0,0,1,False


### 答え合わせ

In [22]:
from sklearn.metrics import confusion_matrix
conf_test = confusion_matrix(y_test, y_test_pred)
conf_test = pd.DataFrame(conf_test,
                          index=index,
                          columns=columns)

In [23]:
conf_test

Unnamed: 0_level_0,Unnamed: 1_level_0,Predicted,Predicted
Unnamed: 0_level_1,Unnamed: 1_level_1,False,True
Actual,False,15,1
Actual,True,9,1


### 点数計算

In [24]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
acc_test = accuracy_score(y_test, y_test_pred)
pre_test = precision_score(y_test, y_test_pred)
rec_test = recall_score(y_test, y_test_pred)

print(f"正解率 : {acc_test}")
print(f"適合率 : {pre_test}")
print(f"検出率 : {rec_test}")

正解率 : 0.6153846153846154
適合率 : 0.5
検出率 : 0.1


### パーセプトロンの動作原理

In [26]:
clf.coef_

array([[ -7., -12., -14.,  -7.,  -2.,   9.,  -9.,  17.]])

In [27]:
X_train.head()

Unnamed: 0,Num,Face,Royal,Suit_D,Suit_H,Suit_S,Color_red,EvenOdd_odd
16,4,False,False,1,0,0,1,0
51,13,True,True,0,0,1,0,1
20,8,False,False,1,0,0,1,0
45,7,False,False,0,0,1,0,1
8,9,False,False,0,0,0,0,1


In [28]:
clf.intercept_

array([8.])

In [32]:
((X_test * clf.coef_).sum(axis="columns") + clf.intercept_) > 0

28    False
37    False
11    False
34    False
2      True
30    False
40     True
32    False
26    False
4     False
10    False
22    False
31    False
29    False
42    False
43    False
7     False
14    False
33    False
50    False
18    False
49    False
35    False
15    False
5     False
27    False
dtype: bool

In [None]:
((X_test * clf.coef_).sum(axis="columns") + clf.intercept_) > 0

In [33]:
y_test_pred

28    False
37    False
11    False
34    False
2      True
30    False
40     True
32    False
26    False
4     False
10    False
22    False
31    False
29    False
42    False
43    False
7     False
14    False
33    False
50    False
18    False
49    False
35    False
15    False
5     False
27    False
Name: prediction, dtype: bool