In [17]:
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn import tree
from sklearn.naive_bayes import GaussianNB
from sklearn.multiclass import OutputCodeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn import svm

# Attribute information

Each record is an example of a hand consisting of five playing cards drawn from a standard deck of 52. Each card is described using two attributes (suit and rank), for a total of 10 predictive attributes.

* ranks (C1, C2, C3, C4, C5)    
  - 1: Ace  
  - 2-10: 2-10  
  - 11: Jack  
  - 12: Queen  
  - 13: King  
* suits (S1, S2, S3, S4, S5)   
  - 1: Hearts  
  - 2: Spades  
  - 3: Diamonds  
  - 4: Clubs  
* hand  
  - 0: Nothing in hand; not a recognized poker hand 
  - 1: One pair; one pair of equal ranks within five cards
  - 2: Two pairs; two pairs of equal ranks within five cards
  - 3: Three of a kind; three equal ranks within five cards
  - 4: Straight; five cards, sequentially ranked with no gaps
  - 5: Flush; five cards with the same suit
  - 6: Full house; pair + different rank three of a kind
  - 7: Four of a kind; four equal ranks within five cards
  - 8: Straight flush; straight + flush
  - 9: Royal flush; {Ace, King, Queen, Jack, Ten} + flush

# Get the dataset

In [2]:
# dataset location
testing_data_location = "./dataset/poker-hand-testing.data"
training_data_location = "./dataset/poker-hand-training-true.data"

# assign column names to the dataset attributes
names = ['S1','C1','S2','C2','S3','C3','S4','C4','S5','C5','hand']

# get testing & training data
testing = pd.read_csv(testing_data_location, names=names, sep=',', header=None)
training = pd.read_csv(training_data_location, names=names, sep=',', header=None)

print("Testing data:", testing.shape)
print("Training data:", training.shape)

# show the first five columns of the training hand
print("-----")
print("Training Hand")
training.head()

Testing data: (1000000, 11)
Training data: (25010, 11)
-----
Training Hand


Unnamed: 0,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5,hand
0,1,10,1,11,1,13,1,12,1,1,9
1,2,11,2,13,2,10,2,12,2,1,9
2,3,12,3,11,3,13,3,10,3,1,9
3,4,10,4,11,4,1,4,13,4,12,9
4,4,1,4,13,4,12,4,11,4,10,9


In [3]:
# show the first five columns of the testing hand
print("Testing Hand")
testing.head()

Testing Hand


Unnamed: 0,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5,hand
0,1,1,1,13,2,4,2,3,1,12,0
1,3,12,3,2,3,11,4,5,2,5,1
2,1,9,4,6,1,4,3,2,3,9,1
3,1,4,3,13,2,13,2,1,3,6,1
4,3,10,2,7,1,2,2,11,4,9,0


# Preprocessing

### separate dataset attributes (X) from Class (Y)

In [4]:
# separate first ten columns (X) from eleven'th column for training set
X_train = training.iloc[:,0:10]
Y_train = training.iloc[:,10]

# ...for testing set
X_test = testing.iloc[:,0:10]
Y_test = testing.iloc[:,10]

# show first five columns of training X
print("training X")
X_train.head()

training X


Unnamed: 0,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5
0,1,10,1,11,1,13,1,12,1,1
1,2,11,2,13,2,10,2,12,2,1
2,3,12,3,11,3,13,3,10,3,1
3,4,10,4,11,4,1,4,13,4,12
4,4,1,4,13,4,12,4,11,4,10


In [5]:
# show first five columns of training Y
print("training Y")
Y_train.head()

training Y


0    9
1    9
2    9
3    9
4    9
Name: hand, dtype: int64

### show unique Class values

In [6]:
# show unique Class categories
Y_train.unique()

array([9, 8, 1, 0, 4, 3, 2, 5, 6, 7])

# Feature Scaling

In [10]:
scaler = StandardScaler()

# fit only for training data
scaler.fit(X_train)

# transform training and testing data
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Training and Predictions

In [8]:
# create 3 layers of 10 nodes each + 1000 iterations
mlp = MLPClassifier(solver='adam', hidden_layer_sizes=(20, 20), max_iter=2000, activation='tanh', learning_rate_init=0.02)

# train algorithm on training data
mlp.fit(X_train, Y_train.values.ravel())

# make predictions
predictions = mlp.predict(X_test)

# Check the results

In [9]:
print(confusion_matrix(Y_test,predictions))
print(classification_report(Y_test, predictions))

[[442199  58884      0      0     10    112      0      0      4      0]
 [117441 302291   2169    506     34     37      0      4     16      0]
 [  1697  31549  13362   1007      5      0      2      0      0      0]
 [   863   5465    643  14141      0      0      7      2      0      0]
 [  2216   1653      0      0     14      0      0      0      2      0]
 [  1222    155      0      0      1    555      0      0     52     11]
 [     0    108    516    798      0      0      2      0      0      0]
 [     0      0     12    214      0      0      4      0      0      0]
 [     3      7      0      0      1      0      0      0      1      0]
 [     3      0      0      0      0      0      0      0      0      0]]
              precision    recall  f1-score   support

           0       0.78      0.88      0.83    501209
           1       0.76      0.72      0.73    422498
           2       0.80      0.28      0.42     47622
           3       0.85      0.67      0.75     2112

# Compare with the other models

In [None]:
# define models
models = [RandomForestClassifier(n_estimators=100), KNeighborsClassifier(), BaggingClassifier(), AdaBoostClassifier(), GaussianNB(), tree.DecisionTreeClassifier(), svm.SVC(kernel='linear', C=1), OutputCodeClassifier(BaggingClassifier()), OneVsRestClassifier(svm.SVC(kernel='linear'))]

# define model names
model_names = ["Random Forest", "KNeighbors", "Bagging with DT", "AdaBoost", "Naive Bayes", "Decision Tree", "Linear SVM", "OutputCodeClassifier with Linear SVM", "OneVsRestClassifier with Linear SVM"]

# run models
for model, name in zip(models, model_names):
    model.fit(X_train, Y_train.values.ravel())
    
    # make predictions
    predictions = model.predict(X_test)
    
    # show arruracy
    acc = accuracy_score(Y_test, predictions)
    print("Accuracy Using", name,": " + str(acc)+'\n')
    print(classification_report(Y_test, predictions))
    print(confusion_matrix(Y_test, predictions))

Accuracy Using Random Forest : 0.614135



  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.64      0.81      0.71    501209
           1       0.58      0.49      0.53    422498
           2       0.39      0.00      0.01     47622
           3       0.49      0.00      0.01     21121
           4       0.31      0.00      0.00      3885
           5       1.00      0.00      0.01      1996
           6       0.00      0.00      0.00      1424
           7       0.00      0.00      0.00       230
           8       0.00      0.00      0.00        12
           9       0.00      0.00      0.00         3

   micro avg       0.61      0.61      0.61   1000000
   macro avg       0.34      0.13      0.13   1000000
weighted avg       0.59      0.61      0.58   1000000

[[405379  95814     11      3      2      0      0      0      0      0]
 [213748 208485    225     31      9      0      0      0      0      0]
 [ 12277  35132    201     12      0      0      0      0      0      0]
 [  3450  17566     46

  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.55      0.66      0.60    501209
           1       0.46      0.42      0.44    422498
           2       0.16      0.01      0.02     47622
           3       0.11      0.01      0.01     21121
           4       0.04      0.00      0.00      3885
           5       0.58      0.02      0.04      1996
           6       0.00      0.00      0.00      1424
           7       0.00      0.00      0.00       230
           8       0.00      0.00      0.00        12
           9       0.00      0.00      0.00         3

   micro avg       0.51      0.51      0.51   1000000
   macro avg       0.19      0.11      0.11   1000000
weighted avg       0.48      0.51      0.49   1000000

[[331574 168519    883    197     15     21      0      0      0      0]
 [240295 179463   2025    621     79     12      2      0      0      1]
 [ 23705  23082    626    182     25      0      1      0      1      0]
 [  8405  12284    296

  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.50      0.98      0.66    501209
           1       0.00      0.00      0.00    422498
           2       0.00      0.00      0.00     47622
           3       0.00      0.00      0.00     21121
           4       0.00      0.00      0.00      3885
           5       0.00      0.00      0.00      1996
           6       0.00      0.00      0.00      1424
           7       0.00      0.00      0.00       230
           8       0.00      0.00      0.00        12
           9       0.00      0.00      0.00         3

   micro avg       0.49      0.49      0.49   1000000
   macro avg       0.05      0.10      0.07   1000000
weighted avg       0.25      0.49      0.33   1000000

[[491576      0      0      0      0      0      0      0      0   9633]
 [414414      0      0      0      0      0      0      0      0   8084]
 [ 46709      0      0      0      0      0      0      0      0    913]
 [ 20729      0      0

  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.50      1.00      0.67    501209
           1       0.00      0.00      0.00    422498
           2       0.00      0.00      0.00     47622
           3       0.00      0.00      0.00     21121
           4       0.00      0.00      0.00      3885
           5       0.00      0.00      0.00      1996
           6       0.00      0.00      0.00      1424
           7       0.00      0.00      0.00       230
           8       0.00      0.00      0.00        12
           9       0.00      0.00      0.00         3

   micro avg       0.50      0.50      0.50   1000000
   macro avg       0.05      0.10      0.07   1000000
weighted avg       0.25      0.50      0.33   1000000

[[501209      0      0      0      0      0      0      0      0      0]
 [422498      0      0      0      0      0      0      0      0      0]
 [ 47622      0      0      0      0      0      0      0      0      0]
 [ 21121      0      0