In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV

In [31]:
df = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')


In [15]:
def concat_hand_value_count(data_set):
    suit_counts = []
    for i in range(0, 6):
        each_counts = pd.concat([data_set.rank1_cnt == i,
                                 data_set.rank2_cnt == i, 
                                 data_set.rank3_cnt == i, 
                                 data_set.rank4_cnt == i,
                                 data_set.rank5_cnt == i,
                                 data_set.rank6_cnt == i, 
                                 data_set.rank7_cnt == i, 
                                 data_set.rank8_cnt == i,
                                 data_set.rank9_cnt == i,
                                 data_set.rank10_cnt == i, 
                                 data_set.rank11_cnt == i, 
                                 data_set.rank12_cnt == i,
                                 data_set.rank13_cnt == i,
                                ], axis=1)
        suit_counts.append(np.sum(each_counts, axis=1))
    suit_counts = np.vstack(suit_counts)
    suit_counts_df = pd.DataFrame(suit_counts.transpose(),
                                  columns=['Card_Value_Count_{}'.format(i) for i in range(0,6)])
    
    return pd.concat([suit_counts_df, data_set], axis=1)
        
df['has_ace'] = df[['C1', 'C2', 'C3', 'C4', 'C5']].apply(lambda x: 1 if any(x) == 1 else 0, axis=1)


In [8]:
any(df[['C1', 'C2', 'C3', 'C4', 'C5']].iloc[0]) == 4

False

In [9]:
from collections import Counter

def count_cards(record, rank):
    for key, count in Counter(record).items():
        if key == rank:
            return count

In [16]:
for rank in range(1, 13+1):
    df[f'rank{rank}_cnt']  = df[['C1', 'C2', 'C3', 'C4', 'C5']].apply(count_cards, rank=rank, axis=1)

for suit in range(1, 4+1):
    df[f'suit{suit}_cnt'] = df[['S1', 'S2', 'S3', 'S4', 'S5']].apply(count_cards, rank=suit, axis=1)

df['card_std'] = np.std(df[['C1', 'C2', 'C3', 'C4', 'C5']])
df = concat_hand_value_count(df)

In [17]:
df = df.fillna(0)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('hand', axis=1), df['hand'])

In [19]:
forest = RandomForestClassifier(n_estimators=20)
forest.fit(X_train, y_train)
print(f'Training error: {accuracy_score(y_train, forest.predict(X_train))}')
print(f'Test error: {accuracy_score(y_test, forest.predict(X_test))}')

Training error: 0.9996801194220825
Test error: 0.9937629937629938


In [20]:

test_data['has_ace'] = test_data[['C1', 'C2', 'C3', 'C4', 'C5']].apply(lambda x: 1 if any(x) == 1 else 0, axis=1)

for rank in range(1, 13+1):
    test_data[f'rank{rank}_cnt']  = test_data[['C1', 'C2', 'C3', 'C4', 'C5']].apply(count_cards, rank=rank, axis=1)

for suit in range(1, 4+1):
    test_data[f'suit{suit}_cnt'] = test_data[['S1', 'S2', 'S3', 'S4', 'S5']].apply(count_cards, rank=suit, axis=1)

test_data['card_std'] = np.std(test_data[['C1', 'C2', 'C3', 'C4', 'C5']])
test_data = concat_hand_value_count(test_data)

In [28]:
test_data = test_data.fillna(0)
test_data['hand'] = forest.predict(test_data.drop('id', axis=1))

In [29]:
test_data.set_index('id')[['hand']].to_csv('forest_submission.csv')

In [24]:
X_train.columns

Index(['Card_Value_Count_0', 'Card_Value_Count_1', 'Card_Value_Count_2',
       'Card_Value_Count_3', 'Card_Value_Count_4', 'Card_Value_Count_5', 'S1',
       'C1', 'S2', 'C2', 'S3', 'C3', 'S4', 'C4', 'S5', 'C5', 'has_ace',
       'suit1_cnt', 'suit2_cnt', 'suit3_cnt', 'suit4_cnt', 'card_std',
       'rank1_cnt', 'rank2_cnt', 'rank3_cnt', 'rank4_cnt', 'rank5_cnt',
       'rank6_cnt', 'rank7_cnt', 'rank8_cnt', 'rank9_cnt', 'rank10_cnt',
       'rank11_cnt', 'rank12_cnt', 'rank13_cnt'],
      dtype='object')

In [32]:
def pair(row):
    cards = list(row[['C1', 'C2', 'C3', 'C4', 'C5']].values)
    for card in cards:
        if cards.count(card) == 2:
            return True
    return False

def two_pair(row):
    cards = list(row[['C1', 'C2', 'C3', 'C4', 'C5']].values)
    for card in cards:
        if cards.count(card) == 2:
            cards.remove(card)
            for second_pair in cards:
                if cards.count(second_pair) == 2:
                    return True
    return False

def three(row):
    cards = list(row[['C1', 'C2', 'C3', 'C4', 'C5']].values)
    for card in cards:
        if cards.count(card) == 3:
            return True
    return False

def straight(row):
    cards = list(sorted(row[['C1', 'C2', 'C3', 'C4', 'C5']].values))
    need_to_straight = [4, 3, 2, 1, 0]
    straight_to_2 = [1, 10, 11, 12, 13]
    last_card = cards[-1]
    res = []
    if cards == straight_to_2:
        return True
    for card in cards:
        res.append(last_card - card)
    if res == need_to_straight:
        return True
    return False

def flush(row):
    suits = list(row[['S1', 'S2', 'S3', 'S4', 'S5']].values)
    if len(set(suits)) == 1:
        return True
    return False

def full_house(row):
    cards = list(row[['C1', 'C2', 'C3', 'C4', 'C5']].values)
    for card in cards:
        if cards.count(card) == 3:
            check_full_house = list(filter(lambda x: x != card, cards))
            for second_pair in check_full_house:
                if check_full_house.count(second_pair) == 2:
                    return True
    return False

def four_pair(row):
    cards = list(row[['C1', 'C2', 'C3', 'C4', 'C5']].values)
    for card in cards:
        if cards.count(card) == 4:
            return True
    return False

def straight_flush(row):
    cards = list(sorted(row[['C1', 'C2', 'C3', 'C4', 'C5']].values))
    suits = list(row[['S1', 'S2', 'S3', 'S4', 'S5']].values)
    
    need_to_straight = [4, 3, 2, 1, 0]
    last_card = cards[-1]
    res = []
    
    if len(set(suits)) == 1:
        for card in cards:
            res.append(last_card - card)
        if res == need_to_straight:
            return True
    return False

def royal(row):
    cards = list(sorted(row[['C1', 'C2', 'C3', 'C4', 'C5']].values))
    suits = list(row[['S1', 'S2', 'S3', 'S4', 'S5']].values)
    
    need_to_royal = [1, 10, 11, 12, 13]
    
    if cards == need_to_royal and len(set(suits)) == 1:
        return True
    return False

In [33]:
def poker_combinations(row):
    """
    This function converts information about card in dataset in number of combinations.
    0 - no combinations; 1 - pair; 2 - two pair; 3 - three pair; 4 - straight;
    5 - flush; 6 - full house; 7 - four pair; 8 - straight flush; 9 - royal flush

    Apply this function to train and test dataframe
    """
    if royal(row):
        return 9
    elif straight_flush(row):
        return 8
    elif four_pair(row):
        return 7
    elif full_house(row):
        return 6
    elif flush(row):
        return 5
    elif straight(row):
        return 4
    elif three(row):
        return 3
    elif two_pair(row):
        return 2
    elif pair(row):
        return 1
    else:
        return 0

In [35]:
df['prediction'] = df.apply(poker_combinations, axis=1)

In [37]:
print(f'Training error: {accuracy_score(df["hand"], df["prediction"])}')

Training error: 1.0


In [38]:
test_data['hand'] = test_data.apply(poker_combinations, axis=1)
test_data.set_index('id')[['hand']].to_csv('forest_submission.csv')