In [1]:
from poker_project.data import prepare
import poker_project.data.holdem as hd
from matplotlib import pyplot as plt
import poker_project.data.visualize as vis
import numpy as np


In [2]:
import pandas as pd
import glob
import os
from io import StringIO


In [3]:
# Prepare the data set, return a dictionary of hands, player history, and players in games.
hdb = prepare.prepare_holdem()


In [6]:
# For example, get players from the game w/ timestamp '802704132'
hdb.hand_roster('802704132')



KeyError: '802704132'

In [4]:
h = hdb.winning_hands()

In [5]:
def won(df):
    return df[df > 0]


def lost(df):
    return df[df < 0]


def tie(df):
    return df[df == 0]


def stats(df):
    w = won(df['delta'])
    l = lost(df['delta'])
    t = tie(df['delta'])
    pm = df['delta'].mean()
    pc = df['delta'].count()
    pwc = w.count()
    pwm = w.mean()
    pwr = pwc / pc
    plc = l.count()
    plm = l.mean()
    plr = plc / pc
    ptm = t.mean()
    ptc = t.count()
    ptr = ptc / pc
    pev = (pwr * pwm) + (plr * plm)
    ret = {'ev': [pm],
           'count': [pc],
           'won': [pwc],
           'avg win': [pwm],
           'winrate': [pwr],
           'lost': [plc],
           'avg lost': [plm],
           'lossrate': [plr],
           'tie': [plc],
           'avg tie': [plm],
           'tierate': [plr]}
    return pd.DataFrame(ret)

In [6]:
# hg1 = h.reset_index()[['delta', 'c1_val']].groupby('c1_val')
# hg2 = h.reset_index()[['delta', 'c2_val']].groupby('c2_val')
# hc = pd.concat([h[['delta', 'c1_val']].rename(columns={'c1_val': 'c_val'}), h[['delta', 'c2_val']].rename(columns={'c2_val': 'c_val'})], ignore_index=True)
pairs = h.query('c1_val == c2_val')#['amt']
not_pairs = h.query('c1_val != c2_val')#['amt'] 
suited = h.query('c1_suit == c2_suit')#['amt']
not_suited = h.query('c1_suit != c2_suit')#['amt']
not_suited_not_pairs = h.query('(c1_suit != c2_suit) & (c1_val != c2_val)')#['amt']
all_hands = h#['amt']
# hole_cards = h[['amt', 'delta', 'c1_val', 'c1_suit', 'c2_val', 'c2_suit']]
# hole_cards['deal'] = hole_cards['c1_val'] + hole_cards['c2_val']
# hcg = hole_cards.groupby('deal')

# pairs2 = h.query('c1_val == c2_val')
# pairs2.loc[:, 'class'] = 'pair'
# not_pairs2 = h.query('c1_val != c2_val')
# not_pairs2.loc[:, 'class'] = 'not_pair'
# ad = pd.concat([pairs2, not_pairs2])

p = stats(pairs)
np = stats(not_pairs)
s = stats(suited)
ns = stats(not_suited)
nsnp = stats(not_suited_not_pairs)
a = stats(all_hands)

st = [p, np, s, ns, nsnp, a]
stdf = pd.concat(st)
labels = ['Pair', 'No Pair', 'Suited', 'Offsuit', '*Offsuit', 'All Hands']
stdf.index = pd.Index(labels)

plt.clf()
plt.style.use('ggplot')
# ax.bar(height=winrate, x=labels)
stdf.loc[:, ['count']].plot.bar()
# stdf.loc[:, ["winrate", 'lossrate']].plot.bar()
# hcs = hcg['delta'].apply(stats)
# hcd = hcs.reset_index().rename(columns={'deal': 'Hand'}).drop('level_1', axis=1).sort_values('ev', ascending=False)
# hcd.rename(columns={'ev': 'Expected Value'}, inplace=True)
# hcd['count'] = (hcd['count'] - hcd['count'].min()) / (hcd['count'].max() - hcd['count'].min())
# hcd['Expected Value'] = (hcd['Expected Value'] - hcd['Expected Value'].min()) / (hcd['Expected Value'].max() - hcd['Expected Value'].min())
# hcd.plot.scatter(x='count', y='winrate')
# ad.loc[:, ['c1_val', 'c1_suit', 'c2_val', 'c2_suit']] = ad[['c1_val', 'c1_suit', 'c2_val', 'c2_suit']].applymap(lambda x: ord(x))
#hcd['count'].plot.normal()
plt.show()

In [10]:
player_hist = hdb.player

# ax = list(player_hist.values())[0].plot(y=['total_delta'], legend='false')
plt.clf()
ax = plt.axes()
plt.style.use('ggplot')

all_d = []
for p in player_hist.values():
    arr = p['delta'].values
    if arr.size > 400:
        all_d.append(arr[0:401])
    else:
        continue
    # p.plot(y=['total_delta'], ax=ax, legend='false')

mat = np.stack(all_d)
means = np.mean(mat, axis=0)
means.sort()
his = np.histogram(means, bins=200)
df = pd.DataFrame(means, columns=['mean'])
df.plot.hist(bins=his[1])
# ax.hist(means, bins=his[1])#bins=np.arange(min(means), max(means) + 1000, 1000))
# player_hist[['amount_won', 'delta']]
plt.show()

In [5]:
# View the history of 'ShoelessJ' from that game.
hdb.player_data(player_name='ShoelessJ', timestamp="802704132")

action           100
amount_won       150
bank_start     23921
bet_flop           -
bet_preflop        B
bet_river          -
bet_turn           -
card_1          None
card_2          None
dealt_num          2
delta             50
pos                2
Name: 802704132, dtype: object

In [8]:
hdb.player_details(timestamp="802704132")

{'ShoelessJ': action           100
 amount_won       150
 bank_start     23921
 bet_flop           -
 bet_preflop        B
 bet_river          -
 bet_turn           -
 card_1          None
 card_2          None
 dealt_num          2
 delta             50
 pos                2
 Name: 802704132, dtype: object, 'tvp': action            50
 amount_won         0
 bank_start     10263
 bet_flop           -
 bet_preflop       Bf
 bet_river          -
 bet_turn           -
 card_1          None
 card_2          None
 dealt_num          2
 delta            -50
 pos                1
 Name: 802704132, dtype: object}

In [14]:
# Return the hand information for that game.
hdb.find_hand(timestamp='809138957').iloc[:,0:10]

Unnamed: 0,timestamp,game_num,hand_num,dealt_num,num_flop,flop_pot,num_turn,turn_pot,num_river,river_pot
1965,809138957,3,2584,3,2,900,2,2700,2,2700


In [215]:
# Structure of hand history data frame
hdb.hands.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104387 entries, 0 to 104386
Data columns (total 17 columns):
timestamp       104387 non-null int64
game_num        104387 non-null int64
hand_num        104387 non-null int64
dealt_num       104387 non-null int64
num_flop        104387 non-null int64
flop_pot        104387 non-null int64
num_turn        104387 non-null int64
turn_pot        104387 non-null int64
num_river       104387 non-null int64
river_pot       104387 non-null int64
num_showdown    104387 non-null int64
showdown_pot    104387 non-null int64
card_1          79563 non-null object
card_2          79563 non-null object
card_3          79563 non-null object
card_4          63593 non-null object
card_5          50396 non-null object
dtypes: int64(12), object(5)
memory usage: 13.5+ MB


In [217]:
# Structure of player history dataframe
hdb.player_data(player_name='ShoelessJ').info()

<class 'pandas.core.frame.DataFrame'>
Index: 168 entries, 800283655 to 802595211
Data columns (total 13 columns):
action         168 non-null object
amount_won     168 non-null object
bank_start     168 non-null object
bet_flop       168 non-null object
bet_preflop    168 non-null object
bet_river      168 non-null object
bet_turn       168 non-null object
card_1         31 non-null object
card_2         31 non-null object
dealt_num      168 non-null object
delta          168 non-null float64
pos            168 non-null object
total_delta    168 non-null float64
dtypes: float64(2), object(11)
memory usage: 18.4+ KB


In [8]:
# All hands where hole cards were shown, and how many hands were shown:
shown_hands = hdb.find_hands_cards_shown()
print(shown_hands)
hdb.player


defaultdict(<function DataSet.find_hands_cards_shown.<locals>.<lambda> at 0x11592c510>, {'806821964': 3, '806822388': 2, '806822512': 2, '806822757': 2, '806823885': 2, '806824009': 2, '806824193': 2, '816329175': 2, '816329230': 2, '816329360': 2, '816329819': 3, '816330289': 3, '816330443': 2, '816330948': 2, '816331013': 2, '816331084': 2, '816331419': 2, '816332638': 2, '816332687': 2, '816332868': 2, '816333605': 2, '816334003': 2, '816334171': 2, '816334308': 2, '816334608': 2, '816334735': 2, '816334791': 2, '816334912': 2, '816335172': 2, '816335365': 2, '816336118': 2, '816336231': 2, '816336557': 2, '816336598': 2, '816336648': 2, '816337298': 2, '816337368': 2, '816337707': 2, '816337788': 2, '816337848': 2, '816338338': 2, '816338485': 2, '816338665': 2, '816338963': 2, '816339094': 2, '816339232': 2, '802587475': 2, '803190330': 2, '803190372': 2, '803190482': 2, '803190534': 2, '803942300': 2, '803942734': 3, '803943100': 2, '803943291': 2, '803943448': 2, '806821876': 2,

defaultdict(<function poker_project.data.prepare.__player_df__.<locals>.<lambda>()>,
            {'A8':           action amount_won bank_start bet_flop bet_preflop bet_river  \
             802072665     50          0      15386        -          BQ         -   
             806821964    100        300       8625        k          Bk         k   
             806822003    100          0       8825       kf          Bc         -   
             806822028    400          0       8725        f          cc         -   
             806822062    100          0       8325        -          Bf         -   
             806822072     50          0       8225        -          Bf         -   
             806822079      0          0       8175        -           f         -   
             806822092    450          0       8175       bf          Bc         -   
             806822128    400        500       7725        k          Bc         -   
             806822158      0          0       78

In [11]:
# Total number of games where hands were shown
numGamesShown = len(shown_hands.keys())
print(numGamesShown)

806


In [12]:
# Total number of hands that were shown
numTotalShown = sum(shown_hands.values())
print(numTotalShown)

1649


In [13]:
# Games shown out of all games
print(numGamesShown / hdb.hands.shape[0])

0.08531809039906849


In [14]:
# Hands shown per game
print(numTotalShown / hdb.hands.shape[0])

0.17455276807452103


In [17]:
# Hands shown out of all hands
print(numTotalShown / hdb.hands['num_players_dealt'].sum())

0.06394199077125906


In [18]:
# We found that theres an extremely low rate of hands shown in NoLimit poker. 
# About 8.5% out of all games in our sample show hands, and 6.4% out of all hands are shown.
# This may be problematic and require further research or consideration into widening our scope
# from just NoLimit poker, where there are much much larger datasets.

In [None]:
hdb.