In [1]:
from poker_project.data import prepare
import poker_project.data.holdem as hd


In [2]:
import pandas as pd
import glob
import os
from io import StringIO


In [4]:
# Prepare the data set, return a dictionary of hands, player history, and players in games.
hdb = prepare.prepare_holdem()


In [5]:
# For example, get players from the game w/ timestamp '802704132'
hdb.hand_roster('802704132')



['2', 'ShoelessJ', 'tvp']

In [6]:
# View the history of 'ShoelessJ' from that game.
hdb.player_data(player_name='ShoelessJ', timestamp='802704132')

Unnamed: 0,players_dealt,player_position,bet_preflop,bet_flop,bet_turn,bet_river,bank_start,action,amount_won,card_1,card_2
802704118,2,1,Br,b,-,-,23321,2600,3200,,
802704132,2,2,B,-,-,-,23921,100,150,,
802704137,2,1,Br,-,-,-,23971,1100,1200,,
802704152,2,2,Bc,f,-,-,24071,600,0,,
802704201,2,1,Bc,k,k,k,23471,100,0,6c,Kc
802704250,2,2,B,-,-,-,23371,100,150,,
802704256,2,1,Bf,-,-,-,23421,50,0,,
802704266,2,2,B,-,-,-,23371,100,150,,
802704268,2,1,Br,-,-,-,23421,600,700,,
802704284,2,2,B,-,-,-,23521,100,150,,


In [7]:
# Return the hand information for that game.
print(hdb.find_hand(timestamp='802704132'))

   timestamp  game_num  hand_num  num_players_dealt  num_players_flop  \
5  802704132         2         6                  2                 0   

   flop_pot  num_players_turn  turn_pot  num_players_river  river_pot  \
5         0                 0         0                  0          0   

   num_players_showdown  showdown_pot card_1 card_2 card_3 card_4 card_5  
5                     1           150   None   None   None   None   None  


In [8]:
# Structure of hand history data frame
hdb.hands.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9447 entries, 0 to 9446
Data columns (total 17 columns):
timestamp               9447 non-null int64
game_num                9447 non-null int64
hand_num                9447 non-null int64
num_players_dealt       9447 non-null int64
num_players_flop        9447 non-null int64
flop_pot                9447 non-null int64
num_players_turn        9447 non-null int64
turn_pot                9447 non-null int64
num_players_river       9447 non-null int64
river_pot               9447 non-null int64
num_players_showdown    9447 non-null int64
showdown_pot            9447 non-null int64
card_1                  3371 non-null object
card_2                  3371 non-null object
card_3                  3371 non-null object
card_4                  1776 non-null object
card_5                  1157 non-null object
dtypes: int64(12), object(5)
memory usage: 1.2+ MB


In [9]:
# Structure of player history dataframe
hdb.player_data(player_name='ShoelessJ').info()

<class 'pandas.core.frame.DataFrame'>
Index: 94 entries, 802704118 to 802706246
Data columns (total 11 columns):
players_dealt      94 non-null object
player_position    94 non-null object
bet_preflop        94 non-null object
bet_flop           94 non-null object
bet_turn           94 non-null object
bet_river          94 non-null object
bank_start         94 non-null object
action             94 non-null object
amount_won         94 non-null object
card_1             14 non-null object
card_2             14 non-null object
dtypes: object(11)
memory usage: 8.8+ KB


In [10]:
# All hands where hole cards were shown, and how many hands were shown:
shown_hands = hdb.find_hands_cards_shown()
print(shown_hands)


defaultdict(<function DataSet.find_hands_cards_shown.<locals>.<lambda> at 0x11593d7b8>, {'806821964': 3, '806822388': 2, '806822512': 2, '806822757': 2, '806823885': 2, '806824009': 2, '806824193': 2, '816329175': 2, '816329230': 2, '816329360': 2, '816329819': 3, '816330289': 3, '816330443': 2, '816330948': 2, '816331013': 2, '816331084': 2, '816331419': 2, '816332638': 2, '816332687': 2, '816332868': 2, '816333605': 2, '816334003': 2, '816334171': 2, '816334308': 2, '816334608': 2, '816334735': 2, '816334791': 2, '816334912': 2, '816335172': 2, '816335365': 2, '816336118': 2, '816336231': 2, '816336557': 2, '816336598': 2, '816336648': 2, '816337298': 2, '816337368': 2, '816337707': 2, '816337788': 2, '816337848': 2, '816338338': 2, '816338485': 2, '816338665': 2, '816338963': 2, '816339094': 2, '816339232': 2, '802587475': 2, '803190330': 2, '803190372': 2, '803190482': 2, '803190534': 2, '803942300': 2, '803942734': 3, '803943100': 2, '803943291': 2, '803943448': 2, '806821876': 2,

In [11]:
# Total number of games where hands were shown
numGamesShown = len(shown_hands.keys())
print(numGamesShown)

806


In [12]:
# Total number of hands that were shown
numTotalShown = sum(shown_hands.values())
print(numTotalShown)

1649


In [13]:
# Games shown out of all games
print(numGamesShown / hdb.hands.shape[0])

0.08531809039906849


In [14]:
# Hands shown per game
print(numTotalShown / hdb.hands.shape[0])

0.17455276807452103


In [17]:
# Hands shown out of all hands
print(numTotalShown / hdb.hands['num_players_dealt'].sum())

0.06394199077125906


In [18]:
# We found that theres an extremely low rate of hands shown in NoLimit poker. 
# About 8.5% out of all games in our sample show hands, and 6.4% out of all hands are shown.
# This may be problematic and require further research or consideration into widening our scope
# from just NoLimit poker, where there are much much larger datasets.