In [1]:
import blackjack_engine
import random
import numpy as np
import pandas as pd
# random.seed(4) # can use this for repeatable results

In [2]:
def padarray(A, size):
    if len(A) == 0:
        A = ['']
    t = size - len(A)
    arr = np.pad(A, pad_width=(0, t), mode='empty')
    
    return arr.astype('<U32')

def player_bot(hand):
    """ makes random choice, or makes decisions using neural net etc"""
    # decision making code goes here
    return random.choice(['hit', 'stay']) 

def play_blackjack_game():
    # initialize a game with shuffled deck and hands dealt to dealer and player:
    game = blackjack_engine.BlackjackGame()
    moves = []
    #game_states = []
    dealer_cards = []
    player_hands = []
    game_results = []
    if game.is_finished:
        # somebody got dealt a blackjack...need to deal with that case
        pass
    while not game.is_finished: # if nobody got dealt a blackjack, loop through decisions
        player_hand = game.player_hands[0].cards[:]
        dealer_card = game.dealer_hand.cards[0]
        #game_state_row = {'player_hand':player_hand,'dealer_card':dealer_card}
        #game_state_row = [player_hand, dealer_card]
        player_hands.append(player_hand)
        dealer_cards.append(dealer_card)
        hit_or_stay = player_bot(game.player_hands)
        moves.append(hit_or_stay)
        game.player_move(hit_or_stay)

    [game_results.append(game.result) for game_result in np.arange(len(moves))]
        
    return player_hands, dealer_cards, moves, game_results

# implement a training set generation routine: give the API random actions and see how games turn out
def build_training_set(n_pts):
    '''
    state_of_play is an n x m array of values where n is the number of data points, and m is the max number of features in a hand 
    y is an n x 1 array, where n is the number of data points (number of times the player drew a card, basically) 
    '''
    all_player_hands = []
    all_dealer_cards = []
    all_moves = []
    all_game_results = []

    for pt in np.linspace(0,n_pts,n_pts+1):
        [player_hands, dealer_cards, moves,game_results] = play_blackjack_game()
        all_player_hands.append(player_hands)
        all_dealer_cards.append(dealer_cards)
        all_moves.append(moves)
        all_game_results.append(game_results)
    return all_player_hands, all_dealer_cards, all_moves, all_game_results

def flatten_list(input_list):
    flat_list = []
    for element in input_list:
        for sub_element in element:
            flat_list.append(sub_element)
    return flat_list

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import normalize

def reshape_data(input_data):
    # flatten and preprocess all of the input data
    [all_player_hands, all_dealer_cards, all_moves, all_game_results] = input_data
    flat_player_hands = flatten_list(all_player_hands)
    flat_game_results = flatten_list(all_game_results)
    flat_moves = flatten_list(all_moves)
    flat_dealer_cards = flatten_list(all_dealer_cards)
    
    # format X to be a n x m matrix where n is the number of examples and m is the max number of features
    X = []
    for instance in np.arange(len(flat_moves)):
        Xrow = [flat_dealer_cards[int(instance)], flat_game_results[int(instance)]]
        for card in flat_player_hands[int(instance)]:
            Xrow.append(card)
        while len(Xrow) < 9:
            Xrow.append('pad')
        X.append(Xrow)

    return X, flat_moves

def encode_inputs(X):
    oe = OrdinalEncoder()
    oe.fit(X)
    X_enc = oe.transform(X)

    return X_enc

def encode_outputs(y):
    le = LabelEncoder()
    le.fit(y)
    y_enc = le.transform(y)
    return y_enc

def preprocessing_pipeline(input_data):
    # separate and shape the input data
    X,y = reshape_data(input_data)
    
    # encode all variables numerically
    X = encode_inputs(X)
    y = encode_outputs(y)
    
    # normalize the inputs
    X = normalize(X)
    
    # split into test and train sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    return X_train, X_test, y_train, y_test


# Build a training set with a random player of a specified size

In [4]:
n_examples = 1000
[all_player_hands, all_dealer_cards, all_moves, all_game_results] = build_training_set(n_examples)
input_data = [all_player_hands, all_dealer_cards, all_moves, all_game_results]

# Preprocess the input data (reshape, encode, normalize)

In [5]:
X_train, X_test, y_train, y_test = preprocessing_pipeline(input_data)

print('X_train:',np.shape(X_train))
print('X_test:',np.shape(X_test))
print('y_train:',np.shape(y_train))
print('y_test:',np.shape(y_test))

preprocessed_input_data = [X_train, X_test, y_train, y_test]

X_train: (950, 9)
X_test: (238, 9)
y_train: (950,)
y_test: (238,)


In [6]:
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
# train the neural network on the previously generated data
def build_model(input_data):
    [X_train, X_test, y_train, y_test] = input_data
    [n_pts,input_len] = np.shape(X_train)
    model = Sequential()
    model.add(Dense(100,input_shape=(input_len,),activation = 'relu'))
    model.add(Dense(175,activation = 'relu'))
    model.add(Dense(1))
    model.compile(loss='binary_crossentropy', optimizer='adam')
    model.summary()
    return model

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [7]:
model = build_model(preprocessed_input_data)

Instructions for updating:
Colocations handled automatically by placer.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               1000      
_________________________________________________________________
dense_2 (Dense)              (None, 175)               17675     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 176       
Total params: 18,851
Trainable params: 18,851
Non-trainable params: 0
_________________________________________________________________


In [10]:
callbacks = [EarlyStopping(patience = 10)]
model.fit(np.array(X_train), np.array(y_train), batch_size=32, epochs=100, callbacks = callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 99/100
Epoch 100/100


<keras.callbacks.callbacks.History at 0x1b94ded6bc8>

# Use the model to make decisions, test the win %? 

In [None]:

def use_model_to_make_decision(trained_model,state_of_play):

    decision = trained_model.predict(state_of_play)
    while game_is_still_in_play:
        state_of_play = play_blackjack_hand(state_of_play,decision)
        game_is_still_in_play = check_result(state_of_play)
    return state_of_play
    