In [1]:
reverse_value_dict = {'A':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9,'10':10,'J':11,'Q':12,'K':13}
class Card(object):
    def __init__(self,value='A',suit='♠'):
        self.value = value
        self.suit = suit
        self.score = reverse_value_dict[value]
    def __str__(self):
        return "{}{}".format(self.value,self.suit)
    def __repr__(self):
        return str(self)



In [105]:
import random

def build_deck():
    value_dict = {1:'A',2:'2',3:'3',4:'4',5:'5',6:'6',7:'7',8:'8',9:'9',10:'10',11:'J',12:'Q',13:'K'}
    suit_dict = {0:'♣' , 1: '♦' , 2: '♥' , 3: '♠'}
    deck = []
    for j in xrange(4):
        for i in xrange(1,14):
            deck.append(Card(value_dict[i],suit_dict[j]))
    return deck

def ai(state):
    if state['passed'] and state['passed'].score < state['card'].score:
        return 'stay'
    if state['card'].score >= 4:
        return 'stay'
    return 'pass'

def dumb(state):
    if random.random()> 0.5:
        return 'stay'
    else:
        return 'pass'
    
def transform(state):
    h = ''
    h +='{:02d}'.format(state['players'])
    h +='{:02d}'.format(len(state['history']))
    h += '{:02}'.format(state['card'].score)
    if state['passed']:
        h += '{:02}'.format(state['passed'].score)
    else:
        h += '00'
#     if state['players'] <= 4:
#         for item in state['history']:
#             if item == 'pass' or item == 'cut':
#                 h += 'p'
#             else:
#                 h += 's'
#     else:
    ps = 0
    if len(state['history']) > 1:
        for i in xrange(len(state['history'])-1):
            if ((state['history'][i] == 'pass' and state['history'][i+1] == 'stay') or
                state['history'][i] == 'blocked'):
                ps += 1
    h += str(ps)
    return h

def game(players,ais = None , Q_d = {}):
    if not ais:
        ais = [ai for _ in xrange(players)]
    deck = build_deck()
    random.shuffle(deck)
    hands = []
    while len(hands) < players:
        hands.append(deck.pop())
        
    history = []
    
    starting_hand = [card for card in hands]
    history_hash = []
    for i in xrange(players):
        old_card = None
        if len(history) > i: #king block
            state = {'history':history,'card':hands[i],'players':players,'passed':None}
            history_hash.append(transform(state))
            continue
        if i>0 and history[i-1] == 'pass':
            old_card = hands[i-1]
        state = {'history':history,'card':hands[i],'players':players,'passed':old_card}
        history_hash.append(transform(state))
        history.append(ais[i](state))
        assert(len(history)) == i+1
        if i < players - 1 and history[i] == 'pass' and hands[i+1].value == 'K':
            history[i] = 'blocked'
            history.append('king')
            assert(len(history) == i+2)
        if i < players - 1 and history[i] == 'pass':
            hands[i],hands[i+1] = hands[i+1],hands[i] #swap
        if i == players- 1 and history[i] == 'pass':
            hands[i] = deck.pop()
            history[i] = 'cut'
    smallest = min(hands,key = lambda x: x.score).score
    score = [-(hands[i].score == smallest) for i in xrange(players)]
    normsumscore = sum(score) / float(players)
    for i in xrange(players):
        h = history_hash[i]
        if not h in Q_d:
            Q_d[h] = {'pass':{'V':0,'N':0} , 'blocked' : {'V':0,'N':0} ,'stay': {'V':0,'N':0}}
        if history[i] == 'pass' or history[i] == 'cut':
            action = 'pass'
        elif history[i] == 'blocked':
            action = 'blocked'
        else:
            action = 'stay'
        l = Q_d[h][action]
        l['V'] += score[i] - normsumscore
        l['N'] += 1
        
    assert len(score)==len(hands)==len(starting_hand)==len(history)
    return score , history , starting_hand , hands , history_hash
    

In [106]:
def learning_ai(state):
    if state['passed'] and state['passed'].score < state['card'].score:
        return 'stay'
    global Q
    puct_constant = 5
    h = transform(state)
    if not h in Q:
        return ai(state)
    sqrt_sum = (Q[h]['pass']['N'] + Q[h]['stay']['N'])**0.5
    best = -float('inf')
    best_key = None
    for key in ['pass','stay']:
        if Q[h][key]['N'] > 0:
            q = float(Q[h][key]['V']) / Q[h][key]['N']
        else:
            q = 0
        u = puct_constant * sqrt_sum / (1 + Q[h][key]['N'])
        if q+u > best:
            best = q+u
            best_key = key
    return best_key

def smart_ai(state):
    if state['passed'] and state['passed'].score < state['card'].score:
        return 'stay'
    global Q
    h = transform(state)
    if not h in Q:
        return ai(state)
    best = -float('inf')
    best_key = None
    for key in ['pass','stay']:
        if Q[h][key]['N'] > 0:
            q = float(Q[h][key]['V']) / Q[h][key]['N']
        else:
            q = 0
        if q > best:
            best = q
            best_key = key
    return best_key

In [124]:
import re
def read_dict(text):
    d = {}
    # values are either ints, floats, or dicts
    text = text[text.find('{')+1:]
#     print text
    while True:
        try:
            key,value = text.split(':',1)
        except:
            break
#         print key
        key = key.strip()[1:-1]
        value = value.strip()
#         print key
#         print value
        if value[0] == '{':
            opens = 0
            index = 0
            for char in value:
                if char == '{':
                    opens += 1
                elif char == '}':
                    opens -= 1
                index += 1
                if opens == 0: 
                    d[key] = read_dict(value[:index])            
                    break
            text = value[index+1:]
            continue
        else:
            index = 0
            for char in value:
                if char == ',' or char == '}':
                    break
                else:
                    index += 1
            d[key] = float(value[:index])
#             print index
            if char == ',':
                text = value[index+1:]
                continue
            if char == '}':
                break
    return d

    
Q={}
for line in open('statistics.txt','r'):
#     print line
    key,value = line.split(':',1)
    key = key.strip()[1:-1]
    Q[key] = read_dict(value)


In [127]:
count = 0
for key in Q:
    if key[:2] == '02':
        print key
        count += 1
print count

020109070
020110070
020105080
020101090
020113070
020003000
020111080
020107120
020110000
020103110
020113100
020112100
020102060
020107050
020004000
020108070
020111030
020110090
020101040
020112030
020106060
020012000
020102010
020108100
020104080
020104100
020105040
020112080
020109000
020110040
020110120
020013000
020113020
020111070
020107010
020103020
020101120
020112070
020108090
020111110
020106020
020102050
020001000
020108020
020105000
020101010
020104020
020106110
020213001
020106050
020103050
020105090
020113060
020107110
020103100
020110010
020113110
020109040
020102090
020111020
020107060
020108060
020101050
020101110
020112000
020111120
020007000
020109110
020106010
020102000
020108110
020011000
020103080
020105050
020101020
020104110
020008000
020104050
020112090
020106120
020109010
020002000
020110050
020110110
020109080
020109120
020113010
020105120
020107020
020103030
020108080
020112040
020005000
020102040
020111010
020108010
020105010
020104010
020101060
020102100


In [134]:
training_time = 10000000
# Q = {}
from IPython.display import clear_output
import time
import sys
for players in xrange(2,13):
# players = 8
    saved_results = [0]*players
    ais = [learning_ai]*players

    for i in xrange(training_time):
        results = game(players,ais,Q)
        for j in xrange(len(saved_results)):
            saved_results[j] += results[0][j]
        if i%100==0:
            print '{:6d}'.format(i) , saved_results
            clear_output(wait = True)
    print saved_results
    with open('statistics.txt','w') as file_:
        count = 0
        for key in Q:
            file_.write("'{}': {}\n".format(key , str(Q[key])))

[-474838, -734726, -910005, -1034416, -1128199, -1192529, -1244648, -1280653, -1303789, -1318544, -1331543, -942081]


In [136]:
# open('statistics.txt','w').close()
with open('statistics.txt','w') as file_:
    count = 0
    for key in Q:
        file_.write("'{}': {}\n".format(key , str(Q[key])))

In [137]:
training_time = 10000000
Q_valid = {}
from IPython.display import clear_output
import time
import sys
for players in xrange(2,13):
# players = 8
    saved_results = [0]*players
    ais = [smart_ai]*players

    for i in xrange(training_time):
        results = game(players,ais,Q_valid)
        for j in xrange(len(saved_results)):
            saved_results[j] += results[0][j]
        if i%100==0:
            print '{:6d}'.format(i) , saved_results
            clear_output(wait = True)
    print saved_results

[-455147, -709167, -887223, -1016858, -1111345, -1178167, -1246140, -1284609, -1308500, -1320686, -1326409, -912630]


In [138]:
with open('validation.txt','w') as file_:
    count = 0
    for key in Q_valid:
        file_.write("'{}': {}\n".format(key , str(Q_valid[key])))

In [629]:
# for key in Q:
#     if key[:2] == '02' and key[2:4] == '01' and key[4:6] == '10':
#         action = 'pass' if Q[key]['pass']['N'] > Q[key]['stay']['N'] else 'stay'
#         print key, action, Q[key][action]['N'] , float(Q[key][action]['V']) / float(Q[key][action]['N'])

INF = float('inf')
        
def allowed(h):
    if h[-1] == 'p':
        last_hash = ''
        last_hash += h[:2]
        last_hash += '{:02d}'.format(int(h[2:4])-1)
        last_hash += h[4:6]
        if h[-2] == 's':
            last_hash += '00' + h[8:-1]
#             print last_hash
            if not last_hash in Q:
                return False
            if Q[last_hash]['pass']['N'] > Q[last_hash]['stay']['N']:
                return True
            else:
                return False
        elif h[-2] == 'p':        
            for i in xrange(1,14):
                last_hash += '{:02d}'.format(i) + h[8:-1]
#                 print last_hash
                if not last_hash in Q:
                    last_hash = last_hash[:6]
                    continue
                if Q[last_hash]['pass']['N'] > Q[last_hash]['stay']['N']:
                    if allowed(last_hash):
                        return True
                last_hash = last_hash[:6]
            return False
        else:
            last_hash += '00'
#             print last_hash
            if not last_hash in Q:
                print 'not there'
                return False
            if Q[last_hash]['pass']['N'] > Q[last_hash]['stay']['N']:
                return True
            else:
                return False
    else:
        return True
    
def convert_hash(h):
    return h[2:6]

Qn = {}
n = 7
for key in Q_valid:
#     if not allowed(key):
#         continue
    if int(key[6:8]) and int(key[6:8]) < int(key[4:6]):
        continue
    h = convert_hash(key)
    if key[:2] == '{:02d}'.format(n):
        if  Q_valid[key]['pass']['N'] + Q_valid[key]['stay']['N'] < 25:
            continue
        action = 'pass' if Q_valid[key]['pass']['N'] > Q_valid[key]['stay']['N'] else 'stay'
        ps = int(key[-1])
        if h in Qn:
            pass
        else:
            Qn[h] = [- INF , INF]
        
        if action == 'pass':
            Qn[h][0] = max(Qn[h][0],ps)
        if action == 'stay':
            Qn[h][1] = min(Qn[h][1],ps)

for key in Qn:
    if Qn[key][0]== -INF and Qn[key][1] != INF:
        print key , 'stay'
    elif Qn[key][0]!= -INF and Qn[key][1] == INF:
        print key , 'pass'        
    elif Qn[key][1] - Qn[key][0] == 1:
        print key , 'stay on {}'.format(Qn[key][1])
    else:
        print key , Qn[key]
#     print key,Qn[key]

0309 stay
0308 stay
0307 stay
0306 stay
0305 stay on 1
0304 pass
0303 pass
0302 pass
0301 pass
0602 pass
0603 pass
0601 pass
0606 stay on 1
0607 stay
0604 stay on 2
0605 stay on 1
0608 stay
0612 stay
0112 stay
0113 stay
0110 stay
0111 stay
0206 stay
0207 stay
0204 stay on 1
0205 stay
0202 pass
0203 pass
0201 pass
0611 stay
0610 stay
0208 stay
0209 stay
0109 stay
0108 stay
0105 stay
0104 pass
0107 stay
0106 stay
0101 pass
0103 pass
0102 pass
0609 stay
0512 stay
0513 stay
0510 stay
0511 stay
0211 stay
0210 stay
0213 stay
0212 stay
0509 stay
0508 stay
0501 pass
0503 pass
0502 pass
0505 stay on 1
0504 [1, 1]
0507 stay
0506 stay
0004 pass
0005 stay
0006 stay
0007 stay
0001 pass
0002 pass
0003 pass
0008 stay
0009 stay
0613 stay
0013 stay
0012 stay
0011 stay
0010 stay
0401 pass
0402 pass
0403 pass
0404 [1, 1]
0405 stay on 1
0406 stay
0407 stay
0408 stay
0409 stay
0713 stay
0413 stay
0412 stay
0411 stay
0410 stay
0310 stay
0311 stay
0312 stay
0313 stay


In [630]:
for key in Q_valid:
    if key[:6] == '070504':
        if key[6:8] == '00' or key[6:8] >= key[4:6]:
            if Q_valid[key]['pass']['N'] + Q_valid[key]['stay']['N'] < 25:
                continue
            action = 'pass' if Q_valid[key]['pass']['N'] > Q_valid[key]['stay']['N'] else 'stay'
            try:
                print key, action , Q_valid[key][action]['N'], float(Q_valid[key][action]['V']) / float(Q_valid[key][action]['N'])
            except:
                print

070504002 stay 734 0.176333203581
070504000 pass 8947 -0.287726133261
070504111 pass 1502 0.0194027011604
070504110 pass 5819 -0.263668278791
070504001 pass 17417 0.00789048466605
070504090 pass 5971 -0.246883747637
070504091 pass 1601 0.0149906308557
070504070 pass 5730 -0.236748940414
070504071 pass 1525 0.00290398126464
070504050 pass 5903 -0.18813678275
070504051 pass 1522 -0.00929228458795
070504100 pass 5851 -0.268061625607
070504101 pass 1548 0.00802879291251
070504120 pass 5864 -0.25068212824
070504121 pass 1515 -0.010843941537
070504081 stay 1673 0.0923063786184
070504080 pass 5860 -0.263773768893
070504061 pass 1545 -0.00277392510402
070504060 pass 5945 -0.227393968521
070504041 pass 1105 0.0168067226891
070504040 pass 4804 -0.16620078506


In [591]:
for key in Q:
    if key[:6] == '020107':
        if key[6:8] == '00' or key[6:8] >= key[4:6]:
            action = 'pass' if Q[key]['pass']['N'] > Q[key]['stay']['N'] else 'stay'
            print (key, action ,  Q[key]['pass']['N'] , Q[key]['stay']['N'],
                   float(Q[key]['pass']['V']) / float(Q[key]['pass']['N']) , 
                   float(Q[key]['stay']['V']) / float(Q[key]['stay']['N']) )

('02010709p', 'pass', 3329, 714, -0.15049564433763893, -0.5)
('02010711p', 'pass', 3023, 1082, -0.3099569963612306, -0.5)
('02010700s', 'pass', 32436, 5293, -0.24093599704032556, -0.39448327980351405)
('02010708p', 'pass', 3403, 604, -0.06964443138407288, -0.5)
('02010712p', 'pass', 2632, 1365, -0.38848784194528874, -0.5)
('02010710p', 'pass', 3196, 842, -0.22230913642052566, -0.5)
('02010707p', 'stay', 1399, 1643, -0.028234453180843458, 0.0)


In [536]:
Q['04020500ps']

{'blocked': {'N': 313, 'V': 10.75},
 'pass': {'N': 3625, 'V': 110.0},
 'stay': {'N': 6099, 'V': 520.0}}

In [309]:
players = 8
ais = [ai]*players
smart_loc = 4
ais[smart_loc] = smart_ai
saved_results = [0]*players
Q_valid = {}
for i in xrange(100000):
    results = game(players,ais,Q_valid)
    for j in xrange(len(saved_results)):
        saved_results[j] += results[0][j]
    if i%100==0:
        print '{:6d}'.format(i) , saved_results
        sys.stdout.flush()
        clear_output(wait=True)
print saved_results    

[-8694, -11772, -14124, -15887, -16430, -19304, -18841, -14336]


In [310]:
players = 8
ais = [ai]*players
saved_results = [0]*players
Q_valid = {}
for i in xrange(100000):
    results = game(players,ais,Q_valid)
    for j in xrange(len(saved_results)):
        saved_results[j] += results[0][j]
    if i%100==0:
        print '{:6d}'.format(i) , saved_results
        sys.stdout.flush()
        clear_output(wait=True)
print saved_results    

[-8715, -11815, -14152, -15849, -17329, -18266, -18854, -14637]


In [397]:
players = 8
ais = [smart_ai]*players
saved_results = [0]*players
Q_valid = {}
for i in xrange(100000):
    results = game(players,ais,Q_valid)
    for j in xrange(len(saved_results)):
        saved_results[j] += results[0][j]
    if i%100==0:
        print '{:6d}'.format(i) , saved_results
        sys.stdout.flush()
        clear_output(wait=True)
print saved_results  

KeyboardInterrupt: 

In [427]:
players = 8
ais = [ai]*players
game(players,ais)

([0, 0, -1, 0, 0, 0, 0, 0],
 ['pass', 'pass', 'blocked', 'king', 'pass', 'pass', 'pass', 'cut'],
 [3♠, 5♠, J♠, K♥, A♥, Q♣, 4♠, 9♦],
 [5♠, J♠, 3♠, K♥, Q♣, 4♠, 9♦, 7♦],
 ['080003000',
  '080103050',
  '080203110',
  '080413001',
  '080401001',
  '080501121',
  '080601041',
  '080701091'])

In [404]:
Q

{'04030102spp': {'blocked': {'N': 0, 'V': 0},
  'pass': {'N': 2537, 'V': 440.75},
  'stay': {'N': 261, 'V': -191.0}},
 '06011005': {'blocked': {'N': 0, 'V': 0},
  'pass': {'N': 0, 'V': 0},
  'stay': {'N': 377, 'V': 71.16666666666657}},
 '07060112': {'blocked': {'N': 0, 'V': 0},
  'pass': {'N': 14048, 'V': -725.4285714285567},
  'stay': {'N': 746, 'V': -613.8571428571398}},
 '07060111': {'blocked': {'N': 0, 'V': 0},
  'pass': {'N': 14021, 'V': -727.7142857142818},
  'stay': {'N': 747, 'V': -613.9999999999966}},
 '07060110': {'blocked': {'N': 0, 'V': 0},
  'pass': {'N': 13934, 'V': -769.0000000000189},
  'stay': {'N': 746, 'V': -614.2857142857123}},
 '05041006': {'blocked': {'N': 0, 'V': 0},
  'pass': {'N': 0, 'V': 0},
  'stay': {'N': 234, 'V': 51.60000000000014}},
 '05041007': {'blocked': {'N': 0, 'V': 0},
  'pass': {'N': 0, 'V': 0},
  'stay': {'N': 201, 'V': 45.000000000000036}},
 '02011108p': {'blocked': {'N': 0, 'V': 0},
  'pass': {'N': 0, 'V': 0},
  'stay': {'N': 210, 'V': 105.0}},


In [368]:
def q_value(score):
    if score['N'] == 0:
        return -1
    else:
        return float(score['V']) / score['N']
for key in Q:
    if key[4:6] == '02':
        print (key , Q[key]['stay']['N']+Q[key]['pass']['N'],
               'stay' if q_value(Q[key]['stay'])>q_value(Q[key]['pass']) else 'pass'
           , q_value(Q[key]['pass']) , q_value(Q[key]['stay']))

('02010210p', 2358, 'pass', -0.22514124293785312, -0.5)
('02010204p', 2366, 'pass', 0.257183908045977, -0.5)
('02010209p', 2395, 'pass', -0.13440294581799053, -0.5)
('02010207p', 2405, 'pass', -0.003235440517670483, -0.5)
('02010201p', 2412, 'stay', -1, 0.5)
('02010211p', 2390, 'pass', -0.30815347721822545, -0.5)
('02010206p', 2391, 'pass', 0.0898876404494382, -0.5)
('02010203p', 2395, 'pass', 0.32052483598875353, -0.5)
('02010208p', 2383, 'pass', -0.0686375321336761, -0.5)
('02010200s', 15990, 'pass', -0.22199125510716078, -0.486758214811182)
('02010212p', 2352, 'pass', -0.389530931339225, -0.5)
('02010205p', 2425, 'pass', 0.16073968705547653, -0.5)
('02010202p', 1762, 'pass', 0.41258992805755396, 0.0)
('02000200', 29851, 'pass', 0.025558250695583935, -0.4188222344523941)


In [306]:
for key in Q_valid:
    if key[:4] == '8204':
        print key, Q_valid[key]

820400 {'blocked': {'N': 0, 'V': 0}, 'stay': {'N': 466, 'V': -195}, 'pass': {'N': 0, 'V': 0}}


In [339]:
Q['2103011']

{'blocked': {'N': 0, 'V': 0},
 'pass': {'N': 0, 'V': 0},
 'stay': {'N': 529, 'V': 0}}