In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_rows', 1500)

from copy import deepcopy
from collections import defaultdict

import camels
from camels import Game, State, flatten_state

# Probability stats

In [3]:
def amplitude_sampler(init_state):
    def sample_one():
        game = Game(init_state=init_state)
        game.finish_round()
        return game.leaderboard()
    return sample_one

In [6]:
def amplitude_stats(num, init_state):
    sample_one = amplitude_sampler(init_state)
    camel_order = flatten_state(init_state)
    res = {camel: np.zeros(len(camel_order), dtype=np.float64) for camel in camel_order}
    for _ in range(num):
        sample = sample_one()
        for i, camel in enumerate(sample):
            res[camel][i] += 1
    for v in res.values():
        v /= num
    return res

In [7]:
amplitude_stats(10000, init_state=State.from_dict({1: ["a5"], 3: ["a2", "a1", "a0"]}))

{'a5': array([0.7274, 0.1056, 0.0676, 0.0994]),
 'a2': array([0.1131, 0.4819, 0.2515, 0.1535]),
 'a1': array([0.0917, 0.2299, 0.3772, 0.3012]),
 'a0': array([0.0678, 0.1826, 0.3037, 0.4459])}

# Topology analysis

In [8]:
def extract_topologies(state, die_state):
    groups = group_positions(state, die_state)
    return set([hash_group(group, state, die_state) for group in groups])
        
def group_positions(state, die_state):
    ordered = sorted(list(state))
    groups = []
    group = []
    for i in ordered:
        camels_here = state[i]
        realsize = len(set(camels_here.camels) - set(die_state))
        if realsize == 0:
            continue
        if group and i - group[-1] > 3:
            groups.append(group)
            group = []
        group.append(i)
    if group:
        groups.append(group)
    return groups

def hash_group(group, state, die_state):
    res = []
    min_pos = group[0]
    max_pos = group[-1]
    for pos in range(min_pos, max_pos+1):
        stack = state.get(pos)
        size = len(set(stack.camels) - set(die_state)) if stack else 0
        res.append(size)
    return ",".join(map(str, res))

In [9]:
def sampler(camels):
    def sample_one():
        game = Game(camels=camels)
        topologies = defaultdict(int)
        num_rounds = 0
        while max(game.board.state.state.pos_to_stack) < 16:
            game.one_step()
            cur_tops = extract_topologies(game.board.state.state.pos_to_stack, game.die.state)
            for top in cur_tops:
                topologies[top] += 1
            num_rounds += 1
        return {k: v/num_rounds for  k, v in topologies.items()}
    return sample_one

In [29]:
def process_hash(camel_hash):
    counts = {i: int(v) for i,v in enumerate(camel_hash.split(",")) if v != 0}
    total = sum(counts.values())
    pos_to_camels = {}
    cnt = total
    for pos, count in counts.items():
        camels = []
        for _ in range(count):
            camels.append(f"a{cnt}")
            cnt -= 1
        pos_to_camels[pos] = camels
    state = State.from_dict(pos_to_camels)
    camel_order = flatten_state(state)
    amplitude = amplitude_stats(10000, init_state=state)
    res = {}
    for k, v in amplitude.items():
        if len(v) > 1:
            res[f"{k}_2"] = v[-2]
        res[f"{k}_1"] = v[-1]
    return res

In [30]:
sorted(["a1_1", "a2_1", "a1_2"])

['a1_1', 'a1_2', 'a2_1']

In [31]:
from multiprocessing import Pool

In [32]:
def _process_topology(camel_hash, freq):
        part = {"nums": freq}
        part.update(process_hash(camel_hash))
        return pd.DataFrame(part, index=[camel_hash]) 

In [33]:
def stats(num, camels):
    sample_one = sampler(camels)
    topologies = defaultdict(int)
    for _ in range(num):
        sample = sample_one()
        for top, count in sample.items():
            topologies[top] += count
    res = []
    pool = Pool(10)
    res = pool.starmap(_process_topology, topologies.items())
    res = pd.concat(res)
    res["nums"] /= num
    columns = ["nums"] + sorted([i for i in res.columns if i != "nums"])
    return res[columns].sort_values("nums", ascending=False)

In [34]:
res = stats(10000, ["a5", "a4", "a3", "a2", "a1"])
display(res)

Unnamed: 0,nums,a1_1,a1_2,a2_1,a2_2,a3_1,a3_2,a4_1,a4_2,a5_1,a5_2
1,0.336691,1.0,,,,,,,,,
2,0.078135,0.6671,0.3329,0.3329,0.6671,,,,,,
11,0.06703,0.6223,0.3777,0.3777,0.6223,,,,,,
101,0.051781,0.7766,0.2234,0.2234,0.7766,,,,,,
1001,0.035979,0.8376,0.1624,0.1624,0.8376,,,,,,
3,0.023264,0.5089,0.292,0.3156,0.4103,0.1755,0.2977,,,,
12,0.022227,0.5376,0.2726,0.2556,0.4547,0.2068,0.2727,,,,
21,0.020651,0.3114,0.432,0.4852,0.2554,0.2034,0.3126,,,,
111,0.019656,0.4871,0.3438,0.3052,0.3754,0.2077,0.2808,,,,
102,0.016675,0.5668,0.3249,0.2981,0.5588,0.1351,0.1163,,,,


In [35]:
res[['nums', 'a1_1', 'a1_2', 'a2_1', 'a2_2', 'a3_1', 'a3_2', 'a4_1', 'a4_2', 'a5_1', 'a5_2']]#.to_csv("amplitudes_byfreq.tsv", sep="\t", index_label="comb")

In [37]:
res[['nums', 'a1_1', 'a1_2', 'a2_1', 'a2_2', 'a3_1', 'a3_2', 'a4_1', 'a4_2', 'a5_1', 'a5_2']].sort_index()#.to_csv("amplitudes_byname.tsv", sep="\t", index_label="comb")

Unnamed: 0,nums,a1_1,a1_2,a2_1,a2_2,a3_1,a3_2,a4_1,a4_2,a5_1,a5_2
1,0.334953,1.0,,,,,,,,,
1001,0.035395,0.8305,0.1695,0.1695,0.8305,,,,,,
1001001,0.002906,0.833,0.1496,0.1496,0.6971,0.0174,0.1533,,,,
1001001001,0.000146,0.8347,0.1448,0.1448,0.7049,0.0188,0.1321,0.0017,0.0182,,
1001001001001,7e-06,0.833,0.1498,0.1498,0.6986,0.0154,0.1334,0.0018,0.0159,0.0,0.0023
1001001002,9.7e-05,0.5849,0.3352,0.303,0.5968,0.1008,0.0611,0.0108,0.0063,0.0005,0.0006
100100101,0.00021,0.7854,0.183,0.183,0.6784,0.029,0.1224,0.0026,0.0162,,
100100101001,3.2e-05,0.8413,0.1409,0.1409,0.6553,0.0165,0.1752,0.0012,0.0257,0.0001,0.0029
10010010101,1.1e-05,0.7793,0.1879,0.1879,0.6161,0.0296,0.1663,0.0028,0.0262,0.0004,0.0035
1001001011,2.1e-05,0.5052,0.3516,0.312,0.4544,0.1634,0.1701,0.0177,0.0219,0.0017,0.002
