In [2]:
from collections import namedtuple
from functools import partial
from itertools import product
import pandas as pd
from toolz import groupby, pipe
from typing import Iterator, List, Text, Tuple

In [3]:
df = pd.read_csv('ODIs/win_probability_1st_innings.csv')
# we only want the elements at the end of the over
df = df[df.Ball == 6]
# there seems to be some spurious data in the Runs column - Cleaning that up
df['Runs1'] = df['overs_balls'] * df['RunRate']
df['Runs1'] = df.Runs1.astype(int)
df = df[['Over', 'Wickets', 'Runs1', 'Runs',
         'win_probability', 'score', 
         'score_lo', 'score_hi', 
         'overs_balls', 'RunRate']].drop_duplicates()
df = df[(df.Runs == df.Runs1)]
df = df[['Over', 'Wickets', 'Runs', 'win_probability', 'score', 'score_lo', 'score_hi']]
df.sort_values(by=['Over', 'Wickets', 'Runs'], inplace=True)
# If there are repeated elements, get the max win probability element
df['max_repeat'] = (df.groupby(['Over', 'Wickets', 'Runs'], 
                               as_index=False)['win_probability'].
                   transform(lambda x: x.max()))
df = df[df.win_probability == df.max_repeat]

In [None]:
df = pd.read_csv('ODIs/win_probability_2st_innings.csv')
# we only want the elements at the end of the over
df = df[df.Ball == 6]
# there seems to be some spurious data in the Runs column - Cleaning that up
df['Runs1'] = df['overs_balls'] * df['RunRate']
df['Runs1'] = df.Runs1.astype(int)
df = df[['Over', 'Wickets', 'Runs1', 'Runs',
         'win_probability', 'score', 
         'score_lo', 'score_hi', 
         'overs_balls', 'RunRate']].drop_duplicates()
df = df[(df.Runs == df.Runs1)]
df = df[['Over', 'Wickets', 'Runs', 'Target', 'win_probability', 'score', 'score_lo', 'score_hi']]
df.sort_values(by=['Over', 'Wickets', 'Runs'], inplace=True)
# If there are repeated elements, get the max win probability element
df['max_repeat'] = (df.groupby(['Over', 'Wickets', 'Runs'], 
                               as_index=False)['win_probability'].
                   transform(lambda x: x.max()))
df = df[df.win_probability == df.max_repeat]

# Now we want to change the meaning of the state
df['Over'] = 50 - df['Over']
df['Runs'] = df['Target'] - df['Runs']
df['Wickets'] = 10 - df['Wickets']

assert max(df.Wickets) == 10
assert max(df.Over) == 49
assert max(df.Runs) = max(df.Target)

In [4]:
State = namedtuple('State', ["Over", "Wicket", "Runs"])
WinProbability = namedtuple('WinProbability', ["win_probability", "mean", "lo", "hi"])
def extract_win_probability(row) -> Tuple[State, WinProbability]:
    state = State(row.Over, row.Wickets, row.Runs)
    win_probability = WinProbability(row.win_probability, row.score, row.score_lo, row.score_hi)
    return state, win_probability
pairs = [extract_win_probability(row) for row in df.itertuples()]

In [None]:
def match(pairs: List[Tuple[State, WinProbability]], 
          attribute_value: int,
          attribute_name: Text) -> List[Tuple[State, WinProbability]]:
    grped = groupby(lambda pair: getattr(pair[0], attribute_name ), pairs)
    matched_attribute_value = sorted(((attr_val, abs(attr_val - attribute_value)) 
                                      for attr_val in grped), key=lambda k: k[1])[0][0]
    
    return grped[matched_attribute_value]

def closest(state: State, pairs: List[Tuple[State, WinProbability]]) -> WinProbability:
    match_over = partial(match, attribute_value=state.Over, attribute_name='Over')
    match_wickets = partial(match, attribute_value=state.Wicket, attribute_name='Wicket')
    match_runs = partial(match, attribute_value=state.Runs, attribute_name="Runs")
    best = pipe(pairs, match_over, match_runs, match_wickets)
    assert len(best) > 0
    return best[0][1]

def fill_wp_matrix(pairs: List[Tuple[State, WinProbability]]) -> Iterator[Tuple[State, WinProbability]]:
    overs = range(1, 51)
    wickets = range(0, 11)
    runs = range(0, 500)
    for (over, wicket) in product(overs, wickets):
        prev_win_probability = None
        for run in runs:
            state = State(over, wicket, run)
            win_probability = closest(state, pairs)
            if prev_win_probability is not None:
                if win_probability.win_probability < prev_win_probability.win_probability:
                    win_probability = prev_win_probability
            yield state, win_probability
            prev_win_probability = win_probability


with open("ODIs/probs_i1_odi_non_decreasing_in_run.txt", "w") as f:
    for c, (state, win_probability) in enumerate(fill_wp_matrix(pairs)):
        if c % 27500 == 0:
            print(c)
        wp = win_probability.win_probability
        score = win_probability.mean
        lo = win_probability.lo
        hi = win_probability.hi
        over = state.Over
        wicket = state.Wicket
        run = state.Runs
        f.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n".format(
            over, wicket, run, wp, score, lo, hi))

0
27500
55000
82500
110000
137500
165000
192500
