In [1]:
import pandas as pd
import numpy as np
import random

In [2]:
def sim_search(lost_in, look_in):
    """ Simulate a search for dog"""
    N = lost_in.shape[0]
    prob_dict = {'a': 0.25, 'b': 0.15}  # prob of found    
    res = np.array([False]*N)
    for idx, forest in enumerate(lost_in):
        if forest == look_in:
            r = np.random.rand(1) <= prob_dict[forest]
        else:
            r = False
        res[idx] = r
    return res

In [3]:
N = 50000

# a) In which forest should Oscar look on the first day of the search to maximize the probability he finds his dog that day?

In [4]:
forest = np.random.choice(['a', 'b'], size=N, p=[0.4, 0.6])
found_a = sim_search(lost_in=forest, look_in='a')
found_b = sim_search(lost_in=forest, look_in='b')

df = pd.DataFrame({'Forest': forest, 'Found_a': found_a, 'Found_b': found_b})

In [5]:
prob_a = np.sum(df.Found_a==True)/N
prob_b = np.sum(df.Found_b==True)/N

print('Prob of finding dog in A: {0}'.format(prob_a))
print('Prob of finding dog in B: {0}'.format(prob_b))

Prob of finding dog in A: 0.1004
Prob of finding dog in B: 0.08984


** Theory **

In [6]:
theory_a = 0.4*.25 + 0.6*0
theory_b = 0.4*0 + 0.6*.15
print((theory_a, theory_b))

(0.1, 0.09)


# b) Oscar looked in forest A on the first day but didn't find his dog. What is the probability that the dog is in forest A?

In [7]:
tmp_df = df[df.Found_a==False]
tmp_df['Forest'].value_counts(normalize=True)

b    0.668764
a    0.331236
Name: Forest, dtype: float64

** Theory **

In [8]:
num = .4*.75
den = num + .6 * 1.0
num/den

0.33333333333333337

# c) Oscar flips a fair coin to determine where to look on the first day and finds the dog on the first day. What is the probability that he looked in forest A?

In [9]:
# make a new dataframe with 50% chance; note long format used
half_N = int(N/2)
forest = np.random.choice(['a', 'b'], size=half_N, p=[0.4, 0.6])
found_a = sim_search(lost_in=forest, look_in='a')
found_b = sim_search(lost_in=forest, look_in='b')

df_a = pd.DataFrame({'Forest': forest, 'Look':'a', 'Found':found_a})
df_b = pd.DataFrame({'Forest': forest, 'Look': 'b', 'Found':found_b})
df = pd.concat((df_a, df_b))

In [10]:
df[df.Found==True]['Look'].value_counts(normalize=True)

a    0.527382
b    0.472618
Name: Look, dtype: float64

** Theory **

In [11]:
num = .4*.5*.25
den = num + .6*.5*.15
num/den

0.5263157894736842

# d) Oscar decides to look in forest A for the first two days. What is the probability that he finds his dog alive for the first time on the second day?

In [18]:
forest = np.random.choice(['a', 'b'], size=N, p=[.4, .6])

# simulate search in forest
found_1 = sim_search(lost_in=forest, look_in='a')
found_2 = sim_search(lost_in=forest, look_in='a')

# simulate dog is alive
alive_1 = np.random.rand(N) <= 2/2
alive_2 = np.random.rand(N) <= 2/3


df = pd.DataFrame({'Forest': forest,
                   'F1': found_1, 'F2': found_2, 
                   'A1': alive_1, 'A2': alive_2})

In [19]:
fin_df = df[(df.F1==False) & (df.F2==True) & (df.A2==True)]

fin_df.shape[0]/N

0.0497

** Theory **

In [15]:
0.4*.75 * 0.25 * 2/3

0.05000000000000001

In [None]:
#num = (0.4*.25 * .75) * 2/3
my_guess = 0.4 * .75 * 2/3 * .25
my_guess

# e) Oscar decides to look in forest A for the first two days. Given that he did not find his dog on the first day, find the probability that he does not find his dog dead on the second day.

In [None]:
forest = np.random.choice(['a', 'b'], size=N, p=[.4, .6])

# simulate search in forest
found_1 = sim_search(lost_in=forest, look_in='a')
found_2 = sim_search(lost_in=forest, look_in='a')

# simulate dog is alive
alive_1 = np.random.rand(N) <= 2/2
alive_2 = np.random.rand(N) <= 2/3


df = pd.DataFrame({'Forest': forest,
                   'F1': found_1, 'F2': found_2, 
                   'A1': alive_1, 'A2': alive_2})


In [None]:
given_df = df[(df.F1 == False)]
find_dead_df = given_df[(given_df.A2==False) & (given_df.F2==True)]
find_dead_df.shape

prob_e = 1 - find_dead_df.shape[0]/given_df.shape[0]
prob_e

In [None]:
print(given_df.shape[0])
print(find_dead_df.shape[0])

 ** Theoretical **

In [None]:
p_not_f1 = 0.4*.75 + 0.6*1
p_f2 = 0.4*.25

num = p_not_f1 * 1/3 * p_f2
den = p_not_f1
ans = 1- num/den

print(ans)

# f) Oscar finally finds his dog on the fourth day of the search. He looked in forest A for the first 3 days and in forest B on the fourth day. Given this information, what is the probability that he found his dog alive?

In [None]:
forest = np.random.choice(['a', 'b'], size=N, p=[.4, .6])

# simulate search in forest
found_1 = sim_search(lost_in=forest, look_in='a')
found_2 = sim_search(lost_in=forest, look_in='a')
found_3 = sim_search(lost_in=forest, look_in='a')
found_4 = sim_search(lost_in=forest, look_in='b')

# simulate dog is alive
alive_1 = np.random.rand(N) <= 2/2
alive_2 = np.random.rand(N) <= 2/3
alive_3 = np.random.rand(N) <= 2/4
alive_4 = np.random.rand(N) <= 2/5

df = pd.DataFrame({'Forest': forest,
                   'F1': found_1, 'F2': found_2, 'F3': found_3, 'F4': found_4, 
                   'A1': alive_1, 'A2': alive_2, 'A3': alive_3, 'A4': alive_4})


In [None]:
#df.groupby(by='Forest').F4.value_counts(normalize=True)
#df.A4.value_counts(normalize=True)

In [None]:
# dog alive on all days
df['A1_A4'] = df.A1 & df.A2 & df.A3 & df.A4

f4_df = df[(df.F4==True)]
f4_a1t04_df = f4_df[f4_df.A1_A4 == True]

print(f4_a1t04_df.shape[0]/f4_df.shape[0])
print(f4_a1t04_df.shape[0]/N)

In [None]:
my_guess = 1 * 2/3 * 1/2 * 2/5 
my_guess

In [None]:
my_guess2 = 1 * 2/3 * 1/2 * 2/5 * 0.6 * 0.15
my_guess2