In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [None]:
# import the data and view it
df = pd.read_csv('chart.csv')

df

## Mean and Std - incomplete data!

What to do about 

In [None]:
recruitment_mean = df['Recruitment'].mean(skipna=True)
recruitment_std = df['Recruitment'].std(skipna=True)

survival_mean = df['Survival Rate'].mean(skipna=True)
survival_std = df['Survival Rate'].std(skipna=True)

# show the mean and standard deviation of the recruitment and survival rate
print("Recruitment Mean: ", recruitment_mean)
print("Recruitment Standard Deviation: ", recruitment_std)
print("Survival Rate Mean: ", survival_mean)
print("Survival Rate Standard Deviation: ", survival_std)

## Part 3 - Simulation using normal distributions

Randomly sample from normal distribution for both recruitment and survival rates for 25 years. 

In [None]:
# parameters for the simulation

starting_population = 275
num_simulations = 10
num_years = 25

In [None]:
# create a table to store the simulations

starting_populations = pd.Series(np.ones(num_simulations) * starting_population, name='Year 0')
simulations = pd.DataFrame(starting_populations)

# show the table
simulations

In [None]:
# run the simulation

# create a random number generator
rng = np.random.default_rng(seed=3290751)

for i in range(1, num_years + 1):
    # calculate the recruitment
    recruitment = rng.normal(recruitment_mean, recruitment_std, size=num_simulations)
    
    # calculate the survival rate
    survival = rng.normal(survival_mean, survival_std, size=num_simulations)
    
    # calculate the new population
    new_population = simulations[f"Year {i - 1}"] * survival + recruitment
    
    # add the new population to the table
    simulations[f"Year {i}"] = new_population

# round everything to 1 decimal place
simulations = simulations.round(1)
# show the table
simulations

In [None]:
# plot the trials (row by row)

years_index = [i for i in range(num_years + 1)]
for i in range(num_simulations):
    if i == 1: # only put in the label once
        plt.plot(years_index, simulations.iloc[i, :], color='tab:gray', label='Simulations')
    else:
        plt.plot(years_index, simulations.iloc[i, :], color='tab:gray')

# plot the actual data
plt.plot(df['Actual Population'], color='tab:orange', linewidth=3, label='Actual Population')

plt.xlabel("Year")
plt.ylabel("Population")
plt.title("Population over Time (Using normal distribution)")
plt.legend()
plt.show()
plt.close()

## Bootstrapping!

There's a better way to run the simulations that we learned in Stats Inference?

In [None]:
# create a table to store the simulations

starting_populations = pd.Series(np.ones(num_simulations) * starting_population, name='Year 0')
bootstrap_sims = pd.DataFrame(starting_populations)

# show the table
bootstrap_sims

In [None]:
# run the simulation

# create a random number generator
rng = np.random.default_rng(seed=29365701)

# create version of data without NaNs
# TODO decide if we drop entire rows with NaN or just skip the specific NaNs
recruitment_for_bootstrap = df['Recruitment'].dropna()
survival_for_bootstrap = df['Survival Rate'].dropna()


for i in range(1, num_years + 1):
    # randomly sample from the recruitment
    recruitment = rng.choice(recruitment_for_bootstrap, size=num_simulations, replace=True)
    
    # calculate the survival rate
    survival = rng.choice(survival_for_bootstrap, size=num_simulations, replace=True)
    
    # calculate the new population
    new_population = bootstrap_sims[f"Year {i - 1}"] * survival + recruitment
    
    # add the new population to the table
    bootstrap_sims[f"Year {i}"] = new_population

# round everything to 1 decimal place
bootstrap_sims = bootstrap_sims.round(1)
# show the table
bootstrap_sims

In [None]:
# plot the trials (row by row)

years_index = [i for i in range(num_years + 1)]
for i in range(num_simulations):
    if i == 1: # only put in the label once
        plt.plot(years_index, bootstrap_sims.iloc[i, :], color='tab:gray', label='Simulations')
    else:
        plt.plot(years_index, bootstrap_sims.iloc[i, :], color='tab:gray')

# plot the actual data
plt.plot(df['Actual Population'], color='tab:orange', linewidth=3, label='Actual Population')

plt.xlabel("Year")
plt.ylabel("Population")
plt.title("Population over Time (Bootstrap method)")
plt.legend()
plt.show()
plt.close()