In [300]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

drill_costs = pd.read_excel("Analysis_Data.xlsx", header=2, sheet_name="Drilling Cost")

drill_costs.head(10)

Unnamed: 0,Date,U.S. Nominal Cost per Crude Oil Well Drilled (Thousand Dollars per Well),U.S. Nominal Cost per Natural Gas Well Drilled (Thousand Dollars per Well),U.S. Nominal Cost per Dry Well Drilled (Thousand Dollars per Well),Arithmetic Return - Crude Oil,Arithmetic Return - Natural Gas,Arithmetic Return - Dry Well
0,1960-06-30,52.2,102.7,44.0,.,.,.
1,1961-06-30,51.3,94.7,45.2,-0.017241,-0.077897,0.027273
2,1962-06-30,54.2,97.1,50.8,0.05653,0.025343,0.123894
3,1963-06-30,51.8,92.4,48.2,-0.04428,-0.048404,-0.051181
4,1964-06-30,50.6,104.8,48.5,-0.023166,0.134199,0.006224
5,1965-06-30,56.6,101.9,53.1,0.118577,-0.027672,0.094845
6,1966-06-30,62.2,133.8,56.9,0.09894,0.313052,0.071563
7,1967-06-30,66.6,141.0,61.5,0.07074,0.053812,0.080844
8,1968-06-30,79.1,148.5,66.2,0.187688,0.053191,0.076423
9,1969-06-30,86.5,154.3,70.2,0.093552,0.039057,0.060423


In [301]:
drill_costs = drill_costs[
    (drill_costs["Date"].dt.year >= 1991) & (drill_costs["Date"].dt.year <= 2006)
]
drill_costs

Unnamed: 0,Date,U.S. Nominal Cost per Crude Oil Well Drilled (Thousand Dollars per Well),U.S. Nominal Cost per Natural Gas Well Drilled (Thousand Dollars per Well),U.S. Nominal Cost per Dry Well Drilled (Thousand Dollars per Well),Arithmetic Return - Crude Oil,Arithmetic Return - Natural Gas,Arithmetic Return - Dry Well
31,1991-06-30,346.9,506.6,441.2,0.077999,0.074899,0.200544
32,1992-06-30,362.3,426.1,357.6,0.044393,-0.158902,-0.189483
33,1993-06-30,356.6,521.2,387.7,-0.015733,0.223187,0.084172
34,1994-06-30,409.5,535.1,491.5,0.148345,0.026669,0.267733
35,1995-06-30,415.8,629.7,481.2,0.015385,0.176789,-0.020956
36,1996-06-30,341.0,616.0,541.0,-0.179894,-0.021756,0.124273
37,1997-06-30,445.6,728.6,655.6,0.306745,0.182792,0.21183
38,1998-06-30,566.0,815.6,973.2,0.270197,0.119407,0.484442
39,1999-06-30,783.0,798.4,1115.5,0.383392,-0.021089,0.146219
40,2000-06-30,593.4,756.9,1075.4,-0.242146,-0.051979,-0.035948


In [302]:
drill_costs["avg_return"] = drill_costs.filter(regex="^Arithmetic Return").mean(axis=1)
drill_costs["avg_cost"] = drill_costs.filter(regex="^U\.S\.").mean(axis=1)
drill_costs["avg_return"], drill_costs["avg_cost"]

(31    0.117814
 32   -0.101331
 33    0.097209
 34    0.147582
 35    0.057073
 36   -0.025793
 37    0.233789
 38    0.291349
 39    0.169507
 40   -0.110024
 41    0.306636
 42    0.116643
 43    0.174706
 44    0.299779
 45    0.138219
 46    0.190702
 Name: avg_return, dtype: object,
 31     431.566667
 32     382.000000
 33     421.833333
 34     478.700000
 35     508.900000
 36     499.333333
 37     609.933333
 38     784.933333
 39     898.966667
 40     808.566667
 41    1082.000000
 42    1182.700000
 43    1402.800000
 44    1711.833333
 45    1936.966667
 46    2279.800000
 Name: avg_cost, dtype: float64)

In [303]:
stats.shapiro(drill_costs["avg_return"])

ShapiroResult(statistic=0.9324272871017456, pvalue=0.2662917971611023)

In [304]:
rng = np.random.default_rng(12345)
kernel = gaussian_kde(drill_costs["avg_return"])

initial_cost = drill_costs["avg_cost"].iloc[-1]

mean_return = drill_costs["avg_return"].mean()
std_return = drill_costs["avg_return"].std()

years_1 = 2012 - 2006
years_2 = 2015 - 2012
years_3 = 2024 - 2015

In [305]:
normal_return = rng.normal(mean_return, std_return, years_1)
kernel_return = kernel.resample(years_1, seed=12345).flatten()

phase_2 = rng.triangular(left=-0.22, mode=-0.0917, right=-0.07, size=years_2)
phase_3 = rng.triangular(left=0.02, mode=0.05, right=0.06, size=years_3)

normal_phases = np.concatenate((normal_return, phase_2, phase_3))
kernel_phases = np.concatenate((kernel_return, phase_2, phase_3))

In [306]:
normal_future_vals = initial_cost * np.cumprod(1 + normal_phases)
kernel_future_vals = initial_cost * np.cumprod(1 + kernel_phases)

In [307]:
print(f"Normal Future Values for 2024: {normal_future_vals[-1]}")
print(f"Kernel Future Values for 2024: {kernel_future_vals[-1]}")

Normal Future Values for 2024: 3631.0810284452045
Kernel Future Values for 2024: 7978.544059801507
