# Tobit

In [78]:
import numpy as np
import pandas as pd 
from numpy import linalg as la
from scipy.stats import norm
from scipy.stats import chi2
from scipy import optimize
from tabulate import tabulate
from matplotlib import pyplot as plt
import seaborn as sns 
sns.set_theme()

%load_ext autoreload
%autoreload 2

import tiboT as tiboT
import estimation
import LinearModels as lm

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load Data

In [79]:
# name columns
cols = ['y', 'x']

# read dataset 
dat = pd.read_csv('Data.csv', header = 0, names = cols)

# add constant term 
dat['cnst'] = 1.0

# Declare labels
lbly = 'y'
lblx = ['cnst', 'x']

# pandas to numpy 
x = dat[lblx].values
y = dat[lbly].values

dat.head(10)

Unnamed: 0,y,x,cnst
0,-2.949143,-0.617175,1.0
1,0.0,0.357731,1.0
2,-0.255997,0.600539,1.0
3,0.0,0.123566,1.0
4,0.0,-0.490995,1.0
5,-3.044403,-1.239924,1.0
6,0.0,-1.25876,1.0
7,-1.896223,-0.653037,1.0
8,-2.744344,-1.61695,1.0
9,0.0,0.793135,1.0


In [80]:
print(x)

[[ 1.         -0.61717459]
 [ 1.          0.35773073]
 [ 1.          0.60053901]
 ...
 [ 1.          0.63586568]
 [ 1.         -0.25705614]
 [ 1.          0.60642989]]


## Estimate tiboT model 

In [81]:
theta0 = tiboT.starting_values(y, x)
theta0

array([-0.60068275,  0.57252733,  0.87964049])

In [82]:
result = estimation.estimate(tiboT.q, theta0, y, x, cov_type='Sandwich')

Optimization terminated successfully.
         Current function value: 1.006679
         Iterations: 12
         Function evaluations: 52
         Gradient evaluations: 13


In [83]:
mu = result.get('theta')[0] 
beta = result.get('theta')[1] 
sig = result.get('theta')[2] 

se  = result.get('se')
t_values  = result.get('t')

In [84]:
# Create a dictionary with the results
results_dict = {
    'Parameter': ['mu', 'beta', 'sigma'],
    'Estimate': [mu, beta, sig],
    'Standard Error': se,
    't-values': t_values
}

# Create the DataFrame
df = pd.DataFrame(results_dict)

# Print the DataFrame
print(df)

  Parameter  Estimate  Standard Error   t-values
0        mu  0.543006        0.078948   6.878001
1      beta  1.436538        0.074785  19.209006
2     sigma  1.536482        0.060303  25.479382


## Partial effects on the probability of censoring

In [85]:
b = result.get('theta')[:-1] 
phi = norm.pdf((x@b)/sig)
PE = (beta/sig) * phi
print(PE) 


[0.36378172 0.29441034 0.24543953 0.33415543 0.37091621 0.26957606
 0.26573626 0.3608614  0.19069108 0.20481291 0.05167967 0.25763529
 0.18242682 0.18777391 0.30803271 0.35343811 0.26585436 0.28842288
 0.34512312 0.37125606 0.1503742  0.37005855 0.3346022  0.35915606
 0.24307704 0.14378973 0.30538344 0.35291308 0.34550019 0.22169346
 0.10835202 0.22869649 0.35380883 0.36288436 0.30148995 0.29156516
 0.31994184 0.36825434 0.23990149 0.2090281  0.19272078 0.36682416
 0.3729687  0.20272545 0.36710291 0.12312519 0.37265065 0.17097938
 0.37206404 0.31123033 0.01653706 0.21226234 0.35521222 0.28646974
 0.02615341 0.32540213 0.08050928 0.20578307 0.17149217 0.32645182
 0.3611074  0.12689474 0.26137073 0.37267646 0.3668256  0.22365052
 0.36173823 0.34899358 0.17672062 0.3729655  0.06414516 0.17295952
 0.36245999 0.17979031 0.3658071  0.19263615 0.05562109 0.03429497
 0.16709272 0.16510186 0.0984041  0.2188928  0.37295906 0.34813636
 0.04408202 0.28253917 0.26985777 0.12030369 0.361571   0.0323

In [86]:
phi = norm.pdf((mu+beta*x[:, 1])/sig) 
PE = (beta/sig) * phi

print(PE)

[0.36378172 0.29441034 0.24543953 0.33415543 0.37091621 0.26957606
 0.26573626 0.3608614  0.19069108 0.20481291 0.05167967 0.25763529
 0.18242682 0.18777391 0.30803271 0.35343811 0.26585436 0.28842288
 0.34512312 0.37125606 0.1503742  0.37005855 0.3346022  0.35915606
 0.24307704 0.14378973 0.30538344 0.35291308 0.34550019 0.22169346
 0.10835202 0.22869649 0.35380883 0.36288436 0.30148995 0.29156516
 0.31994184 0.36825434 0.23990149 0.2090281  0.19272078 0.36682416
 0.3729687  0.20272545 0.36710291 0.12312519 0.37265065 0.17097938
 0.37206404 0.31123033 0.01653706 0.21226234 0.35521222 0.28646974
 0.02615341 0.32540213 0.08050928 0.20578307 0.17149217 0.32645182
 0.3611074  0.12689474 0.26137073 0.37267646 0.3668256  0.22365052
 0.36173823 0.34899358 0.17672062 0.3729655  0.06414516 0.17295952
 0.36245999 0.17979031 0.3658071  0.19263615 0.05562109 0.03429497
 0.16709272 0.16510186 0.0984041  0.2188928  0.37295906 0.34813636
 0.04408202 0.28253917 0.26985777 0.12030369 0.361571   0.0323

## Partial effects on the conditional mean

In [87]:
max_value = max(x[:, 1])
min_value = min(x[:, 1])

# Print the results
print(f"Maximum value: {max_value}")
print(f"Minimum value: {min_value}")

Maximum value: 3.2649743449819217
Minimum value: -3.2708791597089197


In [88]:
x_values = [-2, 0, 2]

partial_effects = []

for x in x_values:
    Phi = norm.cdf((mu + beta * x) / sig)
    PE = beta * (1 - Phi)
    partial_effects.append([PE])  # Add as a row to maintain column format

# Convert the list to a NumPy array
partial_effects_array = np.array(partial_effects)

# Display the result
print(partial_effects_array)

[[1.34359785]
 [0.51987004]
 [0.01881482]]


In [89]:
# List to store the partial derivatives
partial_derivatives = []

# Loop over each x value to compute partial derivatives
for x in x_values:
    # Compute phi and Phi for each x
    phi = norm.pdf((mu + beta * x) / sig)  # Standard normal pdf
    Phi = norm.cdf((mu + beta * x) / sig)  # Standard normal cdf
    
    # Compute the partial derivatives
    dmu_0 = -(beta / sig) * phi
    dbeta_0 = 1 - Phi - ((beta * x) / sig) * phi
    dsigma_0 = (beta * (mu + beta * x) / sig**2) * phi
    
    # Append the partial derivatives as a row
    partial_derivatives.append([dmu_0, dbeta_0, dsigma_0])

# Convert the list of partial derivatives to a NumPy array
partial_derivatives_matrix = np.array(partial_derivatives)

# Display the result
print(partial_derivatives_matrix)

[[-0.11811716  1.17153733 -0.1791242 ]
 [-0.35041149  0.36189103  0.12383847]
 [-0.03150026 -0.04990319  0.07003495]]


In [90]:
def get_avar(grad, cov):
    cov_me = grad@cov@grad.T
    return cov_me 

In [91]:
avarrho = get_avar(combined_matrix, result['cov'])
print(avarrho)

[[ 1.08219066e-03 -2.98500196e-03 -3.35508046e-05]
 [-2.98500196e-03  1.14193413e-02 -5.44758544e-04]
 [-3.35508046e-05 -5.44758544e-04  1.32668815e-04]]


In [92]:
def wald_test(b_hat, cov, R, r, df):
    """
    Performs the Wald test for the hypothesis R @ b_hat = r.
    """
    
    # Calculate the Wald test statistic
    w_stat = ((R @ rho - r).T @ la.inv(R @ cov @ R.T) @ (R @ rho  - r))
    
    # Degrees of freedom is the number of restrictions (number of rows in R)
    df = df
    
    # Calculate p-value and critical value
    crit_val = chi2.ppf(0.95, df)
    p_value = 1 - chi2.cdf(w_stat.item(), df)
    
    return w_stat, crit_val, p_value

In [94]:
# Define null hypothesis: R * b_hat = 1 
R = np.array([[1, -1, 0],
                [0, 1, -1]])
r = np.zeros((2, 1))

# Extract b_hat and covariance matrix
rho  = partial_effects_array  # Estimated coefficients
cov = result['cov']     # Covariance matrix of coefficients

# Perform Wald test
w_stat, crit_val, p_value = wald_test(rho, cov, R, r, df = 3)

print(f'The test statistic is {w_stat.item():.2f}.')
print(f'The critical value at a 5% significance level is {crit_val:.2f}.')
print(f'The p-value is {p_value:.8f}.')

if w_stat > crit_val:
    print(f"Reject null hypothesis: We reject the null of constant effects n - P-value of: {p_value:.4f}.")
else:
    print(f"Fail to reject null hypothesis: We cannot reject the null of constant effects - P-value of: {p_value:.4f}.")

The test statistic is 412.57.
The critical value at a 5% significance level is 7.81.
The p-value is 0.00000000.
Reject null hypothesis: We reject the null of constant effects n - P-value of: 0.0000.
