# Project 3

### How strong is the “home bias” in the demand for cars, and how does that affect the own-price elasticity of demand?

In [65]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
sns.set_theme()
import clogit_project3
import estimation as est
from numpy import linalg as la
from scipy import optimize
import LinearModels as lm

import statsmodels.formula.api as smf

# Read in data

The dataset, `cars.csv`, contains cleaned and processed data. If you want to make changes, the notebook, `materialize.ipynb`, creates the data from the raw source datsets. 

In [68]:
# read data
cars = pd.read_csv('cars.csv')
lbl_vars = pd.read_csv('labels_variables.csv')
lbl_vals = pd.read_csv('labels_values.csv')

# convert from dataframe to dict
lbl_vals = {c: lbl_vals[c].dropna().to_dict() for c in lbl_vals.columns}
lbl_vars.set_index('variable', inplace=True)

# Set up for analysis

In [69]:
price_var = 'princ'
cars['logp'] = np.log(cars[price_var])
# new variable: price elasticity heterogeneous for home-region 
cars['logp_x_home'] = cars[price_var] * cars['home']
cars['size'] = cars['he'] * cars['le'] * cars['wi']

In [70]:
brand_dummies = pd.get_dummies(cars['brand']) # creates a matrix of dummies for each value of dummyvar
brand_dummies_list = list(brand_dummies.columns[1:].values) # omit a reference category, here it is the first (hence columns[1:])

loc_dummies = pd.get_dummies(cars['loc']) 
loc_dummies_list = list(loc_dummies.columns[1:].values)

# add dummies to the dataframe 
#assert dummies.columns[0] not in cars.columns, f'It looks like you have already added this dummy to the dataframe. Avoid duplicates! '
cars = pd.concat([cars,brand_dummies,loc_dummies], axis=1)

# select x_vars
standardize = [
    'size'
    , 'li'
    , 'hp'
]

dont_standardize = [
    'logp'
    , 'logp_x_home'
    , 'home'
]

x_vars = standardize + dont_standardize + loc_dummies_list #+ brand_dummies_list #

print(f'K = {len(x_vars)} variables selected.')

K = len(x_vars)
N = cars.ma.nunique() * cars.ye.nunique()
J = 40
x = cars[x_vars].values.reshape((N,J,K))
y = (cars['s'].values.reshape((N,J)))

# standardize x
stop = len(standardize)
start = 0
x[:, :, start:stop] = ((x[:, :, start:stop] - x[:, :, start:stop].mean(0).mean(0))/(x[:, :, start:stop].std(0).std(0)))
# "bange for at nogen variable driver det for meget"
# "singular matrix = collinearity"

K = 18 variables selected.


In [71]:
pairs = [
    ('Nelder-Mead', 'Outer Product')
    , ('BFGS', 'Hessian')
    , ('BFGS', 'Sandwich')
]

list_of_dfs = []

for pair in pairs:
    print('____________________________________________')
    print(f'              \n{pair}\n')
    method = pair[0]
    cov_type = pair[1]
    
    res = est.estimate(clogit_project3.q
                       , clogit_project3.starting_values(y, x)
                       , y
                       , x
                       , method=method
                       , cov_type=cov_type
                       , options={
                           'disp':True
                           ,'maxiter':30000
                       }
                      )
    
    temp = pd.DataFrame({v:res[v] for v in ['theta', 'se', 't']})
    temp['method'] = [method for i in range(temp.shape[0])]
    temp['cov_type'] = [cov_type for i in range(temp.shape[0])]
    temp['x_var'] = x_vars
    
    list_of_dfs.append(temp)
    
res_df = pd.concat(list_of_dfs, ignore_index=True)

____________________________________________
              
('Nelder-Mead', 'Outer Product')

Optimization terminated successfully.
         Current function value: 3.518328
         Iterations: 4765
         Function evaluations: 6062
____________________________________________
              
('BFGS', 'Hessian')

Optimization terminated successfully.
         Current function value: 3.492595
         Iterations: 124
         Function evaluations: 2413
         Gradient evaluations: 127
____________________________________________
              
('BFGS', 'Sandwich')

Optimization terminated successfully.
         Current function value: 3.492595
         Iterations: 124
         Function evaluations: 2413
         Gradient evaluations: 127


In [72]:
df_neld = res_df[(res_df.method == 'Nelder-Mead')].copy()
df_neld

Unnamed: 0,theta,se,t,method,cov_type,x_var
0,0.025538,0.145818,0.175137,Nelder-Mead,Outer Product,size
1,0.005767,0.122322,0.047145,Nelder-Mead,Outer Product,li
2,-0.00749,0.253341,-0.029566,Nelder-Mead,Outer Product,hp
3,-1.415335,6.338415,-0.223295,Nelder-Mead,Outer Product,logp
4,-0.269573,7.93602,-0.033968,Nelder-Mead,Outer Product,logp_x_home
5,1.301397,5.391229,0.241392,Nelder-Mead,Outer Product,home
6,-0.488261,27.783383,-0.017574,Nelder-Mead,Outer Product,2
7,0.041992,9.021455,0.004655,Nelder-Mead,Outer Product,3
8,0.229127,8.689037,0.02637,Nelder-Mead,Outer Product,4
9,0.26631,9.085874,0.02931,Nelder-Mead,Outer Product,5


In [73]:
df_hess = res_df[(res_df.method == 'BFGS') & (res_df.cov_type == 'Hessian')].copy()
df_hess

Unnamed: 0,theta,se,t,method,cov_type,x_var
18,0.00906,0.002715,3.337491,BFGS,Hessian,size
19,0.001252,0.002366,0.52901,BFGS,Hessian,li
20,-0.040624,0.00476,-8.533505,BFGS,Hessian,hp
21,-0.047645,0.122943,-0.387539,BFGS,Hessian,logp
22,-0.359541,0.092759,-3.876093,BFGS,Hessian,logp_x_home
23,1.578211,0.071147,22.18239,BFGS,Hessian,home
24,-1.000428,0.354974,-2.818316,BFGS,Hessian,2
25,-0.247952,0.086195,-2.876622,BFGS,Hessian,3
26,0.003416,0.083479,0.040916,BFGS,Hessian,4
27,-0.261975,0.093231,-2.809966,BFGS,Hessian,5


In [74]:
#df_sand = res_df[(res_df.method == 'BFGS') & (res_df.cov_type == 'Sandwich')].copy()
#df_sand

In [75]:
#df_hess['diff_in_se'] = df_hess.se - df_sand.se

In [76]:
desired_pair = ('BFGS', 'Sandwich')
#desired_pair = ('Nelder-Mead', 'Outer Product')
thetahat = res_df[(res_df.method == desired_pair[0]) & (res_df.cov_type == desired_pair[1])].theta.to_numpy().flatten()

choice1 = clogit_project3.choice_prob(thetahat, x)
no_home_x = x.copy()
no_home_x[:, :, 5] = np.zeros((N,J))

In [77]:
choice2 = clogit_project3.choice_prob(thetahat, no_home_x)

In [78]:
with_home = np.zeros((5,40))
without_home = np.zeros((5,40))

for idx, market in enumerate(np.split(choice1, 5)):
    with_home[idx, :] = market.mean(0, keepdims=True)
    
for idx, market in enumerate(np.split(choice2, 5)):
    without_home[idx, :] = market.mean(0, keepdims=True)  

### Partial effect
$$
    \frac{\partial}{\partial x_{kl}} Pr(j) = Pr(j) \left[\boldsymbol{1}_{k=j} \beta_{l} -  Pr(l) \beta_{l} \right]
$$

In [79]:
def labels(x):
	# labels and dimensions for plotting
	N, J, K = x.shape
	palt=['p' + str(i)  for i in range(J)]; 
	xalt=['alt' + str(i)  for i in range(J)]; 
	xvars=['var' + str(i)  for i in range(K)]; 
	return N, J, K, palt, xalt, xvars

N, J, K, palt, xalt, xvars = labels(x)

In [80]:
def APE_var(theta, x, m=0, quiet=False): 
    p=clogit_project3.choice_prob(thetahat, x)
    E=np.empty((J,J))
    for j in range(J):
        for k in range(J):
            E[k, j]=np.mean(p[:,j]*theta[m]*(1*(j==k)-p[:,k]), axis=0)
    return E

dydx=APE_var(thetahat, x, m=5)
#print('\nckeck: derivatibes should sum to 0 over all alterntives \n', np.sum(dydx,axis=1))

In [81]:
test=pd.DataFrame(dydx, columns=[f'p{i+1}' for i in range(J)], index=[f'alt_{i+1}' for i in range(J)])

In [82]:
test

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,...,p31,p32,p33,p34,p35,p36,p37,p38,p39,p40
alt_1,0.03334,-0.000878,-0.000808,-0.000843,-0.000837,-0.000771,-0.000824,-0.000906,-0.000982,-0.001123,...,-0.000696,-0.000697,-0.000716,-0.000706,-0.00078,-0.00076,-0.001042,-0.001108,-0.000922,-0.000772
alt_2,-0.000878,0.03432,-0.000893,-0.000908,-0.000871,-0.000784,-0.000807,-0.00087,-0.000918,-0.001024,...,-0.000772,-0.00077,-0.000804,-0.0008,-0.000864,-0.000785,-0.001032,-0.00113,-0.000935,-0.000779
alt_3,-0.000808,-0.000893,0.036537,-0.001098,-0.00099,-0.000962,-0.000832,-0.000867,-0.000933,-0.000947,...,-0.000817,-0.000833,-0.000835,-0.000856,-0.000868,-0.000909,-0.001068,-0.001127,-0.001011,-0.000846
alt_4,-0.000843,-0.000908,-0.001098,0.038483,-0.001106,-0.001082,-0.000995,-0.001021,-0.001029,-0.000995,...,-0.000904,-0.000939,-0.000887,-0.000875,-0.000951,-0.000897,-0.001143,-0.00115,-0.001021,-0.000897
alt_5,-0.000837,-0.000871,-0.00099,-0.001106,0.041042,-0.001319,-0.001269,-0.001091,-0.000977,-0.001003,...,-0.001077,-0.001066,-0.000965,-0.000923,-0.000912,-0.000912,-0.001052,-0.001153,-0.001097,-0.000875
alt_6,-0.000771,-0.000784,-0.000962,-0.001082,-0.001319,0.039998,-0.001323,-0.00115,-0.001133,-0.001017,...,-0.00105,-0.000985,-0.000951,-0.000965,-0.000874,-0.000869,-0.000983,-0.00104,-0.001046,-0.000861
alt_7,-0.000824,-0.000807,-0.000832,-0.000995,-0.001269,-0.001323,0.040258,-0.001321,-0.001282,-0.001167,...,-0.001064,-0.001047,-0.000976,-0.000876,-0.00091,-0.000857,-0.001003,-0.001091,-0.001023,-0.000888
alt_8,-0.000906,-0.00087,-0.000867,-0.001021,-0.001091,-0.00115,-0.001321,0.041605,-0.001563,-0.001329,...,-0.001085,-0.001073,-0.000969,-0.000885,-0.000905,-0.000948,-0.001038,-0.001072,-0.000981,-0.000872
alt_9,-0.000982,-0.000918,-0.000933,-0.001029,-0.000977,-0.001133,-0.001282,-0.001563,0.042819,-0.001582,...,-0.00108,-0.001045,-0.001013,-0.000913,-0.000928,-0.000966,-0.001076,-0.001159,-0.000958,-0.000845
alt_10,-0.001123,-0.001024,-0.000947,-0.000995,-0.001003,-0.001017,-0.001167,-0.001329,-0.001582,0.043556,...,-0.001045,-0.001006,-0.000962,-0.000903,-0.000913,-0.00095,-0.001195,-0.001331,-0.001095,-0.000913


### Price-elasticity of demand
$$
\boldsymbol{\epsilon}_{jj}(\boldsymbol{X}_{i}) = \frac{\partial s_j(\boldsymbol{X}_{i}, \boldsymbol{\beta})}{\partial p_{ij}} \frac{p_{ij}}{s_j(\boldsymbol{X}_{i}, \boldsymbol{\beta})}
$$

In [83]:
# Extract parameter estimates
desired_pair = ('BFGS', 'Sandwich')
#desired_pair = ('Nelder-Mead', 'Outer Product')
thetahat = res_df[(res_df.method == desired_pair[0]) & (res_df.cov_type == desired_pair[1])].theta.to_numpy().flatten()

# Original choice probabilites
ccp1 = clogit_project3.choice_prob(thetahat, x)
ccp1[0].sum()

1.0

In [84]:
E_own   = np.zeros((N, J))
E_cross = np.zeros((N, J))
dpdx    = np.zeros((N, J))
k_price = 3

for j in range(J):
    # A. copy 
    x2 = x.copy()
    
    # B. increase price just for car j 
    rel_change_x = 0.001
    x2[:, j, k_price] = x2[:, j, k_price] * (1+rel_change_x)

    # C. evaluate CCPs
    ccp2 = clogit_project3.choice_prob(thetahat, x2)
    
    # D. percentage change in CCPs 
    rel_change_y = ccp2 / ccp1 - 1
    
    # E. elasticities 
    elasticity = rel_change_y / rel_change_x
    
    E_own[:, j] = elasticity[:, j]

    k_not_j = [k for k in range(J) if k != j] # indices for all other cars than j 
    E_cross[:, j] = elasticity[:, k_not_j].mean(axis=1) # Fill in: Avg. among the cars k_not_j

In [85]:
print(f'Own-price elasticity:  {np.mean(E_own).round(7)}')
print(f'Cross-price elasticity: {np.mean(E_cross).round(7)}')

Own-price elasticity:  0.0166822
Cross-price elasticity: -0.0005


In [86]:
home = 5
print(x_vars[home]) # check that we found the right one 
assert x_vars[home] == 'home'

# Create two indexed, from where idx1 is for electric cars and idx0 is for non-electric cars.
idx1 = x[:, :, home]==1
idx0 = x[:, :, home]==0 
print(f'Elasticity, home:  {np.mean(E_own[idx1]).round(4)}')
print(f'Elasticity, other: {np.mean(E_own[idx0]).round(4)}')

home
Elasticity, home:  0.0134
Elasticity, other: 0.0182


In [47]:
def Ematrix_var(theta, x, m=1, quiet=False):
    N, J, K, palt, xalt, xvars = labels(x)
    p=clogit_project3.choice_prob(thetahat, x)
    E=np.empty((J,J))
    for j in range(J):
        for k in range(J):
            E[k, j]=np.mean(x[:,k,m]*theta[m]*(1*(j==k)-p[:,k]), axis=0)
    return E
E=Ematrix_var(thetahat, x, m=0)

### Compensating Variation

We can compare welfare under $v_{ij}$ and $\tilde{v}_{ij}$:

$$
CV = \frac{1}{\beta_1} \log \sum_{j=1}^{J} \exp{v_{ij}} - \frac{1}{\beta_1} \log \sum_{j=1}^{J} \exp{\tilde{v}_{ij}}
$$