# Project 3

### How strong is the “home bias” in the demand for cars, and how does that affect the own-price elasticity of demand?

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
sns.set_theme()
import clogit_project3
import estimation as est
from numpy import linalg as la
from scipy import optimize
import LinearModels as lm

import statsmodels.formula.api as smf

# Read in data

The dataset, `cars.csv`, contains cleaned and processed data. If you want to make changes, the notebook, `materialize.ipynb`, creates the data from the raw source datsets. 

In [2]:
# read data
cars = pd.read_csv('cars.csv')
lbl_vars = pd.read_csv('labels_variables.csv')
lbl_vals = pd.read_csv('labels_values.csv')

# convert from dataframe to dict
lbl_vals = {c: lbl_vals[c].dropna().to_dict() for c in lbl_vals.columns}
lbl_vars.set_index('variable', inplace=True)

# Set up for analysis

In [3]:
price_var = 'princ'
cars['logp'] = np.log(cars[price_var])
# new variable: price elasticity heterogeneous for home-region 
cars['logp_x_home'] = cars[price_var] * cars['home']
cars['size'] = cars['he'] * cars['le'] * cars['wi']

In [4]:
brand_dummies = pd.get_dummies(cars['brand']) # creates a matrix of dummies for each value of dummyvar
brand_dummies_list = list(brand_dummies.columns[1:].values) # omit a reference category, here it is the first (hence columns[1:])

loc_dummies = pd.get_dummies(cars['loc']) 
loc_dummies_list = list(loc_dummies.columns[1:].values)

# add dummies to the dataframe 
#assert dummies.columns[0] not in cars.columns, f'It looks like you have already added this dummy to the dataframe. Avoid duplicates! '
cars = pd.concat([cars,brand_dummies,loc_dummies], axis=1)

# select x_vars
standardize = [
    'size'
    , 'li'
    , 'hp'
]

dont_standardize = [
    'logp'
    , 'logp_x_home'
    , 'home'
]

x_vars = standardize + dont_standardize #+ brand_dummies_list #+ loc_dummies_list

print(f'K = {len(x_vars)} variables selected.')

K = len(x_vars)
N = cars.ma.nunique() * cars.ye.nunique()
J = 40
x = cars[x_vars].values.reshape((N,J,K))
y = (cars['s'].values.reshape((N,J)))

# standardize x
stop = len(standardize)
start = 0
x[:, :, start:stop] = ((x[:, :, start:stop] - x[:, :, start:stop].mean(0).mean(0))/(x[:, :, start:stop].std(0).std(0)))
# "bange for at nogen variable driver det for meget"
# "singular matrix = collinearity"

K = 6 variables selected.


In [5]:
pairs = [
    ('Nelder-Mead', 'Outer Product')
    , ('BFGS', 'Hessian')
    , ('BFGS', 'Sandwich')
]

list_of_dfs = []

for pair in pairs:
    print('____________________________________________')
    print(f'              \n{pair}\n')
    method = pair[0]
    cov_type = pair[1]
    
    res = est.estimate(clogit_project3.q
                       , clogit_project3.starting_values(y, x)
                       , y
                       , x
                       , method=method
                       , cov_type=cov_type
                       , options={
                           'disp':True
                           ,'maxiter':30000
                       }
                      )
    
    temp = pd.DataFrame({v:res[v] for v in ['theta', 'se', 't']})
    temp['method'] = [method for i in range(temp.shape[0])]
    temp['cov_type'] = [cov_type for i in range(temp.shape[0])]
    temp['x_var'] = x_vars
    
    list_of_dfs.append(temp)
    
res_df = pd.concat(list_of_dfs, ignore_index=True)

____________________________________________
              
('Nelder-Mead', 'Outer Product')

Optimization terminated successfully.
         Current function value: 3.499303
         Iterations: 1007
         Function evaluations: 1568
____________________________________________
              
('BFGS', 'Hessian')

Optimization terminated successfully.
         Current function value: 3.499303
         Iterations: 26
         Function evaluations: 203
         Gradient evaluations: 29
____________________________________________
              
('BFGS', 'Sandwich')

Optimization terminated successfully.
         Current function value: 3.499303
         Iterations: 26
         Function evaluations: 203
         Gradient evaluations: 29


In [6]:
#df_neld = res_df[(res_df.method == 'Nelder-Mead')].copy()
#df_neld

In [7]:
#df_hess = res_df[(res_df.method == 'BFGS') & (res_df.cov_type == 'Hessian')].copy()
#df_hess

In [8]:
#df_sand = res_df[(res_df.method == 'BFGS') & (res_df.cov_type == 'Sandwich')].copy()
#df_sand

In [9]:
#df_hess['diff_in_se'] = df_hess.se - df_sand.se

In [10]:
desired_pair = ('BFGS', 'Sandwich')
#desired_pair = ('Nelder-Mead', 'Outer Product')
thetahat = res_df[(res_df.method == desired_pair[0]) & (res_df.cov_type == desired_pair[1])].theta.to_numpy().flatten()

choice1 = clogit_project3.choice_prob(thetahat, x)
no_home_x = x.copy()
no_home_x[:, :, 5] = np.zeros((N,J))

In [11]:
choice2 = clogit_project3.choice_prob(thetahat, no_home_x)

In [12]:
with_home = np.zeros((5,40))
without_home = np.zeros((5,40))

for idx, market in enumerate(np.split(choice1, 5)):
    with_home[idx, :] = market.mean(0, keepdims=True)
    
for idx, market in enumerate(np.split(choice2, 5)):
    without_home[idx, :] = market.mean(0, keepdims=True)  

### Partial effect
$$
    \frac{\partial}{\partial x_kl} Pr(j) = Pr(j) \left[\boldsymbol{1}_{k=j} \beta_{l} -  Pr(l) \beta_{l} \right]
$$

### Price-elasticity of demand
$$
\boldsymbol{\epsilon}_{jj}(\boldsymbol{X}_{i}) = \frac{\partial s_j(\boldsymbol{X}_{i}, \boldsymbol{\beta})}{\partial p_{ij}} \frac{p_{ij}}{s_j(\boldsymbol{X}_{i}, \boldsymbol{\beta})}
$$

In [13]:
# Extract parameter estimates
desired_pair = ('BFGS', 'Sandwich')
#desired_pair = ('Nelder-Mead', 'Outer Product')
thetahat = res_df[(res_df.method == desired_pair[0]) & (res_df.cov_type == desired_pair[1])].theta.to_numpy().flatten()

# Original choice probabilites
ccp1 = clogit_project3.choice_prob(thetahat, x)
ccp1[0].sum()

1.0

In [14]:
E_own   = np.zeros((N, J))
E_cross = np.zeros((N, J))
dpdx    = np.zeros((N, J))
k_price = 3

for j in range(J):
    # A. copy 
    x2 = x.copy()
    
    # B. increase price just for car j 
    rel_change_x = 0.001
    x2[:, j, k_price] = x2[:, j, k_price] * (1+rel_change_x)

    # C. evaluate CCPs
    ccp2 = clogit_project3.choice_prob(thetahat, x2)
    
    # D. percentage change in CCPs 
    rel_change_y = ccp2 / ccp1 - 1
    
    # E. elasticities 
    elasticity = rel_change_y / rel_change_x
    
    E_own[:, j] = elasticity[:, j] # Fill in 

    k_not_j = [k for k in range(J) if k != j] # indices for all other cars than j 
    E_cross[:, j] = elasticity[:, k_not_j].mean(axis=1) # Fill in: Avg. among the cars k_not_j

In [15]:
print(f'Own-price elasticity:  {np.mean(E_own).round(7)}')
print(f'Cross-price elasticity: {np.mean(E_cross).round(7)}')

Own-price elasticity:  -0.107316
Cross-price elasticity: 0.0032166


In [16]:
home = 5
print(x_vars[home]) # check that we found the right one 
assert x_vars[home] == 'home'

# Create two indexed, from where idx1 is for electric cars and idx0 is for non-electric cars.
idx1 = x[:, :, home]==1
idx0 = x[:, :, home]==0 
print(f'Elasticity, home:  {np.mean(E_own[idx1]).round(4)}')
print(f'Elasticity, other: {np.mean(E_own[idx0]).round(4)}')

home
Elasticity, home:  -0.0861
Elasticity, other: -0.1171


### Compensating Variation

We can compare welfare under $v_{ij}$ and $\tilde{v}_{ij}$:

$$
CV = \frac{1}{\beta_1} \log \sum_{j=1}^{J} \exp{v_{ij}} - \frac{1}{\beta_1} \log \sum_{j=1}^{J} \exp{\tilde{v}_{ij}}
$$