In [2]:
import numpy as np
from numpy import random
from scipy.stats import norm
from scipy.stats import genextreme
import pandas as pd 

%load_ext autoreload
%autoreload 2

import clogit 
import estimation as est

In [3]:
cars = pd.read_csv('cars.csv')
lbl_vars = pd.read_csv('labels_variables.csv')
lbl_vals = pd.read_csv('labels_values.csv')

# convert from dataframe to dict
lbl_vals = {c: lbl_vals[c].dropna().to_dict() for c in lbl_vals.columns}
lbl_vars.set_index('variable', inplace=True)
abc = lbl_vars.join(cars.mean(numeric_only=True).apply(lambda x: f'{x: .2f}').to_frame('Mean'))
abc

Unnamed: 0_level_0,label,Mean
variable,Unnamed: 1_level_1,Unnamed: 2_level_1
ye,year (=first dimension of panel),84.5
ma,market (=second dimension of panel),3.0
co,model code (=third dimension of panel),207.5
zcode,alternative model code (predecessors and succe...,177.76
brd,brand code,16.79
type,name of brand and model,
brand,name of brand,
model,name of model,
org,"origin code (demand side, country with which c...",2.72
loc,"location code (production side, country where ...",5.17


In [6]:
abc[abc.index.isin(['li1', 'li2', 'li3'])].label.tolist()

['measure 1 for fuel efficiency (liter per km, at 90 km/h)',
 'measure 2 for fuel efficiency (liter per km, at 120 km/h)',
 'measure 3 for fuel efficiency (liter per km, at city speed)']

In [3]:
price_var = 'princ'
cars['logp'] = np.log(cars[price_var])
# new variable: price elasticity heterogeneous for home-region 
cars['logp_x_home'] = cars[price_var] * cars['home']

In [4]:
categorical_var = 'brand' # name of categorical variable
dummies = pd.get_dummies(cars[categorical_var]) # creates a matrix of dummies for each value of dummyvar
x_vars_dummies = list(dummies.columns[1:].values) # omit a reference category, here it is the first (hence columns[1:])

# add dummies to the dataframe 
assert dummies.columns[0] not in cars.columns, f'It looks like you have already added this dummy to the dataframe. Avoid duplicates! '
cars = pd.concat([cars,dummies], axis=1)

# x_vars
x_vars = ['logp', 'home', 'cy', 'hp', 'we', 'li'] + x_vars_dummies # <--- !!! choose your preferred variables here 
print(f'K = {len(x_vars)} variables selected.')

K = len(x_vars)
N = cars.ma.nunique() * cars.ye.nunique()
J = 40 
x = cars[x_vars].values.reshape((N,J,K))
y = np.log(cars['s'].values.reshape((N,J)))

# standardize x
# x = ((x - x.mean(0).mean(0))/(x.std(0).std(0)))

K = 38 variables selected.


In [6]:
cars

Unnamed: 0,ye,ma,co,zcode,brd,type,brand,model,org,loc,...,seat,skoda,suzuki,tal/hillman,tal/matra,tal/simca,tal/sunb,talbot,toyota,volvo
0,70,1,15,14,2,audi 100/200,audi,100/200,2,4,...,0,0,0,0,0,0,0,0,0,0
1,70,1,26,35,4,citroen 2 CV 6 - 2 CV 4,citroen,2CV6,1,3,...,0,0,0,0,0,0,0,0,0,0
2,70,1,36,36,4,citroen dyane,citroen,dyane,1,3,...,0,0,0,0,0,0,0,0,0,0
3,70,1,64,67,7,fiat 128,fiat,128,3,5,...,0,0,0,0,0,0,0,0,0,0
4,70,1,71,80,8,ford escort,ford,escort,2,4,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5995,99,5,914,161,16,mercedes A,mercedes,A,2,4,...,0,0,0,0,0,0,0,0,0,0
5996,99,5,936,80,8,ford focus,ford,focus,7,4,...,0,0,0,0,0,0,0,0,0,0
5997,99,5,951,208,20,peugeot 206,peugeot,206,1,3,...,0,0,0,0,0,0,0,0,0,0
5998,99,5,953,282,29,toyota avensis,toyota,avensis,4,12,...,0,0,0,0,0,0,0,0,1,0
