In [23]:
!pip install -r requirements.txt 
!pip install CFEDemands --upgrade --pre -q
!pip install eep153_tools --upgrade -q
!pip install python-gnupg



In [24]:
InputFiles = {'Food Expenditures':('1gcAb2jlGQNrD2zrrTEbjL47vbXoxCHkkjHSYzD0-Tiw','Food Expenditures'),
              'Food Prices':('1gcAb2jlGQNrD2zrrTEbjL47vbXoxCHkkjHSYzD0-Tiw','Food Prices'),
              'Household Characteristics':('1gcAb2jlGQNrD2zrrTEbjL47vbXoxCHkkjHSYzD0-Tiw','Household Characteristics'),
              'FCT':('1gcAb2jlGQNrD2zrrTEbjL47vbXoxCHkkjHSYzD0-Tiw','FCT'),
              'Copy of RDI':('1gcAb2jlGQNrD2zrrTEbjL47vbXoxCHkkjHSYzD0-Tiw','Copy of RDI'),}

In [25]:

import numpy as np
import pandas as pd
from eep153_tools.sheets import read_sheets
import cfe
from cfe.estimation import drop_columns_wo_covariance
from cfe import Regression


In [35]:
def get_clean_sheet(key,sheet=None):

    df = read_sheets(key,sheet=sheet)
    df.columns = [c.strip() for c in df.columns.tolist()]

    df = df.loc[:,~df.columns.duplicated(keep='first')]   

    df = df.drop([col for col in df.columns if col.startswith('Unnamed')], axis=1)

    df = df.loc[~df.index.duplicated(), :]

    return df

In [36]:
# Get food expenditures 
expenditures = get_clean_sheet(InputFiles['Food Expenditures'][0],
                    sheet=InputFiles['Food Expenditures'][1])

if 'm' not in expenditures.columns:
    expenditures['m'] = 1

expenditures = expenditures.set_index(['i','t','m'])
expenditures.columns.name = 'j'

expenditures = expenditures.apply(lambda x: pd.to_numeric(x,errors='coerce'))
expenditures = expenditures.replace(0,np.nan)

In [28]:
expenditures = expenditures.loc[~expenditures.index.duplicated()]

In [29]:
# Get Household characteristics 
hh_characteristics = get_clean_sheet(InputFiles['Household Characteristics'][0],
                    sheet=InputFiles['Household Characteristics'][1])

if 'm' not in hh_characteristics.columns:
    hh_characteristics['m'] = 1

hh_characteristics = hh_characteristics.set_index(['i','t','m'])
hh_characteristics.columns.name = 'k'
#hh_characteristics.name = 'value'  

hh_characteristics = hh_characteristics.apply(lambda x: pd.to_numeric(x,errors='coerce'))

In [30]:
hh_characteristics

Unnamed: 0_level_0,Unnamed: 1_level_0,k,women,boys,men,girls,Males 00-03,Females 00-03,Males 04-08,Females 04-08,Males 09-13,Females 09-13,Males 14-18,Females 14-18,Males 19-30,Females 19-30,Males 31-50,Females 31-50,Males 51-99,Females 51-99,log HSize
i,t,m,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1997125,1997,Herrera,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1.098612
1997160,1997,Herrera,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1.098612
1997170,1997,Bocas Del Toro,1,3,1,4,1,2,1,0,0,1,1,1,0,0,0,1,1,0,2.197225
1997177,1997,Bocas Del Toro,4,9,1,5,2,1,4,1,1,3,2,0,0,2,0,2,1,0,2.944439
1997178,1997,Bocas Del Toro,2,1,2,2,0,0,0,1,1,0,0,1,2,0,0,1,0,1,1.945910
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20087041,2008,Comarca Ngobe Bugle,3,3,1,1,2,1,1,0,0,0,0,0,1,2,0,0,0,1,2.079442
20087042,2008,Comarca Ngobe Bugle,1,3,1,3,2,0,0,1,1,1,0,1,0,0,1,1,0,0,2.079442
20087043,2008,Comarca Ngobe Bugle,2,3,1,3,2,0,0,1,1,1,0,2,0,0,1,1,0,0,2.197225
20087044,2008,Comarca Ngobe Bugle,2,1,2,3,1,2,0,1,0,0,0,0,2,2,0,0,0,0,2.079442


In [37]:
hh_characteristics.index.is_unique 

True

In [38]:
from eep153_tools.sheets import read_sheets
# Get food prices and call it p
url = 'https://docs.google.com/spreadsheets/d/1gcAb2jlGQNrD2zrrTEbjL47vbXoxCHkkjHSYzD0-Tiw/edit#gid=2085637103'
p = read_sheets(url,sheet='Food Prices',nheaders=2)

p.columns.names = ['t','m']
p.groupby(level='j').mean()

#use tranpose to switch columns and rows as the original structure of the dataframe was not applicable to use.
p = p.transpose()
if 'm' not in p.columns:
    p['m']=1

p=p.apply(lambda x: pd.to_numeric(x,errors='coerce'))
p=p.replace(0,np.nan)

In [39]:
p=p.T

In [40]:
pd.options.display.max_rows=1000

In [41]:
#p.xs('Aceite Vegetal',level='j').groupby('t',axis=1).median()

In [42]:
p_per_lb=p.groupby('t',axis=1).median().xs('pound',level='u')

  p_per_lb=p.groupby('t',axis=1).median().xs('pound',level='u')


In [43]:
# Get FCT
fct = get_clean_sheet(InputFiles['FCT'][0],
                    sheet=InputFiles['FCT'][1])

fct = fct.set_index('j')
fct.columns.name = 'n'

fct = fct.apply(lambda x: pd.to_numeric(x,errors='coerce'))

In [44]:
# Get RDI
rdi = get_clean_sheet(InputFiles['Copy of RDI'][0],
                    sheet=InputFiles['Copy of RDI'][1])
rdi = rdi.set_index('n')
rdi.columns.name = 'k'

In [45]:
expenditures.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,j,Aceite Vegetal,Aguacates,Ahí Verde,Ajo,Alimento Infantil,Apio,Arroz,Arvejas,Azúcar,Café Y Té,...,"Sodas, Refrescos Y Jugos",Sopa Enlatada,Tercer otro,Tomate,Viscera De Res,Visceras De Pollo O Gallina,Yuca,Zanahoria,Zapallo / Chayote,Ñame
i,t,m,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
19971000,1997,Chíriqui,6.0,,0.5,,2.7,0.3,7.25,,1.6,,...,8.0,0.7,,,,,,0.8,,0.4
19971001,1997,Chíriqui,,,,,,,4.8,,,1.4,...,1.5,,,0.3,,,,,,
19971002,1997,Chíriqui,3.5,,,,,,6.0,,2.0,3.6,...,,3.5,,,,,,,2.0,
19971003,1997,Chíriqui,3.5,,,,,,,,,,...,,,,,,,,,,
19971005,1997,Chíriqui,3.7,,,,,,,,4.5,1.5,...,,0.7,,,,,1.0,,,


In [46]:
hh_characteristics.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,k,women,boys,men,girls,Males 00-03,Females 00-03,Males 04-08,Females 04-08,Males 09-13,Females 09-13,Males 14-18,Females 14-18,Males 19-30,Females 19-30,Males 31-50,Females 31-50,Males 51-99,Females 51-99,log HSize
i,t,m,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1997125,1997,Herrera,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1.098612
1997160,1997,Herrera,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1.098612
1997170,1997,Bocas Del Toro,1,3,1,4,1,2,1,0,0,1,1,1,0,0,0,1,1,0,2.197225
1997177,1997,Bocas Del Toro,4,9,1,5,2,1,4,1,1,3,2,0,0,2,0,2,1,0,2.944439
1997178,1997,Bocas Del Toro,2,1,2,2,0,0,0,1,1,0,0,1,2,0,0,1,0,1,1.94591


In [47]:
p_per_lb.columns=[1997,2003]

In [50]:
from cfe import regression as rgsn
import numpy as np

# Load a regression model from a pickle file that was named 'estimates.pickle' earlier.
r = rgsn.read_pickle('estimates.pickle')

# Use the loaded model to predict expenditures based on the model's estimates.
xhat = r.predicted_expenditures()

# Implied relative prices
p = np.exp(r.get_Ar())

In [51]:
xhat

i         t     m                    j                        
20081     2008  Bocas Del Toro       Aceite Vegetal               5.326744
                                     Ajo                          0.686368
                                     Arroz                        7.549828
                                     Azúcar                       2.972885
                                     Café Y Té                    2.636834
                                                                    ...   
20087040  2008  Comarca Ngobe Bugle  Queso (Blanco Y Amarillo)    1.720973
                                     Raspados Y Duros             1.391364
                                     Sal                          0.630284
                                     Sardina Y Atún               3.746857
                                     Sodas, Refrescos Y Jugos     2.617251
Length: 417378, dtype: float64

In [52]:
xhat.unstack('j').xs(1997,level='t')/p_per_lb[1997]

Unnamed: 0_level_0,j,Aceite Vegetal,Aguacates,Ahí Verde,Ajo,Alimento Infantil,Apio,Arroz,Arvejas,Azúcar,Café Y Té,...,Segundo otro,"Sodas, Refrescos Y Jugos",Tercer otro,Tomate,Viscera De Res,Visceras De Pollo O Gallina,Yuca,Zanahoria,Zapallo / Chayote,Ñame
i,m,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1997125,Herrera,1.302591,,,0.648874,,,27.405115,,6.432036,1.402235,...,,7.047068,,,,,,,,
1997160,Herrera,1.081041,,,0.557456,,,22.629801,,5.316965,1.311137,...,,4.416370,,,,,,,,
1997170,Bocas Del Toro,0.686893,,,0.373462,,,23.068933,,12.643680,0.926063,...,,1.304553,,,,,,,,
1997180,Bocas Del Toro,1.650482,,,0.796181,,,33.546540,,16.484101,1.615524,...,,7.256709,,,,,,,,
1997269,Chíriqui,1.291344,,,0.641654,,,18.348149,,5.514160,0.977074,...,,4.017732,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19976665,Panamá,0.540010,,,0.362979,,,9.046374,,2.643939,0.668320,...,,1.795628,,,,,,,,
19976666,Panamá,0.743526,,,0.443518,,,12.815101,,3.625360,0.852673,...,,2.562676,,,,,,,,
19976667,Panamá,0.711743,,,0.428997,,,13.161689,,3.388701,0.693344,...,,2.632959,,,,,,,,
19976668,Panamá,0.706572,,,0.426155,,,13.119580,,3.370511,0.689509,...,,2.592718,,,,,,,,


In [None]:
#xhat.unstack('j').xs(2003,level='t')/p_per_lb[2003]

In [64]:
#xhat.index

In [63]:
fct.head()


n,Agua,Energía,Proteina,Grasa Total,Carbo-hidratos,Fibra Diet. total,Ceniza,Calcio,Fosforo,Hierro,...,Colesterol,Potasio,Sodio,Zinc,Magnesio,Vit. B6,Vit. B12,Ac. Fólico,Folato Equiv. FD,Fracción Comestible
j,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aceite Vegetal,0.0,884,0.0,100.0,0.0,0.0,0.0,0,0,0.0,...,0,0,0,0.0,0,0.0,0.0,0,0,1.0
Aguacates,73.23,160,2.0,14.66,8.53,6.7,1.58,12,52,0.55,...,0,485,7,0.64,29,0.26,0.0,0,81,0.74
Ahí Verde,93.89,20,0.86,0.17,3.64,1.7,0.43,10,20,0.34,...,0,175,3,0.13,10,0.22,0.0,0,11,0.82
Ajo,63.8,134,5.3,0.2,29.3,0.0,1.4,38,134,1.4,...,0,529,19,1.0,0,0.0,0.0,3,0,0.94
Alimento Infantil,3.0,510,12.5,27.0,54.3,0.0,3.2,530,420,9.0,...,0,600,180,6.0,55,0.3,1.5,80,136,1.0


In [62]:
rdi.head()


k,F 00-03,M 00-03,F 04-08,M 04-08,F 09-13,M 09-13,F 14-18,M 14-18,F 19-30,M 19-30,F 31-50,M 31-50,F 51+,M 51+
n,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Energy,1000.0,1000.0,1200.0,1400.0,1600.0,1800.0,1800.0,2200.0,2000.0,2400.0,1800.0,2200.0,1600.0,2000.0
Protein,13.0,13.0,19.0,19.0,34.0,34.0,46.0,52.0,46.0,56.0,46.0,56.0,46.0,56.0
Fiber,14.0,14.0,16.8,19.6,22.4,25.2,25.2,30.8,28.0,33.6,25.2,30.8,22.4,28.0
Folate,150.0,150.0,200.0,200.0,300.0,300.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
Calcium,700.0,700.0,1000.0,1000.0,1300.0,1300.0,1300.0,1300.0,1000.0,1000.0,1000.0,1000.0,1200.0,1000.0


In [61]:
# log_expenditures represents the logarithm of food expenditures.
log_expenditures = np.log(expenditures)

log_expenditures.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,j,Aceite Vegetal,Aguacates,Ahí Verde,Ajo,Alimento Infantil,Apio,Arroz,Arvejas,Azúcar,Café Y Té,...,"Sodas, Refrescos Y Jugos",Sopa Enlatada,Tercer otro,Tomate,Viscera De Res,Visceras De Pollo O Gallina,Yuca,Zanahoria,Zapallo / Chayote,Ñame
i,t,m,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
19971000,1997,Chíriqui,1.791759,,-0.693147,,0.993252,-1.203973,1.981001,,0.470004,,...,2.079442,-0.356675,,,,,,-0.223144,,-0.916291
19971001,1997,Chíriqui,,,,,,,1.568616,,,0.336472,...,0.405465,,,-1.203973,,,,,,
19971002,1997,Chíriqui,1.252763,,,,,,1.791759,,0.693147,1.280934,...,,1.252763,,,,,,,0.693147,
19971003,1997,Chíriqui,1.252763,,,,,,,,,,...,,,,,,,,,,
19971005,1997,Chíriqui,1.308333,,,,,,,,1.504077,0.405465,...,,-0.356675,,,,,0.0,,,


In [66]:
# use is the intersection of indexs between log_expenditures and household characteristics.
use = log_expenditures.index.intersection(hh_characteristics.index)

log_expenditures = log_expenditures.loc[use,:]
hh_characteristics = hh_characteristics.loc[use,:]


ValueError: Length of names must match number of levels in MultiIndex.

In [60]:
log_expenditures = log_expenditures.stack()
hh_characteristics = hh_characteristics.stack()

# Check that indices are in right places!
assert log_expenditures.index.names == ['i','t','m','j']
assert hh_characteristics.index.names == ['i','t','m','k']

AttributeError: 'Series' object has no attribute 'stack'

In [None]:
#set up regression
result = Regression(y = log_expenditures,d = hh_characteristics)

In [None]:
#get regression results
result.predicted_expenditures()

In [None]:
# use the graph to compare log food expenditures and actual expenditures
%matplotlib widget
df = pd.DataFrame({'y':log_expenditures,'yhat':result.get_predicted_log_expenditures()})
df.plot.scatter(x='yhat',y='y')

In [None]:
#get the value of beta - As shown above, beta captures how the household's overall wealth affects its expenditures on food. 
result.get_beta().sort_values()

In [None]:
#graph beta
result.graph_beta()

In [None]:
#get the value of gamma - As shown above, gamma captures how household characteristics affects its expenditures on food. 
result.gamma

In [None]:
#To save result
result.to_pickle('estimates.pickle')

result = cfe.regression.read_pickle('estimates.pickle')

In [None]:
import cfe

result = cfe.Regression(y=np.log(expenditures.stack()),d=hh_characteristics)

result.get_beta().sort_values(ascending=False) # Check sanity...

In [None]:
result.to_pickle('./foo.pickle')

In [None]:
p

In [None]:
from cfe import regression as rgsn
import numpy as np

# Load a regression model from a pickle file that was named 'estimates.pickle' earlier.
r = rgsn.read_pickle('estimates.pickle')

# Use the loaded model to predict expenditures based on the model's estimates.
xhat = r.predicted_expenditures()

# Implied relative prices
p = np.exp(r.get_Ar())

In [None]:
import matplotlib.pyplot as plt
%matplotlib notebook

use = 'Arroz'  # Put the food item that we want demand curve for here. 
               # 'Arroz' is translated to 'rice' in English. 

# Generates an array of 20 evenly spaced numbers from 0.5 to 2.
scale = np.linspace(.5,2,20)

# Total food expenditures per household
xbar = xhat.groupby(['i','t','m']).sum()

# Reference budget
xref = xbar.quantile(0.5)  # Household at 0.5 quantile is median

# Reference prices - Calculate the average price for each food item across all observations.
pbar = p.groupby(level='j').mean()

def my_prices(p0,p=pbar,j='Arroz'):
    """
    This changes the food prices of good j while holding other food prices unchanged
    """
    p = p.copy()
    p.loc[j] = p0
    return p


In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

# Demand for Rice for household at median budget
plt.plot([r.demands(xref,my_prices(pbar[use]*s,pbar))[use] for s in scale],scale)

# Demand for Rice for household at 25% percentile
plt.plot([r.demands(xbar.quantile(0.25),my_prices(pbar[use]*s,pbar))[use] for s in scale],scale)

# Demand for Rice for household at 75% percentile
plt.plot([r.demands(xbar.quantile(0.75),my_prices(pbar[use]*s,pbar))[use] for s in scale],scale)

plt.ylabel(f"Price (relative to base of {pbar[use]:.2f})")
plt.xlabel(f"Quantities of {use} Demanded")

In [None]:
p=p.median()

In [None]:
# Get food prices and call it p
url = 'https://docs.google.com/spreadsheets/d/1gcAb2jlGQNrD2zrrTEbjL47vbXoxCHkkjHSYzD0-Tiw/edit#gid=2085637103'
p = read_sheets(url,sheet='Food Prices',nheaders=2)

p.columns.names = ['t','m']
p.groupby(level='j').mean()

#use tranpose to switch columns and rows as the original structure of the dataframe was not applicable to use.
p = p.transpose()
if 'm' not in p.columns:
    p['m']=1

p=p.apply(lambda x: pd.to_numeric(x,errors='coerce'))
p=p.replace(0,np.nan)

In [None]:
p

In [None]:

# Get FCT
fct = get_clean_sheet(InputFiles['FCT'][0],
                    sheet=InputFiles['FCT'][1])

fct = fct.set_index('j')
fct.columns.name = 'n'

fct = fct.apply(lambda x: pd.to_numeric(x,errors='coerce'))
fct.head(10)

In [None]:
#use = fct.index.intersection(qhat.columns)


#nutrients = qhat[use]@fct.loc[use,:]
#nutrients.mean()    # NB: Nutrients are for past /week/ for entire household. (Depends on dataset)

In [None]:
xhat

In [None]:
fct

In [None]:
# Get RDI
rdi = get_clean_sheet(InputFiles['Copy of RDI'][0],
                    sheet=InputFiles['Copy of RDI'][1])
rdi = rdi.set_index('n')
rdi.columns.name = 'k'
rdi

In [None]:
# Get Household characteristics 
hh_characteristics = get_clean_sheet(InputFiles['Household Characteristics'][0],
                    sheet=InputFiles['Household Characteristics'][1])

if 'm' not in hh_characteristics.columns:
    hh_characteristics['m'] = 1

hh_characteristics = hh_characteristics.set_index(['i','t','m'])
hh_characteristics.columns.name = 'k'
#hh_characteristics.name = 'value'  

hh_characteristics = hh_characteristics.apply(lambda x: pd.to_numeric(x,errors='coerce'))
print(hh_characteristics)

In [None]:
# Get the intersection of column names between rdi and hh_characteristics DataFrames
common_columns = list(set(rdi.columns) & set(hh_characteristics.columns))

# Select only the common columns from hh_characteristics DataFrame
hh_characteristics = hh_characteristics[common_columns]

# Debugging: Print out common columns
print("Common columns:", common_columns)

# Rename columns in hh_characteristics DataFrame using the mapping dictionary
hh_characteristics = hh_characteristics.rename(columns=column_mapping)

# Debugging: Print out mapping dictionary and renamed columns
print("Column mapping dictionary:", column_mapping)
print("Columns in hh_characteristics DataFrame after renaming:", hh_characteristics.columns.tolist())

# Now hh_characteristics DataFrame should have column names matching the rdi DataFrame
print("Columns in rdi DataFrame:", rdi.columns.tolist())
print("Columns in hh_characteristics DataFrame:", hh_characteristics.columns.tolist())

# Print out hh_characteristics DataFrame
print("hh_characteristics DataFrame:")
print(hh_characteristics)


In [None]:
# Get the intersection of column names between rdi and hh_characteristics DataFrames
common_columns = list(set(rdi.columns) & set(hh_characteristics.columns))

# Select only the common columns from hh_characteristics DataFrame
hh_characteristics = hh_characteristics[common_columns]

# Now both DataFrames have the same columns

# Rename columns in hh_characteristics DataFrame using the mapping dictionary
hh_characteristics = hh_characteristics.rename(columns=column_mapping)

# Now hh_characteristics DataFrame should have column names matching the rdi DataFrame
print("Columns in rdi DataFrame:", rdi.columns.tolist())
print("Columns in hh_characteristics DataFrame:", hh_characteristics.columns.tolist())
hh_characteristics

# Define a mapping dictionary to rename columns in hh_characteristics DataFrame
column_mapping = {
    'women': 'F 51+',
    'girls': 'F 14-18',
    'men': 'M 51+',
    'boys': 'M 14-18',
    'Males 00-03': 'M 00-03',
    'Females 00-03': 'F 00-03',
    'Males 04-08': 'M 04-08',
    'Females 04-08': 'F 04-08',
    'Males 09-13': 'M 09-13',
    'Females 09-13': 'F 09-13',
    'Males 14-18': 'M 14-18',
    'Females 14-18': 'F 14-18',
    'Males 19-30': 'M 19-30',
    'Females 19-30': 'F 19-30',
    'Males 31-50': 'M 31-50',
    'Females 31-50': 'F 31-50',
    'Males 51-99': 'M 51+',
    'Females 51-99': 'F 51+',
    'log HSize': 'log HSize'}
hh_characteristics

In [None]:
new_column_names = [
"Females 00-03", "Males 00-03", "Females 04-08", "Males 04-08", "Females 09-13", "Males 09-13", "Females 14-18", "Males 14-18",  "Females 19-30", "Males 19-30", "Females 31-50", "Males 31-50",  "Females 51-99", "Males 51-99"]
column_name_mapping = {old: new for old, new in zip(rdi.columns, new_column_names)}
rdi.rename(columns=column_name_mapping, inplace=True)

print (rdi.head())

new_column_names = [
"Females 00-03", "Males 00-03", "Females 04-08", "Males 04-08", "Females 09-13", "Males 09-13", "Females 14-18", "Males 14-18",  "Females 19-30", "Males 19-30", "Females 31-50", "Males 31-50",  "Females 51-99", "Males 51-99"]
column_name_mapping = {old: new for old, new in zip(hh_characteristics.columns, new_column_names)}
hh_characteristics.rename(columns=column_name_mapping, inplace=True)

print (hh_characteristics.head())


In [None]:
xhat = r.predicted_expenditures()
prices_kg = read_sheets(key='https://docs.google.com/spreadsheets/d/1P7OQG9cmBSS4dY6ZPI1HOPHW9shCTSkguDVnUCuh3Is/edit#gid=199254621 ', sheet="Food Prices in Kilograms 1")
if 'm' not in prices_kg.columns:  
    prices_kg['m'] = 1
prices_kg

prices_kg = prices_kg.set_index(['t','m'])
prices_kg.columns.name = 'j'

prices_kg = prices_kg.apply(lambda x: pd.to_numeric(x,errors='coerce'))
prices_kg = prices_kg.replace(0,np.nan)

qhat = (xhat.unstack('j')/prices_kg).dropna(how='all')
qhat = qhat.loc[:,qhat.count()>0]

use = fct.index.intersection(qhat.columns)

nutrients = qhat[use]@fct.loc[use,:]

nutrients.mean()

hh_characteristics = get_clean_sheet(InputFiles['Household Characteristics'][0],
                    sheet=InputFiles['Household Characteristics'][1])

if 'm' not in hh_characteristics.columns:
    hh_characteristics['m'] = 1

hh_characteristics = hh_characteristics.set_index(['i','t','m'])
hh_characteristics.columns.name = 'k'

hh_characteristics = hh_characteristics.apply(lambda x: pd.to_numeric(x,errors='coerce'))

hh_characteristics = hh_characteristics[rdi.columns.tolist()]

hh_rdi = hh_characteristics@rdi.T

#check recommended weekly intake?
hh_rwi = hh_rdi*7
use_nutrients = nutrients.columns.intersection(hh_rwi.columns)
nutrient_ratio = (nutrients[use_nutrients]/hh_rwi[use_nutrients]).dropna()

nutrient_ratio