## A concept for forecasting inflation with online listed item prices
Can we use online prices as a predictor of a nation's inflation and CPI?

Resources online indicates that not only is this possible, but it has been done the past decade by those who has the right tools - http://www.mit.edu/~afc/papers/Cavallo_Online_Offline.pdf

While it isn't entirely surprising that online and offline prices are similar.
finding the correct data points in an ever growing pool of resources is the challenge that this model attempts to answer.

Technolgy used: Jupyter Notebook, plot.ly, Python, d3js, Javascript
<br><br><i>
    <b>Output</b> = df
</i>

In [None]:
# Developed modules
try:
    import cpi_aux
    reload (cpi_aux)
except Exception as e: print e
    
from cpi_aux import *

### Firstly setting variables to affect output of model
(Time shift adjustment, CPI shown, percentile to filter)

In [None]:
# Variables and Formatting definition

START_OFFSET=-160
END_OFFSET=0

cpi_metric='CPI'
cpi_metric='diff'

percentile=1

# Variables

global metrics , items , pricecolumns , salescolumns , weighcolumns


### Formatting 
# pd.options.display.float_format = '{:,.2f}'.format

In [None]:
# widgets.Dropdown(options=)
options={}
categorical_path='ref/categories/'
categories = [sub for sub in os.listdir(categorical_path) if os.path.isdir(categorical_path+'/'+sub) 
 and not any(xs in sub for xs in ['ipynb_checkpoints', 'Misc Items'])]

random_cat=True
for i,c in enumerate(categories):
    options[c]=i
    
#select a random category
selected=None
if selected==None or random_cat:
    selected=categories[random.randint(a=0,b=(len(categories)-1))]
category=widgets.Dropdown(options=categories,description='Category',
   layout=Layout(width='40%'),value=selected)

# category.layout.max_width='400'
display(category)
category.value='Footwear - Men'

#### Prime data from files, establish fundamental variables

In [None]:
# reload(cpi_aux)


basepath='ref/categories/Food - Tea/'
basepath='ref/categories/%s/'%(category.value)
price_df=getProducts(basepath)
cpi_df,cpi_category=getCPI(basepath)
df=price_df.copy()

categorical_df,total_df = getSummary(df)
display(categorical_df)
display(total_df)

# pricecolumns , salescolumns , weighcolumns = getMetricColumns(df=df)

weighcolumns = list(filter(lambda x: keyword in x, df.columns.values) for keyword in ['weigh'])[0]
salescolumns = list(filter(lambda x: keyword in x, df.columns.values) for keyword in ['sales'])[0]

# As price columns don't have a 'price' in the column name
pricecolumns = reduce(lambda x,y : # need to find all column names which don't have the other two groups of column names
 filter(lambda z: z!=y,x) ,['date']+weighcolumns+salescolumns, df.columns.values)


# print salescolumns
# print pricecolumns
# print list(filter(lambda x: x not in keyword , ['date','sales','weigh']) for keyword in df.columns.values )

# print salescolumns
### Interactive frames
# qw=qgrid.show_grid(total_df,show_toolbar=True)
# qw
# total_df.total = total_df.total.astype(float)


# First Plot - Seeing the data first hand
No changing or modification of data yet

In [None]:
frequency='M'

# Show Price (interlolated to 1M)
title='<b>Average Price ($)</b><br>Outliers not filtered<br>Interpolated and bucketing interval set to: <i>%s</i>'%(frequency)
interpolateDF(df,columns=pricecolumns,frequency=frequency).iplot(title=title)

# Show Units Sold (interlolated to 1M)
title="<b>Units Sold</b><br>Outliers not filtered"
interpolateDF(df,columns=weighcolumns,frequency=frequency).iplot(title=title)

# Show Sales Revenue (interlolated to 1M)
title="<b>Sales revenue of items ($)</b><br>Outliers not filtered"
interpolateDF(df,columns=salescolumns,frequency=frequency).iplot(title=title)

## CPI stats
Sourced from rba.gov.au
<br>Data available with quarterly updates
<br>File format in csv and is the basis used our model's benchmark and reference

In [None]:
# Take copy of original
cpi=cpi_df.copy()

cpi=enrichCPI(cpi)
# Plotting
title='<b>CPI - %s</b><br>Obtain CPI for the next step, including each point\'s derivative'%(decorateText(cpi_category))
iplot(plotdouble(cpi,'diff','CPI',title=title))


##### Price vs Units Sold vs Revenue

In [None]:
# Either use prices or normalized prices
prices=df.set_index(pd.DatetimeIndex(df['date'])).drop(columns=['date'])
normalized_prices=normalizeDF(df)
ndf=normalized_prices.copy()

n2df = get_mean_weighted_rating(ndf, df)
ndf['mean'] = n2df['weighted_mean']

graph_ndf=ndf.copy()
display(graph_ndf.head(2))

#Show chart of Prices
interpolateDF(graph_ndf,pricecolumns + ['mean'],frequency='M').iplot(
    title='<b>Price - ('+str(int(percentile*100))+ '%)</b><br>Interpolated and bucketing interval set to 1 month')

#Show chart of Items Sold
graph_ndf['total_units']=graph_ndf[weighcolumns].sum(axis=1)
interpolateDF(graph_ndf,weighcolumns+['total_units']).iplot(
    title='<b>Items Sold</b><br>Interpolated and bucketing interval set to 1 month')

#Show chart of Total Revenue
graph_ndf['total_revenue']=graph_ndf[salescolumns].sum(axis=1)
interpolateDF(graph_ndf,salescolumns + ['total_revenue']).iplot(
    title='<b>Revenue - Outliters Filtered</b><br>Interpolated and bucketing interval set to 1 month')

In [None]:
frequency_u_vs_r='14D'
rolling_freq='90D'
u_vs_r=normalizeDF(graph_ndf[['total_units','total_revenue','mean']])
u_vs_r[('Rolling Avg %s'%(rolling_freq))]=u_vs_r['mean'].rolling(rolling_freq).mean()
u_vs_r=interpolateDF(u_vs_r,frequency=frequency_u_vs_r)
u_vs_r=u_vs_r.rename(columns={
    'mean' : 'Weighted Mean Price','total_units':'Sold Units','total_revenue':'Revenue'})
u_vs_r.iplot(title='<b>Price vs Total Sold vs Revenue</b><br>Interval: %s'%(frequency_u_vs_r))

# print ndf.columns

In [None]:
input_cpi=cpi.copy()
grap_ndf=normalizeDF(price_df.copy())
graph_ndf=get_mean_weighted_rating(normalized_df_input=ndf,original_df_input=price_df.copy())

interpolate_graph=False
graph_frequency='14D'
graph_frequency='90D'
rolling_freq='90D'
# graph_ndf[['total_revenue_rolling']] = graph_ndf[['total_revenue']].rolling(graph_frequency).mean()

cpi_vs_price = plotCompareCPIMetrics(graph_ndf,input_cpi,cpi_metric='CPI',
 frequency=graph_frequency,cpi_category=cpi_category,getRollingMean=True,interpolate=interpolate_graph,rolling_freq=rolling_freq)

cpidelta_vs_price = plotCompareCPIMetrics(graph_ndf,input_cpi,cpi_metric='diff',
 frequency=graph_frequency,cpi_category=cpi_category,getRollingMean=True,interpolate=interpolate_graph,rolling_freq=rolling_freq)

cpidelta_vs_total_revenue = plotCompareCPIMetrics(graph_ndf,input_cpi,cpi_metric='diff'
 ,price_metric='total_revenue',frequency=graph_frequency,cpi_category=cpi_category,getRollingMean=True,interpolate=interpolate_graph,rolling_freq=rolling_freq)

cpi_vs_total_revenue = plotCompareCPIMetrics(graph_ndf,input_cpi,cpi_metric='CPI',price_metric='total_revenue'
 ,frequency=graph_frequency,cpi_category=cpi_category,getRollingMean=True,interpolate=interpolate_graph,rolling_freq=rolling_freq)

In [None]:
print cpi_category

In [None]:
# import cpi_aux
reload(cpi_aux)
# from cpi_aux import *
from cpi_aux import plot_final, enrichCPI

In [None]:
# import cpi_aux
reload(cpi_aux)
# from cpi_aux import *
from cpi_aux import plot_final, enrichCPI
plot_final(icpi=cpi,normalized_prices=interpolateDF(ndf,frequency='M'),show_corr=False, show_offset=False)