We need to build a dataset with prices of commodities.

In [1]:
import pandas as pd
import numpy as np

In [2]:
brn = pd.read_csv('data/ICE.BRN_150101_190201.txt', index_col='<DATE>')['<CLOSE>']
cl = pd.read_csv('data/NYMEX.CL_150101_190201.txt', index_col='<DATE>')['<CLOSE>']
ho = pd.read_csv('data/NYMEX.HO_150101_190201.txt', index_col='<DATE>')['<CLOSE>']
xrb = pd.read_csv('data/NYMEX.XRB_150101_190201.txt', index_col='<DATE>')['<CLOSE>']

Union the prices into the single dataset.

In [9]:
prices = pd.DataFrame({'brn': brn, 'cl': cl, 'ho': ho, 'xrb': xrb})
prices.head()

Unnamed: 0_level_0,brn,cl,ho,xrb
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
20150105,49.38,47.57,1.6967,1.3146
20150112,49.91,48.8,1.6573,1.358
20150119,47.97,44.99,1.6435,1.3333
20150126,52.1,46.88,1.681,1.455
20150202,58.6,52.8,1.842,1.5697


Calculate the log division of neighboring prices

$$r = \log \frac{p_{t+1}}{p_{t}}$$

Where $p_t$ is a price at the moment $t$.

In [10]:
prices_next = prices.loc[:]
prices_next['brn_next'] = prices['brn'][1:].tolist() + [None]
prices_next['cl_next'] = prices['cl'][1:].tolist() + [None]
prices_next['ho_next'] = prices['ho'][1:].tolist() + [None]
prices_next['xrb_next'] = prices['xrb'][1:].tolist() + [None]
prices_next = prices_next.iloc[:-1]
prices_next.head()

Unnamed: 0_level_0,brn,cl,ho,xrb,brn_next,cl_next,ho_next,xrb_next
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
20150105,49.38,47.57,1.6967,1.3146,49.91,48.8,1.6573,1.358
20150112,49.91,48.8,1.6573,1.358,47.97,44.99,1.6435,1.3333
20150119,47.97,44.99,1.6435,1.3333,52.1,46.88,1.681,1.455
20150126,52.1,46.88,1.681,1.455,58.6,52.8,1.842,1.5697
20150202,58.6,52.8,1.842,1.5697,62.03,53.47,1.9755,1.6322


In [11]:
log_prices = pd.DataFrame(index=prices_next.index)
log_prices['brn'] = np.log(prices_next.brn_next / prices_next.brn)
log_prices['cl'] = np.log(prices_next.cl_next / prices_next.cl)
log_prices['ho'] = np.log(prices_next.ho_next / prices_next.ho)
log_prices['xrb'] = np.log(prices_next.xrb_next / prices_next.xrb)
log_prices.index.name = 'date'
log_prices.head()

Unnamed: 0_level_0,brn,cl,ho,xrb
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
20150105,0.010676,0.025528,-0.023495,0.032481
20150112,-0.039646,-0.08129,-0.008362,-0.018356
20150119,0.082589,0.041151,0.022561,0.087349
20150126,0.11757,0.11892,0.091463,0.075879
20150202,0.056883,0.01261,0.06997,0.039044


In [12]:
log_prices.to_csv('data/log_prices.csv')