In [2]:
import pandas as pd
pd.options.display.float_format = '{:,.3f}'.format
import numpy as np

import warnings
warnings.filterwarnings('ignore')

#plotting
import plotly
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import plotly.figure_factory as ff


import cufflinks as cf
init_notebook_mode(connected=True)
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

import matplotlib.pyplot as plt
%matplotlib inline

import json
import datetime as dt
import os

In [3]:
from IPython.display import Image
from IPython.core.display import display, HTML, Markdown

In [4]:
def format_graph(data_graph):
    data_graph['layout']['paper_bgcolor']='white'
    data_graph['layout']['plot_bgcolor']='white'
    data_graph['layout']['margin']=dict(l=120, r=10, t=140, b=80)
    data_graph['layout']['title']['font']['family']='Bree Serif' 
    data_graph['layout']['title']['font']['size']=18
    data_graph['layout']['title']['font']['color']='black'
    data_graph['layout']['yaxis']['title']['font']['family']='Bree Serif' 
    data_graph['layout']['yaxis']['title']['font']['size']=18
    data_graph['layout']['yaxis']['title']['font']['color']='black'
    data_graph['layout']['xaxis']['title']['font']['family']='Bree Serif' 
    data_graph['layout']['xaxis']['title']['font']['size']=18
    data_graph['layout']['xaxis']['title']['font']['color']='black'
    data_graph['layout']['legend']['font']=dict(color='black',size=14,family='Bree Serif')
    data_graph['layout']['yaxis']['tickfont']=dict(color='black',size=16,family='Bree Serif')
    data_graph['layout']['xaxis']['tickfont']=dict(color='black',size=16,family='Bree Serif')
    return data_graph

# Exploring the data

In [5]:
data_folder = r'getting_the_data\data'

In [32]:
for filename in [x for x in os.listdir(data_folder) if '.csv' in x][1:2]:
    df = pd.read_csv(data_folder+r'\\'+filename, encoding='utf-8').drop('Unnamed: 0',1)
    df['timestamp'] = df[['date','time']].apply(lambda x:\
                          pd.to_datetime(x[0]+x[1].split('days')[1]), axis=1) 
    df['date'] = df['timestamp'].dt.date
    df['time'] = df['time'].apply(lambda x: pd.to_datetime(x.split('days')[1].strip()).time())
        
    df = df.sort_values(by = 'timestamp')
    df = df.reset_index(drop=True)

In [73]:
df['price_move'] = (df['close_p']/df['open_p']-1)*100

In [87]:
df_features = pd.DataFrame()

settlement_time = df['time'].max()
for time in df['time'].unique():
    df_part = df[df['time']==time].set_index('date')[['price_move']].copy()
    
    minute_change = str((dt.datetime.combine(dt.date.today(), settlement_time) \
                    - dt.datetime.combine(dt.date.today(), time)).seconds/60)\
                    .split('.')[0]
    df_part.columns = ['price_move_'+minute_change]
    df_features = df_features.join(df_part, how='outer')

In [88]:
df_features.corr()

Unnamed: 0,price_move_4,price_move_3,price_move_2,price_move_1,price_move_0
price_move_4,1.0,-0.076,-0.017,-0.078,-0.014
price_move_3,-0.076,1.0,0.038,0.09,-0.024
price_move_2,-0.017,0.038,1.0,0.061,0.004
price_move_1,-0.078,0.09,0.061,1.0,-0.06
price_move_0,-0.014,-0.024,0.004,-0.06,1.0


In [89]:
df_features.describe()

Unnamed: 0,price_move_4,price_move_3,price_move_2,price_move_1,price_move_0
count,809.0,805.0,808.0,812.0,812.0
mean,0.007,-0.006,-0.002,-0.003,0.001
std,0.069,0.063,0.068,0.092,0.141
min,-0.293,-0.247,-0.239,-0.411,-0.671
25%,-0.035,-0.035,-0.035,-0.059,-0.091
50%,0.0,0.0,0.0,0.0,0.0
75%,0.036,0.032,0.034,0.036,0.073
max,0.46,0.318,0.338,0.403,0.669


In [90]:
for column in [x for x in df_features.columns if 'price_move' in x]:
    df_features[column.replace('price_move_','is_inrease_')] = df_features[column].apply(lambda x: 1 if x>=0 else 0)
    df_features.loc[pd.isnull(df_features[column]),column.replace('price_move_','is_inrease_')]=np.nan

In [100]:
plot_heatmap(df_features.corr())

In [99]:
import seaborn as sns
def plot_heatmap(df_graph):
    max_pnl = df_graph.max().max()
    min_pnl = df_graph.min().min()
    df_graph=df_graph.transpose().iplot(kind='heatmap', colorscale='Rdbu',asFigure=True)

    #df_graph['data'][0]['reversescale']=True

    #other layout formatting
    df_graph['layout']['paper_bgcolor']='#FFFFFF'
    df_graph['layout']['plot_bgcolor']='#FFFFFF'

    df_graph['layout']['xaxis']['showgrid']=False
    df_graph['layout']['yaxis']['showgrid']=False

    #df_graph['layout']['yaxis']['zeroline']=False
    #df_graph['layout']['xaxis']['zeroline']=False

    df_graph['layout']['width']=800
    df_graph['layout']['height']=400

    df_graph['layout']['margin']['t']=0
    df_graph['layout']['margin']['b']=0
    df_graph['layout']['margin']['l']=0
    df_graph['layout']['margin']['r']=120
    df_graph['layout']['yaxis']['tickfont']['size']=15
    df_graph['layout']['xaxis']['tickfont']['size']=15
    df_graph['layout']['yaxis']['tickfont']['family']='Bree Serif'
    df_graph['layout']['xaxis']['tickfont']['family']='Bree Serif'
    df_graph['layout']['yaxis']['tickformat'] = ".0%"
    #df_graph['layout']['xaxis']['tickformat'] = ".0%"

    df_graph['layout']['yaxis']['title']['text']=''
    df_graph['layout']['xaxis']['title']['text']=''
    df_graph['layout']['yaxis']['title']['font']['family']='Bree Serif' 
    df_graph['layout']['xaxis']['title']['font']['family']='Bree Serif'
    df_graph['layout']['yaxis']['title']['font']['size']=18
    df_graph['layout']['xaxis']['title']['font']['size']=18
    df_graph['layout']['legend']['font']['family']='Bree Serif'
    df_graph['layout']['legend']['font']['size']=18

    annotations=[]
    cmap=sns.color_palette('Greens_r', 10)
    cmap=['rgb({0},{1},{2})'.format(x[0], x[1], x[2]) for x in cmap]
    cmap[0]='black'
    #cmap.reverse()
    for i,y_value in enumerate(df_graph['data'][0]['y']):
        for j,x_value in enumerate(df_graph['data'][0]['x']):
            text=df_graph['data'][0]['z'][i][j]
            if pd.notnull(text) and abs(text)>1:
                #color=cmap[int(((int(text)-min_pnl)/(max_pnl-min_pnl))*10)]
                text=human_format(text)
                #print(y_value, x_value,text)
                annotations.append(dict(xref='x', yref='y',
                            x=x_value, y=y_value,xanchor='center',
                            text=text, font=dict(family='Bree Serif', size=16, color = 'black'),
                                        showarrow=False, align='center'))

    df_graph['layout']['annotations']=annotations

    # new_colorscale =  []
    # for i,field in enumerate(df_graph['data'][0]['colorscale']):
    #     if field[0]==0:
    #         new_colorscale.append((field[0],'white'))
    #     else:
    #         new_colorscale.append(field)
    # new_colorscale = tuple(new_colorscale)
    #df_graph['data'][0]['colorscale'] = new_colorscale

    df_graph['layout']['xaxis']['side'] = 'top'

    iplot(df_graph)

In [None]:
df_results = pd.DataFrame()
for dependent_variable in dependent_variables:
    for explanatory_variable in explanatory_variables:
        explanatory_variable_reg = 'Q("{0}")'.format(explanatory_variable) 
        ols_results = smf.ols(r'{0}'.format(dependent_variable)+ '~{0}'\
                     .format(explanatory_variable_reg), df_reg).fit()
        df_results_part = pd.DataFrame(index=[0])
        df_results_part['explanatory_variable'] = explanatory_variable
        df_results_part['dependent_variable'] = dependent_variable
        df_results_part['beta']=ols_results.params[1]
        df_results_part['pvalue']=ols_results.pvalues[1]
        df_results_part['R2']=ols_results.rsquared_adj
        df_results_part['observations']=ols_results.nobs
        df_results = pd.concat([df_results, df_results_part])