<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

# Artificial Intelligence in Finance

## Data-Driven Finance (b)

Dr Yves J Hilpisch | The AI Machine

http://aimachine.io | http://twitter.com/dyjh

## Debunking Central Assumptions

## Normality

#### Sample Data Sets

In [None]:
import numpy as np
import pandas as pd
from pylab import plt, mpl
np.random.seed(100)
plt.style.use('seaborn')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'

In [None]:
N = 10000

In [None]:
snrn = np.random.standard_normal(N)
snrn -= snrn.mean()  # moment matching
snrn /= snrn.std()  # moment matching

In [None]:
round(snrn.mean(), 4)

In [None]:
round(snrn.std(), 4)

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(snrn, bins=35);

In [None]:
numbers = np.ones(N) * 1.5
split = int(0.25 * N)
numbers[split:3 * split] = -1
numbers[3 * split:4 * split] = 0

In [None]:
numbers -= numbers.mean()
numbers /= numbers.std()

In [None]:
round(numbers.mean(), 4)

In [None]:
round(numbers.std(), 4)

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(numbers, bins=35);

In [None]:
import math
import scipy.stats as scs
import statsmodels.api as sm

In [None]:
def dN(x, mu, sigma):
    ''' Probability density function of a normal random variable x.
    '''
    z = (x - mu) / sigma
    pdf = np.exp(-0.5 * z ** 2) / math.sqrt(2 * math.pi * sigma ** 2)
    return pdf

In [None]:
def return_histogram(rets, title=''):
    ''' Plots a histogram of the returns.
    '''
    plt.figure(figsize=(10, 6))
    x = np.linspace(min(rets), max(rets), 100)
    plt.hist(np.array(rets), bins=50,
             density=True, label='frequency')
    y = dN(x, np.mean(rets), np.std(rets))
    plt.plot(x, y, linewidth=2, label='PDF')
    plt.xlabel('log returns')
    plt.ylabel('frequency/probability')
    plt.title(title)
    plt.legend()

In [None]:
return_histogram(snrn)

In [None]:
return_histogram(numbers)

In [None]:
def return_qqplot(rets, title=''):
    ''' Generates a Q-Q plot of the returns.
    '''
    fig = sm.qqplot(rets, line='s', alpha=0.5)
    fig.set_size_inches(10, 6)
    plt.title(title)
    plt.xlabel('theoretical quantiles')
    plt.ylabel('sample quantiles')

In [None]:
return_qqplot(snrn)

In [None]:
return_qqplot(numbers)

In [None]:
def print_statistics(rets):
    print('RETURN SAMPLE STATISTICS')
    print('---------------------------------------------')
    print('Skew of Sample Log Returns {:9.6f}'.format(
                scs.skew(rets)))
    print('Skew Normal Test p-value   {:9.6f}'.format(
                scs.skewtest(rets)[1]))
    print('---------------------------------------------')
    print('Kurt of Sample Log Returns {:9.6f}'.format(
                scs.kurtosis(rets)))
    print('Kurt Normal Test p-value   {:9.6f}'.format(
                scs.kurtosistest(rets)[1]))
    print('---------------------------------------------')
    print('Normal Test p-value        {:9.6f}'.format(
                scs.normaltest(rets)[1]))
    print('---------------------------------------------')

In [None]:
print_statistics(snrn)

In [None]:
print_statistics(numbers)

#### Real Financial Returns

In [None]:
raw = pd.read_csv('http://hilpisch.com/aiif_eikon_eod_data.csv',
                  index_col=0, parse_dates=True).dropna()

In [None]:
rets = np.log(raw / raw.shift(1)).dropna()

In [None]:
symbol = '.SPX'

In [None]:
return_histogram(rets[symbol].values, symbol)

In [None]:
return_qqplot(rets[symbol].values, symbol)

In [None]:
symbols = ['.SPX', 'AMZN.O', 'EUR=', 'GLD']

In [None]:
for sym in symbols:
    print('\n{}'.format(sym))
    print(45 * '=')
    print_statistics(rets[sym].values)

## Linear Relationships

In [None]:
r = 0.005

In [None]:
market = '.SPX'

In [None]:
res = pd.DataFrame()

In [None]:
for sym in rets.columns[:4]:
    for year in range(2010, 2019):
        rets_ = rets.loc[f'{year}-01-01':f'{year}-12-31']
        muM = rets_[market].mean() * 252
        cov = rets_.cov().loc[sym, market]
        var = rets_[market].var()
        beta = cov / var
        rets_ = rets.loc[f'{year + 1}-01-01':f'{year + 1}-12-31']
        muM = rets_[market].mean() * 252
        mu_capm = r + beta * (muM - r)
        mu_real = rets_[sym].mean() * 252
        res = res.append(pd.DataFrame({'symbol': sym,
                                       'beta': beta,
                                       'mu_capm': mu_capm,
                                       'mu_real': mu_real},
                                      index=[year + 1]),
                        sort=True)

In [None]:
from sklearn.metrics import r2_score

In [None]:
reg = np.polyfit(res['beta'], res['mu_capm'], deg=1)
res['mu_capm_ols'] = np.polyval(reg, res['beta'])

In [None]:
r2_score(res['mu_capm'], res['mu_capm_ols'])

In [None]:
res.plot(kind='scatter', x='beta', y='mu_capm', figsize=(10, 6))
x = np.linspace(res['beta'].min(), res['beta'].max())
plt.plot(x, np.polyval(reg, x), 'g--', label='regression')
plt.legend();

In [None]:
reg = np.polyfit(res['beta'], res['mu_real'], deg=1)
res['mu_real_ols'] = np.polyval(reg, res['beta'])

In [None]:
r2_score(res['mu_real'], res['mu_real_ols'])

In [None]:
res.plot(kind='scatter', x='beta', y='mu_real', figsize=(10, 6))
x = np.linspace(res['beta'].min(), res['beta'].max())
plt.plot(x, np.polyval(reg, x), 'g--', label='regression')
plt.legend();

<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

<br><br><br><a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:ai@tpq.io">ai@tpq.io</a>