<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

# Artificial Intelligence in Finance

## Data-Driven Finance (a)

Dr Yves J Hilpisch | The AI Machine

http://aimachine.io | http://twitter.com/dyjh

## Financial Econometrics and Regression

In [None]:
import numpy as np

In [None]:
def f(x):
    return 2 + 1 / 2 * x

In [None]:
x = np.arange(-4, 5)
x

In [None]:
y = f(x)
y

In [None]:
x

In [None]:
y

In [None]:
beta = np.cov(x, y, ddof=0)[0, 1] / x.var()
beta

In [None]:
alpha = y.mean() - beta * x.mean()
alpha

In [None]:
y_ = alpha + beta * x

In [None]:
np.allclose(y_, y)

## Data Availability

In addition to a (paid) subscribtion to the Eikon Data API (https://developers.refinitiv.com/eikon-apis/eikon-data-apis), the following code requires the `eikon` Python package:

    pip install eikon

In [None]:
import eikon as ek
import configparser

In [None]:
c = configparser.ConfigParser()
c.read('../../../data/aiif.cfg')  # adjust path

In [None]:
ek.set_app_key(c['eikon']['app_id'])

In [None]:
ek.__version__

In [None]:
symbols = ['AAPL.O', 'MSFT.O', 'NFLX.O', 'AMZN.O']

In [None]:
data = ek.get_timeseries(symbols,
                         fields='CLOSE',
                         start_date='2019-07-01',
                         end_date='2020-07-01')

In [None]:
data.info()

In [None]:
data.tail()

In [None]:
data = ek.get_timeseries('AMZN.O',
                         fields='*',
                         start_date='2020-09-24',
                         end_date='2020-09-25',
                         interval='minute')

In [None]:
data.info()

In [None]:
data.head()

In [None]:
data_grid, err = ek.get_data(['AAPL.O', 'IBM', 'GOOG.O', 'AMZN.O'],
                             ['TR.TotalReturnYTD', 'TR.WACCBeta',
                              'YRHIGH', 'YRLOW',
                              'TR.Ebitda', 'TR.GrossProfit'])

In [None]:
data_grid

In addition to a (free paper trading) account with Oanda (http://oanda.com), the following code requires the `tpqoa` package:

    pip install --upgrade git+https://github.com/yhilpisch/tpqoa.git

In [None]:
import tpqoa

In [None]:
oa = tpqoa.tpqoa('../../../data/pyalgo.cfg')

In [None]:
oa.stream_data('BTC_USD', stop=5)

In [None]:
data = ek.get_timeseries('AAPL.O',
                         fields='*',
                         start_date='2020-09-25 15:00:00',
                         end_date='2020-09-25 15:15:00',
                         interval='tick')

In [None]:
data.info()

In [None]:
data.head(8)

In [None]:
news = ek.get_news_headlines('R:TSLA.O PRODUCTION',
                         date_from='2020-06-01',
                         date_to='2020-08-01',
                         count=7
                        )

In [None]:
news

In [None]:
storyId = news['storyId'][1]

In [None]:
from IPython.display import HTML

In [None]:
HTML(ek.get_news_story(storyId))

In [None]:
import nlp
import requests

In [None]:
sources = [
    'https://nr.apple.com/dE0b1T5G3u',  # iPad Pro
    'https://nr.apple.com/dE4c7T6g1K',  # MacBook Air
    'https://nr.apple.com/dE4q4r8A2A',  # Mac Mini
]

In [None]:
html = [requests.get(url).text for url in sources]

In [None]:
data = [nlp.clean_up_text(t) for t in html]

In [None]:
data[0][0:1001]

In [None]:
from twitter import Twitter, OAuth

In [None]:
t = Twitter(auth=OAuth(c['twitter']['access_token'],
                       c['twitter']['access_secret_token'],
                       c['twitter']['api_key'],
                       c['twitter']['api_secret_key']),
            retry=True)

In [None]:
l = t.statuses.home_timeline(count=15)

In [None]:
for e in l:
    print(e['text'])

In [None]:
l = t.statuses.user_timeline(screen_name='dyjh', count=5)

In [None]:
for e in l:
    print(e['text'])

In [None]:
d = t.search.tweets(q='#Python', count=7)

In [None]:
for e in d['statuses']:
    print(e['text'])

In [None]:
l = t.statuses.user_timeline(screen_name='elonmusk', count=50)

In [None]:
tl = [e['text'] for e in l]

In [None]:
tl[:5]

In [None]:
wc = nlp.generate_word_cloud(' '.join(tl), 35)

## Normative Theories Revisited

### Mean-Variance Portfolio Theory

In [None]:
import numpy as np
import pandas as pd
from pylab import plt, mpl
from scipy.optimize import minimize
plt.style.use('seaborn')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
np.set_printoptions(precision=5, suppress=True,
                   formatter={'float': lambda x: f'{x:6.3f}'})

In [None]:
url = 'http://hilpisch.com/aiif_eikon_eod_data.csv'

In [None]:
raw = pd.read_csv(url, index_col=0, parse_dates=True).dropna()

In [None]:
raw.info()

In [None]:
symbols = ['AAPL.O', 'MSFT.O', 'INTC.O', 'AMZN.O', 'GLD']

In [None]:
rets = np.log(raw[symbols] / raw[symbols].shift(1)).dropna()

In [None]:
(raw[symbols[:]] / raw[symbols[:]].iloc[0]).plot(figsize=(10, 6));

In [None]:
weights = len(rets.columns) * [1 / len(rets.columns)]
weights

In [None]:
def port_return(rets, weights):
    return np.dot(rets.mean(), weights) * 252  # annualized

In [None]:
port_return(rets, weights)

In [None]:
def port_volatility(rets, weights):
    return np.dot(weights, np.dot(rets.cov() * 252 , weights)) ** 0.5  # annualized

In [None]:
port_volatility(rets, weights)

In [None]:
def port_sharpe(rets, weights):
    return port_return(rets, weights) / port_volatility(rets, weights)

In [None]:
port_sharpe(rets, weights)

In [None]:
w = np.random.random((1000, len(symbols)))
w = (w.T / w.sum(axis=1)).T

In [None]:
w[:5]

In [None]:
w[:5].sum(axis=1)

In [None]:
pvr = [(port_volatility(rets[symbols], weights),
        port_return(rets[symbols], weights))
       for weights in w]
pvr = np.array(pvr)

In [None]:
psr = pvr[:, 1] / pvr[:, 0]

In [None]:
plt.figure(figsize=(10, 6))
fig = plt.scatter(pvr[:, 0], pvr[:, 1],
                  c=psr, cmap='coolwarm')
cb = plt.colorbar(fig)
cb.set_label('Sharpe ratio')
plt.xlabel('expected volatility')
plt.ylabel('expected return')
plt.title(' | '.join(symbols));

In [None]:
bnds = len(symbols) * [(0, 1),]
bnds

In [None]:
cons = {'type': 'eq', 'fun': lambda weights: weights.sum() - 1}

In [None]:
opt_weights = {}
for year in range(2010, 2019):
    rets_ = rets[symbols].loc[f'{year}-01-01':f'{year}-12-31']
    ow = minimize(lambda weights: -port_sharpe(rets_, weights),
                  len(symbols) * [1 / len(symbols)],
                  bounds=bnds,
                  constraints=cons)['x']
    opt_weights[year] = ow

In [None]:
opt_weights

In [None]:
res = pd.DataFrame()
for year in range(2010, 2019):
    rets_ = rets[symbols].loc[f'{year}-01-01':f'{year}-12-31']
    epv = port_volatility(rets_, opt_weights[year])
    epr = port_return(rets_, opt_weights[year])
    esr = epr / epv
    rets_ = rets[symbols].loc[f'{year + 1}-01-01':f'{year + 1}-12-31']
    rpv = port_volatility(rets_, opt_weights[year])
    rpr = port_return(rets_, opt_weights[year])
    rsr = rpr / rpv
    res = res.append(pd.DataFrame({'epv': epv, 'epr': epr, 'esr': esr,
                                   'rpv': rpv, 'rpr': rpr, 'rsr': rsr},
                                  index=[year + 1]))

In [None]:
res

In [None]:
res.mean()

In [None]:
res[['epv', 'rpv']].corr()

In [None]:
res[['epv', 'rpv']].plot(kind='bar', figsize=(10, 6),
        title='Expected vs. Realized Portfolio Volatility');

In [None]:
res[['epr', 'rpr']].corr()

In [None]:
res[['epr', 'rpr']].plot(kind='bar', figsize=(10, 6),
        title='Expected vs. Realized Portfolio Return');

In [None]:
res[['esr', 'rsr']].corr()

In [None]:
res[['esr', 'rsr']].plot(kind='bar', figsize=(10, 6),
        title='Expected vs. Realized Sharpe Ratio');

### Capital Asset Pricing Model

In [None]:
r = 0.005

In [None]:
market = '.SPX'

In [None]:
rets = np.log(raw / raw.shift(1)).dropna()

In [None]:
res = pd.DataFrame()

In [None]:
for sym in rets.columns[:4]:
    print('\n' + sym)
    print(54 * '=')
    for year in range(2010, 2019):
        rets_ = rets.loc[f'{year}-01-01':f'{year}-12-31']
        muM = rets_[market].mean() * 252
        cov = rets_.cov().loc[sym, market]
        var = rets_[market].var()
        beta = cov / var
        rets_ = rets.loc[f'{year + 1}-01-01':f'{year + 1}-12-31']
        muM = rets_[market].mean() * 252
        mu_capm = r + beta * (muM - r)
        mu_real = rets_[sym].mean() * 252
        res = res.append(pd.DataFrame({'symbol': sym,
                                       'mu_capm': mu_capm,
                                       'mu_real': mu_real},
                                      index=[year + 1]),
                        sort=True)
        print('{} | beta: {:.3f} | mu_capm: {:6.3f} | mu_real: {:6.3f}'
              .format(year + 1, beta, mu_capm, mu_real))

In [None]:
sym = 'AMZN.O'

In [None]:
res[res['symbol'] == sym].corr()

In [None]:
res[res['symbol'] == sym].plot(kind='bar',
                figsize=(10, 6), title=sym);

In [None]:
grouped = res.groupby('symbol').mean()
grouped

In [None]:
grouped.plot(kind='bar', figsize=(10, 6), title='Average Values');

### Arbitrage-Pricing Theory

In [None]:
factors = ['.SPX', '.VIX', 'EUR=', 'XAU=']

In [None]:
res = pd.DataFrame()

In [None]:
np.set_printoptions(formatter={'float': lambda x: f'{x:5.2f}'})

In [None]:
for sym in rets.columns[:4]:
    print('\n' + sym)
    print(71 * '=')
    for year in range(2010, 2019):
        rets_ = rets.loc[f'{year}-01-01':f'{year}-12-31']
        reg = np.linalg.lstsq(rets_[factors],
                              rets_[sym], rcond=-1)[0]
        rets_ = rets.loc[f'{year + 1}-01-01':f'{year + 1}-12-31']
        mu_apt = np.dot(rets_[factors].mean() * 252, reg)
        mu_real =  rets_[sym].mean() * 252
        res = res.append(pd.DataFrame({'symbol': sym,
                        'mu_apt': mu_apt, 'mu_real': mu_real},
                         index=[year + 1]))
        print('{} | fl: {} | mu_apt: {:6.3f} | mu_real: {:6.3f}'
              .format(year + 1, reg.round(2), mu_apt, mu_real))

In [None]:
sym = 'AMZN.O'

In [None]:
res[res['symbol'] == sym].corr()

In [None]:
res[res['symbol'] == sym].plot(kind='bar',
                figsize=(10, 6), title=sym);

In [None]:
grouped = res.groupby('symbol').mean()
grouped

In [None]:
grouped.plot(kind='bar', figsize=(10, 6), title='Average Values');

In [None]:
factors = pd.read_csv('http://hilpisch.com/aiif_eikon_eod_factors.csv',
                      index_col=0, parse_dates=True)

In [None]:
factors.info()

In [None]:
(factors / factors.iloc[0]).plot(figsize=(10, 6));

In [None]:
start = '2017-01-01'
end = '2020-01-01'

In [None]:
retsd = rets.loc[start:end].copy()
retsd.dropna(inplace=True)

In [None]:
retsf = np.log(factors / factors.shift(1))
retsf = retsf.loc[start:end]
retsf.dropna(inplace=True)
retsf = retsf.loc[retsd.index].dropna()

In [None]:
retsf.corr()

In [None]:
res = pd.DataFrame()

In [None]:
np.set_printoptions(formatter={'float': lambda x: f'{x:5.2f}'})

In [None]:
split = int(len(retsf) * 0.5)
for sym in rets.columns[:4]:
    print('\n' + sym)
    print(74 * '=')
    retsf_, retsd_ = retsf.iloc[:split], retsd.iloc[:split]
    reg = np.linalg.lstsq(retsf_, retsd_[sym], rcond=-1)[0]   
    retsf_, retsd_ = retsf.iloc[split:], retsd.iloc[split:]
    mu_apt = np.dot(retsf_.mean() * 252, reg)
    mu_real =  retsd_[sym].mean() * 252
    res = res.append(pd.DataFrame({'mu_apt': mu_apt,
                    'mu_real': mu_real}, index=[sym,]),
                    sort=True)
    print('fl: {} | apt: {:.3f} | real: {:.3f}'
          .format(reg.round(1), mu_apt, mu_real))

In [None]:
res.plot(kind='bar', figsize=(10, 6));

In [None]:
sym

In [None]:
rets_sym = np.dot(retsf_, reg)

In [None]:
rets_sym = pd.DataFrame(rets_sym,
                        columns=[sym + '_apt'],
                        index=retsf_.index)

In [None]:
rets_sym[sym + '_real'] = retsd_[sym]

In [None]:
rets_sym.mean() * 252

In [None]:
rets_sym.std() * 252 ** 0.5

In [None]:
rets_sym.corr()

In [None]:
rets_sym.cumsum().apply(np.exp).plot(figsize=(10, 6));

In [None]:
rets_sym['same'] = (np.sign(rets_sym[sym + '_apt']) ==
                    np.sign(rets_sym[sym + '_real']))

In [None]:
rets_sym['same'].value_counts()

In [None]:
rets_sym['same'].value_counts()[True] / len(rets_sym)

<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

<br><br><br><a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:ai@tpq.io">ai@tpq.io</a>