In [None]:
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import coint
import seaborn
import yfinance as yf

In [None]:
symbolsIds = ['SPY','AAPL','ADBE','LUV','MSFT',\
              'SKYW','QCOM',
                 'HPQ','JNPR','AMD','IBM']

In [None]:
def load_financial_data(symbols, start_date, end_date,output_file):
    try:
        df = pd.read_pickle(output_file)
        print('File data found...reading symbols data')
    except FileNotFoundError:
        print('File not found...downloading the symbols data')
        df = yf.download(symbols, start=start_date, end=end_date)
        df.to_pickle(output_file)
    return df

In [None]:
data=load_financial_data(symbolsIds,start_date='2001-01-01',
                    end_date = '2018-01-01',
                    output_file='multi_data_large.pkl')

In [None]:
def find_cointegrated_pairs(data):
    n = data.shape[1]
    pvalue_matrix = np.ones((n, n))
    keys = data.keys()
    pairs = []
    for i in range(n):
        for j in range(i+1, n):
            result = coint(data[keys[i]], data[keys[j]])
            pvalue_matrix[i, j] = result[1]
            if result[1] < 0.02:
                pairs.append((keys[i], keys[j]))
    return pvalue_matrix, pairs

In [None]:
pvalues, pairs = find_cointegrated_pairs(data['Close'])

In [None]:
seaborn.heatmap(pvalues, xticklabels=symbolsIds,
                yticklabels=symbolsIds, cmap='RdYlGn_r',
                mask = (pvalues >= 0.98))
plt.show()

In [None]:
Symbol1_prices = data['Adj Close']['AAPL']
Symbol2_prices = data['Adj Close']['HPQ']
plt.title("APPL and HPQ prices")
Symbol1_prices.plot()
Symbol2_prices.plot()
plt.legend()
plt.show()

In [None]:
# Artificial symbols to get an idea of how to trade as the real symbol data is too noisy

# Set a seed value to make the experience reproducible
np.random.seed(123)
# Generate Symbol1 daily returns
Symbol1_returns = np.random.normal(0, 1, 100)
# Create a series for Symbol1 prices
Symbol1_prices = pd.Series(np.cumsum(Symbol1_returns), name='Symbol1') + 10
# Create a series for Symbol2 prices
noise = np.random.normal(0, 1, 100)
Symbol2_prices = Symbol1_prices + 10 + noise
Symbol2_prices.name = 'Symbol2'
plt.title("Symbol 1 and Symbol 2 prices")
Symbol1_prices.plot()
Symbol2_prices.plot()
plt.show()

In [None]:
def zscore(series):
    return (series - series.mean()) / np.std(series)

In [None]:
score, pvalue, _ = coint(Symbol1_prices, Symbol2_prices)

In [None]:
ratios = Symbol1_prices / Symbol2_prices
ratios.plot()

In [None]:
zscore(ratios).plot()
plt.axhline(zscore(ratios).mean(),color="black")
plt.axhline(1.0, color="red") 
plt.axhline(-1.0, color="green")
plt.show()

# When the Z-score reaches -1 or +1, we will use this event as a trading signal. The values +1 and -1 are arbitrary values.
# It should be set depending on the study we will run in order to create this trading strategy.

In [None]:
ratios.plot()
buy = ratios.copy()
sell = ratios.copy()
buy[zscore(ratios)>-1] = 0
sell[zscore(ratios)<1] = 0
buy.plot(color="g", linestyle="None", marker="^")
sell.plot(color="r", linestyle="None", marker="v")
x1,x2,y1,y2 = plt.axis()
plt.axis((x1,x2,ratios.min(),ratios.max()))
plt.legend(["Ratio", "Buy Signal", "Sell Signal"])
#plt.show()

![title](zscore_enter_exit.png)

In [None]:
# This code demonstrates when to enter and exit a position

symbol1_buy = Symbol1_prices.copy()
symbol1_sell = Symbol1_prices.copy()
symbol2_buy = Symbol2_prices.copy()
symbol2_sell = Symbol2_prices.copy()

Symbol1_prices.plot()
symbol1_buy[zscore(ratios) > -1] = 0
symbol1_sell[zscore(ratios) < 1] = 0
symbol1_buy.plot(color="g", linestyle="None", marker="^")
symbol1_sell.plot(color="r", linestyle="None", marker="v")

pair_correlation_trading_strategy = pd.DataFrame(index=Symbol1_prices.index)
pair_correlation_trading_strategy["symbol1_price"] = Symbol1_prices
pair_correlation_trading_strategy["symbol1_buy"] = np.zeros(len(Symbol1_prices))
pair_correlation_trading_strategy["symbol1_sell"] = np.zeros(len(Symbol1_prices))
pair_correlation_trading_strategy["symbol2_buy"] = np.zeros(len(Symbol1_prices))
pair_correlation_trading_strategy["symbol2_sell"] = np.zeros(len(Symbol1_prices))

# Keep track of whether the strategy is currently in a buy position (1), sell position (-1), or neutral (0)
# We will limit the number of orders by reducing the position to one share.
position = 0
for i in range(len(Symbol1_prices)):
    s1price = Symbol1_prices[i]
    s2price = Symbol2_prices[i]
    
    # If no current position (position == 0) and a buy signal is identified:
    # Buy Symbol1 and sell Symbol2. This action is logged in the pair_correlation_trading_strategy DataFrame, and position is set to 1.
    if not position and symbol1_buy[i] != 0:
        pair_correlation_trading_strategy["symbol1_buy"][i] = s1price
        pair_correlation_trading_strategy["symbol2_sell"][i] = s2price
        position = 1
    # If no current position (position == 0) and a sell signal is identified:
    # Sell Symbol1 and buy Symbol2. This action is logged, and position is set to -1.
    elif not position and symbol1_sell[i] != 0:
        pair_correlation_trading_strategy["symbol1_sell"][i] = s1price
        pair_correlation_trading_strategy["symbol2_buy"][i] = s2price
        position = -1
    # If currently in a sell position (position == -1) and the sell signal is no longer valid or it's the last iteration:
    # Close the position by buying Symbol1 and selling Symbol2. Update position to 0.
    elif position == -1 and (symbol1_sell[i] == 0 or i == len(Symbol1_prices) - 1):
        pair_correlation_trading_strategy["symbol1_buy"][i] = s1price
        pair_correlation_trading_strategy["symbol2_sell"][i] = s2price
        position = 0
    # If currently in a buy position (position == 1) and the buy signal is no longer valid or it's the last iteration:
    # Close the position by selling Symbol1 and buying Symbol2. Update position to 0.
    elif position == 1 and (symbol1_buy[i] == 0 or i == len(Symbol1_prices) - 1):
        pair_correlation_trading_strategy["symbol1_sell"][i] = s1price
        pair_correlation_trading_strategy["symbol2_buy"][i] = s2price
        position = 0


Symbol2_prices.plot()
symbol2_buy[zscore(ratios) < 1] = 0
symbol2_sell[zscore(ratios) > -1] = 0
symbol2_buy.plot(color="g", linestyle="None", marker="^")
symbol2_sell.plot(color="r", linestyle="None", marker="v")

x1, x2, y1, y2 = plt.axis()
plt.axis((x1, x2, Symbol1_prices.min(), Symbol2_prices.max()))
plt.legend(["Symbol1", "Buy Signal", "Sell Signal", "Symbol2"])
plt.show()

Now use MSFT and JNPR to implement the strategy based on real symbols
The chart reveals a large quantity of orders. The pair correlation strategy without limitation sends too many orders. We can limit the number of orders in the same way we did previously:

- Limiting positions
- Limiting the number of orders
- Setting a higher Z-score threshold

In [None]:
# This code calculates the profit and loss of the pair correlation strategy

pair_correlation_trading_strategy['symbol1_position']=\
pair_correlation_trading_strategy['symbol1_buy']-pair_correlation_trading_strategy['symbol1_sell']

pair_correlation_trading_strategy['symbol2_position']=\
pair_correlation_trading_strategy['symbol2_buy']-pair_correlation_trading_strategy['symbol2_sell']

pair_correlation_trading_strategy['symbol1_position'].cumsum().plot()
pair_correlation_trading_strategy['symbol2_position'].cumsum().plot()

pair_correlation_trading_strategy['total_position']=\
pair_correlation_trading_strategy['symbol1_position']+pair_correlation_trading_strategy['symbol2_position']
pair_correlation_trading_strategy['total_position'].cumsum().plot()
plt.title("Symbol 1 and Symbol 2 positions")
plt.legend()
plt.show()

在此之前，我们只交易了一股。在常规交易中，我们会交易数百或数千股。让我们来分析一下使用配对相关交易策略时会发生什么。

假设我们有一对两个标的（Symbol 1 和 Symbol 2）。假设 Symbol 1 的价格是 $100，Symbol 2 的价格是 $10。如果我们交易一定数量的 Symbol 1 和 Symbol 2 股票，可以使用 100 股。如果我们对 Symbol 1 有一个做多信号，我们将以 $100 买入 Symbol 1。名义头寸将是 100 x $100 = $10,000。由于这是 Symbol 1 的做多信号，那么对于 Symbol 2 就是一个做空信号。我们将有一个 Symbol 2 的名义头寸为 100 x $10 = $1,000。这两个头寸之间将有 $9,000 的差额。

在两个标的之间存在较大的价格差异时，会导致它们之间的影响不平衡。具体来说，价格较高的标的将会在整体回报中占据更大的比重。这是因为在配对交易中，回报通常受价格较高的资产的价格变动所驱动。因此，当这个价格较高的标的出现显著价格变化时，它会对整个配对策略的表现产生更大的影响。

在市场上进行交易和投资时，重要的是对冲头寸，以防止不利的市场波动，进而降低风险。例如，如果我们创建一个投资组合，通过买入多只股票形成多头头寸，我们的预期是这些精选的股票将表现优于整个市场。然而，如果整个市场下跌，即便这些股票相对优于其他股票，它们的价格仍可能因整体市场趋势而下跌。如果我们在市场低迷期间卖出这些头寸，可能会因市场整体下滑而遭受损失。

为了防范这种风险，投资者通常会通过投资一些预期会与主要持仓相反方向波动的资产来对冲他们的头寸。这种方法有助于在不利的市场条件下抵消潜在的损失。在配对交易的背景下，实现中性头寸对于最小化市场风险暴露至关重要。这可以通过在 Symbol 1 和 Symbol 2 上投资相同的名义金额（即美元价值），而不是简单地购买相同数量的股票来实现。

例如，如果 Symbol 1 的价格显著高于 Symbol 2，仅购买相同数量的两只标的股票并不会创建一个平衡的对冲。价格较高的 Symbol 1 将主导投资组合的回报，使对冲无效。相反，通过在两只标的上投资相同的美元金额，可以平衡每个标的的影响力，从而使配对交易策略保持中性，更好地抵御市场整体的波动。
因为我们不想处于上述两种情况中的任何一种，我们将投资相同的名义金额在 Symbol 1 和 Symbol 2 上。假设我们想买入 100 股 Symbol 1。我们将拥有的名义头寸是 100 x $100 = $10,000。要获得 Symbol 2 相同的名义头寸，我们需要 $10,000 / $10 = 1,000 股。如果我们买入 100 股 Symbol 1 和 1,000 股 Symbol 2，我们的投资将处于中性头寸状态，我们不会给予 Symbol 1 比 Symbol 2 更多的重视。

现在，假设 Symbol 2 的价格不是 $10 而是 $3。当我们将 $10,000 除以 $3 时，我们得到 3,333 + 1/3。这意味着我们将下单买入 3,333 股，这将使我们在 Symbol 1 上有 $10,000 的头寸，在 Symbol 2 上有 3,333 x $3 = $9,999 的头寸，结果相差 $1。现在假设交易金额不是 $10,000，而是 $10,000,000。这将导致 $1,000 的差额。因为在购买股票时我们需要去掉小数部分，这种差额会出现在任何标的中。如果我们交易大约 200 对标的，我们可能会有 $200,000 (200 x $1,000) 的头寸未被对冲。我们将暴露于市场波动。因此，如果市场下跌，我们可能会在这 $200,000 上亏损。这就是为什么重要的是使用与这 $200,000 头寸相反方向的金融工具进行对冲。如果我们有许多标的的头寸，导致有 $200,000 的未覆盖的多头头寸，我们将建立一个与市场走势相同的 SPY ETF 的空头头寸。

In [None]:
# We replace s1prices with s1positions from the earlier code by taking into account the number of shares we want to allocate for the trading of this pair

pair_correlation_trading_strategy['symbol1_price']=Symbol1_prices
pair_correlation_trading_strategy['symbol1_buy']=np.zeros(len(Symbol1_prices))
pair_correlation_trading_strategy['symbol1_sell']=np.zeros(len(Symbol1_prices))
pair_correlation_trading_strategy['symbol2_buy']=np.zeros(len(Symbol1_prices))
pair_correlation_trading_strategy['symbol2_sell']=np.zeros(len(Symbol1_prices))
pair_correlation_trading_strategy['delta']=np.zeros(len(Symbol1_prices))


position=0
s1_shares = 1000000
for i in range(len(Symbol1_prices)):
    s1positions= Symbol1_prices[i] * s1_shares
    s2positions= Symbol2_prices[i] * int(s1positions/Symbol2_prices[i])
#     print(Symbol1_prices[i],Symbol2_prices[i])
    delta_position=s1positions-s2positions
    if not position and symbol1_buy[i]!=0:
        pair_correlation_trading_strategy['symbol1_buy'][i]=s1positions
        pair_correlation_trading_strategy['symbol2_sell'][i] = s2positions
        pair_correlation_trading_strategy['delta'][i]=delta_position
        position=1
    elif not position and symbol1_sell[i]!=0:
        pair_correlation_trading_strategy['symbol1_sell'][i] = s1positions
        pair_correlation_trading_strategy['symbol2_buy'][i] = s2positions
        pair_correlation_trading_strategy['delta'][i] = delta_position
        position = -1
    elif position==-1 and (symbol1_sell[i]==0 or i==len(Symbol1_prices)-1):
        pair_correlation_trading_strategy['symbol1_buy'][i] = s1positions
        pair_correlation_trading_strategy['symbol2_sell'][i] = s2positions
        position = 0
    elif position==1 and (symbol1_buy[i] == 0 or i==len(Symbol1_prices)-1):
        pair_correlation_trading_strategy['symbol1_sell'][i] = s1positions
        pair_correlation_trading_strategy['symbol2_buy'][i] = s2positions
        position = 0


pair_correlation_trading_strategy['symbol1_position']=\
pair_correlation_trading_strategy['symbol1_buy']-pair_correlation_trading_strategy['symbol1_sell']

pair_correlation_trading_strategy['symbol2_position']=\
pair_correlation_trading_strategy['symbol2_buy']-pair_correlation_trading_strategy['symbol2_sell']

pair_correlation_trading_strategy['symbol1_position'].cumsum().plot()
pair_correlation_trading_strategy['symbol2_position'].cumsum().plot()

pair_correlation_trading_strategy['total_position']=\
pair_correlation_trading_strategy['symbol1_position']+pair_correlation_trading_strategy['symbol2_position']
pair_correlation_trading_strategy['total_position'].cumsum().plot()
plt.title("Symbol 1 and Symbol 2 positions")
plt.legend()
plt.show()

In [None]:
pair_correlation_trading_strategy['delta'].plot()
plt.title("Delta Position")
plt.show()