In [5]:
%load_ext kedro.extras.extensions.ipython
%reload_kedro /Users/yeungadrian/Documents/repo/TimeSeries

2022-06-05 21:56:04,555 - kedro.framework.session.store - INFO - `read()` not implemented for `BaseSessionStore`. Assuming empty store.
2022-06-05 21:56:05,472 - root - INFO - ** Kedro project timeseries
2022-06-05 21:56:05,474 - root - INFO - Defined global variable `context`, `session`, `catalog` and `pipelines`
2022-06-05 21:56:05,696 - kedro.framework.session.store - INFO - `read()` not implemented for `BaseSessionStore`. Assuming empty store.
2022-06-05 21:56:05,766 - root - INFO - ** Kedro project timeseries
2022-06-05 21:56:05,768 - root - INFO - Defined global variable `context`, `session`, `catalog` and `pipelines`


## To Do:
- Add ability to leverage
  - Cost of leverage
  - Margin
- Benchmark 

## Libraries

In [71]:
import numpy as np
import pandas as pd
from pydantic import BaseModel
from typing import List, TypeVar, Optional, Dict


In [7]:
PandasDataFrame = TypeVar("pandas.core.frame.DataFrame")

## Data

In [8]:
price_df = catalog.load("fund_prices")
price_df

2022-06-05 21:56:06,118 - kedro.io.data_catalog - INFO - Loading data from `fund_prices` (ParquetDataSet)...


Unnamed: 0,date,ABMD,ATVI,AMD,AMZN,AAPL,DTE,EBAY
0,2020-12-31,324.200,92.850000,91.710,3256.93,132.690000,121.410000,50.250000
1,2020-12-30,323.920,91.580000,92.290,3285.85,133.720000,119.660000,50.550000
2,2020-12-29,320.930,91.370000,90.620,3322.00,134.870000,119.240000,50.860000
3,2020-12-28,312.910,91.430000,91.600,3283.96,136.690000,119.230000,50.240000
4,2020-12-24,303.410,90.960000,91.810,3172.69,131.970000,119.720000,50.120000
...,...,...,...,...,...,...,...,...
5322,1999-11-05,11.315,1.148739,10.625,64.94,0.680100,13.626682,6.904808
5323,1999-11-04,10.470,1.091975,10.315,63.06,0.643981,13.360490,6.733871
5324,1999-11-03,10.250,1.083012,10.655,65.81,0.627654,13.626682,6.583654
5325,1999-11-02,10.315,1.064339,10.280,66.44,0.618028,13.626682,6.801210


## Backtesting Portfolio

In [286]:
class Portfolio(BaseModel):
    fund_codes: List[str]
    fund_amounts: List[float]
    start_date: str
    end_date: str
    df: PandasDataFrame
    rebalance: bool
    rebalance_frequency: Optional[str] = None
    frequency_map = Dict = {"y": 12, "q": 3, "m": 1}

    def prepare_data(self):
        self.df["date"] = pd.to_datetime(self.df["date"])
        self.df = self.df.sort_values(by="date").reset_index(drop=True)
        self.df = self.df.loc[
            (self.df.date >= self.start_date) & (self.df.date <= self.end_date)
        ].reset_index(drop=True)

    def normalise_index(self, fund_history):
        initial_index = fund_history[self.fund_codes].iloc[0]
        result = fund_history[self.fund_codes].divide(initial_index, axis=1)
        result["date"] = fund_history["date"]
        return result

    def backtest_portfolio(self, fund_amount, fund_history):
        num_funds = len(self.fund_codes)
        fund_diagonal = np.zeros((num_funds, num_funds))
        np.fill_diagonal(fund_diagonal, fund_amount)

        result = fund_history[self.fund_codes].dot(fund_diagonal)

        result.columns = self.fund_codes

        result["portfolio"] = result.sum(axis=1)
        result["date"] = fund_history["date"]

        return result

    def backtest_strategy(self):

        if self.rebalance:
            fund_weights= [i / sum(self.fund_amounts) for i in self.fund_amounts]
            rebalance_frequency = self.frequency_map[self.rebalance_frequency.lower()]

            date_range = pd.Series(pd.date_range(start=self.start_date, end=self.end_date,freq='D'))
            month_end_dates = date_range[date_range.dt.is_month_end].reset_index(drop=True)
            rebalancing_dates = month_end_dates[
                month_end_dates.index % rebalance_frequency == 0
            ].reset_index(drop=True).dt.strftime("%Y-%m-%d").values.tolist()

            if self.start_date not in rebalancing_dates:
                rebalancing_dates.insert(0,self.start_date)

            if self.end_date not in rebalancing_dates:
                rebalancing_dates.append(self.end_date)

            current_amount = sum(self.fund_amounts)

            strategy_list = []

            start_period = rebalancing_dates[0]
            
            for i in range(0, len(rebalancing_dates)-1):
                
                end_period = rebalancing_dates[i + 1]

                fund_history = self.df.loc[
                    (self.df.date >= start_period) & (self.df.date <= end_period)
                ].reset_index(drop=True)

                fund_history = self.normalise_index(fund_history)

                fund_amounts = np.multiply(fund_weights,current_amount)
                
                slice_df = self.backtest_portfolio(fund_amounts, fund_history)

                current_amount = slice_df.iloc[-1]['portfolio']

                strategy_list.append(slice_df)

                start_period = slice_df.iloc[-1]['date']

            result_df = pd.concat(strategy_list)
            result_df["date"] = result_df["date"].dt.strftime("%Y-%m-%d")

        else:
            self.df = self.normalise_index(self.df)
            
            result_df = self.backtest_portfolio(self.fund_amounts, self.df)

        return result_df


In [287]:
external_data = {
    "fund_codes": ["EBAY", "AAPL"],
    "fund_amounts": [30,30],
    "start_date": "2012-12-27",
    "end_date": "2019-12-31",
    "df": price_df,
    "rebalance": True,
    "rebalance_frequency": "m"
}

In [288]:
mock_portfolio = Portfolio(**external_data)

In [289]:
mock_portfolio.prepare_data()
mock_portfolio.backtest_strategy()

Unnamed: 0,EBAY,AAPL,portfolio,date
0,30.000000,30.000000,60.000000,2012-12-27
1,29.713661,29.681350,59.395011,2012-12-28
2,30.422171,30.996752,61.418923,2012-12-31
0,30.709462,30.709462,61.418923,2012-12-31
1,32.270476,31.682214,63.952690,2013-01-02
...,...,...,...,...
17,88.028200,91.850086,179.878286,2019-12-24
18,88.125442,93.672419,181.797861,2019-12-26
19,87.687853,93.636877,181.324730,2019-12-27
20,87.031471,94.192624,181.224095,2019-12-30
