# Import

In [1]:
import os
import ast
import requests
import logging

import yfinance as yf
import pandas as pd
import numpy as np
import finnhub
from dotenv import load_dotenv
from pathlib import Path    
import sys
import time
sys.path.append('../') # Change the python path at runtime

# Self-created modules
from src.utils import path as path_yq
from src.backtesting import Backtest, Strategy

load_dotenv()
POLYGON_API_KEY = os.environ.get('POLYGON_API_KEY')

BT_START_DATE = '2023-11-01'
BT_START_STR = '20231101'
BT_END_DATE = '2024-01-31'
BT_END_STR = '20240131'

cur_dir = Path.cwd()
root_dir = path_yq.get_root_dir(cur_dir)

logging.basicConfig(filename=Path.joinpath(root_dir, 'logs', 'trading_system.log'),
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    level=logging.DEBUG)

  from .autonotebook import tqdm as notebook_tqdm


Test

In [None]:
def kwargs_func(**kwargs):
    for key, val in kwargs.items():
        print(f"{key}: {val}")
kwargs_func(test='a', num=1)

In [15]:
class SimpleStmStrat(Strategy):
    """
    Use a proportional amount of cash to trade with the sentiment score indicator.
    """
    # Strategy class should define parameters as class variables before they can be optimized or run with.
    col = None

    # Add the parameters in init
    def __init__(self, broker, data, **kwargs):
        super().__init__(broker, data, **kwargs)  # Make sure the parent class can handle **kwargs appropriately
        self.col = kwargs.get('col', self.col)

    # Initialize additional indicators here if needed
    def init(self):
        # self.trade_size = 40 # This times the next open price cannot exceed equity
        self.sl_pct = 0.01
        self.tp_pct = 0.02
        self.risk_per_trade = 0.5 # Maximum of the portfolio on one trade
        # ['cln_hdl_pol_stc_score', 'cln_smr_pol_stc_score', 'cln_hdl_pol_blob_score', 'cln_smr_pol_blob_score', 'cln_news_pol_blob_score', 'cln_hdl_lemma_pol_blob_score', 'cln_smr_lemma_pol_blob_score', 'cln_news_lemma_pol_blob_score', 'cln_hdl_pol_sid_score', 'cln_smr_pol_sid_score', 'cln_news_pol_sid_score', 'cln_hdl_lemma_pol_sid_score', 'cln_smr_lemma_pol_sid_score', 'cln_news_lemma_pol_sid_score', 'cln_hdl_pol_bert_score', 'cln_smr_pol_bert_score', 'cln_news_pol_bert_score', 'cln_hdl_lemma_pol_bert_score', 'cln_smr_lemma_pol_bert_score', 'cln_news_lemma_pol_bert_score', 'cln_hdl_pol_finbert_score', 'cln_smr_pol_finbert_score', 'cln_news_pol_finbert_score', 'cln_hdl_lemma_pol_finbert_score', 'cln_smr_lemma_pol_finbert_score', 'cln_news_lemma_pol_finbert_score']

    def next(self):
        cur_stm = self.data[self.col][-1]
        # print(self.data['closest_date'][-1])
        cur_price = self.data['Close'][-1]

        # print(f"-----{self.data['Datetime'][-1]}-----")
        trade_size = (0.5 * (abs(cur_stm) ** 2) + 0.5) * self.risk_per_trade
        if (cur_stm > 0): # Many losses if I don't take
            self.buy(size=trade_size, sl=(1 - self.sl_pct) * cur_price, tp=(1 + self.tp_pct) * cur_price)
            # If size is a value between 0 and 1, it is interpreted as a fraction of current available liquidity (cash plus Position.pl minus used margin). A value greater than or equal to 1 indicates an absolute number of units.

            # print("Trade here.")
        elif cur_stm < 0:
            self.sell(size=trade_size, sl=(1 + self.sl_pct) * cur_price, tp=(1 - self.tp_pct) * cur_price)
            # print("Trade here.")
        elif (cur_stm == 0):
            pass
            # print("No trade.")
        # print(cur_stm)

merge_path = root_dir.joinpath('data', 'proc', f'BA_merged_{BT_START_STR}_{BT_END_STR}.csv') 
merged2 = pd.read_csv(merge_path, index_col=False)

# Running the backtest
bt = Backtest(data=merged2, 
              strategy=SimpleStmStrat, 
              cash=10000, 
              margin=1,
              commission=.0,
              trade_on_close=False,
              hedging=True
              ) # TODO: Adjust commission
results, returns = bt.run(col='cln_hdl_pol_stc_score')

display(results)
print(type(returns))
display(returns)
bt.plot(results=results, plot_return=True) # TODO: Can have filename, plot in html

# These are the main results that we need
print(results.get('Return [%]'), results.get('Max. Drawdown [%]'), results.get('# Trades'), results.get('Win Rate [%]'))


  bt = Backtest(data=merged2,


Start                                     0.0
End                                   31397.0
Duration                              31397.0
Exposure Time [%]                   85.970444
Equity Final [$]                 10137.533592
Equity Peak [$]                   11350.94807
Return [%]                           1.375336
Buy & Hold Return [%]                6.905782
Return (Ann.) [%]                         0.0
Volatility (Ann.) [%]                     NaN
Sharpe Ratio                              NaN
Sortino Ratio                             NaN
Calmar Ratio                              0.0
Max. Drawdown [%]                  -11.141218
Avg. Drawdown [%]                   -0.307805
Max. Drawdown Duration                15198.0
Avg. Drawdown Duration             187.690909
# Trades                                421.0
Win Rate [%]                        37.054632
Best Trade [%]                       2.722772
Worst Trade [%]                     -6.827858
Avg. Trade [%]                    

<class 'pandas.core.series.Series'>


0     -0.009718
1     -0.010218
2     -0.012548
3      0.019279
4     -0.010051
         ...   
416   -0.001859
417   -0.001949
418   -0.002946
419   -0.006715
420   -0.004784
Name: ReturnPct, Length: 421, dtype: float64

  fig = gridplot(
  fig = gridplot(


1.3753359200000523 -11.141218224249972 421.0 37.05463182897862


# Code

In [None]:
hist_start = pd.to_datetime('2023-08-09') # Historical start date
hist_end = pd.to_datetime('2023-12-14')
# Define the ticker list
ticker_list = ['BA']

# Fetch the data
dl_data = yf.download(ticker_list, start=hist_start, end=hist_end) # Auto adjust is false


In [None]:
data = dl_data['Adj Close']
data = pd.DataFrame(data)
data = data.rename(columns={'Adj Close': 'BA'})
display(data.isna().sum())
data.index = pd.to_datetime(data.index)
display(data.tail(20))



In [None]:
boeing = yf.Ticker('BA')
boeing.news

# yfinance doesn't provide a lot of news

- Assumption
    - Uses all historical data to calculate the drift and standard deviation

In [None]:
# Calculate log returns for the entire dataframe
log_returns = np.log(data / data.shift(1))

# Drop NaN values from log returns
log_returns.dropna(inplace=True)

display(log_returns)

# Calculate the drift
BA_mu = np.mean(log_returns)

# Calculate the volatility (standard deviation of log returns), returns a series
BA_vol =  np.std(log_returns)['BA']


display(data)
print(f"BA_mu, BA_vol: {BA_mu}, {BA_vol}")

- Assumption
    - Never take out holidays
    - Assumed interest rate is 0.0175

In [None]:
def simulate_gbm(S0, mu, sigma, T, dt, N):
    """
    Simulate stock prices using Geometric Brownian Motion.
    
    Parameters:
    S0 (float): Initial stock price
    mu (float): Expected return
    sigma (float): Volatility
    T (float): Time horizon in years
    dt (float): Time step
    N (int): Number of simulations
    
    Returns:
    DataFrame: Simulated stock price paths
    """
    time_steps = int(T / dt)
    stock_paths = np.zeros((time_steps + 1, N))
    stock_paths[0] = S0

    for t in range(1, time_steps + 1):
        Z = np.random.standard_normal(N)
        stock_paths[t] = stock_paths[t - 1] * np.exp((0.0175 - 0.5 * sigma**2) * dt + sigma * np.sqrt(dt) * Z)

    return pd.DataFrame(stock_paths)

# GBM parameters (example values, you should estimate these from real data)
S0 = data['BA'].iloc[-1] # Initial stock price
mu = BA_mu # Expected return
sigma = BA_vol # Volatility
T = 0.1 # Time horizon in years
dt = 1/252 # Daily time step
N = 10 # Number of simulations

simulated_data = simulate_gbm(S0, mu, sigma, T, dt, N)
display(simulated_data)

mean_sim_data = np.mean(simulated_data, axis=1)
display(mean_sim_data)
# Plotting the simulated stock price paths
plt.figure(figsize=(10,6))
plt.plot(mean_sim_data)
plt.title('GBM - Simulated Stock Price Paths')
plt.xlabel('Time Steps')
plt.ylabel('Stock Price')
plt.show()

In [None]:
import pandas as pd
data = {
    'cat': [5, 5, 6, 6],
    'val': [2, 3, 0, 1]
}
df = pd.DataFrame(data)
df

In [None]:
def custom_agg(grp):
    combined = grp[grp['val'] > 0].mean()
    return combined

df.groupby('cat').apply(custom_agg)