In [2]:
from utilities import database as db
import pandas as pd
import numpy as np
import logging

from tqdm import tqdm

In [3]:
### Additional setings ###

# Logging
logging.basicConfig(
    filename="data_collection.log",
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)

CS = 1000 # DB load chunk size

# Calculate Indicators + Signals
### Technical Indicators Glossary

- `SMA (Simple Moving Average)`  
  A basic moving average calculated by taking the arithmetic mean of a given set of prices over a specific period of time.

- `EMA (Exponential Moving Average)`  
  A type of moving average that gives more weight to recent prices, making it more responsive to new information.

- `RSI (Relative Strength Index)`  
  A momentum oscillator that measures the speed and change of price movements, typically used to identify overbought or oversold conditions.

- `MACD (Moving Average Convergence Divergence)`  
  A trend-following momentum indicator that shows the relationship between two EMAs and helps identify bullish or bearish momentum.

- `Bollinger Bands`  
  A volatility indicator consisting of a middle SMA and two price bands above and below it, typically two standard deviations away.

- `ATR (Average True Range)`  
  A volatility indicator that measures the degree of price movement or volatility for an asset over a specific time period.

- `OBV (On-Balance Volume)` 
  A volume-based indicator that uses cumulative volume flow to predict changes in stock price. <br>
<br>
---

### Trading Signal Descriptions

- `Signal_SMA`  
  A short-term trend signal: `1` if SMA(20) is above SMA(50), indicating upward momentum; `0` otherwise.

- `Signal_MACD`  
  A momentum signal: `1` if the MACD line is above the signal line (bullish); `0` otherwise.

- `Signal_RSI`  
  A mean-reversion signal based on the RSI value:  
  - `1` if RSI < 30 (oversold, potential Buy)  
  - `-1` if RSI > 70 (overbought, potential Sell)  
  - `0` otherwise

- `Signal_SMA_Cross`  
  A long-term trend signal:  
  - `1` if SMA(50) > SMA(200) (bullish "Golden Cross")  
  - `-1` if SMA(50) < SMA(200) (bearish "Death Cross")  
  - `0` if no clear trend


In [35]:
def add_indicators(df):

    def add_sma_ema(df):
        '''Simple Moving Average (SMA) and Exponential Moving Average (EMA)'''
        for i in [7, 14, 20, 50, 100, 200]:
            df[f'SMA_{i}'] = df['Close'].rolling(i).mean()
            df[f'EMA_{i}'] = df['Close'].ewm(span=i).mean()
        return df

    def add_macd(df):
        '''Moving Average Convergence Divergence (MACD)'''
        df['EMA_12'] = df['Close'].ewm(span=12).mean()
        df['EMA_26'] = df['Close'].ewm(span=26).mean()
        df['MACD'] = df['EMA_12'] - df['EMA_26']
        df['MACD_Signal'] = df['MACD'].ewm(span=9).mean()
        return df

    def add_rsi(df):
        '''Relative Strength Index (RSI)'''
        delta = df['Close'].diff()
        gain = delta.clip(lower=0)
        loss = -delta.clip(upper=0)
        avg_gain = gain.rolling(14).mean()
        avg_loss = loss.rolling(14).mean()
        rs = avg_gain / avg_loss
        df['RSI'] = 100 - (100 / (1 + rs))
        return df

    def add_bollinger_bands(df):
        '''Bollinger Bands'''
        df['BB_Mid'] = df['Close'].rolling(20).mean()
        df['BB_Std'] = df['Close'].rolling(20).std()
        df['BB_Upper'] = df['BB_Mid'] + 2 * df['BB_Std']
        df['BB_Lower'] = df['BB_Mid'] - 2 * df['BB_Std']
        return df

    def add_momentum(df):
        for period in [10, 20, 50]:
            df[f'mom_{period}'] = df['Close'] - df['Close'].shift(period)
        return df

    def add_atr(df):
        high_low = df['High'] - df['Low']
        high_close = np.abs(df['High'] - df['Close'].shift())
        low_close = np.abs(df['Low'] - df['Close'].shift())
        tr = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
        df['ATR_14'] = tr.rolling(14).mean()
        df['ATR_30'] = tr.rolling(30).mean()
        return df

    def add_obv(df):
        '''On-Balance Volume (OBV)'''
        obv = [0]
        close_prices = df['Close'].values
        volumes = df['Volume'].values
        for i in range(1, len(df)):
            if close_prices[i] > close_prices[i-1]:
                obv.append(obv[-1] + volumes[i])
            elif close_prices[i] < close_prices[i-1]:
                obv.append(obv[-1] - volumes[i])
            else:
                obv.append(obv[-1])
        df['OBV'] = obv
        return df

    for func in [add_sma_ema, add_macd, add_rsi, add_bollinger_bands, add_obv, add_momentum, add_atr]:
        df = func(df)

    return df

def generate_signals(df):
    df['Signal_SMA'] = np.where(df['SMA_20'] > df['SMA_50'], 1, 0)
    df['Signal_MACD'] = np.where(df['MACD'] > df['MACD_Signal'], 1, 0)
    df['Signal_RSI'] = np.where(df['RSI'] < 30, 1, np.where(df['RSI'] > 70, -1, 0))
    df['Signal_SMA_Cross'] = np.where(df['SMA_50'] > df['SMA_200'], 1,
                                  np.where(df['SMA_50'] < df['SMA_200'], -1, 0))

    return df


In [None]:
# Get the list of tickers from the database
tickers_query = 'SELECT distinct ticker from sp500_constituents_hist_prices_raw'
df = pd.read_sql(tickers_query, db.get_engine())
tickers = df['ticker'].tolist()

# Calculate indicators for each ticker
strategy = 'replace'
for ticker in tqdm(tickers):

    query = f"""
        SELECT * 
        FROM sp500_constituents_hist_prices_raw 
        WHERE ticker = '{ticker}' 
        order by date asc
        """
    df = pd.read_sql(query, db.get_engine())

    df = add_indicators(df)
    df = generate_signals(df)
    df.columns = [col.lower().replace(' ', '_') for col in data.columns]

    df.to_sql(
        'sp500_constituents_hist_prices_proc', 
        db.get_engine(), 
        if_exists='replace', 
        index=False
        )
    strategy = 'append'