# Library Import

In [1]:
import pandas as pd 
import yfinance as yf
import polygon 
import numpy as np
from dotenv import load_dotenv
import os
import requests
import duckdb

In [2]:
import psycopg2
# Load environment variables
load_dotenv()

# Get the postgres url
postgres_url = os.environ.get('POSTGRES_URL')
user = os.environ.get('POSTGRES_USER')
password = os.environ.get('POSTGRES_PASSWORD')
# Connect to the postgres database
try:
    conn = psycopg2.connect(postgres_url)
    cursor = conn.cursor()
    print("Connected to the timescaledb database")
except Exception as e:
    print(f"Error connecting to the database: {e}")
    conn = None
    cursor = None
    

Connected to the timescaledb database


## Connect DuckDB to Timescale Postgres

In [3]:
## Connect Polar to Timescale Postgres
duck_query = duckdb.sql(f"""
    INSTALL postgres_scanner;
    LOAD postgres_scanner;

    SELECT * FROM postgres_scan(
        'host=localhost port=5432 user={user} password={password} dbname=condvest',
        'public', 'raw'
    ) ORDER BY symbol, date DESC;
""")

duck_df = duck_query.df()

In [None]:
query = f"""
WITH raw_data AS (
    SELECT * FROM postgres_scan(
        'host=localhost port=5432 user={user} password={password} dbname=condvest',
        'public', 'raw' 
    )
),
ranked AS (
    SELECT *,
        row_number() OVER (PARTITION BY symbol ORDER BY date) as rn
    FROM raw_data
),
grouped AS (
    SELECT *,
        (rn - 1) / 3 as group_id
    FROM ranked
)
SELECT 
    symbol,
    min(date) as date,
    first(open) as open,
    max(high) as high,
    min(low) as low,
    last(close) as close,
    sum(volume) as volume
FROM grouped
GROUP BY symbol, group_id
ORDER BY symbol, date;
"""

%time duckdb_result = duckdb.sql(query)

In [None]:
duckdb_result.show()

## Connect Polar to Timescale Postgres

In [38]:
import polars as pl

# Load from Timescale using ConnectorX
polars_df = pl.read_database_uri(
    "SELECT date, symbol, open, high, low, close, volume FROM raw ORDER BY symbol ASC, date ASC",
    uri=postgres_url
)

In [None]:
# Ensure datetime column is properly cast
polars_df = polars_df.with_columns([
    pl.col("date").cast(pl.Datetime("us"))
])

# Resample to 3-day OHLCV candles using Polars' groupby_dynamic
resampled_3d_df = (
    polars_df.group_by_dynamic(
        index_column="date",
        every="3d",
        by="symbol",
        closed="left",
        period="3d"
    )
    .agg([
        pl.col("open").first().alias("open"),
        pl.col("high").max().alias("high"),
        pl.col("low").min().alias("low"),
        pl.col("close").last().alias("close"),
        pl.col("volume").sum().alias("volume")
    ])
    .sort(["symbol", "date"])
)


In [None]:
resampled_3d_df

## DuckDB + Polar Add Indicator

In [3]:
import duckdb
import polars as pl
import time

# Step 1: Connect and load Postgres data into DuckDB
con = duckdb.connect()

combined_results = []
intervals = [1,3,5,8,13]
for interval in intervals:
    start_time = time.time()

    query = f"""
    WITH raw_data AS (
        SELECT * FROM postgres_scan(
            'host=localhost port=5432 user={user} password={password} dbname=condvest',
            'public', 'raw'
        )
    ),
    ranked AS (
        SELECT *,
            row_number() OVER (PARTITION BY symbol ORDER BY date) as rn
        FROM raw_data
    ),
    grouped AS (
        SELECT *,
            (rn - 1) / {interval} as group_id
        FROM ranked
    )
    SELECT 
        symbol,
        min(date) as date,
        first(open) as open,
        max(high) as high,
        min(low) as low,
        last(close) as close,
        sum(volume) as volume,
        '{interval}'::INT as interval
    FROM grouped
    GROUP BY symbol, group_id
    ORDER BY symbol, date;
    """
    
    df = duckdb.sql(query)
    print(f"DuckDB query execution time: {time.time() - start_time:.2f} seconds")
    combined_results.append(df.df())
    print(f"Duck to dataframe conversion time: {time.time() - start_time:.2f} seconds")

# Step 2: Convert to Polars DataFrame
start_time = time.time()
pl_resampled_df = pl.from_pandas(pd.concat(combined_results))
print(f"Pandas to Polars conversion time: {time.time() - start_time:.2f} seconds")

# Step 3: Convert to Polars and add indicators
def add_indicators(df: pl.DataFrame) -> pl.DataFrame:
    df = df.sort("date")
    
    # Step 1: Compute EMAs
    df = df.with_columns([
        pl.col("close").ewm_mean(span=8).alias("EMA_8"),
        pl.col("close").ewm_mean(span=13).alias("EMA_13"),
        pl.col("close").ewm_mean(span=21).alias("EMA_21"),
        pl.col("close").ewm_mean(span=144).alias("EMA_144"),
        pl.col("close").ewm_mean(span=169).alias("EMA_169"),
        pl.col("close").ewm_mean(span=55).alias("EMA_55"),
        pl.col("close").ewm_mean(span=89).alias("EMA_89"),
    ])
    
    # Step 2: Compute MACD and RSI using already-created columns
    df = df.with_columns([
        (pl.col("EMA_13") - pl.col("EMA_21")).alias("macd_fast"),
        (pl.col("EMA_55") - pl.col("EMA_89")).alias("macd_slow"),
    ])
    
    return df

start_time = time.time()
df_with_indicators = pl_resampled_df.group_by("symbol", maintain_order=True).map_groups(add_indicators)
print(f"Indicator calculation time: {time.time() - start_time:.2f} seconds")

print("\nFirst 10 rows of result:")
print(df_with_indicators.head(10))

DuckDB query execution time: 0.06 seconds
Duck to dataframe conversion time: 19.79 seconds
DuckDB query execution time: 0.03 seconds
Duck to dataframe conversion time: 12.24 seconds
DuckDB query execution time: 0.03 seconds
Duck to dataframe conversion time: 12.14 seconds
DuckDB query execution time: 0.03 seconds
Duck to dataframe conversion time: 13.52 seconds
DuckDB query execution time: 0.03 seconds
Duck to dataframe conversion time: 14.02 seconds
Pandas to Polars conversion time: 15.60 seconds
Indicator calculation time: 22.44 seconds

First 10 rows of result:
shape: (10, 17)
┌────────┬─────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐
│ symbol ┆ date        ┆ open      ┆ high      ┆ … ┆ EMA_55    ┆ EMA_89    ┆ macd_fast ┆ macd_slow │
│ ---    ┆ ---         ┆ ---       ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---       │
│ str    ┆ datetime[μs ┆ f64       ┆ f64       ┆   ┆ f64       ┆ f64       ┆ f64       ┆ f64       │
│        

In [9]:
import polars as pl
import numpy as np
from typing import List

class TrendAlertProcessor:
    """
    TrendAlertProcessor using Polars for efficient processing of financial time series data.
    Incorporates advanced trend detection algorithms from the dictionary-based implementation.
    """
    def __init__(self, df: pl.DataFrame, intervals: List[int]):
        self.df = df
        self.intervals = intervals
        self.rolling_window = 50
    
    def _add_velocity_alert(self, df: pl.DataFrame) -> pl.DataFrame:
        """
        Add velocity alerts based on the relationship between price and various EMAs.
        Similar to velocity_alert_dict in the original implementation.
        """
        # Add velocity status
        df = df.with_columns([
            pl.when(
                (pl.col("close") > pl.col("open")) & 
                (pl.col("close") > pl.max_horizontal("EMA_8", "EMA_13")) & 
                (pl.col("close") > pl.max_horizontal("EMA_144", "EMA_169")) &
                (pl.min_horizontal("EMA_8", "EMA_13") > pl.max_horizontal("EMA_144", "EMA_169"))
            ).then(pl.lit("velocity_maintained"))
            .when(
                (pl.col("close") < pl.col("EMA_13")) & 
                (pl.col("close") > pl.col("EMA_169"))
            ).then(pl.lit("velocity_weak"))
            .when(
                (pl.col("close") < pl.col("EMA_13")) & 
                (pl.col("close") < pl.col("EMA_169"))
            ).then(pl.lit("velocity_loss"))
            .otherwise(pl.lit("velocity_negotiating"))
            .alias("velocity_status")
        ])
        
        return df
    
    def _add_accel_decel_alert(self, df: pl.DataFrame, interval: int) -> pl.DataFrame:
        """
        Add acceleration/deceleration alerts based on EMA relationships and velocity status history.
        """
        window_dict = {
            1: 28, 3: 20, 5: 20, 8: 14, 13: 14
        }
        obs_window = window_dict.get(interval, 7)
        
        # First get velocity status
        df = self._add_velocity_alert(df)
        
        # Count velocity statuses in the observation window
        df = df.with_columns([
            pl.col("velocity_status").map_elements(
                lambda s: 1 if s in ["velocity_loss", "velocity_weak", "velocity_negotiating"] else 0,
                return_dtype=pl.Int32
            ).alias("loss_flag"),
            pl.col("velocity_status").map_elements(
                lambda s: 1 if s == "velocity_maintained" else 0,
                return_dtype=pl.Int32
            ).alias("maintain_flag")
        ])
        
        df = df.with_columns([
            pl.col("loss_flag").rolling_sum(window_size=obs_window).alias("count_velocity_loss"),
            pl.col("maintain_flag").rolling_sum(window_size=obs_window).alias("count_velocity_maintained")
        ])
        
        # Add acceleration/deceleration signals
        df = df.with_columns([
            pl.when(
                (pl.max_horizontal("EMA_144", "EMA_169") <= pl.max_horizontal("EMA_8", "EMA_13")) &
                (pl.col("open") < pl.col("close")) &
                (pl.col("count_velocity_loss") > pl.col("count_velocity_maintained"))
            ).then(pl.lit("accelerated"))
            .when(
                (pl.col("close") < pl.min_horizontal("EMA_8", "EMA_13")) &
                (pl.col("count_velocity_maintained") < pl.col("count_velocity_loss"))
            ).then(pl.lit("decelerated"))
            .otherwise(None).alias("momentum_signal")
        ])
        
        # Create alert
        momentum_alerts = df.filter(pl.col("momentum_signal").is_not_null())
        momentum_alerts = momentum_alerts.with_columns([
            pl.lit("momentum_alert").alias("alert_type"),
            pl.col("momentum_signal").alias("signal"),
            pl.lit(interval).alias("interval")
        ])
        
        return momentum_alerts.select("symbol", "date", "interval", "alert_type", "signal")
    
    def _add_ema_touch_alert(self, df: pl.DataFrame, interval: int) -> pl.DataFrame:
        """
        Add alerts for when price touches or comes close to important EMAs.
        """
        tolerance_dict = {
            1: 0.002, 3: 0.02, 5: 0.05, 8: 0.07, 13: 0.1
        }
        tolerance = tolerance_dict.get(interval, 0.02)
        
        # Calculate tolerance bands around EMAs
        df = df.with_columns([
            pl.min_horizontal(
                pl.col("EMA_144"), pl.col("EMA_169")
            ).fill_null(pl.col("EMA_13")).alias("long_term_min"),
            
            pl.max_horizontal(
                pl.col("EMA_144"), pl.col("EMA_169")
            ).fill_null(pl.col("EMA_13")).alias("long_term_max"),
            
            pl.min_horizontal(
                pl.col("EMA_8"), pl.col("EMA_13")
            ).alias("short_term_min"),
            
            pl.max_horizontal(
                pl.col("EMA_8"), pl.col("EMA_13")
            ).alias("short_term_max")
        ])
        
        # Calculate tolerance bands
        df = df.with_columns([
            (pl.col("long_term_min") * (1 - tolerance)).alias("lower_bound"),
            (pl.col("long_term_max") * (1 + tolerance)).alias("upper_bound")
        ])
        
        # Detect touches
        df = df.with_columns([
            pl.when(
                ((pl.col("low") <= pl.col("upper_bound")) & (pl.col("low") >= pl.col("lower_bound"))) |
                ((pl.col("EMA_13") <= pl.col("upper_bound")) & (pl.col("EMA_13") >= pl.col("lower_bound"))) |
                ((pl.col("EMA_8") <= pl.col("upper_bound")) & (pl.col("EMA_8") >= pl.col("lower_bound")))
            ).then(
                pl.when(
                    (pl.col("short_term_min") > pl.col("long_term_max")) &
                    (pl.min_horizontal(pl.col("close"), pl.col("open")) > pl.col("long_term_min"))
                ).then(pl.lit("support"))
                .when(
                    (pl.col("short_term_max") < pl.col("long_term_max")) &
                    (pl.col("close") < pl.col("long_term_max"))
                ).then(pl.lit("resistance"))
                .otherwise(pl.lit("neutral"))
            ).otherwise(None).alias("ema_touch_type")
        ])
        
        # Filter for touches and create alert
        ema_touch_alerts = df.filter(pl.col("ema_touch_type").is_not_null())
        ema_touch_alerts = ema_touch_alerts.with_columns([
            pl.lit("ema_touch").alias("alert_type"),
            pl.col("ema_touch_type").alias("signal"),
            pl.lit(interval).alias("interval")
        ])
        
        return ema_touch_alerts.select("symbol", "date", "interval", "alert_type", "signal")
    
    def apply(self) -> pl.DataFrame:
        """
        Apply all alert detection algorithms and return a combined DataFrame of alerts.
        """
        all_alerts = []
        
        for interval in self.intervals:
            df_interval = self.df.filter(pl.col("interval") == interval)
            
            # No empty DataFrames
            if df_interval.height == 0:
                continue
                
            # Add velocity alerts
            velocity_df = self._add_velocity_alert(df_interval)
            velocity_alerts = velocity_df.with_columns([
                pl.lit("velocity_alert").alias("alert_type"),
                pl.col("velocity_status").alias("signal"),
                pl.lit(interval).alias("interval")
            ]).select("symbol", "date", "interval", "alert_type", "signal")
            
            # Add momentum alerts
            momentum_alerts = self._add_accel_decel_alert(df_interval, interval)
            
            # Add EMA touch alerts
            ema_touch_alerts = self._add_ema_touch_alert(df_interval, interval)
            
            # Combine all alerts for this interval
            all_alerts.extend([
                velocity_alerts,
                momentum_alerts,
                ema_touch_alerts
            ])
        
        # Combine all alerts into a single DataFrame
        if all_alerts:
            return pl.concat(all_alerts)
        else:
            # Return empty DataFrame with correct schema if no alerts
            return pl.DataFrame({
                "symbol": [],
                "date": [],
                "interval": [],
                "alert_type": [],
                "signal": []
            })

## Add Alerts

In [10]:
trend_alert = TrendAlertProcessor(df_with_indicators, intervals=[1, 3, 5, 8, 13])
alert_df = trend_alert.apply()
print(alert_df.head())


shape: (5, 5)
┌────────┬────────────────────────────────┬──────────┬────────────────┬──────────────────────┐
│ symbol ┆ date                           ┆ interval ┆ alert_type     ┆ signal               │
│ ---    ┆ ---                            ┆ ---      ┆ ---            ┆ ---                  │
│ str    ┆ datetime[μs, America/Edmonton] ┆ i32      ┆ str            ┆ str                  │
╞════════╪════════════════════════════════╪══════════╪════════════════╪══════════════════════╡
│ A      ┆ 1999-11-18 00:00:00 MST        ┆ 1        ┆ velocity_alert ┆ velocity_negotiating │
│ A      ┆ 1999-11-19 00:00:00 MST        ┆ 1        ┆ velocity_alert ┆ velocity_loss        │
│ A      ┆ 1999-11-22 00:00:00 MST        ┆ 1        ┆ velocity_alert ┆ velocity_negotiating │
│ A      ┆ 1999-11-23 00:00:00 MST        ┆ 1        ┆ velocity_alert ┆ velocity_loss        │
│ A      ┆ 1999-11-24 00:00:00 MST        ┆ 1        ┆ velocity_alert ┆ velocity_loss        │
└────────┴──────────────────────────

## Add Signals