## Library Import

In [1]:
import duckdb
import polars as pl
import dotenv
import os

## Load Silver Data

In [5]:
# Test connection to Silver Data db
try:
    parquet_path = '/Users/yiukitcheung/Documents/Projects/condvest/analytics/data/gold_data.parquet'
    print(f"Attempting to connect to: {parquet_path}")
    
    # Test Connection with a simple query
    pl_df = pl.read_parquet(parquet_path)
    pl_df = pl_df.sort(["date"], descending=True)
    print("Connection Succesfful!")
    print(f"Test query result: {pl_df}")
    
except Exception as e:
    print(f"Connection Failed {str(e)}")

Attempting to connect to: /Users/yiukitcheung/Documents/Projects/condvest/analytics/data/gold_data.parquet
Connection Succesfful!
Test query result: shape: (1_257, 5)
┌────────────────────┬────────────────────┬────────────────────┬────────────────────┬──────────────┐
│ accelerating       ┆ long_accelerating  ┆ long_accumulating  ┆ velocity_maintaine ┆ date         │
│ ---                ┆ ---                ┆ ---                ┆ d                  ┆ ---          │
│ list[str]          ┆ list[str]          ┆ list[str]          ┆ ---                ┆ list[date]   │
│                    ┆                    ┆                    ┆ list[str]          ┆              │
╞════════════════════╪════════════════════╪════════════════════╪════════════════════╪══════════════╡
│ ["JPM", "IMAX", …  ┆ ["FTNT", "MFIN", … ┆ ["MFIN", "NEPH", … ┆ ["BANF", "IBN", …  ┆ [2025-05-02] │
│ "MGIC"]            ┆ "QD"]              ┆ "QD"]              ┆ "CQP"]             ┆              │
│ ["SMA", "FLG", …   ┆ ["

In [None]:
# List all tables
tables = con.execute("SHOW TABLES").fetchall()
print("Available tables:", tables)

Available tables: [('raw_data',), ('resampled',), ('silver_1',), ('silver_13',), ('silver_2',), ('silver_21',), ('silver_3',), ('silver_34',), ('silver_5',), ('silver_8',)]


In [None]:
# Get schema information for a specific table
# Replace 'your_table_name' with an actual table name
table_info = con.execute("DESCRIBE silver_1").fetchall()
print("Table schema:", table_info)

Table schema: [('symbol', 'VARCHAR', 'YES', None, None, None), ('date', 'TIMESTAMP WITH TIME ZONE', 'YES', None, None, None), ('open', 'DOUBLE', 'YES', None, None, None), ('high', 'DOUBLE', 'YES', None, None, None), ('low', 'DOUBLE', 'YES', None, None, None), ('close', 'DOUBLE', 'YES', None, None, None), ('volume', 'DOUBLE', 'YES', None, None, None)]


In [None]:
# Get sample data from the table
sample_data = con.execute("SELECT * FROM silver_1 LIMIT 5").fetchall()
print("Sample data:", sample_data)

Sample data: [('AAME', datetime.datetime(1980, 4, 14, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MST-1 day, 17:00:00 STD>), 0.0, 3.0887098831, 2.9122121334, 2.9122121334, 13250.0), ('AAME', datetime.datetime(1980, 6, 25, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 0.0, 3.265207578, 3.0887098312, 3.0887098312, 2250.0), ('AAME', datetime.datetime(1980, 8, 22, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 0.0, 3.9270740055, 3.8829498291, 3.8829498291, 28750.0), ('AAME', datetime.datetime(1980, 10, 6, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 0.0, 4.4565677609, 4.412443161, 4.412443161, 37250.0), ('AAME', datetime.datetime(1980, 11, 5, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MST-1 day, 17:00:00 STD>), 0.0, 4.5006919401, 4.412443161, 4.412443161, 11250.0)]


In [None]:
# Get total row count
row_count = con.execute("SELECT COUNT(*) FROM silver_1").fetchone()
print("Total rows:", row_count)

Total rows: (22384625,)


In [None]:
# Get the first 5 rows of the table
first_5_rows = con.execute("SELECT * FROM silver_1 LIMIT 5").fetchall()
print("First 5 rows:", first_5_rows)
# Get the last 5 rows of the table
last_5_rows = con.execute("SELECT * FROM silver_1 ORDER BY date DESC LIMIT 5").fetchall()
print("Last 5 rows:", last_5_rows)


First 5 rows: [('AAME', datetime.datetime(1980, 4, 14, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MST-1 day, 17:00:00 STD>), 0.0, 3.0887098831, 2.9122121334, 2.9122121334, 13250.0), ('AAME', datetime.datetime(1980, 6, 25, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 0.0, 3.265207578, 3.0887098312, 3.0887098312, 2250.0), ('AAME', datetime.datetime(1980, 8, 22, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 0.0, 3.9270740055, 3.8829498291, 3.8829498291, 28750.0), ('AAME', datetime.datetime(1980, 10, 6, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 0.0, 4.4565677609, 4.412443161, 4.412443161, 37250.0), ('AAME', datetime.datetime(1980, 11, 5, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MST-1 day, 17:00:00 STD>), 0.0, 4.5006919401, 4.412443161, 4.412443161, 11250.0)]
Last 5 rows: [('CALX', datetime.datetime(2025, 5, 2, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 41.73, 42.1133, 41.43, 42.0, 8087

In [None]:
# Get column statistics
stats = con.execute("""
    SELECT 
        symbol,
        COUNT(*) as count,
        AVG(open) as avg,
        MIN(open) as min,
        MAX(open) as max
    FROM silver_1
    GROUP BY symbol
""").fetchall()
print("Column statistics:", stats)

Column statistics: [('FEDU', 1881, 30.41787509111223, 5.1036841834, 164.8540235017), ('GS', 6541, 160.79319930554402, 39.6061683956, 665.7786754361), ('PRO', 4472, 25.365575816778186, 3.2100000381, 74.4899978638), ('SVC', 7477, 10.939898756604201, 1.7699999809, 20.4072547013), ('TRIP', 3370, 40.645777915432845, 10.6999998093, 97.8292056391), ('MSI', 15941, 47.00182682609802, 0.0, 503.4975902375), ('KBR', 4643, 27.414540257007303, 8.2977607972, 71.5790658811), ('DFLI', 927, 42.944322876848005, 0.49, 228.6900024414), ('GDRX', 1158, 16.960712407973922, 3.9400000572, 59.3699989319), ('EVER', 1721, 20.53362057798199, 4.1599998474, 63.1399993896), ('SCYX', 2768, 22.40692961770621, 0.7799999714, 144.1999969482), ('KWE', 1084, 5471.9361707259495, 0.2169999927, 29400.0), ('OSUR', 9689, 7.698674690967777, 0.0, 22.8999996185), ('GAME', 1827, 157.31756484637106, 0.5040000081, 2893.5), ('ASND', 2582, 88.69523781390056, 12.5, 179.0500030518), ('IRS', 7643, 10.34675480360242, 1.9219697171, 27.5213818

In [None]:
# Turn stats into a pl dataframe with mixed types allowed
stats_df = pl.DataFrame(stats, 
                        schema={
                            "symbol": pl.Utf8,
                            "count": pl.Int64,
                            "avg": pl.Float64,
                            "min": pl.Float64,
                            "max": pl.Float64
                        })
print(stats_df)

shape: (5_186, 5)
┌────────┬───────┬────────────┬───────────┬────────────┐
│ symbol ┆ count ┆ avg        ┆ min       ┆ max        │
│ ---    ┆ ---   ┆ ---        ┆ ---       ┆ ---        │
│ str    ┆ i64   ┆ f64        ┆ f64       ┆ f64        │
╞════════╪═══════╪════════════╪═══════════╪════════════╡
│ FEDU   ┆ 1881  ┆ 30.417875  ┆ 5.103684  ┆ 164.854024 │
│ GS     ┆ 6541  ┆ 160.793199 ┆ 39.606168 ┆ 665.778675 │
│ PRO    ┆ 4472  ┆ 25.365576  ┆ 3.21      ┆ 74.489998  │
│ SVC    ┆ 7477  ┆ 10.939899  ┆ 1.77      ┆ 20.407255  │
│ TRIP   ┆ 3370  ┆ 40.645778  ┆ 10.7      ┆ 97.829206  │
│ …      ┆ …     ┆ …          ┆ …         ┆ …          │
│ RCUS   ┆ 1794  ┆ 19.853535  ┆ 6.54      ┆ 48.060001  │
│ MWYN   ┆ 36    ┆ 4.224694   ┆ 3.11      ┆ 4.85       │
│ SAGT   ┆ 40    ┆ 3.015775   ┆ 2.2       ┆ 4.76       │
│ SNWV   ┆ 44    ┆ 30.719182  ┆ 25.67     ┆ 38.709999  │
│ KFII   ┆ 36    ┆ 9.964356   ┆ 9.91      ┆ 10.04      │
└────────┴───────┴────────────┴───────────┴────────────┘


  return dispatch(args[0].__class__)(*args, **kw)


In [None]:
con.close()
