## Library Import

In [4]:
import duckdb
import polars as p

import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))))
from config import load_setting
settings = load_setting()

## Load Silver Data

In [5]:
# Test connection to Silver Data db
try:
    db_path = settings['process']['silver_db_path']
    print(f"Attempting to connect to: {db_path}")
    
    # Try connection
    con = duckdb.connect(f"../{db_path}")
    
    # Test Connection with a simple query
    result = con.execute("SELECT 1").fetchone()
    print("Connection Succesfful!")
    print(f"Test query result: {result}")
    
    
    # List all tables in the database
    tables = con.execute("SHOW TABLES").fetchall()
    print("\nAvailable tables:", tables)
    
except Exception as e:
    print(f"Connection Failed {str(e)}")

Attempting to connect to: process/storage/silver/resampled.duckdb
Connection Succesfful!
Test query result: (1,)

Available tables: [('gold_data',), ('raw_data',), ('silver_1',), ('silver_13',), ('silver_21',), ('silver_3',), ('silver_34',), ('silver_5',), ('silver_55',), ('silver_8',)]


In [6]:
# List all tables
tables = con.execute("SHOW TABLES").fetchall()
print("Available tables:", tables)

Available tables: [('gold_data',), ('raw_data',), ('silver_1',), ('silver_13',), ('silver_21',), ('silver_3',), ('silver_34',), ('silver_5',), ('silver_55',), ('silver_8',)]


In [7]:
# Get schema information for a specific table
# Replace 'your_table_name' with an actual table name
table_info = con.execute("DESCRIBE silver_1").fetchall()
print("Table schema:", table_info)

Table schema: [('symbol', 'VARCHAR', 'YES', None, None, None), ('date', 'TIMESTAMP WITH TIME ZONE', 'YES', None, None, None), ('open', 'DOUBLE', 'YES', None, None, None), ('high', 'DOUBLE', 'YES', None, None, None), ('low', 'DOUBLE', 'YES', None, None, None), ('close', 'DOUBLE', 'YES', None, None, None), ('volume', 'DOUBLE', 'YES', None, None, None)]


In [8]:
# Get sample data from the table
sample_data = con.execute("SELECT * FROM silver_1 LIMIT 5").fetchall()
print("Sample data:", sample_data)

Sample data: [('TANH', datetime.datetime(2024, 8, 20, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 6.4000000954, 6.4000000954, 5.2399997711, 5.3200001717, 1012550.0), ('TANH', datetime.datetime(2024, 10, 11, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 7.0, 7.2800002098, 6.8400001526, 7.1599998474, 59665.0), ('TANH', datetime.datetime(2024, 10, 28, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 6.9600000381, 7.1999998093, 6.5599999428, 6.7199997902, 74850.0), ('TBBB', datetime.datetime(2024, 2, 15, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MST-1 day, 17:00:00 STD>), 21.0100002289, 21.8799991608, 20.2800006866, 20.4300003052, 1155800.0), ('TBBB', datetime.datetime(2024, 3, 19, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 21.6900005341, 21.9449996948, 21.5100002289, 21.6800003052, 340200.0)]


In [9]:
# Get the date range of the silver_1 table
con.execute("SELECT MIN(date), MAX(date) FROM silver_1").fetchall() 

[(datetime.datetime(1962, 1, 2, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MST-1 day, 17:00:00 STD>),
  datetime.datetime(2025, 6, 27, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>))]

In [10]:
# Get the first 5 rows of Resampled Data
first_5_rows = con.execute("SELECT * FROM silver_1 ORDER BY date DESC LIMIT 5").fetchall()
print("First 5 rows:", first_5_rows)

First 5 rows: [('BTDR', datetime.datetime(2025, 6, 27, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 11.32, 11.35, 10.82, 11.09, 15676663.0), ('KTB', datetime.datetime(2025, 6, 27, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 66.17, 66.27, 64.9, 65.79, 985907.0), ('AGCO', datetime.datetime(2025, 6, 27, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 102.74, 103.42, 101.69, 102.41, 1049201.0), ('INFA', datetime.datetime(2025, 6, 27, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 24.3, 24.37, 24.24, 24.25, 3398648.0), ('CHPT', datetime.datetime(2025, 6, 27, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 0.7667, 0.8099, 0.7366, 0.7366, 67455274.0)]


In [11]:
# Get total row count
row_count = con.execute("SELECT COUNT(*) FROM silver_1").fetchone()
print("Total rows:", row_count)

Total rows: (22579490,)


In [12]:
# Get the first 5 rows of the table
first_5_rows = con.execute("SELECT * FROM silver_1 LIMIT 5").fetchall()
print("First 5 rows:", first_5_rows)
# Get the last 5 rows of the table
last_5_rows = con.execute("SELECT * FROM silver_1 ORDER BY date DESC LIMIT 5").fetchall()
print("Last 5 rows:", last_5_rows)


First 5 rows: [('TANH', datetime.datetime(2024, 8, 20, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 6.4000000954, 6.4000000954, 5.2399997711, 5.3200001717, 1012550.0), ('TANH', datetime.datetime(2024, 10, 11, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 7.0, 7.2800002098, 6.8400001526, 7.1599998474, 59665.0), ('TANH', datetime.datetime(2024, 10, 28, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 6.9600000381, 7.1999998093, 6.5599999428, 6.7199997902, 74850.0), ('TBBB', datetime.datetime(2024, 2, 15, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MST-1 day, 17:00:00 STD>), 21.0100002289, 21.8799991608, 20.2800006866, 20.4300003052, 1155800.0), ('TBBB', datetime.datetime(2024, 3, 19, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MDT-1 day, 18:00:00 DST>), 21.6900005341, 21.9449996948, 21.5100002289, 21.6800003052, 340200.0)]
Last 5 rows: [('UBXG', datetime.datetime(2025, 6, 27, 0, 0, tzinfo=<DstTzInfo 'America/Edmonton' MD

In [13]:
# Get column statistics
stats = con.execute("""
    SELECT 
        symbol,
        COUNT(*) as count,
        AVG(open) as avg,
        MIN(open) as min,
        MAX(open) as max
    FROM silver_1
    GROUP BY symbol
""").fetchall()
print("Column statistics:", stats)

Column statistics: [('LCUT', 8578, 8.811501803904534, 0.7924176098, 23.190848642), ('LANV', 1078, 5.8948294060202215, 1.0299999714, 10.1999998093), ('WTS', 9788, 44.73684643093635, 4.4101483865, 248.16), ('BSY', 1196, 45.92714419208645, 26.7628081127, 70.174308238), ('GREEL', 929, 6.337589075596125, 0.2524187271, 10.0035001517), ('GEO', 7783, 7.2767282611272925, 0.404037714, 35.8199996948), ('DSGX', 6646, 25.439989482790466, 0.8000000119, 122.5199966431), ('DVA', 7463, 46.75336046860808, 0.8333330154, 178.4700012207), ('BILI', 1823, 31.444278638186947, 8.7449998856, 156.4499969482), ('AVBP', 356, 22.586098283061236, 15.3199996948, 35.8699989319), ('CYBR', 2706, 124.41422760635719, 25.0, 421.0), ('ECBK', 732, 13.762644809181287, 9.9619998932, 16.75), ('FSM', 4648, 3.9851678174096605, 0.3300000131, 9.4899997711), ('ALLY', 2872, 25.513343007674337, 9.8787193651, 49.7135112067), ('POWW', 2109, 3.1820483637931254, 0.9700000286, 9.8999996185), ('HOTH', 1600, 38.39341574809482, 0.5910000205, 

In [14]:
import polars as pl

# Turn stats into a pl dataframe with mixed types allowed
stats_df = pl.DataFrame(stats, 
                        schema={
                            "symbol": pl.Utf8,
                            "count": pl.Int64,
                            "avg": pl.Float64,
                            "min": pl.Float64,
                            "max": pl.Float64
                        })
print(stats_df)

shape: (5_186, 5)
┌────────┬───────┬──────────────┬───────────┬─────────────┐
│ symbol ┆ count ┆ avg          ┆ min       ┆ max         │
│ ---    ┆ ---   ┆ ---          ┆ ---       ┆ ---         │
│ str    ┆ i64   ┆ f64          ┆ f64       ┆ f64         │
╞════════╪═══════╪══════════════╪═══════════╪═════════════╡
│ LCUT   ┆ 8578  ┆ 8.811502     ┆ 0.792418  ┆ 23.190849   │
│ LANV   ┆ 1078  ┆ 5.894829     ┆ 1.03      ┆ 10.2        │
│ WTS    ┆ 9788  ┆ 44.736846    ┆ 4.410148  ┆ 248.16      │
│ BSY    ┆ 1196  ┆ 45.927144    ┆ 26.762808 ┆ 70.174308   │
│ GREEL  ┆ 929   ┆ 6.337589     ┆ 0.252419  ┆ 10.0035     │
│ …      ┆ …     ┆ …            ┆ …         ┆ …           │
│ ACIC   ┆ 4437  ┆ 8.075549     ┆ 0.288856  ┆ 22.851451   │
│ EPRT   ┆ 1764  ┆ 20.698955    ┆ 6.078198  ┆ 33.982451   │
│ APVO   ┆ 2248  ┆ 28976.274635 ┆ 0.285     ┆ 188945.6875 │
│ SNDK   ┆ 93    ┆ 41.987312    ┆ 29.219999 ┆ 56.509998   │
│ TOPW   ┆ 37    ┆ 4.116649     ┆ 2.15      ┆ 10.73       │
└────────┴───────┴────

  return dispatch(args[0].__class__)(*args, **kw)


In [15]:
con.close()
