In [3]:
import dai
import pandas as pd
import numpy as np
import time
from datetime import timedelta

In [None]:
ed = dai.query("SELECT DISTINCT date FROM cn_stock_bar1d WHERE date >= '2024-01-01'").df()['date'].max().strftime("%Y-%m-%d")
sd = (pd.Timestamp(ed) - timedelta(days=45)).strftime("%Y-%m-%d")

In [4]:
def calc_data(instrument, sd, ed):

    sql = f"""
    WITH 
    data1 AS (
        SELECT 
            instrument,
            date,
            make_date(year(date), month(date),  day(date))    AS day,
            make_time(hour(date), minute(date), second(date)) AS _time,
            LAG(volume, 1) OVER (PARTITION BY day, instrument ORDER BY date) AS _volume_1,
            LAG(volume, 2) OVER (PARTITION BY day, instrument ORDER BY date) AS _volume_2,
            LAG(volume, 3) OVER (PARTITION BY day, instrument ORDER BY date) AS _volume_3,
            LAG(volume, 4) OVER (PARTITION BY day, instrument ORDER BY date) AS _volume_4,
            LAG(volume, 5) OVER (PARTITION BY day, instrument ORDER BY date) AS _volume_5,
            CORR(volume, _volume_1) OVER (PARTITION BY day, instrument) AS ACMA1,
            CORR(volume, _volume_2) OVER (PARTITION BY day, instrument) AS ACMA2,
            CORR(volume, _volume_3) OVER (PARTITION BY day, instrument) AS ACMA3,
            CORR(volume, _volume_4) OVER (PARTITION BY day, instrument) AS ACMA4,
            CORR(volume, _volume_5) OVER (PARTITION BY day, instrument) AS ACMA5,
            IF(_time = '10:09:00',  1, 0) AS _is39,
            IF(_time = '10:10:00',  1, 0) AS _is40,
            IF(_time = '10:11:00',  1, 0) AS _is41,
            IF(_time >= '14:43:00', 1, 0) AS _isL18,
            IF(_time >= '14:42:00', 1, 0) AS _isL19,
            IF(_time >= '14:41:00', 1, 0) AS _isL20,
            IF(_time >= '14:40:00', 1, 0) AS _isL21,
            IF(_time >= '14:39:00', 1, 0) AS _isL22,
            SUM(volume * _is39) OVER(PARTITION BY day, instrument) AS V39,
            SUM(volume * _is40) OVER(PARTITION BY day, instrument) AS V40,
            SUM(volume * _is41) OVER(PARTITION BY day, instrument) AS V41,
            IF(SUM(volume) OVER(PARTITION BY day, instrument) = 0, 0, SUM(volume * _isL18) OVER(PARTITION BY day, instrument) / SUM(volume) OVER(PARTITION BY day, instrument)) AS APL18,
            IF(SUM(volume) OVER(PARTITION BY day, instrument) = 0, 0, SUM(volume * _isL19) OVER(PARTITION BY day, instrument) / SUM(volume) OVER(PARTITION BY day, instrument)) AS APL19,
            IF(SUM(volume) OVER(PARTITION BY day, instrument) = 0, 0, SUM(volume * _isL20) OVER(PARTITION BY day, instrument) / SUM(volume) OVER(PARTITION BY day, instrument)) AS APL20,
            IF(SUM(volume) OVER(PARTITION BY day, instrument) = 0, 0, SUM(volume * _isL21) OVER(PARTITION BY day, instrument) / SUM(volume) OVER(PARTITION BY day, instrument)) AS APL21,
            IF(SUM(volume) OVER(PARTITION BY day, instrument) = 0, 0, SUM(volume * _isL22) OVER(PARTITION BY day, instrument) / SUM(volume) OVER(PARTITION BY day, instrument)) AS APL22,
        FROM cn_stock_bar1m
        ORDER BY date, instrument
    ),

    data2 AS (
        SELECT DISTINCT 
            day AS date,
            instrument,
            V39,
            V40,
            V41,
            APL18,
            APL19,
            APL20,
            APL21,
            APL22,
            ACMA1,
            ACMA2,
            ACMA3,
            ACMA4,
            ACMA5,
        FROM data1
    ),

    data3 AS (
        SELECT
            date,
            instrument,
            V39 AS _V39,
            V40 AS _V40,
            V41 AS _V41,
            APL18,
            APL19,
            APL20,
            APL21,
            APL22,
            ACMA1,
            ACMA2,
            ACMA3,
            ACMA4,
            ACMA5,
            m_ta_sma(APL18, 15)  AS APL18_SMA,
            m_ta_sma(APL19, 15)  AS APL19_SMA,
            m_ta_sma(APL20, 15)  AS APL20_SMA,
            m_ta_sma(APL21, 15)  AS APL21_SMA,
            m_ta_sma(APL22, 15)  AS APL22_SMA,
            m_ta_ema(APL18, 15)  AS APL18_EMA,
            m_ta_ema(APL19, 15)  AS APL19_EMA,
            m_ta_ema(APL20, 15)  AS APL20_EMA,
            m_ta_sma(APL21, 15)  AS APL21_EMA,
            m_ta_sma(APL22, 15)  AS APL22_EMA,
            m_variance(_V39, 15) AS VMA_39,
            m_variance(_V40, 15) AS VMA_40,
            m_variance(_V41, 15) AS VMA_41,
            m_skewness(_V39, 15) AS SMA_39,
            m_skewness(_V40, 15) AS SMA_40,
            m_skewness(_V41, 15) AS SMA_41,
            m_kurtosis(_V39, 15) AS KMA_39,
            m_kurtosis(_V40, 15) AS KMA_40,
            m_kurtosis(_V41, 15) AS KMA_41,
        FROM data2
    ),

    data4 AS (
        SELECT
            CAST(date AS TIMESTAMP) AS date,
            instrument,
            APL18     AS alpha_hf_00030001,
            APL19     AS alpha_hf_00030002,
            APL20     AS alpha_hf_00030003,
            APL21     AS alpha_hf_00030004,
            APL22     AS alpha_hf_00030005,
            APL18_SMA AS alpha_hf_00030006,
            APL19_SMA AS alpha_hf_00030007,
            APL20_SMA AS alpha_hf_00030008,
            APL21_SMA AS alpha_hf_00030009,
            APL22_SMA AS alpha_hf_00030010,
            APL18_EMA AS alpha_hf_00030011,
            APL19_EMA AS alpha_hf_00030012,
            APL20_EMA AS alpha_hf_00030013,
            APL21_EMA AS alpha_hf_00030014,
            APL22_EMA AS alpha_hf_00030015,
            VMA_39    AS alpha_hf_00030016,
            VMA_40    AS alpha_hf_00030017,
            VMA_41    AS alpha_hf_00030018,
            SMA_39    AS alpha_hf_00030019,
            SMA_40    AS alpha_hf_00030020,
            SMA_41    AS alpha_hf_00030021,
            KMA_39    AS alpha_hf_00030022,
            KMA_40    AS alpha_hf_00030023,
            KMA_41    AS alpha_hf_00030024,
            ACMA1     AS alpha_hf_00030025,
            ACMA2     AS alpha_hf_00030026,
            ACMA3     AS alpha_hf_00030027,
            ACMA4     AS alpha_hf_00030028,
            ACMA5     AS alpha_hf_00030029,
        FROM data3
        ORDER BY date, instrument
    )

    SELECT * 
    FROM data4

    """
    data = dai.query(sql, filters = {'instrument' : [instrument], "date" : [sd+' 00:00:00.000', ed+' 23:59:59.999']}).df()
    return data