In [None]:
SELECT current_database() AS DATABASE_NAME, current_schema() AS SCHEMA_NAME


In [None]:
# Import python packages
import logging
from snowflake.snowpark.context import get_active_session

logger = logging.getLogger("fred_logger")

current_context_df = cells.sql_get_context.to_pandas()
database_name = current_context_df.iloc[0,0]
schema_name = current_context_df.iloc[0,1]

session = get_active_session()

logger.info("02_raw_to_harmonized start")

In [None]:
import snowflake.snowpark.functions as F

env = schema_name[:3]
def create_fred_view(session):
    raw_fred_10Y = session.table(f"{database_name}.{env}_RAW_FRED.FRED_DGS10Y").select(
                                                                                        F.to_date(F.col('"observation_date"')).alias("OBSERVATION_DATE"),
                                                                                        F.col('"DGS10"').cast("float").alias("10Y_YIELD")
                                                                                    )
    
    raw_fred_2Y = session.table(f"{database_name}.{env}_RAW_FRED.FRED_DGS2Y").select(
                                                                                        F.to_date(F.col('"observation_date"')).alias("OBSERVATION_DATE"),
                                                                                        F.col('"DGS2"').cast("float").alias("2Y_YIELD")
                                                                                    )

    harmonized_data = raw_fred_10Y.join(raw_fred_2Y, raw_fred_10Y['OBSERVATION_DATE'] == raw_fred_2Y['OBSERVATION_DATE']).select(
                                                                                                                                    raw_fred_10Y["OBSERVATION_DATE"].alias("OBSERVATION_DATE"),  # Keep only one OBSERVATION_DATE
                                                                                                                                    F.col("10Y_YIELD"),
                                                                                                                                    F.col("2Y_YIELD")
                                                                                                                                )

    # harmonized_data = harmonized_data.filter(
    #     (F.col("YIELD_SPREAD").isNotNull()))
    
    session.use_schema(f"{database_name}.{schema_name}")
    harmonized_data.create_or_replace_view('FRED_FLATTENED_V')

def create_fred_view_stream(session):
    _ = session.sql('CREATE OR REPLACE STREAM FRED_FLATTENED_V_STREAM \
                        ON VIEW FRED_FLATTENED_V \
                        SHOW_INITIAL_ROWS = TRUE').collect()
    


In [None]:

create_fred_view(session)
create_fred_view_stream(session)

In [None]:
CREATE OR REPLACE PROCEDURE merge_fred_updates_sp(DATABASE_NAME STRING, SCHEMA_NAME STRING)
 RETURNS STRING
 LANGUAGE PYTHON
 RUNTIME_VERSION=3.9
 PACKAGES=('snowflake-snowpark-python')
 HANDLER='main'
AS
$$
from snowflake.snowpark import Session
import snowflake.snowpark.functions as F


def table_exists(session, schema='', name=''):
    exists = session.sql("SELECT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{}' AND TABLE_NAME = '{}') AS TABLE_EXISTS".format(schema, name)).collect()[0]['TABLE_EXISTS']
    return exists

def create_fred_table(session):
    _ = session.sql("CREATE TABLE FRED_10Y_2Y LIKE FRED_FLATTENED_V").collect()
    _ = session.sql("ALTER TABLE FRED_10Y_2Y ADD COLUMN META_UPDATED_AT TIMESTAMP").collect()

def merge_fred_updates(session):
    # _ = session.sql('ALTER WAREHOUSE HOL_WH SET WAREHOUSE_SIZE = XLARGE WAIT_FOR_COMPLETION = TRUE').collect()

    source = session.table('FRED_FLATTENED_V_STREAM')
    target = session.table('FRED_10Y_2Y')

    # TODO: Is the if clause supposed to be based on "META_UPDATED_AT"?
    cols_to_update = {c: source[c] for c in source.schema.names if "METADATA" not in c}
    metadata_col_to_update = {"META_UPDATED_AT": F.current_timestamp()}
    updates = {**cols_to_update, **metadata_col_to_update}

    # merge into DIM_CUSTOMER
    target.merge(source, target['OBSERVATION_DATE'] == source['OBSERVATION_DATE'], \
                        [F.when_matched().update(updates), F.when_not_matched().insert(updates)])
    
def main(session: Session, DATABASE_NAME: str, SCHEMA_NAME: str) -> str:
    session.use_schema(f"{database_name}.{schema_name}")
    
    if not table_exists(session, schema=schema_name, name='FRED_10Y_2Y'):
            create_fred_table(session)
            # create_fred_stream(session)
    # Process data incrementally
    merge_fred_updates(session)
    return "FRED_10Y_2Y table updated successfully!"

$$;