In [1]:
import os
import yaml
import nbimporter
import warnings
import logging
import pandas as pd
from datetime import datetime, date
import pyspark
from pyspark.sql import SparkSession

from lab_database_manager import PgDBManager
from lab_schema_manager import SchemaManager
from lab_raw_yahoo import get_raw_yahoo, get_raw_yahoo_by_looping_groups
from lab_iceberg_manager import IcebergManager
from lab_iceberg_pg_operator import IcebergPgOperator


# Get Finalytics Connetion and Create a PgDBManager

In [2]:
# Get finalytics connetion info
conn_config_file='cfg_connections.yaml'
pg_db="finalytics"
pg_db_mgr=PgDBManager(conn_config_file, pg_db)

pg_jdbc_url=pg_db_mgr.jdbc_url
pg_jdbc_properties =pg_db_mgr.jdbc_properties

### Get group_date_symbol_list from finalytics

In [3]:
# Get symbol_start_date_pairs from finalytics
query="SELECT group_id, group_start_date, symbol from fin.vw_etl_stock_eod_start_date_grouped where group_start_date <'2025-1-8'"
query_result=pg_db_mgr.get_sql_script_result_list(query)

# Get Yahoo Hist Data

In [4]:
import_time = datetime.now()
yahoo_api="yahooquery"
hist_data=get_raw_yahoo_by_looping_groups(yahoo_api, query_result)
hist_data["import_time"] = pd.to_datetime(import_time).tz_localize(None)

# Get Iceberg Connection and Create an IcebergManager

In [5]:
# Create Spark Session
conn_config_file="cfg_connections.yaml"
schema_config_file='cfg_schemas.yaml'
spark_app_name="raw_yfinance"
iceberg_raw_stock_eod_table='nessie.raw.stock_eod_yahooquery'
my_iceberg_manager=IcebergManager(conn_config_file, schema_config_file, spark_app_name) 

:: loading settings :: url = jar:file:/opt/spark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /root/.ivy2/cache
The jars for the packages stored in: /root/.ivy2/jars
org.postgresql#postgresql added as a dependency
org.apache.iceberg#iceberg-spark-runtime-3.5_2.12 added as a dependency
org.projectnessie.nessie-integrations#nessie-spark-extensions-3.5_2.12 added as a dependency
software.amazon.awssdk#bundle added as a dependency
software.amazon.awssdk#url-connection-client added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-453252d7-a617-4a6a-bff7-fd5316d3b87c;1.0
	confs: [default]
	found org.postgresql#postgresql;42.7.3 in central
	found org.checkerframework#checker-qual;3.42.0 in central
	found org.apache.iceberg#iceberg-spark-runtime-3.5_2.12;1.5.0 in central
	found org.projectnessie.nessie-integrations#nessie-spark-extensions-3.5_2.12;0.77.1 in central
	found software.amazon.awssdk#bundle;2.24.8 in central
	found software.amazon.awssdk#url-connection-client;2.24.8 in central
	found software.amazon.awssdk#utils;2.24

### Load Iceberg Table

In [6]:
my_spark_session = my_iceberg_manager.get_spark_session()
hist_df = my_spark_session.createDataFrame(hist_data)

my_iceberg_manager.truncate_iceberg_table(iceberg_raw_stock_eod_table)
my_iceberg_manager.insert_into_iceberg_table(hist_df, iceberg_raw_stock_eod_table)

# spark.sql("CREATE NAMESPACE IF NOT EXISTS nessie.raw;")

SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.


Iceberg table nessie.raw.stock_eod_yahooquery truncated successfully.


                                                                                

nessie.raw.stock_eod_yahooquery was loaded with 30 records, totally 30 records.


# Load data from Iceberg to Pg with an IcebergPgOperator

In [7]:
pg_table='stage.stock_eod_quote_yahoo'
pg_truncate_script=f"TRUNCATE TABLE {pg_table}"
pg_db_mgr.execute_sql_script(pg_truncate_script)

my_iceberg_pg_operator=IcebergPgOperator(my_spark_session, pg_jdbc_url, pg_jdbc_properties)
jdbc_mode="append"
my_iceberg_pg_operator.insert_iceberg_data_into_pg(iceberg_raw_stock_eod_table, pg_table, jdbc_mode)


                                                                                

# Merge Pg stage into fin

In [8]:
pg_merge_script = "call fin.usp_load_stock_eod();"
pg_db_mgr.execute_sql_script(pg_merge_script)