In [1]:
import yaml
from pyspark.sql.types import StructType, StructField, StringType, FloatType, IntegerType

# Load YAML configuration
config_file_path = "table_config.yaml"
with open(config_file_path, 'r') as file:
    config = yaml.safe_load(file)

In [2]:
# Extract table schema and partition info
table_name = "iceberg.stock_eod_data"
table_config = config["tables"][table_name]

schema_config = table_config["schema"]
partition_by = table_config.get("partition_by", [])

In [3]:
# Map YAML types to PySpark types
type_mapping = {
    "StringType": StringType,
    "FloatType": FloatType,
    "IntegerType": IntegerType,
}

# Build StructType schema
fields = [
    StructField(field["name"], type_mapping[field["type"]](), field["nullable"])
    for field in schema_config
]
struct_type = StructType(fields)

In [4]:
print(struct_type)

StructType([StructField('date', StringType(), False), StructField('open', FloatType(), True), StructField('close', FloatType(), True), StructField('volume', IntegerType(), True)])


In [13]:
# Import the TableManager class
import nbimporter  # Required to import a class from another Jupyter notebook
from lab_registered_tables import TableManager

# Initialize the TableManager with the YAML file path
config_file_path = "registered_table_schemas.yaml"
table_manager = TableManager(config_file_path)

# Get the CREATE TABLE query
table_name = "raw.stock_eod_yfinance"
create_table_query = table_manager.get_create_table_query(table_name)

# Print the query
print(create_table_query)

# Optionally, execute the query in Spark
from pyspark.sql import SparkSession

# spark = SparkSession.builder \
#     .appName("IcebergTableManager") \
#     .config("spark.sql.catalog.iceberg", "org.apache.iceberg.spark.SparkCatalog") \
#     .config("spark.sql.catalog.iceberg.type", "hadoop") \
#     .config("spark.sql.catalog.iceberg.warehouse", "path/to/warehouse") \
#     .getOrCreate()

# # Execute the query
# spark.sql(create_table_query)

CREATE TABLE IF NOT EXISTS raw.stock_eod_yfinance (date string, open float, high float, low float, close float, volume int, dividends float, stock_splits float, symbol string, import_time string)
        USING iceberg
