In [4]:
import os
from pyspark.sql import SparkSession
import snowflake.connector

# Spark session
spark = SparkSession.builder \
    .appName("GoldToSnowflake") \
    .master("local[*]") \
    .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.1.0") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .getOrCreate()

# Load delta table
fact_sales_path = r"C:/Users/User/Desktop/E-Commerce Data Lakaehouse with AI-Powered Self-Healing Pipelines/Gold_layer/Gold_data/fact_sales/data"
fact_sales_df = spark.read.format("delta").load(fact_sales_path)

# Ensure columns are lowercase
fact_sales_df = fact_sales_df.toDF(*[c.lower() for c in fact_sales_df.columns])

# Save to CSV locally
output_path = r"C:/Users/User/Desktop/fact_sales_temp"
fact_sales_df.coalesce(1).write.option("header", True).csv(output_path, mode="overwrite")

# Connect to Snowflake
conn = snowflake.connector.connect(
    user="xxxxxxxxxxx",
    password="xxxxxxxxxx",
    account="xxxxxxxxxx",
    warehouse="COMPUTE_WH",
    database="fact_sales_db",
    schema="public",
    role="accountadmin"
)
cur = conn.cursor()

# Upload CSVs to stage
for f in os.listdir(output_path):
    file_path = os.path.join(output_path, f)
    if file_path.endswith(".csv"):
        cur.execute(f"PUT file://{file_path} @my_stage")
        print(f"Uploaded {file_path} to Snowflake stage")

# Copy into table
cur.execute("""
    COPY INTO fact_sales
    FROM @my_stage
    FILE_FORMAT = (TYPE=CSV FIELD_OPTIONALLY_ENCLOSED_BY='"' SKIP_HEADER=1)
""")

cur.close()
conn.close()
print("Data loaded into fact_sales table successfully")


Uploaded C:/Users/User/Desktop/fact_sales_temp\part-00000-2029e251-27f3-432a-a724-71172b28c86e-c000.csv to Snowflake stage
Data loaded into fact_sales table successfully


In [7]:
import snowflake.connector
import pandas as pd

# Connect to Snowflake
conn = snowflake.connector.connect(
    user="xxxxxxxxx",
    password="xxxxxxxx",
    account="xxxxxxxxxxxxx",
    warehouse="COMPUTE_WH",
    database="fact_sales_db",
    schema="public",
    role="accountadmin"
)

# Query into Pandas DataFrame
query = "SELECT * FROM fact_sales LIMIT 10"
fact_sales_df = pd.read_sql(query, conn)

# Display table
print(fact_sales_df)

# Close connection
conn.close()


  fact_sales_df = pd.read_sql(query, conn)


   CUSTOMER_SK   PRODUCT_SK    CUSTOMER_ID  \
0  51539615028   8589944437  CUST-2FCF789E   
1  34359765205  51539624528  CUST-F2DE1714   
2   8589960657  17179893249  CUST-F53E09E0   
3  17179894808  51539624154  CUST-F13FFE8A   
4  17179894808  51539624154  CUST-F13FFE8A   
5  17179894808  51539624154  CUST-F13FFE8A   
6  60129572589  17179881204  CUST-F5836608   
7  60129567733   8589950062  CUST-C99B0647   
8  51539629846        17431  CUST-CD9BB60D   
9  25769819102   8589940222  CUST-7C151EE9   

                               ORDER_ID                        TRANSACTION_ID  \
0  0028e904-f8b5-402a-a802-45c683207dcf  937c07ac-5fd8-4040-9cfc-6922ab54867f   
1  003e7a1e-f9e0-4fe9-9aad-b8b0a55660fc  1a281fcc-c8e2-48b6-856e-bc971fe28809   
2  00467baf-6703-44f7-b192-b9973f7ff247  15e9bd05-eb98-447d-a7aa-3a717dc9e73f   
3  00a45630-cd9a-4a2d-8ec4-d770d34e90c4  4e5ec230-cb11-47ba-ae70-2bc91b91be67   
4  00a45630-cd9a-4a2d-8ec4-d770d34e90c4  4e5ec230-cb11-47ba-ae70-2bc91b91be67   
5  00a4