In [None]:
# Import required libraries
import google.cloud.bigquery as bq
from google.cloud import storage
import pandas as pd
import json

# Initialize BigQuery client
client = bq.Client()
storage_client = storage.Client()

# Configuration
PROJECT_ID = client.project
DATASET_ID = 'EA_DEMO_RAW'
BUCKET_NAME = f'ea-demo-1raw'

print(f"BigQuery client initialized")
print(f"Project ID: {PROJECT_ID}")
print(f"Dataset: {DATASET_ID}")
print(f"Bucket: {BUCKET_NAME}")

BigQuery client initialized
Project ID: xm497-2025-09-16-sqvyh-1
Dataset: EA_DEMO
Bucket: ea-demo-1raw


In [None]:
# Load customers data from JSON
def load_customers_json():
    """Load customers data from JSON into T_CUSTOMERS_RAW table"""

    table_id = f"{PROJECT_ID}.{DATASET_ID}.T_CUSTOMERS_RAW"

    # Configure the load job
    job_config = bq.LoadJobConfig(
        source_format=bq.SourceFormat.NEWLINE_DELIMITED_JSON,
        autodetect=False,  # Don't auto-detect schema, use existing table schema
        write_disposition=bq.WriteDisposition.WRITE_TRUNCATE,  # Replace existing data
        max_bad_records=0  # Fail if any bad records
    )

    # GCS URI for customers JSON file
    uri = f"gs://{BUCKET_NAME}/customers.json"

    print(f"Loading customers data from: {uri}")
    print(f"Target table: {table_id}")

    # Start the load job
    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )

    # Wait for job to complete
    load_job.result()

    # Get the updated table info
    table = client.get_table(table_id)
    print(f"Loaded {table.num_rows:,} rows into T_CUSTOMERS_RAW")

    return load_job


In [None]:
customers_job=load_customers_json()

Loading customers data from: gs://ea-demo-1raw/customers.json
Target table: xm497-2025-09-16-sqvyh-1.EA_DEMO.T_CUSTOMERS_RAW
Loaded 500 rows into T_CUSTOMERS_RAW


In [None]:
%%bigquery cust_count
select count(*) as cnt from `xm497-2025-09-16-sqvyh-1`.EA_DEMO_RAW.T_CUSTOMERS_RAW

Query is running:   0%|          |

Downloading:   0%|          |

In [None]:
if  cust_count['cnt'][0] == 500:
    print("The customer Json data load is done as expected")
else :
    print("The customer Json data load is not done as expected")

The customer Json data load is done as expected
