In [0]:
import json

bronze_params = dbutils.widgets.get('bronze_params')

output_data = json.loads(bronze_params)

start_date = output_data.get('start_date', '')
end_date = output_data.get('end_date', '')

bronze_adls = output_data.get('bronze_adls', '')
silver_adls = output_data.get('silver_adls', '')
gold_adls = output_data.get('gold_adls', '')

print(f'Start Date: {start_date}, Bronze ADLS: {bronze_adls}')

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from datetime import date, timedelta

In [0]:
df = spark.read.option('multiline', True).json(f'{bronze_adls}{start_date}_earthquake_data.json')

In [0]:
df = (
    df.select(
        'id',
        col('geometry.coordinates').getItem(0).alias('longitude'),
        col('geometry.coordinates').getItem(1).alias('latitude'),
        col('geometry.coordinates').getItem(2).alias('elevation'),
        col('properties.title').alias('title'),
        col('properties.place').alias('place_description'),
        col('properties.mag').alias('mag'),
        col('properties.magType').alias('magType'),
        col('properties.time').alias('time'),
        col('properties.updated').alias('updated'),
        col('properties.sig').alias('sig')
    )
)

In [0]:
df = (
    df
    .withColumn('longitude', when(isnull(col('longitude')), 0).otherwise(col('longitude')))
    .withColumn('latitude', when(isnull(col('latitude')), 0).otherwise(col('latitude')))
    .withColumn('time', when(isnull(col('time')), 0).otherwise(col('time')))
)

In [0]:
df = (
    df
    .withColumn('time', (col('time')/1000).cast(TimestampType()))
    .withColumn('updated', (col('updated')/1000).cast(TimestampType()))
)

In [0]:
df.head()

Row(id='nc75162167', longitude=-122.818168640137, latitude=38.8136672973633, elevation=1.41999995708466, title='M 0.8 - 7 km NW of The Geysers, CA', place_description='7 km NW of The Geysers, CA', mag=0.75, magType='md', time=datetime.datetime(2025, 4, 7, 23, 58, 49, 490000), updated=datetime.datetime(2025, 4, 8, 0, 0, 24, 268000), sig=9)

In [0]:
silver_output_path = f'{silver_adls}earthquake_events_silver/'

df.write.mode('append').parquet(silver_output_path)

In [0]:
dbutils.notebook.exit(silver_output_path)