In [0]:
from pyspark.sql import SparkSession
spark = SparkSession.builder\
    .appName("ExtractJsonWithPython")\
    .config("fs.azure.account.key.ustcasestudyvsksa.blob.core.windows.net", dbutils.secrets.get(scope="casestudy-swiggy", key="deltalake-account-key"))\
    .getOrCreate()

In [0]:
%pip install azure-storage-file-datalake

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
import json
from azure.storage.filedatalake import DataLakeServiceClient, FileSystemClient, DataLakeFileClient
 
CONNECTION_STRING = dbutils.secrets.get(scope="casestudy-swiggy", key="storage-connection-string")

# Initialize DataLakeServiceClient for Azure Data Lake Storage Gen2
data_lake_service_client = DataLakeServiceClient.from_connection_string(CONNECTION_STRING)
 
# Name of the filesystem (similar to container) where files are stored
file_system_name = "swiggy-casestudy"
 
 
def download_and_parse_json(file_system_name, file_path):
    file_client = data_lake_service_client.get_file_system_client(file_system_name).get_file_client(file_path)
    try:
        # Download the JSON data as bytes
        file_data = file_client.download_file().readall()
       
        # Decode the bytes to a string and load it as JSON
        json_data = json.loads(file_data.decode('utf-8'))
       
        print("JSON data successfully retrieved and parsed")
        return json_data
    except Exception as e:
        print(f"Failed to download and parse JSON: {e}")
        return None
 

In [0]:

data = download_and_parse_json("swiggy-casestudy", "/raw_data/swiggy.json")
if data:
    print("fetch completed")
else:
    raise Exception('Data Fetch Failed: Returned Empty')

JSON data successfully retrieved and parsed
fetch completed


In [0]:
records_restaurant = []
records_menu = []

# Loop through the top-level cities
for city_name, city_data in data.items():
    # If the city has sub-areas (nested cities), loop through them
    if isinstance(city_data, dict) and 'restaurants' in city_data:
        # No sub-area: directly process the city
        full_city_name = city_name
        
        # Loop through restaurants in the city
        for restaurant_id, restaurant_data in city_data.get('restaurants', {}).items():
            records_restaurant.append({
                'restaurant_id': restaurant_id,
                'name': restaurant_data.get('name'),
                'city': full_city_name,
                'rating': restaurant_data.get('rating'),
                'rating_count': restaurant_data.get('rating_count'),
                'cost': restaurant_data.get('cost'),
                'cuisine': restaurant_data.get('cuisine'),
                'lic_no': restaurant_data.get('lic_no'),
                'link': restaurant_data.get('link'),
                'address': restaurant_data.get('address'),
            })

            # Loop through menu categories for the current restaurant
            for category, items in restaurant_data.get('menu', {}).items():
                # Loop through items in each category
                for item_name, item_data in items.items():
                    records_menu.append({
                        'restaurant_id': restaurant_id,
                        'category': category,
                        'item_name': item_name,
                        'price': item_data.get('price'),
                        'veg_or_non_veg': item_data.get('veg_or_non_veg'),
                    })
    
    # If the city has sub-areas (nested cities), process each sub-area
    elif isinstance(city_data, dict):
        for sub_area_name, sub_area_data in city_data.items():
            full_city_name = f"{city_name}, {sub_area_name}"

            # Loop through restaurants in the sub-area
            for restaurant_id, restaurant_data in sub_area_data.get('restaurants', {}).items():
                records_restaurant.append({
                    'restaurant_id': restaurant_id,
                    'name': restaurant_data.get('name'),
                    'city': full_city_name,
                    'rating': restaurant_data.get('rating'),
                    'rating_count': restaurant_data.get('rating_count'),
                    'cost': restaurant_data.get('cost'),
                    'cuisine': restaurant_data.get('cuisine'),
                    'lic_no': restaurant_data.get('lic_no'),
                    'link': restaurant_data.get('link'),
                    'address': restaurant_data.get('address'),
                })

                # Loop through menu categories for the current restaurant
                for category, items in restaurant_data.get('menu', {}).items():
                    # Loop through items in each category
                    for item_name, item_data in items.items():
                        records_menu.append({
                            'restaurant_id': restaurant_id,
                            'category': category,
                            'item_name': item_name,
                            'price': item_data.get('price'),
                            'veg_or_non_veg': item_data.get('veg_or_non_veg'),
                        })

# Convert to Spark DataFrames
df_restaurant = spark.createDataFrame(records_restaurant)
df_menu = spark.createDataFrame(records_menu)

# Show the results (optional)
df_restaurant.display()



address,city,cost,cuisine,lic_no,link,name,rating,rating_count,restaurant_id
"AB FOODS POINT, NEAR RISHI NARANG DENTAL CLINIC , NEAR IDBI BANK, ABOHAR",Abohar,₹ 200,"Beverages,Pizzas",22122652000138,https://www.swiggy.com/restaurants/ab-foods-point-central-abohar-abohar-567335,AB FOODS POINT,--,Too Few Ratings,567335
"Janta Sweet House, Bazar No.9, Circullar Road, Abohar",Abohar,₹ 200,"Sweets,Bakery",12117201000112,https://www.swiggy.com/restaurants/janta-sweet-house-central-abohar-abohar-531342,Janta Sweet House,4.4,50+ ratings,531342
"theka coffee desi, sahtiya sadan road city",Abohar,₹ 100,Beverages,22121652000190,https://www.swiggy.com/restaurants/theka-coffee-desi-central-raipura-abohar-158203,theka coffee desi,3.8,100+ ratings,158203
"Singh Hut, CIRCULAR ROAD NEAR NEHRU PARK ABOHAR",Abohar,₹ 250,"Fast Food,Indian",22119652000167,https://www.swiggy.com/restaurants/singh-hut-nehru-park-main-bazar-abohar-187912,Singh Hut,3.7,20+ ratings,187912
"GRILL MASTERS, ADA Heights, Abohar - Hanumangarh Rd, Raipura, Abohar, Punjab 152116, India",Abohar,₹ 250,"Italian-American,Fast Food",12122201000053,https://www.swiggy.com/restaurants/grill-masters-central-abohar-abohar-543530,GRILL MASTERS,--,Too Few Ratings,543530
"Sam Uncle, hanumangarh road near raja bajaj showroom abohar",Abohar,₹ 200,Continental,22119652000052,https://www.swiggy.com/restaurants/sam-uncle-central-main-bazar-abohar-158204,Sam Uncle,3.6,20+ ratings,158204
"shere punjab veg, major surinder chowk near verma sons petrol pump and lic building abohar",Abohar,₹ 150,North Indian,22120652000021,https://www.swiggy.com/restaurants/shere-punjab-veg-central-krishna-nagri-abohar-156588,shere punjab veg,4.0,100+ ratings,156588
"Shri Balaji Vaishno Dhaba, St no 13,6th chowk,main Bazar,near jain bhawan, abohar",Abohar,₹ 100,North Indian,22119652000389,https://www.swiggy.com/restaurants/shri-balaji-vaishno-dhaba-central-main-bazar-abohar-244866,Shri Balaji Vaishno Dhaba,--,Too Few Ratings,244866
"Hinglaj Kachori Bhandhar, street no 11 circular road ,Abohar",Abohar,₹ 100,"Snacks,Chaat",22119652000042,https://www.swiggy.com/restaurants/hinglaj-kachori-bhandhar-central-main-bazar-abohar-156602,Hinglaj Kachori Bhandhar,4.2,20+ ratings,156602
"yummy hub, hanumangarh road near dr naveen sethi hospitalabohar",Abohar,₹ 200,Indian,22119652000045,https://www.swiggy.com/restaurants/yummy-hub-central-abohar-abohar-158193,yummy hub,--,Too Few Ratings,158193


In [0]:
df_menu.display()

category,item_name,price,restaurant_id,veg_or_non_veg
Burger,Aloo Tikki Burger,40.0,567335,Veg
Burger,Veg Creamy Burger,50.0,567335,Veg
Burger,Cheese Burst Burger,65.0,567335,Veg
Burger,Paneer Creamy Burger,80.0,567335,Veg
Burger,Maxican Burger,80.0,567335,Veg
Burger,Bbq Chicken Burger,105.0,567335,Non-veg
Burger,Peri Peri Chicken Burger,105.0,567335,Non-veg
Pasta Must Try,White Sauce,100.0,567335,Veg
Pasta Must Try,Red Sauce,100.0,567335,Veg
Pasta Must Try,Pink Sauce,125.0,567335,Veg


In [0]:
df_restaurant = df_restaurant.cache()
df_menu = df_menu.cache()

In [0]:

df_restaurant.write.format("parquet") \
    .mode("overwrite") \
    .save("wasbs://swiggy-casestudy@ustcasestudyvsksa.blob.core.windows.net/bronze_restaurant_data")

In [0]:

df_menu.write.format("parquet") \
    .mode("overwrite") \
    .save("wasbs://swiggy-casestudy@ustcasestudyvsksa.blob.core.windows.net/bronze_menu_data")