#### Extract the following fields and store the data as restaurants.csv.

In [9]:
%pip install requests, openpyxl


Collecting openpyxl
  Downloading openpyxl-3.1.2-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)
   ---------------------------------------- 0.0/250.0 kB ? eta -:--:--
   ---- ---------------------------------- 30.7/250.0 kB 640.0 kB/s eta 0:00:01
   ---------------------------------------  245.8/250.0 kB 3.7 MB/s eta 0:00:01
   ---------------------------------------- 250.0/250.0 kB 2.2 MB/s eta 0:00:00
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2
Note: you may need to restart the kernel to use updated packages.


In [10]:
import json
import csv
import requests
import pandas as pd

# URL of the JSON data
url = "https://raw.githubusercontent.com/Papagoat/brain-assessment/main/restaurant_data.json"

# Fetch the JSON data from the URL
response = requests.get(url)
data = response.json()

# Load country codes using pandas
country_mapping = {}
country_code_file = 'Country-Code.xlsx'
country_code_df = pd.read_excel(country_code_file)
for index, row in country_code_df.iterrows():
    country_mapping[row['Country Code']] = row['Country']

# Extract fields and write to CSV
with open('restaurants.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    # Write header
    writer.writerow(['Restaurant Id', 'Restaurant Name', 'Country', 'City', 'User Rating Votes', 'User Aggregate Rating', 'Cuisines'])

    # Extract information for each restaurant
    for result in data:
        restaurants = result.get('restaurants', [])
        for restaurant_info in restaurants:
            restaurant = restaurant_info.get('restaurant', {})
            restaurant_id = restaurant.get('R', {}).get('res_id', '')
            restaurant_name = restaurant.get('name', '')
            country_code = restaurant.get('location', {}).get('country_id', '')
            country = country_mapping.get(country_code, '')
            city = restaurant.get('location', {}).get('city', '')
            user_rating_votes = restaurant.get('user_rating', {}).get('votes', '')
            user_aggregate_rating = restaurant.get('user_rating', {}).get('aggregate_rating', '')
            cuisines = restaurant.get('cuisines', '')

            # Write data to CSV
            writer.writerow([restaurant_id, restaurant_name, country, city, user_rating_votes, user_aggregate_rating, cuisines])

print("Data has been extracted and stored in restaurants.csv")


Data has been extracted and stored in restaurants.csv


#### Extract the list of restaurants that have past event in the month of April 2019 and store the data as restaurant_events.csv.

In [11]:
import json
import csv
import requests
from datetime import datetime

# Function to check if the event falls within April 2019
def is_april_2019(event_date):
    try:
        date_obj = datetime.strptime(event_date, '%Y-%m-%d')
        return date_obj.year == 2019 and date_obj.month == 4
    except ValueError:
        return False

# Function to replace empty values with "NA"
def replace_empty(value):
    return value if value else "NA"

# URL of the JSON data
url = "https://raw.githubusercontent.com/Papagoat/brain-assessment/main/restaurant_data.json"

# Fetch the JSON data from the URL
response = requests.get(url)
data = response.json()

# Extract fields and write to CSV
with open('restaurant_events.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    # Write header
    writer.writerow(['Event Id', 'Restaurant Id', 'Restaurant Name', 'Photo URL', 'Event Title', 'Event Start Date', 'Event End Date'])

    # Extract information for each restaurant with past events in April 2019
    for result in data:
        restaurants = result.get('restaurants', [])
        for restaurant_info in restaurants:
            restaurant = restaurant_info.get('restaurant', {})
            events = restaurant.get('zomato_events', [])
            for event_info in events:
                event = event_info.get('event', {})
                event_start_date = event.get('start_date', '')
                event_end_date = event.get('end_date', '')
                if is_april_2019(event_start_date):
                    event_id = event.get('event_id', '')
                    restaurant_id = restaurant.get('R', {}).get('res_id', '')
                    restaurant_name = restaurant.get('name', '')
                    event_title = event.get('title', '')
                    photo_url = replace_empty(event['photos'][0]['photo'].get('url', '') if event.get('photos') else '')

                    # Write data to CSV
                    writer.writerow([event_id, restaurant_id, restaurant_name, photo_url, event_title, event_start_date, event_end_date])

print("Data has been extracted and stored in restaurant_events.csv")


Data has been extracted and stored in restaurant_events.csv


#### From the dataset (restaurant_data.json), determine the threshold for the different rating text based on aggregate rating. Return aggregates for the following ratings only

In [11]:
import requests

# URL of the JSON data
url = "https://raw.githubusercontent.com/Papagoat/brain-assessment/main/restaurant_data.json"

# Fetch the JSON data from the URL
response = requests.get(url)
data = response.json()

# Initialize dictionaries to store aggregate ratings for different rating texts
rating_thresholds = {
    "Excellent": {"min": float('inf'), "max": float('-inf')},
    "Very Good": {"min": float('inf'), "max": float('-inf')},
    "Good": {"min": float('inf'), "max": float('-inf')},
    "Average": {"min": float('inf'), "max": float('-inf')},
    "Poor": {"min": float('inf'), "max": float('-inf')}
}

# Iterate through the data to find rating thresholds
for result in data:
    restaurants = result.get('restaurants', [])
    for restaurant_info in restaurants:
        restaurant = restaurant_info.get('restaurant', {})
        user_rating = restaurant.get('user_rating', {})
        aggregate_rating = float(user_rating.get('aggregate_rating', 0))
        rating_text = user_rating.get('rating_text', '')

        # Update rating thresholds
        if rating_text in rating_thresholds:
            if aggregate_rating < rating_thresholds[rating_text]["min"]:
                rating_thresholds[rating_text]["min"] = aggregate_rating
            if aggregate_rating > rating_thresholds[rating_text]["max"]:
                rating_thresholds[rating_text]["max"] = aggregate_rating

# Print the rating thresholds
for rating_text, thresholds in rating_thresholds.items():
    print(f"{rating_text}: Min - {thresholds['min']}, Max - {thresholds['max']}")


Excellent: Min - 4.5, Max - 4.9
Very Good: Min - 4.0, Max - 4.4
Good: Min - 3.5, Max - 3.9
Average: Min - 2.5, Max - 3.4
Poor: Min - 2.2, Max - 2.2
