### Install required packages

In [None]:
%pip install requests openpyxl

### 1a. Extract the following fields and store the data as restaurants.csv.

In [2]:
import json
import csv
import requests
import pandas as pd

# URL of the JSON data
url = "https://raw.githubusercontent.com/Papagoat/brain-assessment/main/restaurant_data.json"

# Fetch the JSON data from the URL
response = requests.get(url)
data = response.json()

# Load country codes using pandas
country_mapping = {}
country_code_file = 'Country-Code.xlsx'
country_code_df = pd.read_excel(country_code_file)
for index, row in country_code_df.iterrows():
    country_mapping[row['Country Code']] = row['Country']

# Extract fields and write to CSV
with open('restaurants.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    # Write header
    writer.writerow(['Restaurant Id', 'Restaurant Name', 'Country', 'City', 'User Rating Votes', 'User Aggregate Rating', 'Cuisines'])

    # Extract information for each restaurant
    for result in data:
        restaurants = result.get('restaurants', [])
        for restaurant_info in restaurants:
            restaurant = restaurant_info.get('restaurant', {})
            restaurant_id = restaurant.get('R', {}).get('res_id', '')
            restaurant_name = restaurant.get('name', '')
            country_code = restaurant.get('location', {}).get('country_id', '')
            country = country_mapping.get(country_code, '')
            city = restaurant.get('location', {}).get('city', '')
            user_rating_votes = restaurant.get('user_rating', {}).get('votes', '')
            user_aggregate_rating = restaurant.get('user_rating', {}).get('aggregate_rating', '')
            cuisines = restaurant.get('cuisines', '')

            # Write data to CSV
            writer.writerow([restaurant_id, restaurant_name, country, city, user_rating_votes, user_aggregate_rating, cuisines])

print("Data has been extracted and stored in restaurants.csv")

# Read the CSV file and print the first few rows
df = pd.read_csv('restaurants.csv')
print("First few rows of restaurants.csv:")
print(df.head())


Data has been extracted and stored in restaurants.csv
First few rows of restaurants.csv:
   Restaurant Id       Restaurant Name Country       City  User Rating Votes  \
0       18649486  The Drunken Botanist   India    Gurgaon               4765   
1         308322      Hauz Khas Social   India  New Delhi              13627   
2       18856789  AIR- An Ivory Region   India  New Delhi               1819   
3         307374              AMA Cafe   India  New Delhi               3252   
4       18238278               Tamasha   India  New Delhi               8112   

   User Aggregate Rating                                           Cuisines  
0                    4.4        Continental, Italian, North Indian, Chinese  
1                    4.6  Continental, American, Asian, North Indian, Ch...  
2                    4.1          North Indian, Chinese, Continental, Asian  
3                    4.4                                       Cafe, Juices  
4                    4.4    Finger Food,

### 2a. Extract the list of restaurants that have past event in the month of April 2019 and store the data as restaurant_events.csv.

In [3]:
import json
import csv
import requests
from datetime import datetime

# Function to check if the event falls within April 2019
def is_april_2019(event_date):
    try:
        date_obj = datetime.strptime(event_date, '%Y-%m-%d')
        return date_obj.year == 2019 and date_obj.month == 4
    except ValueError:
        return False

# Function to replace empty values with "NA"
def replace_empty(value):
    return value if value else "NA"

# URL of the JSON data
url = "https://raw.githubusercontent.com/Papagoat/brain-assessment/main/restaurant_data.json"

# Fetch the JSON data from the URL
response = requests.get(url)
data = response.json()

# Extract fields and write to CSV
with open('restaurant_events.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    # Write header
    writer.writerow(['Event Id', 'Restaurant Id', 'Restaurant Name', 'Photo URL', 'Event Title', 'Event Start Date', 'Event End Date'])

    # Extract information for each restaurant with past events in April 2019
    for result in data:
        restaurants = result.get('restaurants', [])
        for restaurant_info in restaurants:
            restaurant = restaurant_info.get('restaurant', {})
            events = restaurant.get('zomato_events', [])
            for event_info in events:
                event = event_info.get('event', {})
                event_start_date = event.get('start_date', '')
                event_end_date = event.get('end_date', '')
                if is_april_2019(event_start_date):
                    event_id = event.get('event_id', '')
                    restaurant_id = restaurant.get('R', {}).get('res_id', '')
                    restaurant_name = restaurant.get('name', '')
                    event_title = event.get('title', '')
                    photo_url = replace_empty(event['photos'][0]['photo'].get('url', '') if event.get('photos') else '')

                    # Write data to CSV
                    writer.writerow([event_id, restaurant_id, restaurant_name, photo_url, event_title, event_start_date, event_end_date])

print("Data has been extracted and stored in restaurant_events.csv")

# Read the CSV file and print the first few rows
df = pd.read_csv('restaurant_events.csv')
print("First few rows of restaurant_events.csv:")
print(df.head())


Data has been extracted and stored in restaurant_events.csv
First few rows of restaurant_events.csv:
   Event Id  Restaurant Id       Restaurant Name  \
0    336644       18856789  AIR- An Ivory Region   
1    336889       18382360                 Local   
2    336888       18382360                 Local   
3    336890       18382360                 Local   
4    336894       18382360                 Local   

                                           Photo URL         Event Title  \
0  https://b.zmtcdn.com/data/zomato_events/photos...  Dhol Bhangra Night   
1                                                NaN  Cocktail Wednesday   
2                                                NaN  Cocktail Wednesday   
3  https://b.zmtcdn.com/data/zomato_events/photos...  Cocktail Wednesday   
4  https://b.zmtcdn.com/data/zomato_events/photos...      Hip Hop Friday   

  Event Start Date Event End Date  
0       2019-04-10     2019-04-11  
1       2019-04-10     2019-04-10  
2       2019-04-10   

##### 2b. Unit Test

In [4]:
import unittest
from datetime import datetime

def is_april_2019(event_date):
    try:
        date_obj = datetime.strptime(event_date, '%Y-%m-%d')
        return date_obj.year == 2019 and date_obj.month == 4
    except ValueError:
        return False

def replace_empty(value):
    return value if value else "NA"

# unit tests
class TestFunctions(unittest.TestCase):

    def test_is_april_2019(self):
        # Test with a date in April 2019
        self.assertTrue(is_april_2019('2019-04-15'))
        print("Test is_april_2019 passed successfully!")

        # Test with a date outside of April 2019
        self.assertFalse(is_april_2019('2019-05-15'))
        print("Test is_april_2019 passed successfully!")

        # Test with an invalid date format
        self.assertFalse(is_april_2019('invalid_date_format'))
        print("Test is_april_2019 passed successfully!")

    def test_replace_empty(self):
        # Test with a non-empty value
        self.assertEqual(replace_empty('value'), 'value')
        print("Test replace_empty passed successfully!")

        # Test with an empty value
        self.assertEqual(replace_empty(''), 'NA')
        print("Test replace_empty passed successfully!")

        # Test with None
        self.assertEqual(replace_empty(None), 'NA')
        print("Test replace_empty passed successfully!")

# Run the unit tests
unittest.main(argv=[''], exit=False)


..
----------------------------------------------------------------------
Ran 2 tests in 0.002s

OK


Test is_april_2019 passed successfully!
Test is_april_2019 passed successfully!
Test is_april_2019 passed successfully!
Test replace_empty passed successfully!
Test replace_empty passed successfully!
Test replace_empty passed successfully!


<unittest.main.TestProgram at 0x280725ca750>

### 3a. From the dataset, determine the threshold for the different rating text based on aggregate rating. Return aggregates for the following ratings only

In [5]:
import requests

# URL of the JSON data
url = "https://raw.githubusercontent.com/Papagoat/brain-assessment/main/restaurant_data.json"

# Fetch the JSON data from the URL
response = requests.get(url)
data = response.json()

# Initialize dictionaries to store aggregate ratings for different rating texts
rating_thresholds = {
    "Excellent": {"min": float('inf'), "max": float('-inf')},
    "Very Good": {"min": float('inf'), "max": float('-inf')},
    "Good": {"min": float('inf'), "max": float('-inf')},
    "Average": {"min": float('inf'), "max": float('-inf')},
    "Poor": {"min": float('inf'), "max": float('-inf')}
}

# Iterate through the data to find rating thresholds
for result in data:
    restaurants = result.get('restaurants', [])
    for restaurant_info in restaurants:
        restaurant = restaurant_info.get('restaurant', {})
        user_rating = restaurant.get('user_rating', {})
        aggregate_rating = float(user_rating.get('aggregate_rating', 0))
        rating_text = user_rating.get('rating_text', '')

        # Update rating thresholds
        if rating_text in rating_thresholds:
            if aggregate_rating < rating_thresholds[rating_text]["min"]:
                rating_thresholds[rating_text]["min"] = aggregate_rating
            if aggregate_rating > rating_thresholds[rating_text]["max"]:
                rating_thresholds[rating_text]["max"] = aggregate_rating

# Print the rating thresholds
for rating_text, thresholds in rating_thresholds.items():
    print(f"{rating_text}: Min - {thresholds['min']}, Max - {thresholds['max']}")


Excellent: Min - 4.5, Max - 4.9
Very Good: Min - 4.0, Max - 4.4
Good: Min - 3.5, Max - 3.9
Average: Min - 2.5, Max - 3.4
Poor: Min - 2.2, Max - 2.2


##### 3b. Unit Test

In [6]:
def get_rating_thresholds(data):
    rating_thresholds = {
        "Excellent": {"min": float('inf'), "max": float('-inf')},
        "Very Good": {"min": float('inf'), "max": float('-inf')},
        "Good": {"min": float('inf'), "max": float('-inf')},
        "Average": {"min": float('inf'), "max": float('-inf')},
        "Poor": {"min": float('inf'), "max": float('-inf')}
    }

    for result in data:
        restaurants = result.get('restaurants', [])
        for restaurant_info in restaurants:
            restaurant = restaurant_info.get('restaurant', {})
            user_rating = restaurant.get('user_rating', {})
            aggregate_rating = float(user_rating.get('aggregate_rating', 0))
            rating_text = user_rating.get('rating_text', '')

            if rating_text in rating_thresholds:
                if aggregate_rating < rating_thresholds[rating_text]["min"]:
                    rating_thresholds[rating_text]["min"] = aggregate_rating
                if aggregate_rating > rating_thresholds[rating_text]["max"]:
                    rating_thresholds[rating_text]["max"] = aggregate_rating

    return rating_thresholds

# Unit tests
mock_data = [
    {
        "restaurants": [
            {
                "restaurant": {
                    "user_rating": {
                        "aggregate_rating": "4.0",
                        "rating_text": "Good"
                    }
                }
            },
            {
                "restaurant": {
                    "user_rating": {
                        "aggregate_rating": "4.8",
                        "rating_text": "Excellent"
                    }
                }
            },
            {
                "restaurant": {
                    "user_rating": {
                        "aggregate_rating": "3.5",
                        "rating_text": "Average"
                    }
                }
            }
        ]
    }
]

rating_thresholds = get_rating_thresholds(mock_data)

# Assert the results
expected_rating_thresholds = {
    "Excellent": {"min": 4.8, "max": 4.8},
    "Very Good": {"min": float('inf'), "max": float('-inf')},
    "Good": {"min": 4.0, "max": 4.0},
    "Average": {"min": 3.5, "max": 3.5},
    "Poor": {"min": float('inf'), "max": float('-inf')}
}

if rating_thresholds == expected_rating_thresholds:
    print("Unit test passed: Rating thresholds match expected values.")
else:
    print("Unit test failed: Rating thresholds do not match expected values.")


Unit test passed: Rating thresholds match expected values.
