### Data Exploration of the Given data 

In [1]:
import json
import pandas as pd

pd.set_option("display.max_columns", None)
pd.set_option("display.width", 120)


### Loading the json data from the data directory

In [2]:
def load_json(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

flights_data = load_json("../data/flights.json")
hotels_data = load_json("../data/hotels.json")
places_data = load_json("../data/places.json")


#### Finding the Lengths of the Records

In [3]:
print("Flights records:", len(flights_data))
print("Hotels records:", len(hotels_data))
print("Places records:", len(places_data))


Flights records: 30
Hotels records: 40
Places records: 40


### Understanding the Data 

In [4]:
print("Flight sample:")
flights_data[0]


Flight sample:


{'flight_id': 'FL0001',
 'airline': 'IndiGo',
 'from': 'Hyderabad',
 'to': 'Delhi',
 'departure_time': '2025-01-04T11:32:00',
 'arrival_time': '2025-01-04T15:32:00',
 'price': 2907}

In [5]:
print("Hotel sample:")
hotels_data[0]


Hotel sample:


{'hotel_id': 'HOT0001',
 'name': 'Grand Palace Hotel',
 'city': 'Delhi',
 'stars': 4,
 'price_per_night': 3897,
 'amenities': ['wifi', 'pool']}

In [28]:
print("Place sample:")
places_data[0]

Place sample:


{'place_id': 'PLC0001',
 'name': 'Famous Fort',
 'city': 'Delhi',
 'type': 'lake',
 'rating': 4.6}

#### Coverting the data to the dataframe

In [6]:
df_flights = pd.DataFrame(flights_data)
df_hotels = pd.DataFrame(hotels_data)
df_places = pd.DataFrame(places_data)


In [7]:
print("Flights Columns & Types")
df_flights.dtypes


Flights Columns & Types


flight_id         object
airline           object
from              object
to                object
departure_time    object
arrival_time      object
price              int64
dtype: object

In [8]:
print("Hotels Columns & Types")
df_hotels.dtypes


Hotels Columns & Types


hotel_id           object
name               object
city               object
stars               int64
price_per_night     int64
amenities          object
dtype: object

In [9]:
print("Places Columns & Types")
df_places.dtypes


Places Columns & Types


place_id     object
name         object
city         object
type         object
rating      float64
dtype: object

In [11]:
df_flights.head(3)

Unnamed: 0,flight_id,airline,from,to,departure_time,arrival_time,price
0,FL0001,IndiGo,Hyderabad,Delhi,2025-01-04T11:32:00,2025-01-04T15:32:00,2907
1,FL0002,Air India,Delhi,Kolkata,2025-11-26T05:34:00,2025-11-26T09:34:00,3779
2,FL0003,SpiceJet,Chennai,Hyderabad,2025-06-03T00:26:00,2025-06-03T01:26:00,5473


In [13]:
print("Unique source cities:", df_flights["from"].unique())
print("Unique destination cities:", df_flights["to"].unique())


Unique source cities: ['Hyderabad' 'Delhi' 'Chennai' 'Bangalore' 'Goa' 'Kolkata' 'Jaipur'
 'Mumbai']
Unique destination cities: ['Delhi' 'Kolkata' 'Hyderabad' 'Mumbai' 'Bangalore' 'Jaipur' 'Goa'
 'Chennai']


In [14]:
df_flights["price"].describe()


count      30.000000
mean     5510.866667
std      1758.539240
min      2792.000000
25%      4088.750000
50%      5422.000000
75%      6760.250000
max      8981.000000
Name: price, dtype: float64

In [15]:
df_flights.sort_values(by="price").head(5)


Unnamed: 0,flight_id,airline,from,to,departure_time,arrival_time,price
17,FL0018,Go First,Mumbai,Hyderabad,2025-09-22T18:45:00,2025-09-22T19:45:00,2792
0,FL0001,IndiGo,Hyderabad,Delhi,2025-01-04T11:32:00,2025-01-04T15:32:00,2907
14,FL0015,Air India,Hyderabad,Kolkata,2025-05-22T22:32:00,2025-05-23T00:32:00,2909
23,FL0024,SpiceJet,Mumbai,Goa,2025-07-15T14:38:00,2025-07-15T18:38:00,3304
4,FL0005,Air India,Chennai,Bangalore,2025-02-08T03:08:00,2025-02-08T05:08:00,3695


In [16]:
df_hotels.head(3)


Unnamed: 0,hotel_id,name,city,stars,price_per_night,amenities
0,HOT0001,Grand Palace Hotel,Delhi,4,3897,"[wifi, pool]"
1,HOT0002,Comfort Suites,Delhi,5,3650,"[gym, breakfast, wifi, parking]"
2,HOT0003,Green Leaf Resort,Delhi,4,6123,"[pool, parking]"


In [17]:
df_hotels["city"].unique()


array(['Delhi', 'Mumbai', 'Goa', 'Bangalore', 'Chennai', 'Hyderabad',
       'Kolkata', 'Jaipur'], dtype=object)

In [18]:
df_hotels["price_per_night"].describe()


count      40.000000
mean     4129.800000
std      1638.216153
min      1232.000000
25%      2806.750000
50%      4069.500000
75%      5652.000000
max      6481.000000
Name: price_per_night, dtype: float64

In [19]:
df_hotels["stars"].value_counts().sort_index()


stars
2    14
3     6
4     9
5    11
Name: count, dtype: int64

In [29]:
for column in df_places.select_dtypes(include=["object"]).columns:
    print(f"Unique values in '{column}':", df_places[column].unique())

Unique values in 'place_id': ['PLC0001' 'PLC0002' 'PLC0003' 'PLC0004' 'PLC0005' 'PLC0006' 'PLC0007'
 'PLC0008' 'PLC0009' 'PLC0010' 'PLC0011' 'PLC0012' 'PLC0013' 'PLC0014'
 'PLC0015' 'PLC0016' 'PLC0017' 'PLC0018' 'PLC0019' 'PLC0020' 'PLC0021'
 'PLC0022' 'PLC0023' 'PLC0024' 'PLC0025' 'PLC0026' 'PLC0027' 'PLC0028'
 'PLC0029' 'PLC0030' 'PLC0031' 'PLC0032' 'PLC0033' 'PLC0034' 'PLC0035'
 'PLC0036' 'PLC0037' 'PLC0038' 'PLC0039' 'PLC0040']
Unique values in 'name': ['Famous Fort' 'Beautiful Temple' 'Historic Fort' 'Popular Museum'
 'Famous Museum' 'Beautiful Fort' 'Historic Temple' 'Famous Lake'
 'Popular Lake' 'Famous Park' 'Historic Park' 'Beautiful Park'
 'Beautiful Lake' 'Popular Fort' 'Scenic Museum' 'Historic Lake'
 'Scenic Temple' 'Popular Temple' 'Scenic Park' 'Famous Temple']
Unique values in 'city': ['Delhi' 'Mumbai' 'Goa' 'Bangalore' 'Chennai' 'Hyderabad' 'Kolkata'
 'Jaipur']
Unique values in 'type': ['lake' 'temple' 'museum' 'park' 'fort' 'beach' 'market' 'monument']


In [31]:
for column in df_flights.select_dtypes(include=["object"]).columns:
    print(f"Unique values in '{column}':", df_flights[column].unique())

Unique values in 'flight_id': ['FL0001' 'FL0002' 'FL0003' 'FL0004' 'FL0005' 'FL0006' 'FL0007' 'FL0008'
 'FL0009' 'FL0010' 'FL0011' 'FL0012' 'FL0013' 'FL0014' 'FL0015' 'FL0016'
 'FL0017' 'FL0018' 'FL0019' 'FL0020' 'FL0021' 'FL0022' 'FL0023' 'FL0024'
 'FL0025' 'FL0026' 'FL0027' 'FL0028' 'FL0029' 'FL0030']
Unique values in 'airline': ['IndiGo' 'Air India' 'SpiceJet' 'Go First' 'Vistara']
Unique values in 'from': ['Hyderabad' 'Delhi' 'Chennai' 'Bangalore' 'Goa' 'Kolkata' 'Jaipur'
 'Mumbai']
Unique values in 'to': ['Delhi' 'Kolkata' 'Hyderabad' 'Mumbai' 'Bangalore' 'Jaipur' 'Goa'
 'Chennai']
Unique values in 'departure_time': ['2025-01-04T11:32:00' '2025-11-26T05:34:00' '2025-06-03T00:26:00'
 '2025-05-13T13:18:00' '2025-02-08T03:08:00' '2025-12-27T22:52:00'
 '2025-04-17T13:11:00' '2025-12-11T21:38:00' '2025-01-25T06:00:00'
 '2025-02-27T08:06:00' '2025-05-25T09:41:00' '2025-06-16T10:33:00'
 '2025-12-22T01:36:00' '2025-10-13T23:41:00' '2025-05-22T22:32:00'
 '2025-08-28T14:26:00' '2025-12-06T1

In [32]:
for column in df_hotels.select_dtypes(include=["object"]).columns:
    print(f"Unique values in '{column}':", df_hotels[column].unique())

Unique values in 'hotel_id': ['HOT0001' 'HOT0002' 'HOT0003' 'HOT0004' 'HOT0005' 'HOT0006' 'HOT0007'
 'HOT0008' 'HOT0009' 'HOT0010' 'HOT0011' 'HOT0012' 'HOT0013' 'HOT0014'
 'HOT0015' 'HOT0016' 'HOT0017' 'HOT0018' 'HOT0019' 'HOT0020' 'HOT0021'
 'HOT0022' 'HOT0023' 'HOT0024' 'HOT0025' 'HOT0026' 'HOT0027' 'HOT0028'
 'HOT0029' 'HOT0030' 'HOT0031' 'HOT0032' 'HOT0033' 'HOT0034' 'HOT0035'
 'HOT0036' 'HOT0037' 'HOT0038' 'HOT0039' 'HOT0040']
Unique values in 'name': ['Grand Palace Hotel' 'Comfort Suites' 'Green Leaf Resort' 'Sunrise Hotel'
 'Blue Lagoon Resort' 'Budget Stay Inn' 'Royal Heritage' 'Sea View Resort'
 'City Center Hotel']
Unique values in 'city': ['Delhi' 'Mumbai' 'Goa' 'Bangalore' 'Chennai' 'Hyderabad' 'Kolkata'
 'Jaipur']


TypeError: unhashable type: 'list'

In [20]:
df_hotels.sort_values(
    by=["stars", "price_per_night"],
    ascending=[False, True]
).head(5)


Unnamed: 0,hotel_id,name,city,stars,price_per_night,amenities
13,HOT0014,Royal Heritage,Goa,5,1232,"[wifi, parking, breakfast, pool]"
10,HOT0011,Comfort Suites,Goa,5,2828,"[spa, pool, wifi, gym]"
1,HOT0002,Comfort Suites,Delhi,5,3650,"[gym, breakfast, wifi, parking]"
39,HOT0040,Sea View Resort,Jaipur,5,3673,"[breakfast, parking, pool]"
25,HOT0026,City Center Hotel,Hyderabad,5,4332,"[spa, wifi, pool]"


In [23]:
df_places.head(3)

Unnamed: 0,place_id,name,city,type,rating
0,PLC0001,Famous Fort,Delhi,lake,4.6
1,PLC0002,Beautiful Temple,Delhi,temple,4.2
2,PLC0003,Historic Fort,Delhi,museum,4.2


In [21]:
df_places["city"].unique()


array(['Delhi', 'Mumbai', 'Goa', 'Bangalore', 'Chennai', 'Hyderabad',
       'Kolkata', 'Jaipur'], dtype=object)

In [24]:
df_places["type"].value_counts()


type
park        8
market      7
museum      6
temple      5
lake        4
fort        4
beach       4
monument    2
Name: count, dtype: int64

In [25]:
df_places.sort_values(by="rating", ascending=False).head(10)


Unnamed: 0,place_id,name,city,type,rating
32,PLC0033,Scenic Museum,Kolkata,temple,4.9
30,PLC0031,Famous Fort,Kolkata,beach,4.9
31,PLC0032,Historic Temple,Kolkata,temple,4.9
21,PLC0022,Scenic Temple,Chennai,beach,4.8
0,PLC0001,Famous Fort,Delhi,lake,4.6
28,PLC0029,Popular Lake,Hyderabad,park,4.6
39,PLC0040,Scenic Park,Jaipur,park,4.6
3,PLC0004,Popular Museum,Delhi,lake,4.5
27,PLC0028,Scenic Park,Hyderabad,museum,4.5
34,PLC0035,Scenic Museum,Kolkata,museum,4.5


In [27]:
set(df_flights["to"]) \
.intersection(set(df_hotels["city"])) \
.intersection(set(df_places["city"]))


{'Bangalore',
 'Chennai',
 'Delhi',
 'Goa',
 'Hyderabad',
 'Jaipur',
 'Kolkata',
 'Mumbai'}