In [148]:
import requests
import json
import pandas as pd
import json


def get_response_for_city(id, page_num):
    """
    Extracts information about hotels in a city (id) from RapidAPI endpoints
    """
    

    url = "https://hotels4.p.rapidapi.com/properties/list"

    querystring = {"adults1":"1","pageNumber":page_num,"destinationId":id,"pageSize":"25","checkOut":"2020-01-15","checkIn":"2020-01-08","sortOrder":"PRICE","locale":"en_US","currency":"USD"}

    headers = {
        'x-rapidapi-key': "******",
        'x-rapidapi-host': "hotels4.p.rapidapi.com"
        }

    response = requests.request("GET", url, headers=headers, params=querystring)

    print(response.text)
    
    return json.loads(response.text)


def get_review(hotel_id):
    """
    Extract reviews for a hotel id from RapidAPI endpoint 
    """
    url = "https://hotels4.p.rapidapi.com/reviews/list"

    querystring = {"id":hotel_id,"page":"1","loc":"en_US"}

    headers = {
        'x-rapidapi-key': "*******",
        'x-rapidapi-host': "hotels4.p.rapidapi.com"
        }

    response = requests.request("GET", url, headers=headers, params=querystring)
    data = json.loads(response.text)
    reviews = []
    try: 
        quality = data['reviewData']['guestReviewGroups']['guestReviewOverview']['qualitativeBadgeText']
        total_reviews = data['reviewData']['guestReviewGroups']['guestReviews'][0]['reviews']
    
        for i in range(len(total_reviews)):
            reviews.append(total_reviews[i]['summary'])
    except: 
        return '', reviews
    
    return quality,reviews

def get_tagline(hotel_id):
    """
    Extracts useful details about a hotel ID and stores tag-line
    """
    url = "https://hotels4.p.rapidapi.com/properties/get-details"

    querystring = {"id":hotel_id,"checkIn":"2020-01-08","checkOut":"2020-01-15","currency":"USD","locale":"en_US","adults1":"1"}

    headers = {
        'x-rapidapi-key': "*******",
        'x-rapidapi-host': "hotels4.p.rapidapi.com"
        }

    response = requests.request("GET", url, headers=headers, params=querystring)
    data = json.loads(response.text)

    amenities = ''
    tagline = ''
    address = ''
    #print("Here>>>>>", data)
    try: 
        details = data['data']['body']['overview']['overviewSections']
        for i in range(len(details)):
            if details[i]['type'] == 'HOTEL_FEATURE':
                amenities = " ".join(i for i in details[i]['content'])
            if details[i]['type'] == 'TAGLINE':
                tagline = details[i]['content'][0].strip('<b></b>')

        address = data['data']['body']['propertyDescription']['address']['fullAddress']
    except Exception as e: 
        print(e)
        return amenities, tagline, address
    
    return amenities, tagline, address


def property_details(df, response):
    
    for i in range(len(response['data']['body']['searchResults']['results'])):
        hotel_id = response['data']['body']['searchResults']['results'][i]['id']
        print(hotel_id)

        hotel_name = response['data']['body']['searchResults']['results'][i]['name']

        try: 
            star_rating = response['data']['body']['searchResults']['results'][i]['starRating']
        except: 
            star_rating = ''
            
        try: 
            guest_rating = response['data']['body']['searchResults']['results'][i]['guestReviews']['rating']
            total_guest_rating = response['data']['body']['searchResults']['results'][i]['guestReviews']['total']
        except Exception as e: 
            guest_rating = ''
            total_guest_rating = ''
        
        try: 
            price = response['data']['body']['searchResults']['results'][i]['ratePlan']['price']['current']
        except: 
            price = ''
        try: 
            thumbnail = response['data']['body']['searchResults']['results'][i]['optimizedThumbUrls']['srpDesktop']
        except: 
            thumbnail = ''
            
        quality, review = get_review(hotel_id)
        
        amenities, tagline, hotel_address = get_tagline(hotel_id)
        
        try: 
            conc_review = '\t'.join(i for i in review)
        except: 
            conc_review = ''
            
        df = df.append({'id': hotel_id, 'name': hotel_name, 'address': hotel_address, 'star_rating': star_rating, 
                                    'guest_ranting': guest_rating, 'total_rating': total_guest_rating, 
                                    'price': price, 'thumbnail':  thumbnail, "quality": quality, "review": conc_review, 
                       'amenities': amenities, 'tagline': tagline}, ignore_index=True)
        
    return df

In [194]:
# Enter a city ID to start, more information on RAPID API

page_1_response_city = get_response_for_city("1439028", "1")
page_2_response_city = get_response_for_city("1439028", "2")

{"result":"OK","data":{"body":{"header":"Los Angeles, California, United States of America","query":{"destination":{"id":"1439028","value":"Los Angeles, California, United States of America","resolvedLocation":"CITY:1439028:UNKNOWN:UNKNOWN"}},"searchResults":{"totalCount":1541,"results":[{"id":1567333856,"name":"Nirvana~ Loft Studio @venice Short/long Studio Bedroom Apts","starRating":3.0,"urls":{},"address":{"locality":"Marina del Rey","region":"CA","countryName":"United States","countryCode":"us","obfuscate":false},"landmarks":[{"label":"City center","distance":"12 miles"},{"label":"Universal Studios Hollywood","distance":"12 miles"}],"geoBullets":[],"ratePlan":{"price":{"current":"$15","exactCurrent":15.0},"features":{"paymentPreference":false,"noCCRequired":false}},"neighbourhood":"Marina del Rey","deals":{},"messaging":{},"badging":{},"coordinate":{"lat":33.9766,"lon":-118.4516},"providerType":"LOCAL","supplierHotelId":48947933,"vrBadge":"Apartment","isAlternative":false,"optimize

{"result":"OK","data":{"body":{"header":"Los Angeles, California, United States of America","query":{"destination":{"id":"1439028","value":"Los Angeles, California, United States of America","resolvedLocation":"CITY:1439028:UNKNOWN:UNKNOWN"}},"searchResults":{"totalCount":1541,"results":[{"id":128560,"name":"Days Inn by Wyndham Whittier Los Angeles","starRating":2.0,"urls":{},"address":{"streetAddress":"14330 Telegraph Rd","extendedAddress":"","locality":"Whittier","postalCode":"90604","region":"CA","countryName":"United States","countryCode":"us","obfuscate":false},"guestReviews":{"unformattedRating":7.4,"rating":"7.4","total":513,"scale":10,"badge":"good","badgeText":"Good"},"landmarks":[{"label":"City center","distance":"16 miles"},{"label":"Universal Studios Hollywood","distance":"24 miles"}],"geoBullets":[],"ratePlan":{"price":{"current":"$67","exactCurrent":67.15},"features":{"paymentPreference":false,"noCCRequired":false}},"neighbourhood":"Whittier","deals":{},"messaging":{},"ba

In [195]:

with open('hotel_data/page_1_hotel_25_LA.json', 'w') as f:
    json.dump(page_1_response_city, f)
    
with open('hotel_data/page_2_hotel_25_LA.json', 'w') as f:
    json.dump(page_2_response_city, f)

In [196]:
hotel_ids = []

hotel_df_cols = ['id', 'name', 'address', 'star_rating','guest_ranting', 'total_rating', 'price', 'thumbnail', 
                 'quality', 'review', 'amenities', 'tagline']

hotel_df = pd.DataFrame(columns = hotel_df_cols)


hotel_df = property_details(hotel_df, page_1_response_city)
hotel_df = property_details(hotel_df, page_2_response_city)

hotel_df.to_csv("hotel_data/LA_hotel_data.csv")

1567333856
1846327776
1456488320
1576718848
121693
1249928704
701676608
1204948448
871060096
1071922240
648343
354563
470065
454786
537697
639030080
613065696
592970
532979
451454
622282624
255853
595119
2027428416
201624
128560
38855040
419435
562129
509981
1268959712
451452
451453
554264
550829
416845
1055393312
111568
256292
482126
540977
303889
1577388896
1054137088
615338720
653537
470292
262165
613765408
485252


### Sentiment analyser over reviews

In [159]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\tushar\AppData\Roaming\nltk_data...


True

In [160]:
sia = SentimentIntensityAnalyzer()

In [192]:
import numpy as np

def calculate_polarity(row):
    review_list = row.split('\t')
    pos = [1 if sia.polarity_scores(review)['compound'] >=0.2 else 0 for review in review_list ]
    neg = [1 if sia.polarity_scores(review)['compound'] <=0.2 else 0 for review in review_list  ]
    neutral = len(review_list) - sum(pos) - sum(neg)
    return {'pos': np.round(sum(pos)/len(review_list),3), 'neg': np.round(sum(neg)/len(review_list),3)}

In [197]:
csv_list = ['Chicago_hotel_data.csv', 'LA_hotel_data.csv', 'NY_hotel_data.csv', 'sf_hotel_data.csv']

for city in csv_list: 
    print(city)
    hotel_df = pd.read_csv('hotel_data/' + city)
    hotel_df['sentiment'] = hotel_df['review'].astype(str).apply(calculate_polarity)
    hotel_df.to_csv('hotel_data/' + city, index=False)

Chicago_hotel_data.csv
LA_hotel_data.csv
NY_hotel_data.csv
sf_hotel_data.csv
