In [1]:
import json
import os
import pandas as pd

from airbnb_api import Airbnb

# Data

In [2]:
# location of saved data
data_path = '../../data'

In [3]:
# load neighborhoods identified in Airbnb data
with open(os.path.join(data_path, 'SDneighborhoods.txt'), 'r') as f:
    neighborhoods = f.read().splitlines()

In [4]:
# load listings
listings_file = os.path.join(data_path, 'airbnb-listings-full.json')

if os.path.exists(listings_file):
    listings = pd.read_json(listings_file)
    listings = listings.drop_duplicates(subset='id')
else:
    listings = pd.DataFrame()

num_listings = len(listings)
num_listings

7320

In [5]:
listings.head()

Unnamed: 0,badges,bathroom_label,bathrooms,bed_label,bedroom_label,bedrooms,beds,city,guest_label,host_languages,...,price_string,rate,rate_type,rate_with_service_fee,weekly_price_factor,should_show_from_label,china_promotion_display_types,license,trust_signals,summary
0,[],1 shared bath,1.0,1 bed,1 bedroom,1.0,1.0,Chula Vista,5 guests,[],...,$35,"{'amount': 35.0, 'amount_formatted': '$35', 'c...",nightly,"{'amount': 35.0, 'amount_formatted': '$35', 'c...",0.93,False,[],,,
1,[NEW],1 private bath,1.0,1 bed,1 bedroom,1.0,1.0,San Diego,2 guests,[],...,$30,"{'amount': 30.0, 'amount_formatted': '$30', 'c...",nightly,"{'amount': 30.0, 'amount_formatted': '$30', 'c...",0.95,False,[],,,
2,[],1 bath,1.0,1 bed,Studio,0.0,1.0,San Diego,2 guests,[],...,$77,"{'amount': 77.0, 'amount_formatted': '$77', 'c...",nightly,"{'amount': 77.0, 'amount_formatted': '$77', 'c...",1.0,False,[],,,
3,[],1 bath,1.0,1 bed,1 bedroom,1.0,1.0,San Diego,1 guest,"[en, es]",...,$25,"{'amount': 25.0, 'amount_formatted': '$25', 'c...",nightly,"{'amount': 25.0, 'amount_formatted': '$25', 'c...",1.0,False,[],,,
4,[],1 bath,1.0,1 bed,Studio,0.0,1.0,San Diego,2 guests,"[en, es]",...,$67,"{'amount': 67.0, 'amount_formatted': '$67', 'c...",nightly,"{'amount': 67.0, 'amount_formatted': '$67', 'c...",0.95,False,[],,,


In [6]:
# load reviews
reviews_file = os.path.join(data_path, 'airbnb-reviews-full.json')

if os.path.exists(reviews_file):
    reviews = pd.read_json(reviews_file)
    reviews = reviews.drop_duplicates(subset='id')
else:
    reviews = pd.DataFrame()

num_reviews = len(reviews)
num_reviews

426207

In [7]:
reviews.head()

Unnamed: 0,author,author_id,can_be_edited,comments,created_at,id,id_str,listing_id,recipient_id,collection_tag,listing,rating,recipient,response,role,language,user_flag
0,"{'first_name': 'Reginald', 'has_profile_pic': ...",328321073,False,magnificent,2020-08-20 22:48:25+00:00,653954608,653954608,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
1,"{'first_name': 'Reginald', 'has_profile_pic': ...",328321073,False,I have been Garry's guest for the last 6 month...,2020-06-15 23:46:10+00:00,629997649,629997649,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
2,"{'first_name': 'John', 'has_profile_pic': True...",31735215,False,Gary is a wonderful host.\nHe is very nice and...,2019-12-29 00:59:02+00:00,583538843,583538843,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
3,"{'first_name': 'Lawang', 'has_profile_pic': Tr...",212339508,False,Recommended to all for a cozy place with great...,2018-09-03 22:56:57+00:00,318521505,318521505,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
4,"{'first_name': 'Brittany', 'has_profile_pic': ...",124868001,False,Gary was very friendly and welcoming! And the ...,2018-07-22 23:49:03+00:00,295091676,295091676,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,


# API

In [8]:
# instantiate Airbnb API wrapper
airbnb = Airbnb()

In [9]:
# get listings (306 appears to be the maximum for a single query)
new_listings = airbnb.get_listings('San Diego, CA')

No results for San Diego, CA on page 1


In [10]:
new_listings

In [11]:
def save_data(df, filename):
    """
    Save DataFrame as JSON in nicer format than pandas.DataFrame.to_json
    
    Parameters:
        listings (pandas.DataFrame): DataFrame of listings
        filename (str): Name of file
    """

    with open(filename, 'w') as f:
        df_dict = df.to_dict(orient='records')
        print(json.dumps(df_dict, indent=4), file=f)

In [12]:
listings = listings.append(new_listings).drop_duplicates(subset='id')

if len(listings) > num_listings:
    print('Found new listings!')
    save_data(listings, listings_file)
    num_listings = len(listings)

In [13]:
# get listings for a couple neighborhoods
new_listings = airbnb.get_neighborhood_listings(['North Park', 'South Park'], 'San Diego, CA')

No results for North Park, San Diego, CA on page 1
No results for South Park, San Diego, CA on page 1


In [14]:
new_listings

In [15]:
listings = listings.append(new_listings).drop_duplicates(subset='id')

if len(listings) > num_listings:
    print('Found new listings!')
    save_data(listings, listings_file)

In [16]:
# get reviews for a couple listings
new_reviews = airbnb.get_all_reviews(listings.id.values[:2])
new_reviews

Unnamed: 0,author,author_id,can_be_edited,comments,created_at,id,id_str,listing_id,recipient_id,collection_tag,listing,rating,recipient,response,role,language,user_flag
0,"{'first_name': 'Reginald', 'has_profile_pic': ...",328321073,False,magnificent,2020-08-20T22:48:25Z,653954608,653954608,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
1,"{'first_name': 'Reginald', 'has_profile_pic': ...",328321073,False,I have been Garry's guest for the last 6 month...,2020-06-15T23:46:10Z,629997649,629997649,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
2,"{'first_name': 'John', 'has_profile_pic': True...",31735215,False,Gary is a wonderful host.\nHe is very nice and...,2019-12-29T00:59:02Z,583538843,583538843,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
3,"{'first_name': 'Lawang', 'has_profile_pic': Tr...",212339508,False,Recommended to all for a cozy place with great...,2018-09-03T22:56:57Z,318521505,318521505,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
4,"{'first_name': 'Brittany', 'has_profile_pic': ...",124868001,False,Gary was very friendly and welcoming! And the ...,2018-07-22T23:49:03Z,295091676,295091676,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
5,"{'first_name': 'Dylan', 'has_profile_pic': Tru...",105593023,False,Gary is so kind and friendly. He would like to...,2017-08-13T20:05:09Z,181982904,181982904,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
6,"{'first_name': 'Iris', 'has_profile_pic': True...",85397302,False,Gary was kind. He gave us recommendations on b...,2017-07-17T23:35:17Z,171519718,171519718,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
7,"{'first_name': 'Aliki', 'has_profile_pic': Tru...",90637792,False,"This space is unbelievable, perfect for a fami...",2017-02-15T18:29:40Z,132077682,132077682,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
8,"{'first_name': 'Imane', 'has_profile_pic': Tru...",66956170,False,Gary is by far the best Airbnb host I've ever ...,2016-11-13T19:58:06Z,113796917,113796917,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,
9,"{'first_name': 'Dan', 'has_profile_pic': True,...",47112954,False,"Gary is a great host, with a calm relaxing hom...",2016-09-05T16:49:21Z,99693193,99693193,13301630,40442827,,"{'id': 13301630, 'listing_id_str': '13301630',...",5,"{'first_name': 'Gary', 'has_profile_pic': True...",,guest,en,


In [17]:
reviews = reviews.append(new_reviews).drop_duplicates(subset='id')

if len(reviews) > num_reviews:
    print('Found new reviews!')
    save_data(reviews, reviews_file)