In [5]:
import requests
import json
import time
import pandas as pd


In [2]:
# specify the URL and headers
url = "https://zillow56.p.rapidapi.com/search"


headers = {
	"x-rapidapi-key": "543cfe5e40msha66e48862afef4cp137a66jsn75ce65836e39",
	"x-rapidapi-host": "zillow56.p.rapidapi.com"
}

In [3]:
def get_all_pages_for_zipcode(zipcode: str) -> list:
    '''
    Given a zipcode, this function will retrieve all zillow rental listing data for that zipcode.
    '''
    all_properties = []
    page = 1
    
    while True:
        params = {
            "location": zipcode,
            "output": "json",
            "status": "forRent",
            "doz": "any",
            "page": str(page),
            "sortSelection": "days",
        }
        
        try:
            if page > 1:
                time.sleep(1)
                
            print(f"Retriving zipcode: {zipcode}, page {page} ...")
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            
            data = response.json()

            results = data.get("results")
            
            # check if there are any results
            if not results:
                print(f"All data by {zipcode} retrieved. Total page number retrieved: {page-1}.")
                break
            
            for result in results:
                variables = {
                        "zpid": result.get("zpid"),
                        "longitude": result.get("longitude"),
                        "latitude": result.get("latitude"),
                        "bathrooms": result.get("bathrooms"),
                        "bedrooms": result.get("bedrooms"),
                        "address": result.get("streetAddress"),
                        "price": result.get("price"),
                        "zipcode": result.get("zipcode"),
                        "living_area": result.get("livingArea"),
                        "home_type": result.get("homeType"),
                        "rent_zestimate": result.get("rentZestimate"),
                        "unit": result.get("unit"),
                        "time_on_zillow": result.get("timeOnZillow")
                    }
                all_properties.append(variables)
    

            print(f"Results retrieved: {len(results)}. Total number of results: {len(all_properties)}.")
            
            
            page += 1
            
        except requests.exceptions.RequestException as e:
            print(f"Cannot retrieve data: {e}")
            break
            
        except Exception as e:
            print(f"Error processing data: {e}")
            break
    
    return all_properties


In [4]:
# test the function
all_listing = get_all_pages_for_zipcode("60610")

Retriving zipcode: 60610, page 1 ...
Results retrieved: 11. Total number of results: 11.
Retriving zipcode: 60610, page 2 ...
Results retrieved: 21. Total number of results: 32.
Retriving zipcode: 60610, page 3 ...
Results retrieved: 24. Total number of results: 56.
Retriving zipcode: 60610, page 4 ...
Results retrieved: 6. Total number of results: 62.
Retriving zipcode: 60610, page 5 ...
Results retrieved: 11. Total number of results: 73.
Retriving zipcode: 60610, page 6 ...
Results retrieved: 11. Total number of results: 84.
Retriving zipcode: 60610, page 7 ...
Results retrieved: 11. Total number of results: 95.
Retriving zipcode: 60610, page 8 ...
Results retrieved: 11. Total number of results: 106.
Retriving zipcode: 60610, page 9 ...
Results retrieved: 11. Total number of results: 117.
Retriving zipcode: 60610, page 10 ...
Results retrieved: 11. Total number of results: 128.
Retriving zipcode: 60610, page 11 ...
Results retrieved: 11. Total number of results: 139.
Retriving zipcod

In [7]:
# gather all chicago zipcodes according to USPS website: https://tools.usps.com/zip-code-lookup.htm?bycitystate#page-1 
chicago_zipcode = ["60693", "60694", "60695", "60696", "60697", "60699", "60701", "60706", "60707", "60803", "60804", "60805", "60827"]

for i in range(60601, 60692):
    chicago_zipcode.append(str(i))

to_remove = ["60627", "60635", "60648", "60650", "60658", "60662", "60663", "60665", "60667", "60671", "60672", "60676", "60679", "60683"]

for tm in to_remove:
    chicago_zipcode.remove(tm)

sorted(chicago_zipcode)


['60601',
 '60602',
 '60603',
 '60604',
 '60605',
 '60606',
 '60607',
 '60608',
 '60609',
 '60610',
 '60611',
 '60612',
 '60613',
 '60614',
 '60615',
 '60616',
 '60617',
 '60618',
 '60619',
 '60620',
 '60621',
 '60622',
 '60623',
 '60624',
 '60625',
 '60626',
 '60628',
 '60629',
 '60630',
 '60631',
 '60632',
 '60633',
 '60634',
 '60636',
 '60637',
 '60638',
 '60639',
 '60640',
 '60641',
 '60642',
 '60643',
 '60644',
 '60645',
 '60646',
 '60647',
 '60649',
 '60651',
 '60652',
 '60653',
 '60654',
 '60655',
 '60656',
 '60657',
 '60659',
 '60660',
 '60661',
 '60664',
 '60666',
 '60668',
 '60669',
 '60670',
 '60673',
 '60674',
 '60675',
 '60677',
 '60678',
 '60680',
 '60681',
 '60682',
 '60684',
 '60685',
 '60686',
 '60687',
 '60688',
 '60689',
 '60690',
 '60691',
 '60693',
 '60694',
 '60695',
 '60696',
 '60697',
 '60699',
 '60701',
 '60706',
 '60707',
 '60803',
 '60804',
 '60805',
 '60827']

In [8]:
len(chicago_zipcode)

90

In [71]:
# retrieve all listings for all chicago zipcodes
all_listing_info = []

for zipcode in chicago_zipcode:
    zipcode_listing = get_all_pages_for_zipcode(zipcode)
    all_listing_info.extend(zipcode_listing)
    print(f"Updated; total number of listings: {len(all_listing_info)}.")
    time.sleep(0.1)

Retriving zipcode: 60693, page 1 ...
Results retrieved: 1. Total number of results: 1.
Retriving zipcode: 60693, page 2 ...
Results retrieved: 1. Total number of results: 2.
Retriving zipcode: 60693, page 3 ...
Results retrieved: 15. Total number of results: 17.
Retriving zipcode: 60693, page 4 ...
Results retrieved: 12. Total number of results: 29.
Retriving zipcode: 60693, page 5 ...
Results retrieved: 17. Total number of results: 46.
Retriving zipcode: 60693, page 6 ...
Results retrieved: 20. Total number of results: 66.
Retriving zipcode: 60693, page 7 ...
Results retrieved: 26. Total number of results: 92.
Retriving zipcode: 60693, page 8 ...
Results retrieved: 32. Total number of results: 124.
Retriving zipcode: 60693, page 9 ...
Results retrieved: 19. Total number of results: 143.
Retriving zipcode: 60693, page 10 ...
Results retrieved: 1. Total number of results: 144.
Retriving zipcode: 60693, page 11 ...
Results retrieved: 1. Total number of results: 145.
Retriving zipcode: 60

In [None]:
all_listing_info

In [77]:
len(all_listing_info)

40967

In [73]:
import pandas as pd

In [83]:
df_zillow = pd.DataFrame(all_listing_info)
df_zillow

Unnamed: 0,zpid,longitude,latitude,bathrooms,bedrooms,address,price,zipcode
0,446735593,-87.616850,41.885540,1.0,1.0,450 E Benton Pl #2308,2534.0,60601
1,2112016331,-87.628944,41.874878,1.0,0.0,525 S Dearborn St APT 605,1695.0,60605
2,3868964,-87.617935,41.884876,2.0,2.0,360 E Randolph St APT 3602,4800.0,60601
3,65546971,-87.648830,41.879482,2.0,2.0,842 W Adams St FLOOR 2,2750.0,60607
4,446702980,-87.624670,41.869102,1.0,0.0,1100 S Michigan Ave #2090,2525.0,60605
...,...,...,...,...,...,...,...,...
40962,446735593,-87.616850,41.885540,1.0,1.0,450 E Benton Pl #2308,2534.0,60601
40963,446749902,-87.619350,41.892628,1.0,1.0,333 E Ohio St #2305,2063.0,60611
40964,446735593,-87.616850,41.885540,1.0,1.0,450 E Benton Pl #2308,2534.0,60601
40965,446749902,-87.619350,41.892628,1.0,1.0,333 E Ohio St #2305,2063.0,60611


In [89]:
df_zillow.shape

(40967, 8)

In [90]:
# save raw data
df_zillow.to_csv("zillow_rental_listings_raw.csv", index=False)

In [91]:
df_deduped_zillow = df_zillow.drop_duplicates()

In [92]:
df_deduped_zillow.shape

(4746, 8)

In [94]:
df_deduped_zillow.to_csv("zillow_chicago.csv", index=False)