In [1]:
import requests
import json
import time


In [51]:
# specify the URL and headers
url = "https://zillow56.p.rapidapi.com/search"


headers = {
	"x-rapidapi-key": "543cfe5e40msha66e48862afef4cp137a66jsn75ce65836e39",
	"x-rapidapi-host": "zillow56.p.rapidapi.com"
}

In [64]:
def get_all_pages_for_zipcode(zipcode: str) -> list:
    '''
    Given a zipcode, this function will retrieve all zillow rental listing data for that zipcode.
    '''
    all_properties = []
    page = 1
    
    while True:
        params = {
            "location": zipcode,
            "output": "json",
            "status": "forRent",
            "doz": "any",
            "page": str(page),
            "sortSelection": "days",
        }
        
        try:
            if page > 1:
                time.sleep(1)
                
            print(f"Retriving zipcode: {zipcode}, page {page} ...")
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            
            data = response.json()

            results = data.get("results")
            
            # check if there are any results
            if not results:
                print(f"All data by {zipcode} retrieved. Total page number retrieved: {page-1}.")
                break
            
            for result in results:
                variables = {
                        "zpid": result.get("zpid"),
                        "longitude": result.get("longitude"),
                        "latitude": result.get("latitude"),
                        "bathrooms": result.get("bathrooms"),
                        "bedrooms": result.get("bedrooms"),
                        "address": result.get("streetAddress"),
                        "price": result.get("price"),
                        "zipcode": result.get("zipcode")
                    }
                all_properties.append(variables)
    

            print(f"Results retrieved: {len(results)}. Total number of results: {len(all_properties)}.")
            
            
            page += 1
            
        except requests.exceptions.RequestException as e:
            print(f"Cannot retrieve data: {e}")
            break
            
        except Exception as e:
            print(f"Error processing data: {e}")
            break
    
    return all_properties


In [65]:
# test the function
all_listing = get_all_pages_for_zipcode("60610")

Retriving zipcode: 60610, page 1 ...
Results retrieved: 10. Total number of results: 10.
Retriving zipcode: 60610, page 2 ...
Results retrieved: 23. Total number of results: 33.
Retriving zipcode: 60610, page 3 ...
Results retrieved: 22. Total number of results: 55.
Retriving zipcode: 60610, page 4 ...
Results retrieved: 6. Total number of results: 61.
Retriving zipcode: 60610, page 5 ...
Results retrieved: 10. Total number of results: 71.
Retriving zipcode: 60610, page 6 ...
Results retrieved: 10. Total number of results: 81.
Retriving zipcode: 60610, page 7 ...
Results retrieved: 10. Total number of results: 91.
Retriving zipcode: 60610, page 8 ...
Results retrieved: 10. Total number of results: 101.
Retriving zipcode: 60610, page 9 ...
Results retrieved: 10. Total number of results: 111.
Retriving zipcode: 60610, page 10 ...
Results retrieved: 10. Total number of results: 121.
Retriving zipcode: 60610, page 11 ...
Results retrieved: 10. Total number of results: 131.
Retriving zipcod

In [54]:
# gather all chicago zipcodes according to USPS website: https://tools.usps.com/zip-code-lookup.htm?bycitystate#page-1 
chicago_zipcode = ["60693", "60694", "60695", "60696", "60697", "60699", "60701", "60706", "60707", "60803", "60804", "60805", "60827"]

for i in range(60601, 60692):
    chicago_zipcode.append(str(i))

to_remove = ["60627", "60635", "60648", "60650", "60658", "60662", "60663", "60665", "60667", "60671", "60672", "60676", "60679", "60683"]

for tm in to_remove:
    chicago_zipcode.remove(tm)

sorted(chicago_zipcode)


['60601',
 '60602',
 '60603',
 '60604',
 '60605',
 '60606',
 '60607',
 '60608',
 '60609',
 '60610',
 '60611',
 '60612',
 '60613',
 '60614',
 '60615',
 '60616',
 '60617',
 '60618',
 '60619',
 '60620',
 '60621',
 '60622',
 '60623',
 '60624',
 '60625',
 '60626',
 '60628',
 '60629',
 '60630',
 '60631',
 '60632',
 '60633',
 '60634',
 '60636',
 '60637',
 '60638',
 '60639',
 '60640',
 '60641',
 '60642',
 '60643',
 '60644',
 '60645',
 '60646',
 '60647',
 '60649',
 '60651',
 '60652',
 '60653',
 '60654',
 '60655',
 '60656',
 '60657',
 '60659',
 '60660',
 '60661',
 '60664',
 '60666',
 '60668',
 '60669',
 '60670',
 '60673',
 '60674',
 '60675',
 '60677',
 '60678',
 '60680',
 '60681',
 '60682',
 '60684',
 '60685',
 '60686',
 '60687',
 '60688',
 '60689',
 '60690',
 '60691',
 '60693',
 '60694',
 '60695',
 '60696',
 '60697',
 '60699',
 '60701',
 '60706',
 '60707',
 '60803',
 '60804',
 '60805',
 '60827']

In [55]:
len(chicago_zipcode)

90

In [71]:
# retrieve all listings for all chicago zipcodes
all_listing_info = []

for zipcode in chicago_zipcode:
    zipcode_listing = get_all_pages_for_zipcode(zipcode)
    all_listing_info.extend(zipcode_listing)
    print(f"Updated; total number of listings: {len(all_listing_info)}.")
    time.sleep(0.3)

Retriving zipcode: 60693, page 1 ...
Results retrieved: 1. Total number of results: 1.
Retriving zipcode: 60693, page 2 ...
Results retrieved: 1. Total number of results: 2.
Retriving zipcode: 60693, page 3 ...
Results retrieved: 15. Total number of results: 17.
Retriving zipcode: 60693, page 4 ...
Results retrieved: 12. Total number of results: 29.
Retriving zipcode: 60693, page 5 ...
Results retrieved: 17. Total number of results: 46.
Retriving zipcode: 60693, page 6 ...
Results retrieved: 20. Total number of results: 66.
Retriving zipcode: 60693, page 7 ...
Results retrieved: 26. Total number of results: 92.
Retriving zipcode: 60693, page 8 ...
Results retrieved: 32. Total number of results: 124.
Retriving zipcode: 60693, page 9 ...
Results retrieved: 19. Total number of results: 143.
Retriving zipcode: 60693, page 10 ...
Results retrieved: 1. Total number of results: 144.
Retriving zipcode: 60693, page 11 ...
Results retrieved: 1. Total number of results: 145.
Retriving zipcode: 60

In [72]:
all_listing_info

[{'zpid': 446735593,
  'longitude': -87.61685,
  'latitude': 41.88554,
  'bathrooms': 1.0,
  'bedrooms': 1.0,
  'address': '450 E Benton Pl #2308',
  'price': 2534.0,
  'zipcode': '60601'},
 {'zpid': 2112016331,
  'longitude': -87.628944,
  'latitude': 41.874878,
  'bathrooms': 1.0,
  'bedrooms': 0.0,
  'address': '525 S Dearborn St APT 605',
  'price': 1695.0,
  'zipcode': '60605'},
 {'zpid': 3868964,
  'longitude': -87.617935,
  'latitude': 41.884876,
  'bathrooms': 2.0,
  'bedrooms': 2.0,
  'address': '360 E Randolph St APT 3602',
  'price': 4800.0,
  'zipcode': '60601'},
 {'zpid': 65546971,
  'longitude': -87.64883,
  'latitude': 41.879482,
  'bathrooms': 2.0,
  'bedrooms': 2.0,
  'address': '842 W Adams St FLOOR 2',
  'price': 2750.0,
  'zipcode': '60607'},
 {'zpid': 446702980,
  'longitude': -87.62467,
  'latitude': 41.869102,
  'bathrooms': 1.0,
  'bedrooms': 0.0,
  'address': '1100 S Michigan Ave #2090',
  'price': 2525.0,
  'zipcode': '60605'},
 {'zpid': 2077324985,
  'longitu

In [77]:
len(all_listing_info)

40967

In [73]:
import pandas as pd

In [74]:
df_zillow = pd.DataFrame(all_listing_info).drop_duplicates()
df_zillow

Unnamed: 0,zpid,longitude,latitude,bathrooms,bedrooms,address,price,zipcode
0,446735593,-87.616850,41.885540,1.0,1.0,450 E Benton Pl #2308,2534.0,60601
1,2112016331,-87.628944,41.874878,1.0,0.0,525 S Dearborn St APT 605,1695.0,60605
2,3868964,-87.617935,41.884876,2.0,2.0,360 E Randolph St APT 3602,4800.0,60601
3,65546971,-87.648830,41.879482,2.0,2.0,842 W Adams St FLOOR 2,2750.0,60607
4,446702980,-87.624670,41.869102,1.0,0.0,1100 S Michigan Ave #2090,2525.0,60605
...,...,...,...,...,...,...,...,...
36399,2107163413,,,1.0,0.0,(undisclosed Address),1250.0,60660
36400,345039747,,,1.0,1.0,(undisclosed Address),1350.0,60660
36401,344944878,-87.665470,41.991070,1.0,1.0,6200-48 N Clark St W #1600-24-16554796,1500.0,60660
36402,2089847542,-87.658070,41.992477,1.0,2.0,6101 N Winthrop Ave #1,2200.0,60660


In [75]:
df_deduped_zillow = df_zillow.drop_duplicates(subset=["zpid"], keep="first")
df_deduped_zillow

Unnamed: 0,zpid,longitude,latitude,bathrooms,bedrooms,address,price,zipcode
0,446735593,-87.616850,41.885540,1.0,1.0,450 E Benton Pl #2308,2534.0,60601
1,2112016331,-87.628944,41.874878,1.0,0.0,525 S Dearborn St APT 605,1695.0,60605
2,3868964,-87.617935,41.884876,2.0,2.0,360 E Randolph St APT 3602,4800.0,60601
3,65546971,-87.648830,41.879482,2.0,2.0,842 W Adams St FLOOR 2,2750.0,60607
4,446702980,-87.624670,41.869102,1.0,0.0,1100 S Michigan Ave #2090,2525.0,60605
...,...,...,...,...,...,...,...,...
36399,2107163413,,,1.0,0.0,(undisclosed Address),1250.0,60660
36400,345039747,,,1.0,1.0,(undisclosed Address),1350.0,60660
36401,344944878,-87.665470,41.991070,1.0,1.0,6200-48 N Clark St W #1600-24-16554796,1500.0,60660
36402,2089847542,-87.658070,41.992477,1.0,2.0,6101 N Winthrop Ave #1,2200.0,60660


In [76]:
df_deduped_zillow.to_csv("zillow_chicago.csv", index=False)