# Write a function to web scrape the front page of “Apartments” Housing and get a distribution of low/high end housing costs

In [1]:
# Import Packages
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [2]:
# We talk about user agents later so run and ignore
user_agent_list = ["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246" 
,"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36 "
,"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9 "
,"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1" 
,"Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36"] 

headers = {
    'User-Agent': user_agent_list[2]}

### Pull from and Explore Beautiful Soup Objects

In [3]:
URL = "https://www.apartments.com/los-angeles-ca/"

page = requests.get(URL, headers = headers)

In [4]:
page

<Response [200]>

In [5]:
print(type(page))

<class 'requests.models.Response'>


In [6]:
soup = BeautifulSoup(page.content, "html.parser")

In [7]:
print(soup.prettify())

<!DOCTYPE html>
<html data-placeholder-focus="false" id="appContainer" lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <meta content="telephone=no" name="format-detection"/>
  <meta content="email=no" name="format-detection"/>
  <meta content="true" name="HandheldFriendly"/>
  <title>
   Apartments For Rent in Los Angeles CA - 28,550 Rentals | Apartments.com
  </title>
  <link href="/a/05c8f3/faviconv2.ico" rel="icon"/>
  <link href="https://www.apartments.com/los-angeles-ca/" rel="canonical">
   <meta content="max-image-preview:large" name="robots"/>
   <meta content="Get a great Los Angeles, CA rental on Apartments.com! Use our search filters to browse all 28,550 apartments and score your perfect place! " name="description"/>
   <meta content="en" name="language"/>
   <meta content="en" http-equiv="language"/>
   <meta content="en-US" http-equiv="content-language"/>
   <meta content="739299276152207" property="f

In [8]:
soup.find("p")

<p class="header3" id="modal-header">Language</p>

In [9]:
soup.findAll("p")

[<p class="header3" id="modal-header">Language</p>,
 <p> Select two tiles for min/max range</p>,
 <p>
                         Select two tiles for min/max range
                     </p>,
 <p class="error" data-bind="visible: !sqFeetIsValid(), text: squareFeetErrorMessage"></p>,
 <p class="keywordDirections"></p>,
 <p>
         Set a destination, transportation method, and your ideal commute time to see results.
     </p>,
 <p class="errorMessage" data-bind="visible: toShowTAError">*Please enter your address or point of interest</p>,
 <p>
         Select your school and find the perfect place nearby or on-campus.
     </p>,
 <p class="loading-text"></p>,
 <p class="property-pricing">$1,200 - 6,235</p>,
 <p class="property-beds">3-4 Beds</p>,
 <p class="property-specials">
 <i class="storyicon saleStoryIcon"></i>
 <span>Specials</span>
 </p>,
 <p class="property-amenities">
 <span>In Unit Washer &amp; Dryer</span>
 <span>Stainless Steel Appliances</span>
 <span>Controlled Access</span>

In [10]:
price_elements = soup.findAll("p", class_ = "property-pricing")

### Find the pricing inside of the website

In [11]:
clean_p = [i.text for i in price_elements]

In [16]:
clean_p[0]

'$1,200 - 6,235'

In [13]:
# low price 
float(clean_p[0].split("-")[0].replace("$", "").replace(",", ""))

1200.0

In [14]:
# high price 
float(clean_p[0].split("-")[1].replace("$", "").replace(",", ""))

6235.0

### Clean the Prices

In [17]:
lows = []
highs = []

for i in clean_p:
    try:
        lows.append(float(i.split("-")[0].replace("$", "").replace(",", "")))
    except:
         lows.append(np.nan)
            
    try:
        highs.append(float(i.split("-")[1].replace("$", "").replace(",", "")))
    except:
         highs.append(np.nan)

### Make a function out or it

In [18]:
def get_prices():
    user_agent_list = ["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246" 
    ,"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36 "
    ,"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9 "
    ,"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1" 
    ,"Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36"] 

    headers = {
        'User-Agent': user_agent_list[2]}
    
    URL = "https://www.apartments.com/los-angeles-ca/"

    page = requests.get(URL, headers = headers)
    
    soup = BeautifulSoup(page.content, "html.parser")
    
    price_elements = soup.findAll("p", class_ = "property-pricing")
    
    clean_p = [i.text for i in price_elements]
    
    lows = []
    highs = []

    for i in clean_p:
        try:
            lows.append(float(i.split("-")[0].replace("$", "").replace(",", "")))
        except:
             lows.append(np.nan)

        try:
            highs.append(float(i.split("-")[1].replace("$", "").replace(",", "")))
        except:
             highs.append(np.nan)
                
    return(lows, highs)

In [19]:
lows, highs = get_prices()

In [20]:
highs

[4995.0,
 8150.0,
 5470.0,
 9431.0,
 3391.0,
 5830.0,
 12610.0,
 9773.0,
 3998.0,
 2405.0,
 2879.0,
 2798.0,
 4338.0,
 8441.0,
 12750.0,
 27799.0,
 25000.0,
 3200.0,
 6571.0,
 5100.0,
 3998.0,
 6300.0,
 4804.0,
 5632.0,
 4398.0,
 3966.0,
 20500.0,
 3395.0,
 12352.0,
 9106.0,
 10000.0,
 8210.0,
 nan,
 6414.0,
 8709.0,
 6235.0,
 7975.0,
 3750.0,
 5070.0,
 4465.0]