In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
# URL of the webpage to scrape
url = "https://dlai-lc-dag.s3.us-east-2.amazonaws.com/apartment_finder.html"

### START CODE HERE ###

# send a GET request to the url
import requests
response = requests.get(url)

# get the status of the response for troubleshooting
status = response.status_code


### END CODE HERE ###
print(status)

200


If the above code succeeded, you can print out the HTML of the webpage to inspect it and find out what you are actually searching for. You should be able to find the structure describes at the beginning of the lab.

In [4]:
print(response.text)

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Welcome to CityDwellers</title>
    <link rel="stylesheet" href="styles_green.css">
</head>
<body>

    <div class="content">
        <header>
            <h1>Welcome to CityDwellers - Your Ultimate Apartment Finder</h1>
        </header>
        <section>
            <p>At CityDwellers, we believe that finding your dream apartment should be exciting, not exhausting. Our platform is designed to simplify your search, offering a seamless experience from browsing to signing the lease. Whether you're looking for a cozy studio, a spacious family home, or a luxurious penthouse, your perfect match is just a few clicks away.</p>
        </section>
        <section>
            <h2>Why Choose CityDwellers?</h2>
            <ul>
                <li>Vast Selection: Browse thousands of listings in prime locations. From bustling city centers to qui

By closely inspecting the HTML, you can find that there is a HTML structure that repeats over and over and looks something like this:
```
<div class="grid-item" onclick="toggleDetails(this)">
    <div class="info">
        <p><strong>3 Bedroom</strong></p>
        <p>Location: Southern Suburbs</p>
        <p>Price: $1272</p>
        <div class="details">
            <p>Area: 52 sqm</p>
            <p>Floor: 2</p>
            <p>Furnishing: Unfurnished</p>
            <p>Facing: East</p>
            <p>Parking: Yes</p>
            <p>Bathrooms: 1</p>
            <p>Balcony: 1</p>
            <p>Overlooking: Garden/Park, Pool</p>
        </div>
    </div>
    <div class="photo">
        <img src="imgs/unfurnished/med/2.jpg" alt="House">
        <button class="arrow left" onclick="prevImage(event, this)">&#10094;</button>
        <button class="arrow right" onclick="nextImage(event, this)">&#10095;</button>
    </div>
</div>
```

In [5]:
### START CODE HERE ###

from bs4 import BeautifulSoup

# Parse the HTML content
soup = BeautifulSoup(response.text, "html.parser")

# Find all the grid items
grid_items = soup.find_all("div", class_="grid-item")

### END CODE HERE ###

# Print the number of grid items that were found
print(len(grid_items))


72


In [6]:
# List to store apartment data
apartments = []

# Loop through each grid item and extract the details
for item in grid_items:
    paragraphs = item.find_all("p")
    
    apartment = {
        "number_of_bedrooms": paragraphs[0].text.split(" ")[0].strip(),
        "location": paragraphs[1].text.split(": ")[1].strip(),
        "price": paragraphs[2].text.split(": ")[1].strip(),
        "area": paragraphs[3].text.split(": ")[1].strip(),
        
        ### START CODE HERE ###

        "furnishing": paragraphs[4].text.split(": ")[1].strip(),
        "parking": paragraphs[5].text.split(": ")[1].strip(),
        # Optionally you can extract other columns for practice
        
        ### END CODE HERE ###
    }
    
    apartments.append(apartment)

# Create a pandas DataFrame from the list of apartments
import pandas as pd
df = pd.DataFrame(apartments)

# Display the DataFrame
df.head()


Unnamed: 0,number_of_bedrooms,location,price,area,furnishing,parking
0,3,Southern Suburbs,$1272,52 sqm,2,Unfurnished
1,3,Central,$6120,154 sqm,8,Unfurnished
2,1,Other,$745,34 sqm,7,Unfurnished
3,1,Southeastern Suburbs,$1048,43 sqm,2,Unfurnished
4,3,Central,$2200,63 sqm,10,Partially Furnished


In [7]:
# Display the information about the DataFrame
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   number_of_bedrooms  72 non-null     object
 1   location            72 non-null     object
 2   price               72 non-null     object
 3   area                72 non-null     object
 4   furnishing          72 non-null     object
 5   parking             72 non-null     object
dtypes: object(6)
memory usage: 3.5+ KB


In [8]:
### START CODE HERE ###

# Convert the number_of_bedrooms column to integer
df["number_of_bedrooms"] = df["number_of_bedrooms"].astype(int)

### END CODE HERE ###


In [9]:
### START CODE HERE ###

# Remove the dollar sign from the price column
df["price"] = df["price"].str.replace("$", "", regex=False)

# Convert the price column to integer
df["price"] = df["price"].str.replace(",", "", regex=False).astype(int)

### END CODE HERE ###


In [10]:
### START CODE HERE ###
print(df["location"].unique())
### END CODE HERE ###


['Southern Suburbs' 'Central' 'Other' 'Southeastern Suburbs' 'Periphery'
 'Northern Suburbs' 'Western Suburbs']


<details open>
<summary style="background-color: #c6e2ff6c; padding: 10px; border-radius: 3px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.01); width: 95%; text-align: left; cursor: pointer; font-weight: bold;">
Expected output:</summary> 


```
['Southern Suburbs' 'Central' 'Other' 'Southeastern Suburbs' 'Periphery'
 'Northern Suburbs' 'Western Suburbs']
```

</details>

<details>
<summary style="background-color: #FDBFC7; padding: 10px; border-radius: 3px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); width: 95%; text-align: left; cursor: pointer; font-weight: bold;">
Click here to see the solution</summary> 

<ul style="background-color: #FFF8F8; padding: 10px; border-radius: 3px; margin-top: 5px; width: 95%; box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);">
   
Your solution should look something like this:

```python
# Find the unique values in the location column
print(df["location"].unique())
```
</details>

It looks like you are good to go. The `location` column has only a few different values in it and it is only the `Central` that you are interested in.

Lastly, check for all the values in the `parking` column.


<div style="background-color: #C6E2FF; padding: 10px; border-radius: 3px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); width:95%
">
    <strong>▶▶▶ Directions</strong> 
        <ol>
            <li>Find all the unique values in the <code>parking</code> column and print them out.</li>
        </ol>
</div>

In [12]:
### START CODE HERE ###
print(df["parking"].unique())
### END CODE HERE ###


['Unfurnished' 'Partially Furnished' 'Furnished']


<details open>
<summary style="background-color: #c6e2ff6c; padding: 10px; border-radius: 3px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.01); width: 95%; text-align: left; cursor: pointer; font-weight: bold;">
Expected output:</summary> 


```
['Yes' 'No']
```

</details>

<details>
<summary style="background-color: #FDBFC7; padding: 10px; border-radius: 3px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); width: 95%; text-align: left; cursor: pointer; font-weight: bold;">
Click here to see the solution</summary> 

<ul style="background-color: #FFF8F8; padding: 10px; border-radius: 3px; margin-top: 5px; width: 95%; box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);">
   
Your solution should look something like this:

```python
# Find the unique values in the parking column
print(df["parking"].unique())
```
</details>

The `parking` column has only two different values in it and you are interested in all the rows that have `Yes` in them.

<a id="find-the-apartments"></a>

## Step 6: Find the right apartments

Now you can finally filter and sort your DataFrame to find the apartments that you are looking for.

<div style="background-color: #C6E2FF; padding: 10px; border-radius: 3px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); width:95%
">
    <strong>▶▶▶ Directions</strong> 
        <ol>
            <li>Filter and sort the DataFrame to find the cheapest five apartments with central location, two bedrooms and a parking space.</li>
            <ul>
                <li>Filter the DataFrame for apartments with central location.</li>
                <li>Filter the DataFrame for apartments with two or more bedrooms.</li>
                <li>Filter the DataFrame for apartments with parking.</li>
                <li>Sort the DataFrame by price in ascending order. Hint: use <code>.sort_values()</code> and pass <code>price</code> to the named argument <code>by</code>. This returns a sorted DataFrame.</li>
            </ul>
        </ol>
</div>

In [15]:
### START CODE HERE ###

# Filter the DataFrame for apartments with central location
central_apartments_df = df[df["location"] == "Central"]

# Filter the DataFrame for apartments with two or more bedrooms
two_bedroom_apartments_df = central_apartments_df[central_apartments_df["number_of_bedrooms"] >= 2]

# Filter the DataFrame for apartments with parking
apartments_with_parking_df = two_bedroom_apartments_df[two_bedroom_apartments_df["parking"] == "Yes"]

# Sort the DataFrame by price in ascending order
sorted_apartments_df = apartments_with_parking_df.sort_values(by="price")

### END CODE HERE ###

# Get the cheapest five apartments
cheapest_five_apartments_df = sorted_apartments_df.head(5)

# Display the result
cheapest_five_apartments_df

Unnamed: 0,number_of_bedrooms,location,price,area,furnishing,parking


<details open>
<summary style="background-color: #c6e2ff6c; padding: 10px; border-radius: 3px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.01); width: 95%; text-align: left; cursor: pointer; font-weight: bold;">
Expected output:</summary> 


<img src="./imgsL3/output_step6.png" width=500>

</details>

<details>
<summary style="background-color: #FDBFC7; padding: 10px; border-radius: 3px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); width: 95%; text-align: left; cursor: pointer; font-weight: bold;">
Click here to see the solution</summary> 

<ul style="background-color: #FFF8F8; padding: 10px; border-radius: 3px; margin-top: 5px; width: 95%; box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);">
   
Your solution should look something like this:

```python
# Filter the DataFrame for apartments with central location
central_apartments_df = df[df["location"] == "Central"]

# Filter the DataFrame for apartments with two or more bedrooms
two_bedroom_apartments_df = central_apartments_df[central_apartments_df["number_of_bedrooms"] >= 2]

# Filter the DataFrame for apartments with parking
apartments_with_parking_df = two_bedroom_apartments_df[two_bedroom_apartments_df["parking"] == "Yes"]

# Sort the DataFrame by price in ascending order
sorted_apartments_df = apartments_with_parking_df.sort_values(by="price")
```
</details>

Congratulations for finishing this lab.

You have used `requests` to get the HTML of a webpage, parsed it using `BeautifulSoup` and created a table of available apartments. Then you have found the cheapest five central apartments with minimum two bedrooms and a parking.

Hope you enjoyed it! 