In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import logging

In [2]:
def fetch_page(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    resp = requests.get(url, headers=headers)
    resp.raise_for_status()
    return BeautifulSoup(resp.text, "html.parser")

In [9]:
url = "https://streeteasy.com/for-rent/manhattan/beds:1?page=1"
headers = {
    "User-Agent": (
        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/118.0"
    )
}

resp = requests.get(url, headers=headers)
soup = BeautifulSoup(resp.text, "html.parser")

cards = soup.select('li.sc-541ed69f-1')
print(f"Found {len(cards)} listings")

Found 14 listings


In [20]:
card = cards[0]
# print(card.prettify())

In [11]:
title_tag = card.select_one("p.ListingDescription-module__title___B9n4Z")
if title_tag:
    type_and_neighborhood = title_tag.get_text(strip=True)
    print("Type & Neighborhood:", type_and_neighborhood)

Type & Neighborhood: Rental unitin Chelsea


In [22]:
def parse_title(text: str) -> tuple[str, str]:
    # Fix common missing spaces like 'unitin' or 'condoin'
    fixed_text = text.replace("unitin", "unit in").replace("condoin", "condo in")
    
    # Then split on " in " — this assumes 'type' always precedes 'in'
    if " in " in fixed_text:
        type_part, neighborhood = fixed_text.split(" in ", 1)
        return type_part.strip(), neighborhood.strip()
    else:
        # fallback: just return full string as type, no neighborhood
        return fixed_text.strip(), ""

In [23]:
examples = [
    "Rental unitin Chelsea",
    "Co-op in Kips Bay",
    "Condoin Hell's Kitchen",
    "Studio in Midtown",
    "Mixed-use Building in Midtown",
]

for example in examples:
    t, n = parse_title(example)
    print(f"Type: {t} | Neighborhood: {n}")

Type: Rental unit | Neighborhood: Chelsea
Type: Co-op | Neighborhood: Kips Bay
Type: Condoin Hell's Kitchen | Neighborhood: 
Type: Studio | Neighborhood: Midtown
Type: Mixed-use Building | Neighborhood: Midtown


In [21]:
address_tag = card.select_one("a.ListingDescription-module__addressTextAction___xAFZJ")
if address_tag:
    address = address_tag.get_text(strip=True)
    url = address_tag["href"]
    print("Address:", address)
    print("URL:", url)

Address: 243 West 28th Street #S-16E
URL: https://streeteasy.com/building/ruby-chelsea/s16e?featured=1


In [19]:
price_raw = card.select_one('span[class*="PriceInfo-module__price"]')
price_text = price_raw.get_text(strip=True) if price_raw else None

bed = card.select_one('ul[class*="BedsBathsSqft"] li:nth-of-type(1) span')
beds = bed.get_text(strip=True) if bed else None

bath = card.select_one('ul[class*="BedsBathsSqft"] li:nth-of-type(2) span')
baths = bath.get_text(strip=True) if bath else None

print(f"Price: {price_text}")
print(f"Beds: {beds}")
print(f"Baths: {baths}")

Price: $6,795
Beds: 1 bed
Baths: 1 bath
