In [13]:
import time
import httpx
import lxml.html


# ---------------------------- Utility Functions ----------------------------

def complete_link(zip_code: str) -> str:
    """
    Ensures the URL is complete. If the URL is incomplete (starts with '/'),
    it will prepend the base URL ('https://www.zillow.com').

    :param url: The URL to complete.
    :return: The complete URL.
    """
    base_url = "https://livabilityindex.aarp.org/search/Chicago,%20Illinois%20"
    suffix = ",%20United%20States#scores"
    return base_url + zip_code + suffix

# --------------------------- Scraper Function ------------------------------
def make_request(zip_code: str):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    with httpx.Client(headers = headers, follow_redirects=True) as client:
        url = complete_link(zip_code)
        response = client.get(url)
        zip_data = lxml.html.fromstring(response.text)
        print(zip_data)
        return zip_data
    
# ---------------------------- Main Function ----------------------------
def main(zip_codes: list):
    zipcodes_data = {}

    for zip_code in zip_codes:
        print(f"\n📌 Scraping ZIP Code: {zip_code}\n")
        zipcodes_data[zip_code] = make_request(zip_code)
        time.sleep(1)
    return zipcodes_data

# ---------------------------- Example Usage ----------------------------

if __name__ == "__main__":
    chicago_zip_codes = [
     "60601", "60602", "60603", "60604", "60605", "60606", "60607", "60608", "60609",
    "60610", "60611", "60612", "60613", "60614", "60615", "60616", "60617", "60618", "60619",
    "60620", "60621", "60622", "60623", "60624", "60625", "60626", "60628", "60629", "60630",
    "60631", "60632", "60633", "60634", "60636", "60637", "60638", "60639", "60640", "60641",
    "60642", "60643", "60644", "60645", "60646", "60647", "60649", "60651", "60652", "60653",
    "60654", "60655", "60656", "60657", "60659", "60660", "60661", #"60664", 
    "60666", "60668",
    "60669", "60670", "60673", "60674", "60675", "60677", "60678", "60680", "60681", "60682",
    "60684", "60685", "60686", "60687", "60688", "60689", "60690", "60691", "60693", "60694",
    "60695", "60696", "60697", "60699", "60701"
]
    chicago_zip_codes = ["60601", "60602"]
    
    livability_data = main(chicago_zip_codes)



📌 Scraping ZIP Code: 60601

<Element html at 0x114670d60>

📌 Scraping ZIP Code: 60602

<Element html at 0x11462d8b0>
