In [3]:
from homeharvest import scrape_property
from datetime import datetime, timedelta
import pandas as pd

### Example prompt for how to pull data

In [4]:
# Example for how to pull data
properties = scrape_property(
    location="Logan, UT",
    listing_type="sold", # or (for_sale, for_rent, pending)
    past_days=90, # sold in last 30 days, listed in last 30 days if (for_sale, for_rent)

    # date_from="2023-05-01", alternative to past_days
    # date_to="2023-05-28",
    # foreclosure=True,
    # mls_only=True, # only fetch MLS listings
)

In [5]:
properties.head()

Unnamed: 0,property_url,mls,mls_id,status,text,style,full_street_line,street,unit,city,...,parking_garage,agent,agent_email,agent_phones,broker,broker_phone,broker_website,nearby_schools,primary_photo,alt_photos
0,https://www.realtor.com/realestateandhomes-det...,SAUT,2004357,SOLD,Brimming with potential and waiting for your p...,MOBILE,438 S 780 W,438 S 780,,Logan,...,,Fox Homes Utah,Leslie@FoxHomesUtah.com,"[{'number': '8018723465', 'type': 'Mobile', 'e...",,,,"Logan City District, Bear River Charter School",,
1,https://www.realtor.com/realestateandhomes-det...,SAUT,2006345,SOLD,Just steps away from Historic Center Street si...,SINGLE_FAMILY,29 S 300 W,29 S 300,,Logan,...,,Anna Sears,,"[{'number': '8016802662', 'type': 'BUSINESS_MO...",,,,"Logan City District, Bear River Charter School",,
2,https://www.realtor.com/realestateandhomes-det...,SAUT,2001770,SOLD,Charming townhouse in Cache Valley! Minutes fr...,TOWNHOMES,377 W 1100 N Apt 16,377 W 1100,Apt 16,Logan,...,,Elvis Ryan,,,,,,"Intech Collegiate High School, Fast Forward Hi...",,
3,https://www.realtor.com/realestateandhomes-det...,SAUT,1996692,SOLD,In the heart of Logan within walking distance ...,SINGLE_FAMILY,530 E 800 N,530 E 800,,Logan,...,2.0,Curtis Orr,curtisorrhomes@gmail.com,"[{'number': '4355590698', 'type': 'Mobile', 'e...",,,,"Edith Bowen Laboratory School, Intech Collegia...",,
4,https://www.realtor.com/realestateandhomes-det...,SAUT,2002252,SOLD,Beautifully maintained home w inviting living ...,SINGLE_FAMILY,1918 S 1395 W,1918 S 1395,,Logan,...,2.0,David Anderson,david@a5realestate.com,"[{'number': '8014491842', 'type': None, 'ext':...",,,,"Cache District, Thomas Edison - North",,


### Create a function to pull an entire year of data, month by month to avoid rate limits

In [6]:
# function to scrape eyarly data
def scrape_yearly_data(year, location, listing_type="sold"):
    # initialize an empty DataFrame to store results
    all_data = pd.DataFrame()

    # iterate through each month of the given year
    for month in range(1, 13):
        # calculate the start and end dates for the month
        date_from = datetime(year, month, 1)
        if month == 12:
            date_to = datetime(year + 1, 1, 1) - timedelta(days=1)
        else:
            date_to = datetime(year, month + 1, 1) - timedelta(days=1)

        # format dates as strings
        date_from_str = date_from.strftime('%Y-%m-%d')
        date_to_str = date_to.strftime('%Y-%m-%d')

        # scrape data for the current month
        monthly_data = scrape_property(
            location=location,
            listing_type=listing_type,
            date_from=date_from_str,
            date_to=date_to_str
        )

        # convert to DataFrame and append to all_data
        monthly_df = pd.DataFrame(monthly_data)
        all_data = pd.concat([all_data, monthly_df], ignore_index=True)
    
    return all_data

### Scrape all of Utah's 'Sold' homes from 2023
- output: any home sold in Utah in 2023
- if the 'list_date' is later in time than the 'last_sold_date', that means the home was relisted for sale after being sold

In [7]:
sold_properties_2023 = scrape_yearly_data(2023, "Utah")

In [8]:
# Check the length of the dataframe to see how many properties were scraped
len(sold_properties_2023)

30018

In [9]:
sold_properties_2023.head()

Unnamed: 0,property_url,mls,mls_id,status,text,style,full_street_line,street,unit,city,...,parking_garage,agent,agent_email,agent_phones,broker,broker_phone,broker_website,nearby_schools,primary_photo,alt_photos
0,https://www.realtor.com/realestateandhomes-det...,ICUT,100473,SOLD,Enjoy gorgeous mountain views from this beauti...,SINGLE_FAMILY,840 S Peachtree Dr,840 S Peachtree Dr,,Toquerville,...,2,The Ames Team,Bri@amesteamutah.com,"[{'number': '435-668-3349', 'type': 'Mobile', ...",,,,Washington District,http://ap.rdcpix.com/697436474f0b38b7b7edc663a...,http://ap.rdcpix.com/697436474f0b38b7b7edc663a...
1,https://www.realtor.com/realestateandhomes-det...,SAUT,1827010,SOLD,PROJECTED COMPLETION DATE OF January 2023. 5-P...,TOWNHOMES,5502 W Straights Ln S # 151,5502 W Straights Ln,# 151,West Valley City,...,2,Louise & David Gunther,agunther@remax.net,"[{'number': '(801) 518-7000', 'type': 'Home', ...",,,,"Granite District, Ascent Academies Of Utah",,
2,https://www.realtor.com/realestateandhomes-det...,SAUT,1820270,SOLD,Utah County's fastest-growing home builder! Ou...,TOWNHOMES,809 N Hilltop Dr # 100,809 N Hilltop Dr,# 100,Lehi,...,2,Jamie Lamb,jamielamb@bhginfluence.com,"[{'number': '8018775770', 'type': 'Office', 'e...",,,,,,
3,https://www.realtor.com/realestateandhomes-det...,SAUT,1848218,SOLD,Come see this charming home now on the market!...,SINGLE_FAMILY,3822 S Chatterleigh Rd Vly W,3822 S Chatterleigh Rd Vly,,West Valley City,...,1,Michelle L Holmes,,"[{'number': '(480) 351-6622', 'type': 'BUSINES...",,,,"Granite District, Ascent Academies Of Utah",,
4,https://www.realtor.com/realestateandhomes-det...,SAUT,1849453,SOLD,BACK ON THE MARKET | BUYERS FINANCING FELL THR...,SINGLE_FAMILY,1504 N 1830 W,1504 N 1830,,Provo,...,2,Bryan Crown,bryan@mcewenrealtors.com,"[{'number': '8015201424', 'type': 'Mobile', 'e...",,,,"Provo District, Freedom Academy",,


In [11]:
sold_properties_2023['style'].value_counts()

style
SINGLE_FAMILY                  18770
TOWNHOMES                       4236
LAND                            2641
CONDOS                          1834
MOBILE                           689
OTHER                            633
CONDO                            492
CONDO_TOWNHOME_ROWHOME_COOP      376
MULTI_FAMILY                     298
FARM                               9
COMMERCIAL                         3
APARTMENT                          1
Name: count, dtype: int64

### Output unfiltered data to a CSV file for future use

In [12]:
sold_properties_2023.to_csv('utah_sold_properties_2023.csv')