In [None]:
########################################################################################################################
#File: DataPull_Redfine_county.ipynb 
#Source: https://redfin-public-data.s3.us-west-2.amazonaws.com/redfin_market_tracker/
#Status: Active - Main real estate data pull
#Notes: Pull county level data from Redfine
#Last updated = 11/20/2022 
########################################################################################################################

In [6]:
import requests
import json
from datetime import datetime
import pandas as pd
import warnings
import os

warnings.filterwarnings('ignore')

In [7]:
#url link from - https://www.redfin.com/news/data-center/
#reads in national county level real estate market data
#Duration period: 30

url = 'https://redfin-public-data.s3.us-west-2.amazonaws.com/redfin_market_tracker/county_market_tracker.tsv000.gz'


#assign to dataframe
df = pd.read_csv(url, compression='gzip', sep='\t', on_bad_lines='skip')

print('Num of rows:', len(df))
print('Num of cols:', len(df.columns)) 

#preview data
df.head()

Num of rows: 784430
Num of cols: 58


Unnamed: 0,period_begin,period_end,period_duration,region_type,region_type_id,table_id,is_seasonally_adjusted,region,city,state,...,sold_above_list_yoy,price_drops,price_drops_mom,price_drops_yoy,off_market_in_two_weeks,off_market_in_two_weeks_mom,off_market_in_two_weeks_yoy,parent_metro_region,parent_metro_region_metro_code,last_updated
0,2014-12-01,2014-12-31,30,county,5,2243,f,"Washington County, OH",,Ohio,...,-0.021552,,,,0.0,-0.055556,0.0,"Marietta, OH",31930.0,2022-11-20 14:36:43
1,2013-04-01,2013-04-30,30,county,5,3085,f,"Island County, WA",,Washington,...,0.03594,0.177496,0.001771,-0.056232,0.447059,0.027704,0.172549,"Oak Harbor, WA",36020.0,2022-11-20 14:36:43
2,2020-03-01,2020-03-31,30,county,5,730,f,"DeKalb County, IL",,Illinois,...,0.045822,0.20073,0.006931,-0.056696,0.604167,0.044845,0.066352,"Elgin, IL",20994.0,2022-11-20 14:36:43
3,2017-05-01,2017-05-31,30,county,5,2382,f,"Dauphin County, PA",,Pennsylvania,...,0.0,,,,,,,"Harrisburg, PA",25420.0,2022-11-20 14:36:43
4,2016-05-01,2016-05-31,30,county,5,996,f,"Warren County, IA",,Iowa,...,0.128527,,,,0.081081,-0.108108,0.081081,"Des Moines, IA",19780.0,2022-11-20 14:36:43


In [9]:
#Filter data and clean-up

#filter real estate dataset on Texas
texas_df = df.loc[df['state'] == 'Texas']

#drop columns we don't need
texas_df.drop(texas_df.columns[[2, 3, 4, 5, 6, 8]], axis=1, inplace=True)

#region column isolate zipcode into new column 'zipcode'
texas_df['county'] = texas_df.apply(lambda x: x['region'].split(',')[0].strip(), axis=1)


#list if counties to focus on
counties_list = ['Williamson County', 'Travis County', 'Hays County', 'Bastrop County', 'Caldwell County']

#filter texas dataset to counties we are interested in
texas_counties_df = texas_df.loc[texas_df['county'].isin(counties_list)]


texas_counties_df.head()

#csv export
#texas_counties_df.to_csv('redfin_texas_counties.csv')



Unnamed: 0,period_begin,period_end,region,state,state_code,property_type,property_type_id,median_sale_price,median_sale_price_mom,median_sale_price_yoy,...,price_drops,price_drops_mom,price_drops_yoy,off_market_in_two_weeks,off_market_in_two_weeks_mom,off_market_in_two_weeks_yoy,parent_metro_region,parent_metro_region_metro_code,last_updated,county
139,2012-01-01,2012-01-31,"Williamson County, TX",Texas,TX,Multi-Family (2-4 Unit),4,152500.0,0.155303,0.270833,...,0.043478,-0.164855,-0.096307,0.4,-0.4,0.066667,"Austin, TX",12420.0,2022-11-20 14:36:43,Williamson County
299,2019-05-01,2019-05-31,"Williamson County, TX",Texas,TX,Condo/Co-op,3,237000.0,0.013904,0.068771,...,0.212766,-0.187234,-0.103024,0.571429,-0.21118,0.071429,"Austin, TX",12420.0,2022-11-20 14:36:43,Williamson County
474,2019-06-01,2019-06-30,"Williamson County, TX",Texas,TX,Single Family Residential,6,291000.0,-0.029863,-0.017722,...,0.336561,0.040325,-0.032895,0.505759,-0.006931,0.056208,"Austin, TX",12420.0,2022-11-20 14:36:43,Williamson County
505,2012-04-01,2012-04-30,"Williamson County, TX",Texas,TX,Multi-Family (2-4 Unit),4,154950.0,0.029568,-0.12309,...,0.113636,-0.021499,-0.069462,0.571429,0.171429,0.428571,"Austin, TX",12420.0,2022-11-20 14:36:43,Williamson County
1400,2015-08-01,2015-08-31,"Williamson County, TX",Texas,TX,Multi-Family (2-4 Unit),4,219000.0,0.120205,0.084158,...,,,,0.777778,0.215278,0.777778,"Austin, TX",12420.0,2022-11-20 14:36:43,Williamson County


In [12]:
#preview
print('Num of rows:', len(texas_counties_df))
texas_counties_df.head()

Num of rows: 2573


Unnamed: 0,period_begin,period_end,region,state,state_code,property_type,property_type_id,median_sale_price,median_sale_price_mom,median_sale_price_yoy,...,price_drops,price_drops_mom,price_drops_yoy,off_market_in_two_weeks,off_market_in_two_weeks_mom,off_market_in_two_weeks_yoy,parent_metro_region,parent_metro_region_metro_code,last_updated,county
139,2012-01-01,2012-01-31,"Williamson County, TX",Texas,TX,Multi-Family (2-4 Unit),4,152500.0,0.155303,0.270833,...,0.043478,-0.164855,-0.096307,0.4,-0.4,0.066667,"Austin, TX",12420.0,2022-11-20 14:36:43,Williamson County
299,2019-05-01,2019-05-31,"Williamson County, TX",Texas,TX,Condo/Co-op,3,237000.0,0.013904,0.068771,...,0.212766,-0.187234,-0.103024,0.571429,-0.21118,0.071429,"Austin, TX",12420.0,2022-11-20 14:36:43,Williamson County
474,2019-06-01,2019-06-30,"Williamson County, TX",Texas,TX,Single Family Residential,6,291000.0,-0.029863,-0.017722,...,0.336561,0.040325,-0.032895,0.505759,-0.006931,0.056208,"Austin, TX",12420.0,2022-11-20 14:36:43,Williamson County
505,2012-04-01,2012-04-30,"Williamson County, TX",Texas,TX,Multi-Family (2-4 Unit),4,154950.0,0.029568,-0.12309,...,0.113636,-0.021499,-0.069462,0.571429,0.171429,0.428571,"Austin, TX",12420.0,2022-11-20 14:36:43,Williamson County
1400,2015-08-01,2015-08-31,"Williamson County, TX",Texas,TX,Multi-Family (2-4 Unit),4,219000.0,0.120205,0.084158,...,,,,0.777778,0.215278,0.777778,"Austin, TX",12420.0,2022-11-20 14:36:43,Williamson County


In [12]:
texas_counties_df.info()
texas_counties_df.describe()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 36694 entries, 3883 to 352612
Data columns (total 52 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   period_begin                    36694 non-null  object 
 1   period_end                      36694 non-null  object 
 2   region                          36694 non-null  object 
 3   state                           36694 non-null  object 
 4   state_code                      36694 non-null  object 
 5   property_type                   36694 non-null  object 
 6   property_type_id                36694 non-null  int64  
 7   median_sale_price               36694 non-null  float64
 8   median_sale_price_mom           35328 non-null  float64
 9   median_sale_price_yoy           33193 non-null  float64
 10  median_list_price               34670 non-null  float64
 11  median_list_price_mom           33219 non-null  float64
 12  median_list_price_yoy       

Unnamed: 0,property_type_id,median_sale_price,median_sale_price_mom,median_sale_price_yoy,median_list_price,median_list_price_mom,median_list_price_yoy,median_ppsf,median_ppsf_mom,median_ppsf_yoy,...,sold_above_list_mom,sold_above_list_yoy,price_drops,price_drops_mom,price_drops_yoy,off_market_in_two_weeks,off_market_in_two_weeks_mom,off_market_in_two_weeks_yoy,parent_metro_region_metro_code,zipcode
count,36694.0,36694.0,35328.0,33193.0,34670.0,33219.0,31536.0,36690.0,35324.0,33188.0,...,35328.0,33193.0,25827.0,23249.0,21598.0,29533.0,26598.0,26081.0,36694.0,36694.0
mean,3.789148,349937.3,0.018734,0.155398,370184.2,0.041076,0.168039,193.231848,0.0142,0.140672,...,0.000372,0.033266,0.586626,0.008908,-0.001757,0.43372,-0.002602,-0.001654,12634.648716,78615.785524
std,4.09199,220950.1,0.177104,0.400311,245357.8,2.724775,2.673111,122.643604,0.138051,0.557558,...,0.137281,0.266363,0.236425,0.185772,0.246642,0.284525,0.304983,0.313095,2497.801108,430.55569
min,-1.0,2750.0,-0.996642,-0.996382,1300.0,-0.99784,-0.998071,1.286249,-0.993643,-0.993149,...,-1.0,-1.0,0.02,-0.875,-0.952381,0.0,-1.0,-1.0,12420.0,76527.0
25%,-1.0,207000.0,-0.017335,0.017647,219900.0,-0.019737,0.014146,122.561359,-0.009943,0.028887,...,-0.027778,-0.048469,0.41791,-0.079406,-0.136657,0.25,-0.129533,-0.141324,12420.0,78648.0
50%,4.0,295000.0,0.0,0.097143,310000.0,0.0,0.089285,161.959221,0.000957,0.089781,...,0.0,0.002994,0.578947,0.006352,0.0,0.435714,0.0,0.0,12420.0,78720.0
75%,6.0,432000.0,0.034556,0.219958,450000.0,0.036628,0.202802,237.448547,0.026829,0.188508,...,0.029412,0.113321,0.75,0.100733,0.133887,0.6,0.120728,0.1417,12420.0,78744.0
max,13.0,3875000.0,5.235731,14.825243,8000000.0,454.555556,257.322692,8021.944405,5.630782,58.622673,...,1.0,1.0,1.0,0.875,0.947368,1.0,1.0,1.0,41700.0,78957.0
