## Dependencies

In [3]:
# Confirmed needed dependencies
import random
import pprint
import pandas as pd
import requests
import json

# Dependencies for geocoordinates generator
import sys
import math
import gmplot

# Dependencies for conversion of coordinates to addresses
import geopy
from geopy.geocoders import Nominatim

# Dependencies for Zillow data
from pyzillow.pyzillow import ZillowWrapper, GetDeepSearchResults

# Dependency for Heat Mapper
import gmaps


# Add config.py file with the following variables and cooresponding Zillow API keys
# from config import Ztroy, Zseth, Zkat, Zval, Zyuta
# from config import google_API_Key


################# ONGOING EDITS TO REQUIREMENTS.MD #################
###### IF ANY ERRORS OCCUR RELATING TO MODULES OR CONFIG.PY #######
### REFER TO requirements.md TO ENSURE YOU ARE PROPERLY SETUP ####

## File inputs/outputs

In [4]:
randLatLon_csv = "./Data/randomLatLon.csv" 
addressList_csv = "./Data/addressList.csv"
masterData_csv = "./Data/masterData.csv"

## Geocoordinates of Austin

In [None]:
##########################################
# this section written by troy bailey.   #
# enter uservariables below to determine #
# center location, radius of circle, and #
# number of geocoordinates to generate.  #
##########################################

In [16]:
########################
#### USER VARIABLES ####
########################

x0 = 30.27444       #### Set center coordiantes in decimal degrees
y0 = -97.74028      #### initial coordiantes are location of Texas State Capitol Building

radius = 20         #### Set radius in miles

points = 40000        #### Set number of lat,lon points to generate

In [17]:
# variables and inputs for coordinate calculations
lat_lon_list = []
radiusInDegrees=radius/69           
r = radiusInDegrees
points += 1

In [18]:
# calculate each coordiante point and build a list of lat and lon
for i in range(1, points):
    u = float(random.uniform(0.0,1.0)) #random number for radius length
    v = float(random.uniform(0.0,1.0)) #random number for pi radians
    
    w = r * math.sqrt(u) #radius length
    t = 2 * math.pi * v  #radians
    x = w * math.cos(t)  #calculate x coord distance
    y = w * math.sin(t)  #calculate y coord distance
    
    xLat  = x + x0       #offset x by center x
    yLon = y + y0        #offset y by center y
    
    lat_lon_list.append([xLat,yLon])

# convert list to dataframe
lat_lon_df = pd.DataFrame(lat_lon_list, columns=['lat','lon'])

lat_lon_df.head()

len(lat_lon_df)

40000

In [16]:
# write a CSV file of coordinate points
lat_lon_df.to_csv(randLatLon_csv, index=False, header=True)

## Plot coordinate points on map


In [17]:
# This section will plot points on a Google map centered at centerPointLat and centerPointLon with a magnification of magFactor
# It assumes there is a dataframe with "lat" and "lon" columns
# The resulting map is saved to a file called 

centerPointLat = 30.27444  #these are the coordinates of the Texas State Capitol building
centerPointLon = -97.74028 #these are the coordinates of the Texas State Capitol building
magnificationFactor = 10
pointColor = "red"
pointSize = 100
mapOutputFile = "mymap.html"
df = lat_lon_df

gmap = gmplot.GoogleMapPlotter(centerPointLat, centerPointLon, magnificationFactor)

gmap.scatter(df["lat"], df["lon"], pointColor, size=pointSize, marker=False)

gmap.draw("./Visuals/" + mapOutputFile)

## Convert Coordinates to Residential Addresses

In [5]:
#########################
##### Yuta's Blocks #####
#########################

##### Geopy Nominatim API #####
geopy.geocoders.options.default_user_agent = "ut-group-EPIC"

pp = pprint.PrettyPrinter(indent=4)

url = "https://nominatim.openstreetmap.org/reverse?"

In [None]:
# Test API - Known Residential Address
params_1 = {
    "format": "jsonv2",
    "lat": 30.440777,
    "lon": -97.777048
}

print("===== Test Home Response:")
response = requests.get(url, params=params_1).json()
pp.pprint(response)
print("\n" + "="*60 + "\n")

In [6]:
# Import CSV, put into DataFrame
latlon_df = pd.read_csv(randLatLon_csv)
latlon_df.head()

Unnamed: 0,lat,lon
0,30.127584,-97.746072
1,30.032038,-97.784494
2,30.116258,-97.829903
3,30.060355,-97.848218
4,30.030435,-97.87691


In [7]:
# Put latitudes and longitudes into a zip object
lats = latlon_df.iloc[:, 0]
lons = latlon_df.iloc[:, 1]
lat_lons = []
lat_lons = zip(lats, lons)

In [None]:
# Loop Request API / Append to lists
query_url = "https://nominatim.openstreetmap.org/reverse?"

house_num = []
road = []
postcode = []
lat = []
lon = []
neighborhood = []

counter = 1
numRequests = latlon_df["lat"].count()
rSuccess = []
rFailure = []

print(f"Processing {numRequests} Requests...")

# Nominatim API Request

for lat_lon in lat_lons:
    params = {
        "format": "jsonv2",
        "lat": lat_lon[0],
        "lon": lat_lon[1]
    }

    time.sleep(1.1)
    response = requests.get(query_url, params=params).json()
    

    if response['type'] == 'house' or response['type'] == 'yes':
        lat.append(response['lat'])
        lon.append(response['lon'])
        postcode.append(response['address']['postcode'])
        
        try:
            house_num.append(response['address']['house_number'])
        except (KeyError, IndexError):
            house_num.append("NA")
        try:
            road.append(response['address']['road'])
        except (KeyError, IndexError):
            road.append("NA")
        try:
            neighborhood.append(response['address']['neighbourhood'])
        except (KeyError, IndexError):
            neighborhood.append("NA")
        
        print(f"Processed Record {counter} of {numRequests}.")
        rSuccess.append(counter)
        counter += 1
        
    else:
        print(f"Wrong Type - Skipped Record {counter} of {numRequests}.")
        rFailure.append(counter)
        counter += 1
        
print(f"Finished Requests !!!")

In [None]:
print("Request Results:")
print("Success #:" + str(len(rSuccess)))
print("Skipped #:" + str(len(rFailure)))

In [None]:
# Create dataframe with addresses from API requests
address_df = pd.DataFrame({
    "house #": house_num,
    "street": road,
    "zipcode": postcode,
    "lat": lat,
    "lon": lon,
    "neighborhood": neighborhood,
})

# Clean up Dataframe Columns before output (Drop incomplete zipcodes, Highway streets, and Null house # or streets)
address_df = address_df[address_df['zipcode'].str.len() == 5]
address_df = address_df[address_df['zipcode'].apply(lambda x: len(str(x)) > 3)]
address_df = address_df[address_df['street'].str.contains("Highway") == False]
address_df = address_df[address_df['house #'].str.contains("NA") == False]
address_df = address_df[address_df['street'].str.contains("NA") == False]
address_df

In [None]:
# write a CSV file of addresses
address_df.to_csv(addressList_csv, index=False, header=True)

In [None]:
# Map out the sample with gmplot

gmap = gmplot.GoogleMapPlotter(30.27444, -97.74028, 10)

gmap.scatter(address_sample["lat"], address_sample["lon"], 'red', size=40, marker=False)

gmap.draw("./Visuals/myaddressmap.html")

## Zillow API Calls using Address and Zipcode

In [None]:
# Val create function for read in csv, check for headers?, and append csv


In [25]:
#############################
##### VALERIE'S BLOCKS #####
###########################

# Tiny sample to work with looping without exhausting API call limits
addressListTiny_csv = "./Data/addressListTiny.csv"

address_sample = pd.read_csv(addressListTiny_csv)
address_df = pd.DataFrame(address_sample)
print(len(address_df))
address_df.head()

442


Unnamed: 0,house #,street,zipcode,lat,lon,neighborhood
0,8109,Boggy Ridge Drive,78748,30.182018,-97.795088,Dittmar - Cooper
1,8507,Caspian Drive,78749,30.199637,-97.841057,Woodstone Village
2,950,Westbank Drive,78746,30.278182,-97.809151,Ledgeway
3,4208,Steck Avenue,78759,30.377557,-97.75756,Mesa Forest
4,807,Park Boulevard,78751,30.301572,-97.722516,Hyde Park


In [None]:
####################### NO LONGER USING THIS API ########################
################# HOWEVER, CODE MAY BE USED ELSEWHERE ##################
########### OR WE MAY END UP NEEDING THIS API FOR ISD DATA ############

# pp = pprint.PrettyPrinter(indent=4)
# api = zillow.ValuationApi()
# # Insert your Zillow API key here
# zwsid = "X1-ZWz1gm14kn7d3f_2kpit"

# valuation = []
# sqft = []

# for row, home in address_df.iterrows():
#     print(row)
#     address = address_df["house #"][row] + " " + address_df["street"][row] 
#     print(address)
#     zipcode = address_df["zipcode"][row]
#     print(zipcode)
    
#     try:
#         z_deep_results = api.GetDeepSearchResults(zwsid, address, zipcode)
#         print(z_deep_results)
#         pp.pprint(deep_results.get_dict())
#         print(z_deep_results['zestimate']['amount'])
#         valuation.append(z_deep_results['zestimate']['amount'])
#         print(z_deep_results['extended_data']['finished_sqft'])
#         sqft.append(z_deep_results['extended_data']['finished_sqft'])
#     except:
#         print(f"No record found for {address}, {zipcode}.")



In [13]:
zid = []
alats = []
alons = []
addresses = []
valuation = []
sqft = []
isd = []


############## LOOPING FUNCTION FULLY OPERATIONAL ###############
###### HOWEVER, ZILLOW ONLY ALLOWS 1000 API CALLS PER DAY ######

for row, home in address_df.iterrows():
    address = str(address_df["house #"][row]) + " " + address_df["street"][row]
    addresses.append(address)
    zipcode = address_df["zipcode"][row]
    print(f"Processing {address}, {zipcode} (index {row}).")

    try:
        zillow_data = ZillowWrapper(Zyuta)
        deep_search_response = zillow_data.get_deep_search_results(address, zipcode)
        result = GetDeepSearchResults(deep_search_response)
    except:
        print(f"No record found for {address}, {zipcode} (index {row}). Appending lists with null values")
        zid.append(None)
        alats.append(None)
        alons.append(None)
        valuation.append(None)
        sqft.append(None)
        isd.append(None)
        continue

    try:
        zillowID = result.zillow_id
        zid.append(zillowID)
    except:
        print(f"No zid found for {address}, {zipcode} (index {row}). Appending list with null values")
        zid.append(None)
        
    try:
        alat = result.latitude
        alats.append(alat)
    except:
        print(f"No alat found for {address}, {zipcode} (index {row}). Appending list with null values")
        alats.append(None)

    try:
        alon = result.longitude
        alons.append(alon)
    except:
        print(f"No alon found for {address}, {zipcode} (index {row}). Appending list with null values")
        alons.append(None)
        
    try:    
        val = int(result.zestimate_amount)
        valuation.append(val)
    except:
        print(f"No valuation found for {address}, {zipcode} (index {row}). Appending list with null values")
        valuation.append(None)

    try:
        zsqft = int(result.home_size)
        sqft.append(zsqft)
    except:
        print(f"No sqft found for {address}, {zipcode} (index {row}). Appending list with null values")
        sqft.append(None)


    try:
        zillow_data = ZillowWrapper(Zyuta)
        updated_property_details_response = zillow_data.get_updated_property_details(zid[row])
        new_result = GetUpdatedPropertyDetails(updated_property_details_response)

        try:
            zisd = new_result.school_district
            isd.append(zisd)
        except:
            isd.append(None)
            
    except:
        print(f"No updated property info for this listing (index {row}).")
        isd.append(None)


Processing 8109 Boggy Ridge Drive, 78748 (index 0).
Processing 8507 Caspian Drive, 78749 (index 1).
No updated property info for this listing (index 1).
Processing 950 Westbank Drive, 78746 (index 2).
Processing 4208 Steck Avenue, 78759 (index 3).
No record found for 4208 Steck Avenue, 78759 (index 3). Appending lists with null values
Processing 807 Park Boulevard, 78751 (index 4).
No updated property info for this listing (index 4).
Processing 10109 Slaughter Creek Drive, 78748 (index 5).
No record found for 10109 Slaughter Creek Drive, 78748 (index 5). Appending lists with null values
Processing 601 Jet Lane, 78742 (index 6).
No updated property info for this listing (index 6).
Processing 4409 Jessamine Hollow, 78731 (index 7).
No updated property info for this listing (index 7).
Processing 8800 Daffan Lane, 78724 (index 8).
No record found for 8800 Daffan Lane, 78724 (index 8). Appending lists with null values
Processing 4501 Ridge Oak Drive, 78731 (index 9).
No updated property inf

Processing 11200 Taylor Draper Lane, 78759 (index 78).
Processing 7201 RM 2222, 78730 (index 79).
No record found for 7201 RM 2222, 78730 (index 79). Appending lists with null values
Processing 9333 Brown Lane, 78754 (index 80).
No record found for 9333 Brown Lane, 78754 (index 80). Appending lists with null values
Processing 2639 Gwendolyn Lane, 78748 (index 81).
No updated property info for this listing (index 81).
Processing 11901 River Oaks Trail, 78753 (index 82).
No updated property info for this listing (index 82).
Processing 5409 Wasson Road, 78745 (index 83).
No record found for 5409 Wasson Road, 78745 (index 83). Appending lists with null values
Processing 2928 Norfolk Drive, 78745 (index 84).
Processing 9428 Parkfield Drive, 78758 (index 85).
No record found for 9428 Parkfield Drive, 78758 (index 85). Appending lists with null values
Processing 1701 Victoria Drive, 78721 (index 86).
No updated property info for this listing (index 86).
Processing 728 Barton Creek Boulevard, 

Processing 2936 Oestrick Lane, 78733 (index 153).
No record found for 2936 Oestrick Lane, 78733 (index 153). Appending lists with null values
Processing 5701 Springdale Road, 78723 (index 154).
No record found for 5701 Springdale Road, 78723 (index 154). Appending lists with null values
Processing 4820 Trail Crest Circle, 78735 (index 155).
Processing 1504 East 37th Street, 78722 (index 156).
Processing 5816 Harold Court, 78721 (index 157).
No record found for 5816 Harold Court, 78721 (index 157). Appending lists with null values
Processing 10514 Wylie Drive, 78748 (index 158).
No updated property info for this listing (index 158).
Processing 2205 Rabb Glen Street, 78704 (index 159).
Processing 6315 Pathfinder Drive, 78759 (index 160).
Processing 10001 Middle Fiskville Road, 78753 (index 161).
No record found for 10001 Middle Fiskville Road, 78753 (index 161). Appending lists with null values
Processing 4016 Pinckney Street, 78722 (index 162).
Processing 203 Canyon Rim Drive, 78746 (in

No record found for 6600 West William Cannon Drive, 78735 (index 228). Appending lists with null values
Processing 713 Kinney Avenue, 78704 (index 229).
Processing 5600 Decker Lane, 78724 (index 230).
No record found for 5600 Decker Lane, 78724 (index 230). Appending lists with null values
Processing 1707 Overhill Drive, 78721 (index 231).
No updated property info for this listing (index 231).
Processing 2412 Burleson Court, 78741 (index 232).
Processing 5634 Sedona Drive, 78759 (index 233).
Processing 8507 Bell Mountain Drive, 78730 (index 234).
No updated property info for this listing (index 234).
Processing 7164 Ridge Oak Road, 78749 (index 235).
No updated property info for this listing (index 235).
Processing 1507 Tall Shadows Drive, 78617 (index 236).
No valuation found for 1507 Tall Shadows Drive, 78617 (index 236). Appending list with null values
No sqft found for 1507 Tall Shadows Drive, 78617 (index 236). Appending list with null values
No updated property info for this list

Processing 8425 Asmara Drive, 78750 (index 302).
Processing 9020 Bluff Springs Road, 78747 (index 303).
No updated property info for this listing (index 303).
Processing 201 Edwin Lane, 78742 (index 304).
No updated property info for this listing (index 304).
Processing 2900 Susquehanna Lane, 78723 (index 305).
No updated property info for this listing (index 305).
Processing 8501 Lava Hill Road, 78744 (index 306).
No record found for 8501 Lava Hill Road, 78744 (index 306). Appending lists with null values
Processing 6201 Bolm Road, 78721 (index 307).
No record found for 6201 Bolm Road, 78721 (index 307). Appending lists with null values
Processing 2721 Lost Creek Boulevard, 78735 (index 308).
No record found for 2721 Lost Creek Boulevard, 78735 (index 308). Appending lists with null values
Processing 3703 Turkey Creek Drive, 78730 (index 309).
No updated property info for this listing (index 309).
Processing 7443 Bee Caves Road, 78746 (index 310).
No record found for 7443 Bee Caves Ro

No record found for 9383 New Airport Drive, 78719 (index 371). Appending lists with null values
Processing 6523 Twin Creek Hollow, 78750 (index 372).
No record found for 6523 Twin Creek Hollow, 78750 (index 372). Appending lists with null values
Processing 4604 Island Cove, 78731 (index 373).
No updated property info for this listing (index 373).
Processing 4702 Trail West Drive, 78735 (index 374).
No updated property info for this listing (index 374).
Processing 12030 Samsung Boulevard, 78754 (index 375).
No record found for 12030 Samsung Boulevard, 78754 (index 375). Appending lists with null values
Processing 5701 East Martin Luther King Jr Boulevard, 78721 (index 376).
No record found for 5701 East Martin Luther King Jr Boulevard, 78721 (index 376). Appending lists with null values
Processing 3103 Sweet Autumn Cove, 78735 (index 377).
No updated property info for this listing (index 377).
Processing 113 Tumbleweed Trail South, 78733 (index 378).
No record found for 113 Tumbleweed T

TypeError: must be str, not float

In [None]:
######### ISD IS DRAWN FROM GET_UPDATED_PROPERTY_DETAILS ##########
########### WHICH HAS NOT BEEN ABLE TO RETURN VALUES  ############
############## FOR ANY PROPERTIES IN TINY SAMPLE ################
### MAY NEED TO APPROACH THESE VALUES FROM A DIFFERENT ROUTE ###

## Calculate Value per Sqft

In [14]:
valsqft = []
for row, value in enumerate(valuation):
    try:
        vsqft = round((valuation[row] / sqft[row]), 2)
        valsqft.append(vsqft)
    except:
        print("Cannot perform math with NoneType")
        valsqft.append(None)

Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform

## Master Dataframe Creation

In [26]:
# Checking to ensure lists are appropriate lengths
print(len(zid))
print(len(alats))
print(len(alons))
print(len(addresses))
print(len(valuation))
print(len(sqft))
print(len(valsqft))
print(len(isd)) 

# Referring back to addressListTiny_csv generated dataframe for relevant info
address_df.head()

442
442
442
442
442
442
442
442


Unnamed: 0,house #,street,zipcode,lat,lon,neighborhood
0,8109,Boggy Ridge Drive,78748,30.182018,-97.795088,Dittmar - Cooper
1,8507,Caspian Drive,78749,30.199637,-97.841057,Woodstone Village
2,950,Westbank Drive,78746,30.278182,-97.809151,Ledgeway
3,4208,Steck Avenue,78759,30.377557,-97.75756,Mesa Forest
4,807,Park Boulevard,78751,30.301572,-97.722516,Hyde Park


In [36]:
# CURRENTLY ONLY SHOWING TINY SAMPLE

masterDF = pd.DataFrame({
    "Zillow ID": zid,
    "address": addresses,
    "zipcode": address_df["zipcode"],
    "alat": alats,
    "alon": alons,
    "valuation": valuation,
    "sqft": sqft,
    "value sqft": valsqft,
    "neighborhood": address_df["neighborhood"],
})
len(masterDF)

442

In [35]:
masterDFclean = masterDF.dropna(how="any", subset=["Zillow ID"])
len(masterDFclean)

277

In [37]:
# masterDF to csv
masterDFclean.to_csv(masterData_csv, index=False, header=True)
masterDFclean.head(30)

Unnamed: 0,Zillow ID,address,zipcode,alat,alon,valuation,sqft,value sqft,neighborhood
0,58316011,8109 Boggy Ridge Drive,78748,30.182015,-97.795063,262808.0,1476.0,178.05,Dittmar - Cooper
1,29498766,8507 Caspian Drive,78749,30.199658,-97.841024,310834.0,1654.0,187.93,Woodstone Village
2,2132336893,950 Westbank Drive,78746,30.278708,-97.808911,805208.0,1582.0,508.98,Ledgeway
4,29400629,807 Park Boulevard,78751,30.301584,-97.722499,583926.0,1905.0,306.52,Hyde Park
6,29462460,601 Jet Lane,78742,30.235276,-97.677214,250568.0,1015.0,246.87,
7,29349645,4409 Jessamine Hollow,78731,30.348537,-97.776564,521321.0,2900.0,179.77,Cat Mountain
9,29343151,4501 Ridge Oak Drive,78731,30.329848,-97.769843,2954219.0,10778.0,274.1,Balcones Park
12,70339207,11201 Long Summer Drive,78754,30.359344,-97.640621,292348.0,3183.0,91.85,Pioneer Crossing East
13,58302468,504 East Braker Lane,78753,30.378232,-97.67872,260715.0,1264.0,206.26,Eubank Acres
14,29402786,5703 Coolbrook Drive,78724,30.304843,-97.656442,357860.0,2282.0,156.82,Las Cimas


## Crime Data

In [None]:
# Kat's section

## School Data

In [None]:
# Seth's section

## Heat Mapper

In [None]:
# Troy's section


gmaps.configure(api_key=google_API_Key)

In [None]:
# This cell creates a test masterData_df by pulling in Yuta's address file and adds a column as a testm "value to map"
# This cell can be deleted as soon as there is a master data file that includes a property value column or some other value to plot
# The last digit of the zipcode is used as a value that will vary by area and a random number between 0 and 1 is added to create variation in the weights

masterData_df = pd.read_csv(addressList_csv)
zips = masterData_df["zipcode"]
valueToMap = []

for zip in zips:
    lastDigit = zip[-1:]
#    print(last2Digits)
    valueToMap.append(int(lastDigit) + random.uniform(0.0,1.0))
    
masterData_df["valueToMap"] = valueToMap
masterData_df.head()

In [None]:
# This cell uses gmaps library to create a google heat map from the data in a master data file.
# The masterData csv file is taken as input
# The lat and lon columns are taken as the coordinates for hte heatmap 
# The user specified column is taken as the weighting valies fo each coordinate point

df = masterData_df
columnToMap = 'valueToMap'
max_intensity = df[columnToMap].max()

fig = gmaps.figure()
heatmap_layer = gmaps.heatmap_layer(df[['lat', 'lon']], weights=df[columnToMap], max_intensity=max_intensity, point_radius=10.0)
fig.add_layer(heatmap_layer)
fig

In [None]:
# this is a function version of the cell above
# the function takes columnToMap as the weights for the points defined by 'lat' and 'lon' columns in the dataframe
# the dataframe can be included as a parameter, if it is not included masterData_df is assumed

def heatMapper(columnToMap, df = masterData_df):
    
    max_intensity = df[columnToMap].max()
    
    fig = gmaps.figure()
    heatmap_layer = gmaps.heatmap_layer(df[['lat', 'lon']], weights=df[columnToMap], max_intensity=max_intensity, point_radius=10.0)
    fig.add_layer(heatmap_layer)

    return;

In [None]:
heatMapper(columnToMap = 'valueToMap')
fig