## Dependencies

In [5]:
# Confirmed needed dependencies
import random
import pprint
import pandas as pd
import requests
import json

# Dependencies for geocoordinates generator
import sys
import math
import gmplot

# Dependencies for conversion of coordinates to addresses
import geopy
from geopy.geocoders import Nominatim

# Dependencies for Zillow data
from pyzillow.pyzillow import ZillowWrapper, GetDeepSearchResults

# zillow id, needed for the GetUpdatedPropertyDetails (ISD info)
## CURRENTLY GetUpdatedPropertyDetails IS NOT RETURNING VALUES ##
############# MAY END UP SCRAPPING THIS DEPENDENCY #############
from pyzillow.pyzillow import ZillowWrapper, GetUpdatedPropertyDetails

# Dependency for Heat Mapper
import gmaps


# Add config.py file with the following variables and cooresponding Zillow API keys
#from config import Ztroy, Zseth, Zkat, Zval, Zyuta
from config import google_API_Key


################# ONGOING EDITS TO REQUIREMENTS.MD #################
###### IF ANY ERRORS OCCUR RELATING TO MODULES OR CONFIG.PY #######
### REFER TO requirements.md TO ENSURE YOU ARE PROPERLY SETUP ####

## File inputs/outputs

In [2]:
randLatLon_csv = "./Data/randomLatLon.csv" 
addressList_csv = "./Data/addressList.csv"
masterData_csv = "./Data/masterData.csv"

In [None]:
# Valerie

# Further setting up Notebook with Markdown sections, file outputs, action comments, etc.
# Create pandas dataframe 

## Geocoordinates of Austin

In [None]:
##########################################
# this section written by troy bailey.   #
# enter uservariables below to determine #
# center location, radius of circle, and #
# number of geocoordinates to generate.  #
##########################################

In [None]:
########################
#### USER VARIABLES ####
########################

x0 = 30.27444       #### Set center coordiantes in decimal degrees
y0 = -97.74028      #### initial coordiantes are location of Texas State Capitol Building

radius = 10         #### Set radius in miles

points = 1000        #### Set number of lat,lon points to generate

In [None]:
# variables and inputs for coordinate calculations
lat_lon_list = []
radiusInDegrees=radius/69           
r = radiusInDegrees
points += 1

In [None]:
# calculate each coordiante point and build a list of lat and lon
for i in range(1, points):
    u = float(random.uniform(0.0,1.0)) #random number for radius length
    v = float(random.uniform(0.0,1.0)) #random number for pi radians
    
    w = r * math.sqrt(u) #radius length
    t = 2 * math.pi * v  #radians
    x = w * math.cos(t)  #calculate x coord distance
    y = w * math.sin(t)  #calculate y coord distance
    
    xLat  = x + x0       #offset x by center x
    yLon = y + y0        #offset y by center y
    
    lat_lon_list.append([xLat,yLon])

# convert list to dataframe
lat_lon_df = pd.DataFrame(lat_lon_list, columns=['lat','lon'])

lat_lon_df.head()

len(lat_lon_df)

In [None]:
# write a CSV file of coordinate points
lat_lon_df.to_csv(randLatLon_csv, index=False, header=True)

## Plot coordinate points on map


In [None]:
# This section will plot points on a Google map centered at centerPointLat and centerPointLon with a magnification of magFactor
# It assumes there is a dataframe with "lat" and "lon" columns
# The resulting map is saved to a file called 

centerPointLat = 30.27444  #these are the coordinates of the Texas State Capitol building
centerPointLon = -97.74028 #these are the coordinates of the Texas State Capitol building
magnificationFactor = 10
pointColor = "red"
pointSize = 100
mapOutputFile = "mymap.html"
df = lat_lon_df

gmap = gmplot.GoogleMapPlotter(centerPointLat, centerPointLon, magnificationFactor)

gmap.scatter(df["lat"], df["lon"], pointColor, size=pointSize, marker=False)

gmap.draw("./Visuals/" + mapOutputFile)

## Convert Coordinates to Residential Addresses

In [None]:
#########################
##### Yuta's Blocks #####
#########################


##### General Analysis on the Geopy Nominatim Method Below #####
# - You can filter the address by type='house', but some houses are labeled with type='yes'
# - 'Yes' type means means it's an unlabeled building.
# - Quite a bit of 'yes' type buildings actually hit a residential building, so we may get a high percentage of
#   residential buildings if we filter by 'house' or 'yes'.
# - Out of roughly 100 requests, around 70% will result in either 'house' or 'yes' type.
# - I can't seem to find the limit of API request, but we are throttled to 1 request per second.

# Testing Geopy Nominatim API Response


geopy.geocoders.options.default_user_agent = "my-application"

pp = pprint.PrettyPrinter(indent=4)

url = "https://nominatim.openstreetmap.org/reverse?"

# Params 1 - Known Residential Address
params_1 = {
    "format": "jsonv2",
    "lat": 30.440777,
    "lon": -97.777048
}

print("===== #01 - Home Response:")
response_1 = requests.get(url, params=params_1).json()
pp.pprint(response_1)
print("\n" + "="*60 + "\n")

In [None]:
# Import CSV, put into DataFrame
latlon_df = pd.read_csv(randLatLon_csv)
latlon_df.head()

In [None]:
# Put latitudes and longitudes into a zip object
lats = latlon_df.iloc[:, 0]
lons = latlon_df.iloc[:, 1]
lat_lons = []
lat_lons = zip(lats, lons)

# Might not need to do this step, see below

In [None]:
# Loop Request API and append to create new dataframe
query_url = "https://nominatim.openstreetmap.org/reverse?"

house_num = []
road = []
postcode = []
# Verify this field is unneeded, then delete if not
# aType = []
lat = []
lon = []
neighborhood = []
# Verify this field is unneeded, then delete if not
# addType = []

counter = 1
numRequests = len(lats)

print(f"Processing {numRequests} Requests...")


# Don't actually need to zip lat and lons for this
# can use "for lat_lon in latlon_df.itertuples():"
# Check into this and see if you can cut out the zip part
for lat_lon in lat_lons:
    params = {
        "format": "jsonv2",
        "lat": lat_lon[0],
        "lon": lat_lon[1]
    }
# Check to see if you can do multiple queries at the same time to make this faster
# i.e. send batch requests/list of queries rather than loop of single queries
    response = requests.get(query_url, params=params).json()
    
    if response['type'] == 'house' or response['type'] == 'yes':
        postcode.append(response['address']['postcode'])
#         Do we need to store aType?
#         aType.append(response['type'])
        lat.append(response['lat'])
        lon.append(response['lon'])
    
        try:
            house_num.append(response['address']['house_number'])
        except (KeyError, IndexError):
            house_num.append("NA")
        try:
            road.append(response['address']['road'])
        except (KeyError, IndexError):
            road.append("NA")
        try:
            neighborhood.append(response['address']['neighbourhood'])
        except (KeyError, IndexError):
            neighborhood.append("NA")
            
# Verify this field is unneeded, then delete if not
#         try:
#             addType.append(response['addresstype'])
#         except (KeyError, IndexError):
#             addType.append("NA")


# Printing to console is good for development to see what is happening, but takes a long time.  
# Instead of printing each line to the console, can you store these in two lists (success and failure) 
# that we could run len() on to get counts for stats later?
        print(f"Processed Record {counter} of {numRequests}.")
#         counter = counter + 1
        counter += 1
    else:
        print(f"Wrong Type - Skipped Record {counter} of {numRequests}.")
#         counter = counter + 1
        counter += 1

In [None]:
# Create a dataframe with all addresses from API requests
address_df = pd.DataFrame({
    "house #": house_num,
    "street": road,
    "zipcode": postcode,
#     "type": aType,
    "lat": lat,
    "lon": lon,
    "neighborhood": neighborhood,
#     "address type": addType
})
address_df

# drop type and address type, or rather don't gather in the first place

In [None]:
# write a CSV file of addresses
address_df.to_csv(addressList_csv, index=False, header=True)

In [None]:
# Pull from CSV
address_sample = pd.read_csv(addressList_csv)
address_sample.head()

In [None]:
# Map out the sample with gmplot

gmap = gmplot.GoogleMapPlotter(30.27444, -97.74028, 10)

gmap.scatter(address_sample["lat"], address_sample["lon"], 'red', size=80, marker=False)

gmap.draw("./Visuals/myaddressmap.html")

## Zillow API Calls using Address and Zipcode

In [None]:
# Val create function for read in csv, check for headers?, and append csv


In [3]:
#############################
##### VALERIE'S BLOCKS #####
###########################

# Tiny sample to work with looping without exhausting API call limits
addressListTiny_csv = "./Data/addressListTiny.csv"

address_sample = pd.read_csv(addressListTiny_csv)
address_df = pd.DataFrame(address_sample)
address_df.head()

Unnamed: 0,house #,street,zipcode,lat,lon,neighborhood
0,4704,Carter Lane,78744,30.205996,-97.735289,Southeast Austin
1,9302,Creeks Edge Circle,78733,30.295867,-97.884472,
2,5350,West US Highway 290,78735,30.237296,-97.83759,Sunset Oaks
3,1404,Green Pastures Cove,78725,30.222968,-97.628323,Garden Valley Village
4,7308,Carver Avenue,78752,30.332523,-97.698403,St. Johns


In [None]:
####################### NO LONGER USING THIS API ########################
################# HOWEVER, CODE MAY BE USED ELSEWHERE ##################
########### OR WE MAY END UP NEEDING THIS API FOR ISD DATA ############

# pp = pprint.PrettyPrinter(indent=4)
# api = zillow.ValuationApi()
# # Insert your Zillow API key here
# zwsid = "X1-ZWz1gm14kn7d3f_2kpit"

# valuation = []
# sqft = []

# for row, home in address_df.iterrows():
#     print(row)
#     address = address_df["house #"][row] + " " + address_df["street"][row] 
#     print(address)
#     zipcode = address_df["zipcode"][row]
#     print(zipcode)
    
#     try:
#         z_deep_results = api.GetDeepSearchResults(zwsid, address, zipcode)
#         print(z_deep_results)
#         pp.pprint(deep_results.get_dict())
#         print(z_deep_results['zestimate']['amount'])
#         valuation.append(z_deep_results['zestimate']['amount'])
#         print(z_deep_results['extended_data']['finished_sqft'])
#         sqft.append(z_deep_results['extended_data']['finished_sqft'])
#     except:
#         print(f"No record found for {address}, {zipcode}.")



In [4]:
zid = []
alats = []
alons = []
addresses = []
valuation = []
sqft = []
isd = []


############## LOOPING FUNCTION FULLY OPERATIONAL ###############
###### HOWEVER, ZILLOW ONLY ALLOWS 1000 API CALLS PER DAY ######

for row, home in address_df.iterrows():
    address = str(address_df["house #"][row]) + " " + address_df["street"][row]
    addresses.append(address)
    zipcode = address_df["zipcode"][row]
    print(f"Processing {address}, {zipcode} (index {row}).")

    try:
        zillow_data = ZillowWrapper(Zyuta)
        deep_search_response = zillow_data.get_deep_search_results(address, zipcode)
        result = GetDeepSearchResults(deep_search_response)
    except:
        print(f"No record found for {address}, {zipcode} (index {row}). Appending lists with null values")
        zid.append(None)
        alats.append(None)
        alons.append(None)
        valuation.append(None)
        sqft.append(None)
        isd.append(None)
        continue

    try:
        zillowID = result.zillow_id
        zid.append(zillowID)
    except:
        print(f"No zid found for {address}, {zipcode} (index {row}). Appending list with null values")
        zid.append(None)
        
    try:
        alat = result.latitude
        alats.append(alat)
    except:
        print(f"No alat found for {address}, {zipcode} (index {row}). Appending list with null values")
        alats.append(None)

    try:
        alon = result.longitude
        alons.append(alon)
    except:
        print(f"No alon found for {address}, {zipcode} (index {row}). Appending list with null values")
        alons.append(None)
        
    try:    
        val = int(result.zestimate_amount)
        valuation.append(val)
    except:
        print(f"No valuation found for {address}, {zipcode} (index {row}). Appending list with null values")
        valuation.append(None)

    try:
        zsqft = int(result.home_size)
        sqft.append(zsqft)
    except:
        print(f"No sqft found for {address}, {zipcode} (index {row}). Appending list with null values")
        sqft.append(None)


    try:
        zillow_data = ZillowWrapper(Zyuta)
        updated_property_details_response = zillow_data.get_updated_property_details(zid[row])
        new_result = GetUpdatedPropertyDetails(updated_property_details_response)

        try:
            zisd = new_result.school_district
            isd.append(zisd)
        except:
            isd.append(None)
            
    except:
        print(f"No updated property info for this listing (index {row}).")
        isd.append(None)


Processing 4704 Carter Lane, 78744 (index 0).
No updated property info for this listing (index 0).
Processing 9302 Creeks Edge Circle, 78733 (index 1).
Processing 5350 West US Highway 290, 78735 (index 2).
No record found for 5350 West US Highway 290, 78735 (index 2). Appending lists with null values
Processing 1404 Green Pastures Cove, 78725 (index 3).
No updated property info for this listing (index 3).
Processing 7308 Carver Avenue, 78752 (index 4).
No updated property info for this listing (index 4).
Processing 1203 Cometa Street, 78721 (index 5).
No valuation found for 1203 Cometa Street, 78721 (index 5). Appending list with null values
No updated property info for this listing (index 5).
Processing 3955 Westlake Drive, 78731 (index 6).
No updated property info for this listing (index 6).
Processing 9909 Thaxton Road, 78747 (index 7).
No updated property info for this listing (index 7).
Processing 1200 Capital of Texas Highway, 78746 (index 8).
No record found for 1200 Capital of 

In [None]:
######### ISD IS DRAWN FROM GET_UPDATED_PROPERTY_DETAILS ##########
########### WHICH HAS NOT BEEN ABLE TO RETURN VALUES  ############
############## FOR ANY PROPERTIES IN TINY SAMPLE ################
### MAY NEED TO APPROACH THESE VALUES FROM A DIFFERENT ROUTE ###

## Calculate Value per Sqft

In [5]:
valsqft = []
for row, value in enumerate(valuation):
    try:
        vsqft = round((valuation[row] / sqft[row]), 2)
        valsqft.append(vsqft)
    except:
        print("Cannot perform math with NoneType")
        valsqft.append(None)

Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType
Cannot perform math with NoneType


## Master Dataframe Creation

In [6]:
# Checking to ensure lists are appropriate lengths
print(len(zid))
print(len(alats))
print(len(alons))
print(len(addresses))
print(len(valuation))
print(len(sqft))
print(len(valsqft))
print(len(isd)) 

# Referring back to addressListTiny_csv generated dataframe for relevant info
address_df.head()

10
10
10
10
10
10
10
10


Unnamed: 0,house #,street,zipcode,lat,lon,neighborhood
0,4704,Carter Lane,78744,30.205996,-97.735289,Southeast Austin
1,9302,Creeks Edge Circle,78733,30.295867,-97.884472,
2,5350,West US Highway 290,78735,30.237296,-97.83759,Sunset Oaks
3,1404,Green Pastures Cove,78725,30.222968,-97.628323,Garden Valley Village
4,7308,Carver Avenue,78752,30.332523,-97.698403,St. Johns


In [7]:
# CURRENTLY ONLY SHOWING TINY SAMPLE

masterDF = pd.DataFrame({
    "Zillow ID": zid,
    "address": addresses,
    "zipcode": address_df["zipcode"],
    "alat": alats,
    "alon": alons,
    "valuation": valuation,
    "sqft": sqft,
    "value sqft": valsqft,
    "neighborhood": address_df["neighborhood"],
})

In [8]:
# masterData.csv
masterDF.head(10)

Unnamed: 0,Zillow ID,address,zipcode,alat,alon,valuation,sqft,value sqft,neighborhood
0,29465950.0,4704 Carter Lane,78744,30.205973,-97.735315,313965.0,2327.0,134.92,Southeast Austin
1,29334638.0,9302 Creeks Edge Circle,78733,30.295879,-97.884475,769441.0,3800.0,202.48,
2,,5350 West US Highway 290,78735,,,,,,Sunset Oaks
3,29465131.0,1404 Green Pastures Cove,78725,30.22298,-97.628333,168004.0,1568.0,107.15,Garden Valley Village
4,64485013.0,7308 Carver Avenue,78752,30.332576,-97.698503,288286.0,1182.0,243.9,St. Johns
5,29390251.0,1203 Cometa Street,78721,30.278913,-97.689313,,896.0,,MLK
6,58298026.0,3955 Westlake Drive,78731,30.328837,-97.781045,3198013.0,2911.0,1098.6,Balcones Park
7,64919630.0,9909 Thaxton Road,78747,30.130684,-97.73513,212848.0,1344.0,158.37,Thaxton Place
8,,1200 Capital of Texas Highway,78746,,,,,,Ledgeway
9,,3242 Lockhart Highway,78744,,,,,,


## Crime Data

In [None]:
# Kat's section

## School Data

In [None]:
# Seth's section

## Commute Data

In [17]:
# Troy's section
# this cell pulls mean commute times for a give geo using datausa database

targetUrl = "http://api.datausa.io/api/?show=geo&required=mean_commute_minutes&geo=16000US4805000&sumlevel=all&year=2016"

results = requests.get(targetUrl).json()

# Print the json (pretty printed)
print(json.dumps(results, indent=4, sort_keys=True))

{
    "data": [
        [
            2016,
            "16000US4805000",
            22.7391
        ]
    ],
    "headers": [
        "year",
        "geo",
        "mean_commute_minutes"
    ],
    "logic": [
        {
            "dataset": "ACS 1-year Estimate",
            "link": "http://www.census.gov/programs-surveys/acs/",
            "org": "Census Bureau",
            "supported_levels": {
                "geo": [
                    "nation",
                    "state",
                    "county",
                    "msa",
                    "place",
                    "all"
                ]
            },
            "table": "acs_1yr.yg"
        }
    ],
    "source": {
        "dataset": "ACS 1-year Estimate",
        "link": "http://www.census.gov/programs-surveys/acs/",
        "org": "Census Bureau",
        "supported_levels": {
            "geo": [
                "nation",
                "state",
                "county",
                "msa",
           

In [7]:
#this cell gets the us census "place" code for a given lat, lon

#!pip install censusgeocode

import censusgeocode as cg

result = cg.coordinates(x=-97.777048, y=30.27444)

#result

In [15]:
#this cell is trying to get geo data from the us census geocode API

targetUrl = "https://geocoding.geo.census.gov/geocoder/geographies/coordinates?x=-97.96056&y=30.17&benchmark=4&vintage=4&format=json&layers=all"

results = requests.get(targetUrl).json()

# Print the json (pretty printed)
#print(json.dumps(results, indent=4, sort_keys=True))

#results


In [19]:
#! pip install pygeocodio

from config import geocode_API_Key

from config import google_API_Key

from geocodio import GeocodioClient

client = GeocodioClient(geocode_API_Key)

location = client.reverse((30.345833, -97.714722),fields=["census"])
location


{'results': [{'address_components': {'number': '822',
    'street': 'Taulbee',
    'suffix': 'Ln',
    'formatted_street': 'Taulbee Ln',
    'city': 'Austin',
    'county': 'Travis County',
    'state': 'TX',
    'zip': '78757',
    'country': 'US'},
   'formatted_address': '822 Taulbee Ln, Austin, TX 78757',
   'location': {'lat': 30.345769, 'lng': -97.715047},
   'accuracy': 1,
   'accuracy_type': 'rooftop',
   'source': 'Capcog',
   'fields': {'census': {'census_year': 2015,
     'state_fips': '48',
     'county_fips': '48453',
     'place_fips': '4805000',
     'tract_code': '001504',
     'block_code': '1006',
     'block_group': '1',
     'metro_micro_statistical_area': {'name': 'Austin-Round Rock, TX',
      'area_code': '12420',
      'type': 'metropolitan'},
     'combined_statistical_area': None,
     'source': 'US Census Bureau'}}},
  {'address_components': {'number': '7702',
    'predirectional': 'N',
    'street': 'Lamar Blvd Sb',
    'formatted_street': 'N Lamar Blvd Sb',

In [None]:
targetUrl = "https://api.census.gov/data/2016/acs/acs5/profile?get=DP04_0001E,NAME&for=tract:000100&in=state:01%20county:073&key=YOUR_KEY_GOES_HERE"
#targetUrl = "http://api.datausa.io/api/?show=geo&required=mean_commute_minutes&geo=16000US4805000&sumlevel=all&year=2016"

results = requests.get(targetUrl).json()

# Print the json (pretty printed)
print(json.dumps(results, indent=4, sort_keys=True))

## Heat Mapper

In [None]:
# Troy's section


gmaps.configure(api_key=google_API_Key)

In [None]:
# This cell creates a test masterData_df by pulling in Yuta's address file and adds a column as a testm "value to map"
# This cell can be deleted as soon as there is a master data file that includes a property value column or some other value to plot
# The last digit of the zipcode is used as a value that will vary by area and a random number between 0 and 1 is added to create variation in the weights

masterData_df = pd.read_csv(addressList_csv)
zips = masterData_df["zipcode"]
valueToMap = []

for zip in zips:
    lastDigit = zip[-1:]
#    print(last2Digits)
    valueToMap.append(int(lastDigit) + random.uniform(0.0,1.0))
    
masterData_df["valueToMap"] = valueToMap
masterData_df.head()

In [None]:
# This cell uses gmaps library to create a google heat map from the data in a master data file.
# The masterData csv file is taken as input
# The lat and lon columns are taken as the coordinates for hte heatmap 
# The user specified column is taken as the weighting valies fo each coordinate point

df = masterData_df
columnToMap = 'valueToMap'
max_intensity = df[columnToMap].max()

fig = gmaps.figure()
heatmap_layer = gmaps.heatmap_layer(df[['lat', 'lon']], weights=df[columnToMap], max_intensity=max_intensity, point_radius=10.0)
fig.add_layer(heatmap_layer)
fig

In [None]:
# this is a function version of the cell above
# the function takes columnToMap as the weights for the points defined by 'lat' and 'lon' columns in the dataframe
# the dataframe can be included as a parameter, if it is not included masterData_df is assumed

def heatMapper(columnToMap, df = masterData_df):
    
    max_intensity = df[columnToMap].max()
    
    fig = gmaps.figure()
    heatmap_layer = gmaps.heatmap_layer(df[['lat', 'lon']], weights=df[columnToMap], max_intensity=max_intensity, point_radius=10.0)
    fig.add_layer(heatmap_layer)

    return;

In [None]:
heatMapper(columnToMap = 'valueToMap')
fig