## Dependencies

In [61]:
# Confirmed needed dependencies
import pandas as pd
import sys

# Dependencies for mapping
import gmplot

# Dependency for Heat Mapper
import gmaps

from config import google_API_Key

from ipywidgets.embed import embed_minimal_html

!jupyter nbextension enable --py --sys-prefix widgetsnbextension
!jupyter nbextension enable --py --sys-prefix gmaps

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m
Enabling notebook extension jupyter-gmaps/extension...
      - Validating: [32mOK[0m


## File inputs/outputs

In [4]:
randLatLon_csv = "./Data/Archived/randomLatLon.csv" 
addressList_csv = "./Data/Archived/addressList.csv"
masterDataCLEAN_csv = "./Data/masterDataCLEAN.csv"


## Helpful Functions

In [None]:
#############################
##### VALERIE'S BLOCKS #####
###########################

# Funtion for reading CSV in as DataFrame
def csvDF(oldCSVfilepath):
    csvIN = pd.read_csv(oldCSVfilepath)
    DF = pd.DataFrame(csvIN)
    return DF

# Function for converting DataFrame to CSV
def DFcsv(dataframe, newCSVfilepath):
    dataframe.to_csv(newCSVfilepath, index=False, header=True)
    print(f"Successfully written to '{newCSVfilepath}'")
    
# Function for reading in csv, checking for headers, and appending if appropriate
def csvDFappend(oldCSVfilepath, newColumn):
    csvIN = pd.read_csv(oldCSVfilepath)
    DF = pd.DataFrame(csvIN)
    # Checking to ensure new header name does not match any current headers
    colNames = DF.columns
    for value in colNames:
        if value == newColumn:
            print("Cannot append column that matches an existing column name")
            return DF
    # Check to ensure length of newColumn matches length of current dataframe columns
    if len(newColumn) != len(DF):
        print("Cannot append column that is not the same length as existing dataframe")
        return DF
    # Append newColumn to Dataframe
    DF[newColumn] = newColumn
    return DF

## Heat Mapper

In [34]:
# Troy's section


gmaps.configure(api_key=google_API_Key)

In [None]:
# This cell creates a test masterData_df by pulling in Yuta's address file and adds a column as a testm "value to map"
# This cell can be deleted as soon as there is a master data file that includes a property value column or some other value to plot
# The last digit of the zipcode is used as a value that will vary by area and a random number between 0 and 1 is added to create variation in the weights

masterData_df = pd.read_csv(addressList_csv)
zips = masterData_df["zipcode"]
valueToMap = []

for zip in zips:
    lastDigit = zip[-1:]
#    print(last2Digits)
    valueToMap.append(int(lastDigit) + random.uniform(0.0,1.0))
    
masterData_df["valueToMap"] = valueToMap
masterData_df.head()

In [None]:
# This cell uses gmaps library to create a google heat map from the data in a master data file.
# The masterData csv file is taken as input
# The lat and lon columns are taken as the coordinates for hte heatmap 
# The user specified column is taken as the weighting valies fo each coordinate point

df = masterData_df
columnToMap = 'valueToMap'
max_intensity = df[columnToMap].max()

fig = gmaps.figure()
heatmap_layer = gmaps.heatmap_layer(df[['lat', 'lon']], weights=df[columnToMap], max_intensity=max_intensity, point_radius=10.0)
fig.add_layer(heatmap_layer)
fig

In [None]:
#####This is currently not working as a fucntion. for expediency I suggest we past teh heatmapper code into a cell as needed

# # this is a function version of the cell above
# # the function takes columnToMap as the weights for the points defined by 'lat' and 'lon' columns in the dataframe
# # the dataframe can be included as a parameter, if it is not included masterData_df is assumed

# def heatMapper(columnToMap, df = masterData_df):
    
#     max_intensity = df[columnToMap].max()
    
#     fig = gmaps.figure()
#     heatmap_layer = gmaps.heatmap_layer(df[['lat', 'lon']], weights=df[columnToMap], max_intensity=max_intensity, point_radius=10.0)
#     fig.add_layer(heatmap_layer)

#     return;

In [None]:
heatMapper(columnToMap = 'valueToMap')
fig

## Commute Time Analysis

In [71]:
# Troy's section
# This section will prepare the data and display on a heatmap

# read in masterdata file into dataframe

data_df = pd.read_csv(masterDataClean_csv)

min = data_df.commuteTime.min()
max = data_df.commuteTime.max()

data_df["CommuteScore"] =100-((data_df.commuteTime-min)*(100/(max-min)))

data_df.head()


Unnamed: 0,Zillow ID,address,zipcode,alat,alon,valuation,sqft,neighborhood,tractCode,countyFips,stateFips,commuteTime,CommuteScore
0,29498621,8113 West Gate Boulevard,78745,30.197589,-97.828106,265572.0,940.0,Cherry Creek,1728,453,48,27.7,46.04811
1,29440641,2109 Red Stone Lane,78727,30.418454,-97.69619,249696.0,1359.0,Tomanet Estates,1848,453,48,21.4,67.697595
2,29345444,9 Glen Rock Drive,78738,30.341187,-97.984049,472040.0,2896.0,,1773,453,48,30.8,35.395189
3,29613796,8333 Alvin High Lane,78729,30.460441,-97.761118,307384.0,1983.0,Hunter's Chase,20411,491,48,27.0,48.453608
4,29358809,5300 Valburn Circle,78731,30.374096,-97.775259,941669.0,4376.0,North Cat Mountain,1705,453,48,20.3,71.477663


In [72]:
# Creating a heat map fo the Commute scores

df = data_df
columnToMap = 'CommuteScore'
point_radius = 5


max_intensity = df[columnToMap].max()

figure_layout = {'width': '500px', 'margin': '0 auto 0 auto'}


fig = gmaps.figure(layout=figure_layout)
heatmap_layer = gmaps.heatmap_layer(df[['alat', 'alon']], weights=df[columnToMap], max_intensity=max_intensity, point_radius=point_radius)
fig.add_layer(heatmap_layer)

fig

In [77]:
# Change the heat map colors
heatmap_layer.gradient = [
    'white',
'green',
'blue'
]

In [63]:
embed_minimal_html('./Visuals/commuteHeatMap.html', views=[fig])

## Regression Analysis

In [68]:
import statsmodels.formula.api as sm
data_df = pd.read_csv(masterDataClean_csv)

result = sm.ols(formula="valuation ~ sqft + commuteTime", data=data_df).fit()
print(result.params)

Intercept      250139.857625
sqft              308.018961
commuteTime    -15954.874641
dtype: float64
