# Banking Deserts
---
The below script uncovers the well-known phenomenon of [Banking Deserts](https://en.wikipedia.org/wiki/Banking_desert). The concept is simple: many neighborhoods with predominantly low-income and elderly populations tend to have inadequate coverage of banking services. This leads such communities to be  vulnerable to predatory loan and pricey check casher providers.

In this script, we retrieved and plotted data from the 2013 US Census and Google Places API to show the relationship between various socioeconomic parameters and bank count across 700 randomly selected zip codes. We used Pandas, Numpy, Matplotlib, Requests, Census API, and Google API to accomplish our task.

In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import time
from pprint import pprint
# Google Places API Key from config
from config import gkey

## Data Retrieval

In [2]:
# Import the census data into a pandas DataFrame
census_df = pd.read_csv('Census_Data.csv')

# Preview the data
census_df.head()

Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income,Poverty Rate
0,15081,"South Heights, PA 15081, USA",342,50.2,31500.0,22177,20.760234
1,20615,"Broomes Island, MD 20615, USA",424,43.4,114375.0,43920,5.188679
2,50201,"Nevada, IA 50201, USA",8139,40.4,56619.0,28908,7.777368
3,84020,"Draper, UT 84020, USA",42751,30.4,89922.0,33164,4.39288
4,39097,"Louise, MS 39097, USA",495,58.0,26838.0,17399,34.949495


In [3]:
# Randomly select 700 zip codes locations that have at least 100 residents
# Hint: `pd.sample()`
# Hint: `pd[pd[astype(int) > 100`]]`

census_df = census_df[census_df['Population'].astype(int) > 100].sample(n=700)

# Visualize the DataFrame
#census_df.count()

In [4]:
# Create a DataFrame with only a subset of the zipcodes for testing purposes
# One your code runs successfully, run it on all 700.
census_df = census_df[census_df['Zipcode'].astype(int) > 80000]
census_df.count()

Zipcode              95
Address              95
Population           95
Median Age           95
Household Income     95
Per Capita Income    95
Poverty Rate         95
dtype: int64

In [66]:
# Create blank columns in DataFrame for lat/lng
census_df['lat'], census_df['lng'] = '',''

# Loop through and grab the lat/lng for each of the selected zips using Google maps
# Inside the loop add the lat/lng to our DataFrame
# Note: Be sure to use try/except to handle cities with missing data

for index, row in census_df.iterrows():
    base_url = 'https://maps.googleapis.com/maps/api/geocode/json?'
    params = {
        'address':f'{row["Zipcode"]}',
        "key": gkey
    }
    cities_lat_lng = requests.get(base_url, params=params).json()
    
    census_df.at[index,'lat'] = cities_lat_lng['results'][0]['geometry']['location']['lat']
    census_df.at[index,'lng'] = cities_lat_lng['results'][0]['geometry']['location']['lng']


# Visualize the DataFrame
census_df.head()

Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,lat,lng
287,98362,"Port Angeles, WA 98362, USA",22052,47.5,43827.0,25914,14.388718,48.0962,-123.301
77,89316,"Eureka, NV 89316, USA",1513,38.7,63603.0,26280,14.871117,39.5649,-115.994
239,92252,"Joshua Tree, CA 92252, USA",10047,45.0,42338.0,22951,18.771773,34.1938,-116.254
380,83841,"Laclede, ID 83841, USA",171,60.8,26618.0,23912,0.0,48.1659,-116.758
344,90210,"Beverly Hills, CA 90210, USA",21548,46.4,132254.0,111364,7.420642,34.103,-118.41


In [152]:
# Create an empty column for bank count
census_df['bank_count'] = ''

# Re-loop through the DataFrame and run a Google Places search to get all banks in 5 mile radius (8000 meters)
# Inside the loop add the bank count to our DataFrame

# Set up params
params = {
    "radius": 8000, 
    "type": "bank",
    "key": gkey
}

#set up base_url
base_url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?'

#set up placehorder of urls for debugging
urls = []

def requester(base_url, params, next_page_token=None):
    """
    Returns the 'results' list of google places API request
    including multipage responses (up to 60 results).
    base_url: Google Places API Nearby Search request as HTTP URL 
    params: dictionary. Required keys: key, location, radius and (keyword|name|type)
    """
    #add next page key to params if next page token exists
    if next_page_token:
        params['pagetoken'] = next_page_token
    
    #get request
    r = requests.get(base_url, params=params)
    
    #uncomment line below to print request urls to console
    #print(f'Just got the response from: {r.url}')
    urls.append(r.url)
    
    #read as json
    response = r.json()
    
    #check response status
    if response['status'] == 'ZERO_RESULTS':
        #print('Zero results')
        #if no results fount return an empty list (length of 0)
        return []
    elif response['status'] == 'INVALID_REQUEST':
        raise Exception('INVALID_REQUEST: operation stopped') 
    elif response['status'] == 'OK':
        #if response if OK save results list to a variable
        results = response['results'] 

        #check for next page token in response
        if 'next_page_token' in response:
            #uncomment line below to print the token to the console
            #print(f'Next token found : {response["next_page_token"]}')

            #google won't return next page if the request is made in less 
            #than 2 seconds so we wait for 2.1
            time.sleep(2.1)

            #make a recursive call to the same 'requester' function
            #this call will continue to happen until there is a next page 
            #token in the response
            next_page = requester(base_url, params, response['next_page_token'])

            #since 'requester' returns a list we just add the returning list 
            #to existing list of results
            results = results + next_page
         
        #return list of places as a list
        return results
    
  
#variable to limit number of requests
k = 1

for index, row in census_df.iterrows():
    
    # update params with zipcode each loop
    params['location'] = f"{row['lat']},{row['lng']}"
    
    #uncomment to see the requests flow
    #print(f'Running request for row #:{k}')
    
    #assign resulting list to a variable 'results'
    results = requester(base_url, params)
    
    #add number of banks to the DF
    #we don't need to check for errors because error handling
    #is done inside 'requester' function
    census_df.at[index,'bank_count'] = len(results)
    
    #increase k 
    k += 1 
    
    #uncomment and change k to number of runs you need
    if k == 10:
        break

# Visualize the DataFrame
census_df.head()

Zero results


Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,lat,lng,bank_count
380,83841,"Laclede, ID 83841, USA",171,60.8,26618.0,23912,0.0,48.1659,-116.758,0
344,90210,"Beverly Hills, CA 90210, USA",21548,46.4,132254.0,111364,7.420642,34.103,-118.41,60
669,85553,"Tonto Basin, AZ 85553, USA",1441,64.6,36442.0,28245,16.932686,33.8103,-111.237,20
481,95051,"Santa Clara, CA 95051, USA",53152,36.1,100504.0,43487,6.865217,37.3598,-121.981,20
561,97366,"Newport, OR 97366, USA",1301,58.8,53135.0,36010,4.996157,44.5772,-124.054,20


## Save to a CSV

In [None]:
# Save the DataFrame as a csv


## Plot & Save Graphs

In [None]:
# Build a scatter plot for each data type 


In [None]:
# Build a scatter plot for each data type
plt.scatter(selected_zips["Bank Count"], 
            selected_zips["Median Age"],
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Zip Codes")

# Incorporate the other graph properties
plt.title("Median Age vs. Bank Count by Zip Code")
plt.ylabel("Median Age")
plt.xlabel("Bank Count")
plt.grid(True)
plt.xlim([-2.5, 202])

# Save the figure
plt.savefig("output_analysis/Age_BankCount.png")

# Show plot
plt.show()

In [None]:
# Build a scatter plot for each data type
plt.scatter(selected_zips["Bank Count"], 
            selected_zips["Household Income"],
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Zip Codes")

# Incorporate the other graph properties
plt.title("Household Income vs. Bank Count by Zip Code")
plt.ylabel("Household Income ($)")
plt.xlabel("Bank Count")
plt.grid(True)
plt.xlim([-2.5, 202])
plt.ylim([-2.5, 230000])

# Save the figure
plt.savefig("output_analysis/HouseholdIncome_BankCount.png")

# Show plot
plt.show()

In [None]:
# Build a scatter plot for each data type
plt.scatter(selected_zips["Bank Count"], 
            selected_zips["Per Capita Income"],
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Zip Codes")

# Incorporate the other graph properties
plt.title("Per Capita Income vs. Bank Count by Zip Code")
plt.ylabel("Per Capita Income (%)")
plt.xlabel("Bank Count")
plt.grid(True)
plt.xlim([-2.5, 202])
plt.ylim([0, 165000])

# Save the figure
plt.savefig("output_analysis/PerCapitaIncome_BankCount.png")

# Show plot
plt.show()

In [None]:
# Build a scatter plot for each data type
plt.scatter(selected_zips["Bank Count"], 
            selected_zips["Poverty Rate"],
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Zip Codes")

# Incorporate the other graph properties
plt.title("Poverty Rate vs. Bank Count by Zip Code")
plt.ylabel("Poverty Rate (%)")
plt.xlabel("Bank Count")
plt.grid(True)
plt.xlim([-2.5, 202])
plt.ylim([-2.5, 102])

# Save the figure
plt.savefig("output_analysis/PovertyRate_BankCount.png")

# Show plot
plt.show()