# Analysis of Starbucks Data

In [33]:
import pandas as pd
import geopandas as gpd
import numpy as np
import folium
from folium import Marker
from geopy.geocoders import Nominatim

In [34]:
# Load and preview Starbucks locations in California
starbucks_data = pd.read_csv("C:/Users/hzerr/spatialdata/starbucks_locations.csv")
starbucks_data.head()

Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
0,10429-100710,Palmdale & Hwy 395,14136 US Hwy 395 Adelanto CA,Adelanto,-117.4,34.51
1,635-352,Kanan & Thousand Oaks,5827 Kanan Road Agoura CA,Agoura,-118.76,34.16
2,74510-27669,Vons-Agoura Hills #2001,5671 Kanan Rd. Agoura Hills CA,Agoura Hills,-118.76,34.15
3,29839-255026,Target Anaheim T-0677,8148 E SANTA ANA CANYON ROAD AHAHEIM CA,AHAHEIM,-117.75,33.87
4,23463-230284,Safeway - Alameda 3281,2600 5th Street Alameda CA,Alameda,-122.28,37.79


### Exploratory Data Analysis

In [35]:
starbucks_data.isnull().sum()

Store Number    0
Store Name      0
Address         0
City            0
Longitude       5
Latitude        5
dtype: int64

In [36]:
starbucks_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2821 entries, 0 to 2820
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Store Number  2821 non-null   object 
 1   Store Name    2821 non-null   object 
 2   Address       2821 non-null   object 
 3   City          2821 non-null   object 
 4   Longitude     2816 non-null   float64
 5   Latitude      2816 non-null   float64
dtypes: float64(2), object(4)
memory usage: 132.4+ KB


There are 5 rows missing the longitude and latitude. All other columns contain all necessary data.

In [37]:
starbucks_data[starbucks_data.isnull().any(axis=1)]

Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
153,5406-945,2224 Shattuck - Berkeley,2224 Shattuck Avenue Berkeley CA,Berkeley,,
154,570-512,Solano Ave,1799 Solano Avenue Berkeley CA,Berkeley,,
155,17877-164526,Safeway - Berkeley #691,1444 Shattuck Place Berkeley CA,Berkeley,,
156,19864-202264,Telegraph & Ashby,3001 Telegraph Avenue Berkeley CA,Berkeley,,
157,9217-9253,2128 Oxford St.,2128 Oxford Street Berkeley CA,Berkeley,,


The stores with null values are located in Berkeley. Missing values for latitude and longitude will be filled in.

In [38]:
missing_data = starbucks_data[starbucks_data.isnull().any(axis=1)]

In [51]:
geolocator = Nominatim(user_agent="zhogan")

missing_data = missing_data.copy() 

for addy in range (0, len(missing_data)):
    location = geolocator.geocode(missing_data.iloc[addy]["Address"]).point

    if location:
        # Append latitude and longitude to the DataFrame
        missing_data.loc[addy, 'Longitude'] = location.longitude 
        missing_data.loc[addy, 'Latitude'] = location.latitude
    else:
        print(f"Latitude and longitude for {missing_data.iloc[addy]['Address']} not found")

In [52]:
missing_data

Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
153,5406-945,2224 Shattuck - Berkeley,2224 Shattuck Avenue Berkeley CA,Berkeley,,
154,570-512,Solano Ave,1799 Solano Avenue Berkeley CA,Berkeley,,
155,17877-164526,Safeway - Berkeley #691,1444 Shattuck Place Berkeley CA,Berkeley,,
156,19864-202264,Telegraph & Ashby,3001 Telegraph Avenue Berkeley CA,Berkeley,,
157,9217-9253,2128 Oxford St.,2128 Oxford Street Berkeley CA,Berkeley,,
0,,,,,-122.26823,37.868839
1,,,,,-122.280013,37.891477
2,,,,,-122.269679,37.880907
3,,,,,-122.259406,37.855903
4,,,,,-122.266095,37.870253
