In [1]:
# Import
import requests
import json
import pandas as pd
from sqlalchemy import create_engine
from cfg import google_key

#set db_path
database_path = "../da_job_data.sqlite"

#create sql connection
engine = create_engine(f"sqlite:///{database_path}")
conn = engine.connect()

#store db data to a pandas dataframe
data_df = pd.read_sql("SELECT location FROM da_data", conn)
data_df.head()

#create new dataframe for the locations of the job postings and the count of appearances in the dataset
location_df = data_df['location'].value_counts(dropna=True).rename_axis('location').reset_index(name='count')

#filter out only locations with city and state provided for location mapping
clean_location_df = location_df[location_df['location'].str.contains(r'\w+,\s\w{2}')]
clean_location_df.reset_index(drop=True, inplace=True)
clean_location_df

Unnamed: 0,location,count
0,"Kansas City, MO",353
1,"Oklahoma City, OK",253
2,"Jefferson City, MO",207
3,"Tulsa, OK",129
4,"Columbia, MO",124
...,...,...
324,"Shawnee, OK",1
325,"Shawnee County, KS",1
326,"Bismarck, AR",1
327,"Chickasha, OK",1


In [2]:
cities = []
lats = []
lons = []

#iterate through df and store location names to cities list
for i in range(len(clean_location_df)):
    cities.append(clean_location_df.loc[i, "location"])

#iterate through cities list, pass in city to google api, store latitude and longitude to lists from api response
for city in cities:

    target_url = "https://maps.googleapis.com/maps/api/geocode/json?address=+" + city + "&key=" + google_key

    response = requests.get(target_url).json()
    geometry = response["results"][0]["geometry"]

    lats.append(geometry["location"]["lat"])
    lons.append(geometry["location"]["lng"])


In [3]:

#create dataframe with cities, lats and lons
df = pd.DataFrame(list(zip(cities, lats, lons)),columns =['location', 'latitude', 'longitude'])
#merge this with the previous df to add lats and lons
final_location_df = pd.merge(clean_location_df, df, on='location', how='inner')
final_location_df

#export to json
final_location_df.to_json("..\data\map_db.json")