In [16]:
# import the library
import json
import requests
from matplotlib import pyplot as plt
from pymongo import MongoClient
import os
from dotenv import load_dotenv
load_dotenv()
import pandas as pd
import numpy as np

In [17]:
def geocode(address):
    res = requests.get(f"https://geocode.xyz/{address}", params={"json":1})
    data = res.json()
    return {
        "type":"Point",
        "coordinates":[float(data["longt"]),float(data["latt"])]
    }

In [18]:
# https://geojson.io/#map=16/40.3959/-3.7039
# https://geojson.org/
sanFrancisco = "San Francisco"
geocode(sanFrancisco) 

{'type': 'Point', 'coordinates': [-122.40438, 37.66873]}

In [19]:
#Latitud: 37.779026 | Longitud: -122.419906 These coordinates are more accurate

In [20]:
def getData(query, page_token=None):
    url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
    params = {"key":os.getenv("APIKEY_GOOGLE"), "query":query}
    if page_token:
        params["pageToken"]=page_token
    res = requests.get(url, params=params)
    res = res.json()
    results = res["results"]
    if 'next_page_token' in res.keys():
        page_token = res['next_page_token']
        results=[results, page_token]
    else: 
        results=[results,False]
    return results
    
    
def getLocations(query, limit=50):
    data = []
    page_token = None
    while len(data)<limit:
        results, page_token = getData(query, page_token)
        data += results
        if not page_token: 
            break
    return data



In [21]:
len(getLocations("starbucks in San Francisco", limit = 1000))

1000

In [22]:
df_starbucks = pd.DataFrame(getLocations("starbucks in San Francisco", limit = 1000))

In [23]:
df_starbucks["name"].value_counts()

Starbucks                        950
Starbucks California & Kearny     50
Name: name, dtype: int64

In [24]:
df_schools = pd.DataFrame(getLocations("schools in San Francisco", limit = 1000))

In [25]:
df_vegan = pd.DataFrame(getLocations("vegan restaurants in San Francisco", limit = 1000))


In [26]:
df_pepe = pd.DataFrame(getLocations("dog hairdresser in San Francisco", limit = 1000))


In [27]:
def getlat(x):
    try:
        return x["location"]["lat"]
    except:
        return"unknown"
    
def getlong(x):
    try:
        return x["location"]["lng"]
    except:
        return"unknown"
    
    
def getlatlong(df):
    df['latitude'] = df["geometry"].apply(getlat)
    df['longitude'] = df["geometry"].apply(getlong)

In [28]:
def transformToGeoPoint(s):
    if np.isnan(s.latitude) or np.isnan(s.longitude):
        return None
    return {
        "type":"Point",
        "coordinates":[s.longitude, s.latitude]
    }
    

In [29]:
getlatlong(df_pepe)
df_pepe["geopoint"] = df_pepe.apply(transformToGeoPoint, axis=1)

getlatlong(df_vegan)
df_vegan["geopoint"] = df_vegan.apply(transformToGeoPoint, axis=1)

getlatlong(df_schools)
df_schools["geopoint"] = df_schools.apply(transformToGeoPoint, axis=1)

getlatlong(df_starbucks)
df_starbucks["geopoint"] = df_starbucks.apply(transformToGeoPoint, axis=1)

In [30]:
df_pepe.to_json("INPUT/pepe.json",orient="records")
df_vegan.to_json("INPUT/vegan.json",orient="records")
df_schools.to_json("INPUT/schools.json",orient="records")
df_starbucks.to_json("INPUT/starbucks.json",orient="records")