In [1]:
import pandas as pd

In [11]:
import requests
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

# define list of URLs to scrape
urls = [
    "https://api.geoapify.com/v2/places?categories=tourism,commercial.food_and_drink,healthcare,entertainment.theme_park&filter=place:51d9301bcd0f9a5dc05918c8f31a750b4140f00101f901ff29030000000000c0020692030b4c6f7320416e67656c6573&limit={limit}&offset={offset}&apiKey=ce750d3b8d0e4578990391f5ffe87ba4",
    "https://api.geoapify.com/v2/places?categories=tourism,entertainment.theme_park,healthcare,commercial.food_and_drink&filter=place:512aa13f6eea6f58c059f8ed998f2a4c3e40f00101f901a2ba010000000000c0020692030641757374696e&limit={limit}&offset={offset}&apiKey=ce750d3b8d0e4578990391f5ffe87ba4",
    "https://api.geoapify.com/v2/places?categories=tourism,commercial.food_and_drink,healthcare,entertainment.theme_park&filter=place:51d78dcdee167c52c059a5d22785e3544440f00101f90121af020000000000c0020a92031043697479206f66204e657720596f726b&limit={limit}&offset={offset}&apiKey=ce750d3b8d0e4578990391f5ffe87ba4"
]

# initialize empty lists to store data
attraction_names = []
attraction_types = []
addresses = []
cities = []
states = []
websites = []
opening_hours = []
latitudes = []
longitudes = []

# set limit and initial offset
limit = 500
offset = 0

# set number of threads
num_threads = 10

# define function to scrape data for a given URL, limit, and offset
def scrape_data(url, limit, offset):
    # set URL with limit and offset
    url = url.format(limit=limit, offset=offset)

    # make GET request and get JSON response
    response = requests.get(url)
    response_json = response.json()

    # check if no more results
    if len(response_json['features']) == 0:
        return None

    # loop through results and append data to respective lists
    for result in response_json['features']:
        attraction_names.append(result['properties']['name'])
        if len(result['properties']['categories']) > 0:
            attraction_types.append(result['properties']['categories'][0])
        else:
            attraction_types.append(None)
        addresses.append(result['properties'].get('address_line1'))
        cities.append(result['properties'].get('city'))
        states.append(result['properties'].get('state'))
        websites.append(result['properties'].get('website'))
        opening_hours.append(result['properties'].get('opening_hours'))
        latitudes.append(result['geometry']['coordinates'][1])
        longitudes.append(result['geometry']['coordinates'][0])

    # return number of results scraped
    return len(response_json['features'])

# create thread pool executor
with ThreadPoolExecutor(max_workers=num_threads) as executor:
    for url in urls:
        # reset initial page number and offset for each URL
        page_number = 1
        offset = 0

        # scrape 5000 unique places for each URL
        for i in range(10):
            # submit tasks for each offset
            tasks = [executor.submit(scrape_data, url, page_number, offset + limit*i) for page_number in range(1, 101)]

            # iterate over completed tasks and print progress using tqdm
            for task in tqdm(as_completed(tasks), total=len(tasks)):
                if task.result() is None:
                    break

            # increment page number and offset
            page_number += 100
            offset += limit*100

# create pandas DataFrame from scraped data
attractions_df = pd.DataFrame({
    'attraction_name': attraction_names,
    'attraction_type': attraction_types,
    'address': addresses,
    'city': cities,
    'state': states,
    'website': websites,
    'opening_hours': opening_hours,
    'latitude': latitudes,
    'longitude': longitudes
})

# drop duplicates and reset index
attractions_df = attractions_df.drop_duplicates().reset_index(drop=True)

# display first 5 rows of DataFrame
print(attractions_df.head())


       


100%|██████████| 100/100 [01:17<00:00,  1.28it/s]
  0%|          | 0/100 [00:01<?, ?it/s]
  0%|          | 0/100 [00:14<?, ?it/s]
  0%|          | 0/100 [00:14<?, ?it/s]
  0%|          | 0/100 [00:14<?, ?it/s]
  0%|          | 0/100 [00:13<?, ?it/s]
  0%|          | 0/100 [00:13<?, ?it/s]
  0%|          | 0/100 [00:14<?, ?it/s]
  0%|          | 0/100 [00:14<?, ?it/s]
  0%|          | 0/100 [00:14<?, ?it/s]
100%|██████████| 100/100 [00:38<00:00,  2.59it/s]
  0%|          | 0/100 [00:00<?, ?it/s]
  0%|          | 0/100 [00:07<?, ?it/s]
  0%|          | 0/100 [00:07<?, ?it/s]
  0%|          | 0/100 [00:07<?, ?it/s]
  0%|          | 0/100 [00:07<?, ?it/s]
  0%|          | 0/100 [00:07<?, ?it/s]
  0%|          | 0/100 [00:07<?, ?it/s]
  0%|          | 0/100 [00:07<?, ?it/s]
  0%|          | 0/100 [00:07<?, ?it/s]
  7%|▋         | 7/100 [00:47<10:28,  6.76s/it]
  0%|          | 0/100 [01:45<?, ?it/s]
  0%|          | 0/100 [00:26<?, ?it/s]
  0%|          | 0/100 [00:24<?, ?it/s]
  0%|       

                                     attraction_name attraction_type  \
0                  Ronald Reagan UCLA Medical Center        building   
1                            Marina Del Rey Hospital      healthcare   
2               Providence Holy Cross Medical Center      healthcare   
3  Los Angeles County + University of Southern Ca...      healthcare   
4                      White Memorial Medical Center      healthcare   

                                             address         city       state  \
0                  Ronald Reagan UCLA Medical Center  Los Angeles  California   
1                            Marina Del Rey Hospital         None  California   
2               Providence Holy Cross Medical Center  Los Angeles  California   
3  Los Angeles County + University of Southern Ca...  Los Angeles  California   
4                      White Memorial Medical Center  Los Angeles  California   

  website opening_hours   latitude   longitude  
0    None          None  34.066

In [12]:
attractions_df

Unnamed: 0,attraction_name,attraction_type,address,city,state,website,opening_hours,latitude,longitude
0,Ronald Reagan UCLA Medical Center,building,Ronald Reagan UCLA Medical Center,Los Angeles,California,,,34.066459,-118.446341
1,Marina Del Rey Hospital,healthcare,Marina Del Rey Hospital,,California,,,33.982156,-118.439305
2,Providence Holy Cross Medical Center,healthcare,Providence Holy Cross Medical Center,Los Angeles,California,,,34.280298,-118.459135
3,Los Angeles County + University of Southern Ca...,healthcare,Los Angeles County + University of Southern Ca...,Los Angeles,California,,,34.059431,-118.209376
4,White Memorial Medical Center,healthcare,White Memorial Medical Center,Los Angeles,California,,,34.049639,-118.217133
...,...,...,...,...,...,...,...,...,...
295,Kirby Forensic Psychiatric Center,access_limited,Kirby Forensic Psychiatric Center,New York,New York,,,40.786528,-73.932257
296,New York Eye Surgery Center,healthcare,New York Eye Surgery Center,New York,New York,,,40.858433,-73.855339
297,Lenox Health Greenwich Village,healthcare,Lenox Health Greenwich Village,New York,New York,,,40.737778,-74.000833
298,Ryan/NENA Comprehensive Heath Service Center,healthcare,Ryan/NENA Comprehensive Heath Service Center,New York,New York,,,40.721615,-73.980043


In [21]:
pip install googlemaps

Collecting googlemaps
  Downloading googlemaps-4.10.0.tar.gz (33 kB)
Building wheels for collected packages: googlemaps
  Building wheel for googlemaps (setup.py) ... [?25ldone
[?25h  Created wheel for googlemaps: filename=googlemaps-4.10.0-py3-none-any.whl size=40716 sha256=bbc554b3e27336ea84e295f767f0e70ddb7077a4490f3c2bc0c028057c240a11
  Stored in directory: /Users/gokulnair/Library/Caches/pip/wheels/d9/5f/46/54a2bdb4bcb07d3faba4463d2884865705914cc72a7b8bb5f0
Successfully built googlemaps
Installing collected packages: googlemaps
Successfully installed googlemaps-4.10.0
Note: you may need to restart the kernel to use updated packages.


In [23]:
import googlemaps
import pandas as pd

# Replace API_KEY with your own API key obtained from the Google Cloud Console
gmaps = googlemaps.Client(key='AIzaSyDcCiwrmpFpy0MR4k9cHj63o4MY7Q3YHJQ')

# Set the state you want to search for tourist attractions in
state = 'California'

# Define the search parameters for the Google Places API
places_result = gmaps.places(query='Tourist attractions in ' + state, type='tourist_attraction')

# Define an empty list to hold the results
results = []

# Extract the relevant information from the API response and append it to the results list
for place in places_result['results']:
    name = place['name']
    address = place['formatted_address']
    city = None
    state = None
    lat = place['geometry']['location']['lat']
    lng = place['geometry']['location']['lng']
    
    # Extract the city and state from the formatted address, if available
    for component in place['address_components']:
        if 'locality' in component['types']:
            city = component['long_name']
        elif 'administrative_area_level_1' in component['types']:
            state = component['short_name']
    
    results.append({'Name': name, 'Address': address, 'City': city, 'State': state, 'Lat': lat, 'Lng': lng})

# Convert the results list to a pandas DataFrame
df = pd.DataFrame(results, columns=['Name', 'Address', 'City', 'State', 'Lat', 'Lng'])

# Print the resulting DataFrame
print(df)


KeyError: 'address_components'

In [30]:
import googlemaps
import pandas as pd

# Replace API_KEY with your own API key obtained from the Google Cloud Console
gmaps = googlemaps.Client(key='AIzaSyDcCiwrmpFpy0MR4k9cHj63o4MY7Q3YHJQ')

# Set the state you want to search for tourist attractions in
state = 'California'

# Define the search parameters for the Google Places API
places_result = gmaps.places(query='Tourist attractions in ' + state, type='tourist_attraction')

# Define an empty list to hold the results
results = []

# Extract the relevant information from the API response and append it to the results list
for place in places_result['results']:
    name = place['name']
    address = place['formatted_address']
    city = None
    state = None
    lat = place['geometry']['location']['lat']
    lng = place['geometry']['location']['lng']
    
    # Extract the city and state from the formatted address, if available
    if 'address_components' in place:
        for component in place['address_components']:
            if 'locality' in component['types']:
                city = component['long_name']
            elif 'administrative_area_level_1' in component['types']:
                state = component['short_name']
    
    results.append({'Name': name, 'Address': address, 'City': city, 'State': state, 'Lat': lat, 'Lng': lng})

# Convert the results list to a pandas DataFrame
df = pd.DataFrame(results, columns=['Name', 'Address', 'City', 'State', 'Lat', 'Lng'])

# Print the resulting DataFrame
print(df)


                                  Name  \
0                      Disneyland Park   
1                Sequoia National Park   
2     Redwood National and State Parks   
3                       Yosemite Falls   
4     Disney California Adventure Park   
5               Yosemite National Park   
6                     The Mystery Spot   
7                          Balboa Park   
8          Universal Studios Hollywood   
9            Joshua Tree National Park   
10           Bodie State Historic Park   
11                       Hearst Castle   
12          Kings Canyon National Park   
13    Gilroy Gardens Family Theme Park   
14             Crescent Beach Overlook   
15          Death Valley National Park   
16                    Lighthouse Point   
17  Sequoia National Park's Tunnel Log   
18        Columbia State Historic Park   
19      Moaning Caverns Adventure Park   

                                              Address  City State        Lat  \
0   1313 Disneyland Dr, Anaheim, CA 9

In [25]:
df

Unnamed: 0,Name,Address,City,State,Lat,Lng
0,Disneyland Park,"1313 Disneyland Dr, Anaheim, CA 92802, United ...",,,33.812092,-117.918974
1,Sequoia National Park,"California, United States",,,36.486367,-118.565752
2,Redwood National and State Parks,"California, United States",,,41.213179,-124.004627
3,Yosemite Falls,"Yosemite Village, CA 95389, United States",,,37.756596,-119.596907
4,Disney California Adventure Park,"1313 Disneyland Dr, Anaheim, CA 92802, United ...",,,33.806112,-117.920859
5,Yosemite National Park,"California, United States",,,37.865101,-119.538329
6,The Mystery Spot,"465 Mystery Spot Rd, Santa Cruz, CA 95065, Uni...",,,37.016865,-122.002508
7,Balboa Park,"San Diego, CA, United States",,,32.734148,-117.144553
8,Universal Studios Hollywood,"100 Universal City Plaza, Universal City, CA 9...",,,34.138117,-118.353378
9,Joshua Tree National Park,"California, United States",,,33.873415,-115.900992


In [28]:
import googlemaps
import pandas as pd
import time

# Replace API_KEY with your own API key obtained from the Google Cloud Console
gmaps = googlemaps.Client(key='AIzaSyDcCiwrmpFpy0MR4k9cHj63o4MY7Q3YHJQ')

# Set the state you want to search for tourist attractions in
state = 'California'

# Define the search parameters for the Google Places API
places_result = gmaps.places(query='Tourist attractions in ' + state, type='tourist_attraction')

# Define an empty list to hold the results
results = []

# Extract the relevant information from the API response and append it to the results list
while True:
    for place in places_result['results']:
        name = place['name']
        address = place['formatted_address']
        city = None
        state = None
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']

        # Extract the city and state from the formatted address, if available
        if 'address_components' in place:
            for component in place['address_components']:
                if 'locality' in component['types']:
                    city = component['long_name']
                elif 'administrative_area_level_1' in component['types']:
                    state = component['short_name']

        results.append({'Name': name, 'Address': address, 'City': city, 'State': state, 'Lat': lat, 'Lng': lng})
        
    # Check if there are more results to retrieve
    if 'next_page_token' in places_result:
        next_page_token = places_result['next_page_token']
        
        # Pause for a few seconds to give Google time to generate the next page of results
        time.sleep(2)
        
        # Make a new request using the next_page_token to retrieve the next page of results
        places_result = gmaps.places(query='Tourist attractions in ' + state, type='tourist_attraction', page_token=next_page_token)
    else:
        break

# Convert the results list to a pandas DataFrame
df = pd.DataFrame(results, columns=['Name', 'Address', 'City', 'State', 'Lat', 'Lng'])

# Print the resulting DataFrame
print(df)


TypeError: can only concatenate str (not "NoneType") to str

In [31]:
import googlemaps
import pandas as pd
import time

# Replace API_KEY with your own API key obtained from the Google Cloud Console
gmaps = googlemaps.Client(key='AIzaSyDcCiwrmpFpy0MR4k9cHj63o4MY7Q3YHJQ')

# Set the state you want to search for tourist attractions in
state = 'California'

# Define the search parameters for the Google Places API
places_result = gmaps.places(query='Tourist attractions in ' + state, type='tourist_attraction')

# Define an empty list to hold the results
results = []

# Extract the relevant information from the API response and append it to the results list
while True:
    for place in places_result['results']:
        name = place['name']
        address = place['formatted_address']
        city = None
        state = None
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']

        # Extract the city and state from the formatted address, if available
        if 'address_components' in place:
            for component in place['address_components']:
                if 'locality' in component['types']:
                    city = component['long_name']
                elif 'administrative_area_level_1' in component['types']:
                    state = component['short_name']

        results.append({'Name': name, 'Address': address, 'City': city, 'State': state, 'Lat': lat, 'Lng': lng})
        
    # Check if there are more results to retrieve
    if next_page_token is not None:
        # Make a new request using the next_page_token to retrieve the next page of results
        places_result = gmaps.places(query='Tourist attractions in ' + state, type='tourist_attraction', page_token=next_page_token)
    else:
        break


# Convert the results list to a pandas DataFrame
df = pd.DataFrame(results, columns=['Name', 'Address', 'City', 'State', 'Lat', 'Lng'])

# Print the resulting DataFrame
print(df)


TypeError: can only concatenate str (not "NoneType") to str

In [38]:
pip install us

Collecting us
  Downloading us-2.0.2.tar.gz (14 kB)
Collecting jellyfish==0.6.1
  Downloading jellyfish-0.6.1.tar.gz (132 kB)
[K     |████████████████████████████████| 132 kB 6.7 MB/s eta 0:00:01
[?25hBuilding wheels for collected packages: us, jellyfish
  Building wheel for us (setup.py) ... [?25ldone
[?25h  Created wheel for us: filename=us-2.0.2-py3-none-any.whl size=11942 sha256=a97fe93941dd86988d45af92d2e4595c05d5fa090911e5d8523a3a074b503fc4
  Stored in directory: /Users/gokulnair/Library/Caches/pip/wheels/1a/93/5b/98d3861ec2c4a9d90b16324c6f8d7e4db03e6a830bc993adbb
  Building wheel for jellyfish (setup.py) ... [?25ldone
[?25h  Created wheel for jellyfish: filename=jellyfish-0.6.1-cp39-cp39-macosx_10_9_x86_64.whl size=22785 sha256=cac1bfc945e46567c8a36b52a2af4330698e2de36974c006fc00b09aa1c7b1e9
  Stored in directory: /Users/gokulnair/Library/Caches/pip/wheels/e6/7d/be/a937dbd1f988778a15011a563ac3a12917103bfc25ff6cb473
Successfully built us jellyfish
Installing collected packa

In [46]:
import requests
from geodata.us.counties import Counties

# Get the state FIPS code for California
ca_fips = '06'

# Make an API request to get the county population data for California
url = f'https://api.census.gov/data/2019/pep/population?get=POP&for=COUNTY:*&in=STATE:{ca_fips}'
response = requests.get(url)
data = response.json()

# Convert the population data to a dictionary mapping county FIPS codes to populations
populations = {d[2]: int(d[0]) for d in data[1:]}

# Load the county data and filter for California
ca_counties = Counties().filter(state_fips=ca_fips)

# Sort the counties by population and get the top 1000
top_counties = sorted(ca_counties, key=lambda c: populations.get(c.fips, 0), reverse=True)[:1000]

# Extract the county names and put them in a list
county_names = [c.name for c in top_counties]

print(county_names)


ModuleNotFoundError: No module named 'geodata.us'

In [48]:
import googlemaps
import pandas as pd
import time

# Replace API_KEY with your own API key obtained from the Google Cloud Console
gmaps = googlemaps.Client(key='AIzaSyDcCiwrmpFpy0MR4k9cHj63o4MY7Q3YHJQ')

# Define a list of cities in California

cities = ['Alameda', 'Alpine', 'Amador', 'Butte', 'Calaveras', 'Colusa', 'Contra Costa', 'Del Norte', 'El Dorado', 'Fresno', 'Glenn', 'Humboldt', 'Imperial', 'Inyo', 'Kern', 'Kings', 'Lake', 'Lassen', 'Los Angeles', 'Madera', 'Marin', 'Mariposa', 'Mendocino', 'Merced', 'Modoc', 'Mono', 'Monterey', 'Napa', 'Nevada', 'Orange', 'Placer', 'Plumas', 'Riverside', 'Sacramento', 'San Benito', 'San Bernardino', 'San Diego', 'San Francisco', 'San Joaquin', 'San Luis Obispo', 'San Mateo', 'Santa Barbara', 'Santa Clara', 'Santa Cruz', 'Shasta', 'Sierra', 'Siskiyou', 'Solano', 'Sonoma', 'Stanislaus', 'Sutter', 'Tehama', 'Trinity', 'Tulare', 'Tuolumne', 'Ventura', 'Yolo', 'Yuba']

# Define an empty list to hold the results
results = []

# Loop through each city and make a request for tourist attractions
for city in cities:
    # Define the search parameters for the Google Places API
    places_result = gmaps.places(query='Tourist attractions in ' + city + ', California', type='tourist_attraction')

    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['formatted_address']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']

        results.append({'County': city, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng})

    # Check if there are more pages of results
    while 'next_page_token' in places_result:
        # Pause for 2 seconds to avoid hitting the API rate limit
        time.sleep(2)

        # Make a new request using the next_page_token to retrieve the next page of results
        next_page_token = places_result['next_page_token']
        places_result = gmaps.places(query='Tourist attractions in ' + city + ', California', type='tourist_attraction', page_token=next_page_token)

        # Extract the relevant information from the API response and append it to the results list
        for place in places_result['results']:
            name = place['name']
            address = place['formatted_address']
            lat = place['geometry']['location']['lat']
            lng = place['geometry']['location']['lng']

            results.append({'County': city, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng})

# Convert the results list to a pandas DataFrame
df = pd.DataFrame(results, columns=['County', 'Name', 'Address', 'Lat', 'Lng'])

# Print the resulting DataFrame
print(df)


         City                                              Name  \
0     Alameda            Doug Siden Visitor Center at Crab Cove   
1     Alameda                            Pacific Pinball Museum   
2     Alameda                           Sand Castle Picnic Area   
3     Alameda                          Alameda Naval Air Museum   
4     Alameda                                    Alameda Museum   
...       ...                                               ...   
3313     Yuba  Freeman's Crossing / Oregon Creek Covered Bridge   
3314     Yuba                       Kelly Ridge Recreation Area   
3315     Yuba                 Cronan Ranch Regional Trails Park   
3316     Yuba                                    Riverbend Park   
3317     Yuba                              Robert L. Doyle Park   

                                                Address        Lat         Lng  
0      1252 McKay Ave, Alameda, CA 94501, United States  37.768918 -122.278299  
1     1510 Webster St, Alameda, C

In [49]:
df.drop_duplicates()

Unnamed: 0,City,Name,Address,Lat,Lng
0,Alameda,Doug Siden Visitor Center at Crab Cove,"1252 McKay Ave, Alameda, CA 94501, United States",37.768918,-122.278299
1,Alameda,Pacific Pinball Museum,"1510 Webster St, Alameda, CA 94501, United States",37.773798,-122.276636
2,Alameda,Sand Castle Picnic Area,"Biking/Hiking Trail, Alameda, CA 94501, United...",37.764646,-122.273355
3,Alameda,Alameda Naval Air Museum,"2151 Ferry Point, Alameda, CA 94501, United St...",37.781421,-122.299170
4,Alameda,Alameda Museum,"2324 Alameda Ave, Alameda, CA 94501, United St...",37.763542,-122.244161
...,...,...,...,...,...
3313,Yuba,Freeman's Crossing / Oregon Creek Covered Bridge,"4 California 4, Murphys, CA 95247, United States",39.396783,-121.082333
3314,Yuba,Kelly Ridge Recreation Area,"917 Kelly Ridge Rd, Oroville, CA 95966, United...",39.541832,-121.465614
3315,Yuba,Cronan Ranch Regional Trails Park,"Pilot Hill, CA 95664, United States",38.826544,-120.989127
3316,Yuba,Riverbend Park,"50 Montgomery St, Oroville, CA 95965, United S...",39.507037,-121.576640


In [50]:
df

Unnamed: 0,City,Name,Address,Lat,Lng
0,Alameda,Doug Siden Visitor Center at Crab Cove,"1252 McKay Ave, Alameda, CA 94501, United States",37.768918,-122.278299
1,Alameda,Pacific Pinball Museum,"1510 Webster St, Alameda, CA 94501, United States",37.773798,-122.276636
2,Alameda,Sand Castle Picnic Area,"Biking/Hiking Trail, Alameda, CA 94501, United...",37.764646,-122.273355
3,Alameda,Alameda Naval Air Museum,"2151 Ferry Point, Alameda, CA 94501, United St...",37.781421,-122.299170
4,Alameda,Alameda Museum,"2324 Alameda Ave, Alameda, CA 94501, United St...",37.763542,-122.244161
...,...,...,...,...,...
3313,Yuba,Freeman's Crossing / Oregon Creek Covered Bridge,"4 California 4, Murphys, CA 95247, United States",39.396783,-121.082333
3314,Yuba,Kelly Ridge Recreation Area,"917 Kelly Ridge Rd, Oroville, CA 95966, United...",39.541832,-121.465614
3315,Yuba,Cronan Ranch Regional Trails Park,"Pilot Hill, CA 95664, United States",38.826544,-120.989127
3316,Yuba,Riverbend Park,"50 Montgomery St, Oroville, CA 95965, United S...",39.507037,-121.576640


In [56]:
import googlemaps
import pandas as pd
import time

# Replace API_KEY with your own API key obtained from the Google Cloud Console
gmaps = googlemaps.Client(key='AIzaSyDcCiwrmpFpy0MR4k9cHj63o4MY7Q3YHJQ')

# Define a list of counties in California
counties = ['Alameda', 'Alpine', 'Amador', 'Butte', 'Calaveras', 'Colusa', 'Contra Costa', 'Del Norte', 'El Dorado', 'Fresno', 'Glenn', 'Humboldt', 'Imperial', 'Inyo', 'Kern', 'Kings', 'Lake', 'Lassen', 'Los Angeles', 'Madera', 'Marin', 'Mariposa', 'Mendocino', 'Merced', 'Modoc', 'Mono', 'Monterey', 'Napa', 'Nevada', 'Orange', 'Placer', 'Plumas', 'Riverside', 'Sacramento', 'San Benito', 'San Bernardino', 'San Diego', 'San Francisco', 'San Joaquin', 'San Luis Obispo', 'San Mateo', 'Santa Barbara', 'Santa Clara', 'Santa Cruz', 'Shasta', 'Sierra', 'Siskiyou', 'Solano', 'Sonoma', 'Stanislaus', 'Sutter', 'Tehama', 'Trinity', 'Tulare', 'Tuolumne', 'Ventura', 'Yolo', 'Yuba']

# Define an empty list to hold the results
results = []

# Loop through each county and make requests for tourist attractions, restaurants, and healthcare facilities
for county in counties:
    # Define the search parameters for the Google Places API
    places_result = gmaps.places(query='Tourist attractions in ' + county + ', California', type='tourist_attraction')
    
    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['formatted_address']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']
        
        results.append({'County': county, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng, 'Type': 'tourist'})
    
    # Define the search parameters for restaurants
    places_result = gmaps.places(query='Restaurants in ' + county + ', California', type='restaurant')
    
    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['formatted_address']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']
        
        results.append({'County': county, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng, 'Type': 'restaurant'})
    
    # Define the search parameters for healthcare facilities
    places_result = gmaps.places(query='Hospitals in ' + county + ', California', type='hospital')
    
    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['formatted_address']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']
        
        results.append({'County': county, 'Name': name,"Address":address,"Lat":lat,"Long":lng,'Type': 'heathcare'})
    
    # Search for nearby restaurants
    places_result = gmaps.places_nearby(location=(lat, lng), radius=5000, type='restaurant')

    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['vicinity']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']

        results.append({'County': county, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng, 'Type': 'restaurant'})

    # Search for nearby healthcare facilities
    places_result = gmaps.places_nearby(location=(lat, lng), radius=5000, type='hospital')

    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['vicinity']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']

        results.append({'County': county, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng, 'Type': 'healthcare'})

df = pd.DataFrame(results, columns=['County', 'Name', 'Address', 'Lat', 'Lng', 'Type'])

df



Unnamed: 0,County,Name,Address,Lat,Lng,Type
0,Alameda,Doug Siden Visitor Center at Crab Cove,"1252 McKay Ave, Alameda, CA 94501, United States",37.768918,-122.278299,tourist
1,Alameda,Pacific Pinball Museum,"1510 Webster St, Alameda, CA 94501, United States",37.773798,-122.276636,tourist
2,Alameda,Sand Castle Picnic Area,"Biking/Hiking Trail, Alameda, CA 94501, United...",37.764646,-122.273355,tourist
3,Alameda,Alameda Naval Air Museum,"2151 Ferry Point, Alameda, CA 94501, United St...",37.781421,-122.299170,tourist
4,Alameda,Alameda Museum,"2324 Alameda Ave, Alameda, CA 94501, United St...",37.763542,-122.244161,tourist
...,...,...,...,...,...,...
4654,Yuba,Sutter Surgical Hospital North Valley,"455 Plumas Boulevard, Yuba City",39.132047,-121.613376,healthcare
4655,Yuba,North Valley Behavioral Health,"1535 Plumas Street, Yuba City",39.153580,-121.617431,healthcare
4656,Yuba,Ampla Health Yuba City Pediatrics,"931 Market Street, Yuba City",39.144990,-121.613918,healthcare
4657,Yuba,Pain Management: Sutter Medical Care Center: S...,"460 Plumas Boulevard, Yuba City",39.132159,-121.614429,healthcare


In [57]:
df["State"] = "California"

In [61]:
df.to_csv("cal_df.csv")

TEXAS details

In [62]:
# Define a list of counties in Texas
counties = ['Anderson', 'Andrews', 'Angelina', 'Aransas', 'Archer', 'Armstrong', 'Atascosa', 'Austin', 'Bailey', 'Bandera', 'Bastrop', 'Baylor', 'Bee', 'Bell', 'Bexar', 'Blanco', 'Borden', 'Bosque', 'Bowie', 'Brazoria', 'Brazos', 'Brewster', 'Briscoe', 'Brooks', 'Brown', 'Burleson', 'Burnet', 'Caldwell', 'Calhoun', 'Callahan', 'Cameron', 'Camp', 'Carson', 'Cass', 'Castro', 'Chambers', 'Cherokee', 'Childress', 'Clay', 'Cochran', 'Coke', 'Coleman', 'Collin', 'Collingsworth', 'Colorado', 'Comal', 'Comanche', 'Concho', 'Cooke', 'Coryell', 'Cottle', 'Crane', 'Crockett', 'Crosby', 'Culberson', 'Dallam', 'Dallas', 'Dawson', 'Deaf Smith', 'Delta', 'Denton', 'DeWitt', 'Dickens', 'Dimmit', 'Donley', 'Duval', 'Eastland', 'Ector', 'Edwards', 'Ellis', 'El Paso', 'Erath', 'Falls', 'Fannin', 'Fayette', 'Fisher', 'Floyd', 'Foard', 'Fort Bend', 'Franklin', 'Freestone', 'Frio', 'Gaines', 'Galveston', 'Garza', 'Gillespie', 'Glasscock', 'Goliad', 'Gonzales', 'Gray', 'Grayson', 'Gregg', 'Grimes', 'Guadalupe', 'Hale', 'Hall', 'Hamilton', 'Hansford', 'Hardeman', 'Hardin', 'Harris', 'Harrison', 'Hartley', 'Haskell', 'Hays', 'Hemphill', 'Henderson', 'Hidalgo', 'Hill', 'Hockley', 'Hood', 'Hopkins', 'Houston', 'Howard', 'Hudspeth', 'Hunt', 'Hutchinson', 'Irion', 'Jack', 'Jackson', 'Jasper', 'Jeff Davis', 'Jefferson', 'Jim Hogg', 'Jim Wells', 'Johnson', 'Jones', 'Karnes', 'Kaufman', 'Kendall', 'Kenedy', 'Kent', 'Kerr', 'Kimble', 'King', 'Kinney', 'Kleberg', 'Knox', 'Lamar', 'Lamb', 'Lampasas', 'La Salle', 'Lavaca', 'Lee', 'Leon', 'Liberty', 'Limestone', 'Lipscomb', 'Live Oak', 'Llano', 'Loving', 'Lubbock', 'Lynn', 'McCulloch', 'McLennan', 'McMullen', 'Madison', 'Marion', 'Martin', 'Mason', 'Matagorda', 'Maverick', 'Medina', 'Menard', 'Midland', 'Milam', 'Mills', 'Mitchell', 'Montague', 'Montgomery', 'Moore', 'Morris', 'Motley', 'Nacogdoches', 'Navarro', 'Newton', 'Nolan']

# Define an empty list to hold the results
results = []

# Loop through each county and make requests for tourist attractions, restaurants, and healthcare facilities
for county in counties:
    # Define the search parameters for the Google Places API
    places_result = gmaps.places(query='Tourist attractions in ' + county + ', Texas', type='tourist_attraction')
    
    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['formatted_address']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']
        
        results.append({'County': county, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng, 'Type': 'tourist'})
    
    # Define the search parameters for restaurants
    places_result = gmaps.places(query='Restaurants in ' + county + ', Texas', type='restaurant')
    
    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['formatted_address']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']
        
        results.append({'County': county, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng, 'Type': 'restaurant'})
    
    # Define the search parameters for healthcare facilities
    places_result = gmaps.places(query='Hospitals in ' + county + ', Texas', type='hospital')
    
    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['formatted_address']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']
        
        results.append({'County': county, 'Name': name,"Address":address,"Lat":lat,"Long":lng,'Type': 'heathcare'})
    
    # Search for nearby restaurants
    places_result = gmaps.places_nearby(location=(lat, lng), radius=5000, type='restaurant')

    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['vicinity']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']

        results.append({'County': county, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng, 'Type': 'restaurant'})

    # Search for nearby healthcare facilities
    places_result = gmaps.places_nearby(location=(lat, lng), radius=5000, type='hospital')

    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['vicinity']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']

        results.append({'County': county, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng, 'Type': 'healthcare'})

t_df = pd.DataFrame(results, columns=['County', 'Name', 'Address', 'Lat', 'Lng', 'Type'])

t_df




Unnamed: 0,County,Name,Address,Lat,Lng,Type
0,Anderson,Sam Houston Statue & Visitor Center,"7600 TX-75, Huntsville, TX 77340, United States",30.661818,-95.510701,tourist
1,Anderson,Washington-on-the-Brazos State Historic Site,"23400 Park Rd 12, Washington, TX 77880, United...",30.325911,-96.155111,tourist
2,Anderson,Lake Somerville State Park,"14222 Park Rd 57, Somerville, TX 77879, United...",30.310915,-96.663271,tourist
3,Anderson,Sam Houston Memorial Museum,"1836 Sam Houston Ave, Huntsville, TX 77340, Un...",30.715585,-95.551969,tourist
4,Anderson,Natural Bridge Caverns,"26495 Natural Bridge Caverns Rd, San Antonio, ...",29.692410,-98.342728,tourist
...,...,...,...,...,...,...
13221,Nolan,DaVita The Woodlands At Home,"9301 Pinecroft Drive suite #130, Shenandoah",30.174010,-95.457223,healthcare
13222,Nolan,Licepros,"27351 Blueberry Hill Drive #37, Oak Ridge North",30.164976,-95.440988,healthcare
13223,Nolan,Sono Bello The Woodlands,"1450 Lake Robbins Drive #360, The Woodlands",30.161850,-95.456135,healthcare
13224,Nolan,Encompass Health Rehabilitation Hospital of Th...,"18550 Interstate 45, Conroe",30.185840,-95.454480,healthcare


In [70]:
t_df["City"] = "Texas"
t_df

Unnamed: 0,County,Name,Address,Lat,Lng,Type,City
0,Anderson,Sam Houston Statue & Visitor Center,"7600 TX-75, Huntsville, TX 77340, United States",30.661818,-95.510701,tourist,Texas
1,Anderson,Washington-on-the-Brazos State Historic Site,"23400 Park Rd 12, Washington, TX 77880, United...",30.325911,-96.155111,tourist,Texas
2,Anderson,Lake Somerville State Park,"14222 Park Rd 57, Somerville, TX 77879, United...",30.310915,-96.663271,tourist,Texas
3,Anderson,Sam Houston Memorial Museum,"1836 Sam Houston Ave, Huntsville, TX 77340, Un...",30.715585,-95.551969,tourist,Texas
4,Anderson,Natural Bridge Caverns,"26495 Natural Bridge Caverns Rd, San Antonio, ...",29.692410,-98.342728,tourist,Texas
...,...,...,...,...,...,...,...
13221,Nolan,DaVita The Woodlands At Home,"9301 Pinecroft Drive suite #130, Shenandoah",30.174010,-95.457223,healthcare,Texas
13222,Nolan,Licepros,"27351 Blueberry Hill Drive #37, Oak Ridge North",30.164976,-95.440988,healthcare,Texas
13223,Nolan,Sono Bello The Woodlands,"1450 Lake Robbins Drive #360, The Woodlands",30.161850,-95.456135,healthcare,Texas
13224,Nolan,Encompass Health Rehabilitation Hospital of Th...,"18550 Interstate 45, Conroe",30.185840,-95.454480,healthcare,Texas


In [71]:
t_df.to_csv("texas_df.csv")

PENNSYLVANIA DF

In [None]:
counties_pa = ['Adams', 'Allegheny', 'Armstrong', 'Beaver', 'Bedford', 'Berks', 'Blair', 'Bradford', 'Bucks', 'Butler', 'Cambria', 'Cameron', 'Carbon', 'Centre', 'Chester', 'Clarion', 'Clearfield', 'Clinton', 'Columbia', 'Crawford', 'Cumberland', 'Dauphin', 'Delaware', 'Elk', 'Erie', 'Fayette', 'Forest', 'Franklin', 'Fulton', 'Greene', 'Huntingdon', 'Indiana', 'Jefferson', 'Juniata', 'Lackawanna', 'Lancaster', 'Lawrence', 'Lebanon', 'Lehigh', 'Luzerne', 'Lycoming', 'McKean', 'Mercer', 'Mifflin', 'Monroe', 'Montgomery', 'Montour', 'Northampton', 'Northumberland', 'Perry', 'Philadelphia', 'Pike', 'Potter', 'Schuylkill', 'Snyder', 'Somerset', 'Sullivan', 'Susquehanna', 'Tioga', 'Union', 'Venango', 'Warren', 'Washington', 'Wayne', 'Westmoreland', 'Wyoming', 'York']

# Define a list of counties in Pennsylvania
counties = ['Anderson', 'Andrews', 'Angelina', 'Aransas', 'Archer', 'Armstrong', 'Atascosa', 'Austin', 'Bailey', 'Bandera', 'Bastrop', 'Baylor', 'Bee', 'Bell', 'Bexar', 'Blanco', 'Borden', 'Bosque', 'Bowie', 'Brazoria', 'Brazos', 'Brewster', 'Briscoe', 'Brooks', 'Brown', 'Burleson', 'Burnet', 'Caldwell', 'Calhoun', 'Callahan', 'Cameron', 'Camp', 'Carson', 'Cass', 'Castro', 'Chambers', 'Cherokee', 'Childress', 'Clay', 'Cochran', 'Coke', 'Coleman', 'Collin', 'Collingsworth', 'Colorado', 'Comal', 'Comanche', 'Concho', 'Cooke', 'Coryell', 'Cottle', 'Crane', 'Crockett', 'Crosby', 'Culberson', 'Dallam', 'Dallas', 'Dawson', 'Deaf Smith', 'Delta', 'Denton', 'DeWitt', 'Dickens', 'Dimmit', 'Donley', 'Duval', 'Eastland', 'Ector', 'Edwards', 'Ellis', 'El Paso', 'Erath', 'Falls', 'Fannin', 'Fayette', 'Fisher', 'Floyd', 'Foard', 'Fort Bend', 'Franklin', 'Freestone', 'Frio', 'Gaines', 'Galveston', 'Garza', 'Gillespie', 'Glasscock', 'Goliad', 'Gonzales', 'Gray', 'Grayson', 'Gregg', 'Grimes', 'Guadalupe', 'Hale', 'Hall', 'Hamilton', 'Hansford', 'Hardeman', 'Hardin', 'Harris', 'Harrison', 'Hartley', 'Haskell', 'Hays', 'Hemphill', 'Henderson', 'Hidalgo', 'Hill', 'Hockley', 'Hood', 'Hopkins', 'Houston', 'Howard', 'Hudspeth', 'Hunt', 'Hutchinson', 'Irion', 'Jack', 'Jackson', 'Jasper', 'Jeff Davis', 'Jefferson', 'Jim Hogg', 'Jim Wells', 'Johnson', 'Jones', 'Karnes', 'Kaufman', 'Kendall', 'Kenedy', 'Kent', 'Kerr', 'Kimble', 'King', 'Kinney', 'Kleberg', 'Knox', 'Lamar', 'Lamb', 'Lampasas', 'La Salle', 'Lavaca', 'Lee', 'Leon', 'Liberty', 'Limestone', 'Lipscomb', 'Live Oak', 'Llano', 'Loving', 'Lubbock', 'Lynn', 'McCulloch', 'McLennan', 'McMullen', 'Madison', 'Marion', 'Martin', 'Mason', 'Matagorda', 'Maverick', 'Medina', 'Menard', 'Midland', 'Milam', 'Mills', 'Mitchell', 'Montague', 'Montgomery', 'Moore', 'Morris', 'Motley', 'Nacogdoches', 'Navarro', 'Newton', 'Nolan']

# Define an empty list to hold the results
results = []

# Loop through each county and make requests for tourist attractions, restaurants, and healthcare facilities
for county in counties:
    # Define the search parameters for the Google Places API
    places_result = gmaps.places(query='Tourist attractions in ' + county + ', Pennsylvania', type='tourist_attraction')
    
    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['formatted_address']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']
        
        results.append({'County': county, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng, 'Type': 'tourist'})
    
    # Define the search parameters for restaurants
    places_result = gmaps.places(query='Restaurants in ' + county + ', Pennsylvania', type='restaurant')
    
    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['formatted_address']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']
        
        results.append({'County': county, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng, 'Type': 'restaurant'})
    
    # Define the search parameters for healthcare facilities
    places_result = gmaps.places(query='Hospitals in ' + county + ', Pennsylvania', type='hospital')
    
    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['formatted_address']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']
        
        results.append({'County': county, 'Name': name,"Address":address,"Lat":lat,"Long":lng,'Type': 'heathcare'})
    
    # Search for nearby restaurants
    places_result = gmaps.places_nearby(location=(lat, lng), radius=5000, type='restaurant')

    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['vicinity']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']

        results.append({'County': county, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng, 'Type': 'restaurant'})

    # Search for nearby healthcare facilities
    places_result = gmaps.places_nearby(location=(lat, lng), radius=5000, type='hospital')

    # Extract the relevant information from the API response and append it to the results list
    for place in places_result['results']:
        name = place['name']
        address = place['vicinity']
        lat = place['geometry']['location']['lat']
        lng = place['geometry']['location']['lng']

        results.append({'County': county, 'Name': name, 'Address': address, 'Lat': lat, 'Lng': lng, 'Type': 'healthcare'})

p_df = pd.DataFrame(results, columns=['County', 'Name', 'Address', 'Lat', 'Lng', 'Type'])

p_df



