In [12]:
import requests
from bs4 import BeautifulSoup

import json


In [2]:
# Send a request to the website
response = requests.get('https://www.latlong.net/category/national-parks-236-42.html')

# Retrieve the HTML content
html = response.content

In [3]:
# Create a Beautiful Soup object
soup = BeautifulSoup(html, 'html.parser')

In [4]:
# Find all of the table rows
rows = soup.find_all('tr')

# Create an empty list to store the data
data = []

# Iterate through the rows
for row in rows:
    # Find all of the cells in the row
    cells = row.find_all('td')
    
    # If the row has cells, extract the data from the cells
    if cells:
        place_name = cells[0].text.strip()
        latitude = cells[1].text.strip()
        longitude = cells[2].text.strip()
        
        # Add the data to the list
        data.append({
            'Place Name': place_name,
            'Latitude': latitude,
            'Longitude': longitude
        })

# Print the data
print(data)


[{'Place Name': 'Capitol Reef National Park, UT, USA', 'Latitude': '38.089600', 'Longitude': '-111.149910'}, {'Place Name': 'Pinnacles National Park, CA, USA', 'Latitude': '36.491508', 'Longitude': '-121.197243'}, {'Place Name': 'Rocky Mountain National Park, CO, USA', 'Latitude': '40.343182', 'Longitude': '-105.688103'}, {'Place Name': 'Offshore Trap/Pot Waters Area, Western Atlantic Ocean, the US', 'Latitude': '38.000000', 'Longitude': '-82.000000'}, {'Place Name': 'Steller Sea Lion Protection Area, AL, the US', 'Latitude': '57.466667', 'Longitude': '-153.433334'}, {'Place Name': 'Pacific Remote Islands Marine National Monument, the US', 'Latitude': '16.736944', 'Longitude': '-169.523895'}, {'Place Name': 'Danville Conservation Area, New Florence, MO, USA', 'Latitude': '38.865097', 'Longitude': '-91.504852'}, {'Place Name': 'Sand Harbor State Park, Incline Village, NV, USA', 'Latitude': '39.198364', 'Longitude': '-119.930984'}, {'Place Name': 'White Sands National Park, NM, the US', 

In [5]:
# Create a dataframe from the data
df = pd.DataFrame(data)

# Print the resulting dataframe
print(df)

                                           Place Name   Latitude    Longitude
0                 Capitol Reef National Park, UT, USA  38.089600  -111.149910
1                    Pinnacles National Park, CA, USA  36.491508  -121.197243
2               Rocky Mountain National Park, CO, USA  40.343182  -105.688103
3   Offshore Trap/Pot Waters Area, Western Atlanti...  38.000000   -82.000000
4        Steller Sea Lion Protection Area, AL, the US  57.466667  -153.433334
..                                                ...        ...          ...
82                   Mt Hood National Forest, OR, USA  45.227173  -121.839455
83              Bryce Canyon National Park, Utah, USA  37.593048  -112.187332
84                Mammoth Cave National Park, KY, USA  37.183640   -86.159943
85          Redwood National and State Parks, CA, USA  41.213181  -124.004631
86                     Yellowstone National Park, USA  44.423691  -110.588516

[87 rows x 3 columns]


In [6]:


# Define a function to extract the place name
def extract_place_name(name):
    # Split the string by ', '
    parts = name.split(', ')
    
    # Return the first part (the place name)
    return parts[0]

# Apply the function to the 'Place Name' column
df['Place Name'] = df['Place Name'].apply(extract_place_name)

# Print the resulting dataframe
print(df)

                          Place Name   Latitude    Longitude
0         Capitol Reef National Park  38.089600  -111.149910
1            Pinnacles National Park  36.491508  -121.197243
2       Rocky Mountain National Park  40.343182  -105.688103
3      Offshore Trap/Pot Waters Area  38.000000   -82.000000
4   Steller Sea Lion Protection Area  57.466667  -153.433334
..                               ...        ...          ...
82           Mt Hood National Forest  45.227173  -121.839455
83        Bryce Canyon National Park  37.593048  -112.187332
84        Mammoth Cave National Park  37.183640   -86.159943
85  Redwood National and State Parks  41.213181  -124.004631
86         Yellowstone National Park  44.423691  -110.588516

[87 rows x 3 columns]


In [7]:
# Define a function to combine the 'Latitude' and 'Longitude' columns
def combine_coords(row):
    # Concatenate the 'Latitude' and 'Longitude' values with a comma separator
    coords = f"{row['Latitude']}, {row['Longitude']}"
    
    # Return the resulting string
    return coords

# Apply the function to each row in the dataframe
df['coords'] = df.apply(combine_coords, axis=1)

# Print the resulting dataframe
print(df)

                          Place Name   Latitude    Longitude  \
0         Capitol Reef National Park  38.089600  -111.149910   
1            Pinnacles National Park  36.491508  -121.197243   
2       Rocky Mountain National Park  40.343182  -105.688103   
3      Offshore Trap/Pot Waters Area  38.000000   -82.000000   
4   Steller Sea Lion Protection Area  57.466667  -153.433334   
..                               ...        ...          ...   
82           Mt Hood National Forest  45.227173  -121.839455   
83        Bryce Canyon National Park  37.593048  -112.187332   
84        Mammoth Cave National Park  37.183640   -86.159943   
85  Redwood National and State Parks  41.213181  -124.004631   
86         Yellowstone National Park  44.423691  -110.588516   

                    coords  
0   38.089600, -111.149910  
1   36.491508, -121.197243  
2   40.343182, -105.688103  
3    38.000000, -82.000000  
4   57.466667, -153.433334  
..                     ...  
82  45.227173, -121.839455  

In [8]:

# Drop the 'Latitude' and 'Longitude' columns
df2 = df.drop(columns=['Latitude', 'Longitude'])

# Print the resulting dataframe
print(df2)

                          Place Name                  coords
0         Capitol Reef National Park  38.089600, -111.149910
1            Pinnacles National Park  36.491508, -121.197243
2       Rocky Mountain National Park  40.343182, -105.688103
3      Offshore Trap/Pot Waters Area   38.000000, -82.000000
4   Steller Sea Lion Protection Area  57.466667, -153.433334
..                               ...                     ...
82           Mt Hood National Forest  45.227173, -121.839455
83        Bryce Canyon National Park  37.593048, -112.187332
84        Mammoth Cave National Park   37.183640, -86.159943
85  Redwood National and State Parks  41.213181, -124.004631
86         Yellowstone National Park  44.423691, -110.588516

[87 rows x 2 columns]


In [9]:
# Create an empty dataframe
df3 = pd.DataFrame(columns=['Place Name', 'coords'])

# Iterate over the rows of the data
for row in data:
    # Append the row to the dataframe
    df3 = df.append(row, ignore_index=True)

# Print the resulting dataframe
print(df3)

                          Place Name   Latitude    Longitude  \
0         Capitol Reef National Park  38.089600  -111.149910   
1            Pinnacles National Park  36.491508  -121.197243   
2       Rocky Mountain National Park  40.343182  -105.688103   
3      Offshore Trap/Pot Waters Area  38.000000   -82.000000   
4   Steller Sea Lion Protection Area  57.466667  -153.433334   
..                               ...        ...          ...   
83        Bryce Canyon National Park  37.593048  -112.187332   
84        Mammoth Cave National Park  37.183640   -86.159943   
85  Redwood National and State Parks  41.213181  -124.004631   
86         Yellowstone National Park  44.423691  -110.588516   
87    Yellowstone National Park, USA  44.423691  -110.588516   

                    coords  
0   38.089600, -111.149910  
1   36.491508, -121.197243  
2   40.343182, -105.688103  
3    38.000000, -82.000000  
4   57.466667, -153.433334  
..                     ...  
83  37.593048, -112.187332  

In [14]:
df4 = df3.dropna(subset=['coords'])

# Print the resulting dataframe
print(df4)

                          Place Name                  coords
0         Capitol Reef National Park  38.089600, -111.149910
1            Pinnacles National Park  36.491508, -121.197243
2       Rocky Mountain National Park  40.343182, -105.688103
3      Offshore Trap/Pot Waters Area   38.000000, -82.000000
4   Steller Sea Lion Protection Area  57.466667, -153.433334
..                               ...                     ...
82           Mt Hood National Forest  45.227173, -121.839455
83        Bryce Canyon National Park  37.593048, -112.187332
84        Mammoth Cave National Park   37.183640, -86.159943
85  Redwood National and State Parks  41.213181, -124.004631
86         Yellowstone National Park  44.423691, -110.588516

[87 rows x 2 columns]


In [15]:
df4.to_json(r'coords.json', orient='records')