## Data Wrangling - iNaturalist National Park Observations
### This notebook imports and cleans data from inaturalist and merges it with national park visitation data

### Import packages

In [1]:
import pandas as pd
from collections import defaultdict
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np

### Load the data

In [2]:
#Mammal observations from iNaturalist search "National Park" 
observations = pd.read_csv('Data/observations-141356.csv', parse_dates = [2])

### Inspect data and select columns

In [3]:
#drop unneccessary columns and rows with place_country_names that are obviously not United States
observation = observations.drop(['id', 'observed_on_string', 'time_observed_at', 'time_zone', 'quality_grade','description', 'place_town_name','species_guess','taxon_id' ], axis= 1)
observation = observation.drop(observation[observation['place_country_name'] == 'Argentina'].index)

In [4]:
observation.head(2)

Unnamed: 0,observed_on,place_guess,latitude,longitude,place_county_name,place_state_name,place_country_name,scientific_name,common_name,iconic_taxon_name
0,2008-08-08,"Yellowstone National Park, Park County, US-WY, US",44.811585,-110.481333,Park,Wyoming,United States,Bison bison,American Bison,Mammalia
1,2009-05-14,"Everglades National Park, Monroe County, US-FL...",25.153938,-80.85179,Monroe,Florida,United States,Trichechus manatus,West Indian Manatee,Mammalia


The national park name needs to be extracted from the place_guess string. I need to be able to join the animal observations to the national park visitation data

In [5]:
#Create a dataframe that has a column with the original place_guess string and a column with national park name extracted from place_guess
#Get a dataframe that is a single column containing a list of the place_guess name. Drop duplicates.
place_guess = observations.filter(['place_guess', 'place_state_name'], axis = 1)
place_guess1 = place_guess.drop_duplicates(ignore_index = True)

#Use the separating commas to separate the place_guess string using its commas. the first part is the national park name
#Join the df with the park name in its own column to the place_guess column 
place_guess3 = pd.DataFrame(place_guess1.place_guess.str.split(',', expand= True))
place_guess5 = place_guess1.join(place_guess3)
place_guess5.rename(columns= {0:'c0', 1 : 'c1', 2:'c2', 3: 'c3', 4:'c4', 5: 'c5', 6:'c6'}, inplace= True)

In [6]:
place_guess5.head()

Unnamed: 0,place_guess,place_state_name,c0,c1,c2,c3,c4,c5,c6
0,"Yellowstone National Park, Park County, US-WY, US",Wyoming,Yellowstone National Park,Park County,US-WY,US,,,
1,"Everglades National Park, Monroe County, US-FL...",Florida,Everglades National Park,Monroe County,US-FL,US,,,
2,"Kings Canyon/Sequoia National Park, NV, US",California,Kings Canyon/Sequoia National Park,NV,US,,,,
3,"Antietam Battlefield National Park, MD, US",Maryland,Antietam Battlefield National Park,MD,US,,,,
4,"Yellowstone National Park, US",Wyoming,Yellowstone National Park,US,,,,,


In [7]:
# use .loc method to create a new column. dataframe_name.loc[condition, new_column_name] = new_column_value
place_guess5.loc[place_guess5['c0'].str.contains('National Park'), 'park'] = place_guess5['c0']
place_guess5.head()

Unnamed: 0,place_guess,place_state_name,c0,c1,c2,c3,c4,c5,c6,park
0,"Yellowstone National Park, Park County, US-WY, US",Wyoming,Yellowstone National Park,Park County,US-WY,US,,,,Yellowstone National Park
1,"Everglades National Park, Monroe County, US-FL...",Florida,Everglades National Park,Monroe County,US-FL,US,,,,Everglades National Park
2,"Kings Canyon/Sequoia National Park, NV, US",California,Kings Canyon/Sequoia National Park,NV,US,,,,,Kings Canyon/Sequoia National Park
3,"Antietam Battlefield National Park, MD, US",Maryland,Antietam Battlefield National Park,MD,US,,,,,Antietam Battlefield National Park
4,"Yellowstone National Park, US",Wyoming,Yellowstone National Park,US,,,,,,Yellowstone National Park


In [8]:
#check for missing values in the park column
place_guess5.info()#park=431 non-null

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 737 entries, 0 to 736
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   place_guess       737 non-null    object
 1   place_state_name  734 non-null    object
 2   c0                737 non-null    object
 3   c1                737 non-null    object
 4   c2                736 non-null    object
 5   c3                698 non-null    object
 6   c4                105 non-null    object
 7   c5                23 non-null     object
 8   c6                1 non-null      object
 9   park              431 non-null    object
dtypes: object(10)
memory usage: 57.7+ KB


In [9]:
#see if National Park part of the name got separated into 'c1' column bc of comma separation
place_guess5['c1'].fillna('None', inplace = True)
place_guess5.loc[place_guess5['c1'].str.contains('National Park'), 'park'] = place_guess5['c1']#park column=566 non-null

In [10]:
#see if National Park part of the name got separated into 'c2' column bc of comma separation
place_guess5['c2'].fillna('None', inplace = True)
place_guess5.loc[place_guess5['c2'].str.contains('National Park'), 'park'] = place_guess5['c2']#park column = 570 non-null

In [11]:
#see if National Park part of the name got separated into 'c3' column bc of comma separation
place_guess5['c3'].fillna('None', inplace = True)
place_guess5.loc[place_guess5['c3'].str.contains('National Park'), 'park'] = place_guess5['c3']#park column info = 570 non-null

In [12]:
#see if National Park part of the name got separated into 'c4' column bc of comma separation
place_guess5['c4'].fillna('None', inplace = True)
place_guess5.loc[place_guess5['c4'].str.contains('National Park'), 'park']= place_guess5['c4']#park column = 570 non-null, stop
place_guess5.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 737 entries, 0 to 736
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   place_guess       737 non-null    object
 1   place_state_name  734 non-null    object
 2   c0                737 non-null    object
 3   c1                737 non-null    object
 4   c2                737 non-null    object
 5   c3                737 non-null    object
 6   c4                737 non-null    object
 7   c5                23 non-null     object
 8   c6                1 non-null      object
 9   park              570 non-null    object
dtypes: object(10)
memory usage: 57.7+ KB


In [13]:
#570 rows have a national park name in the 'park' column. scan through the place_guess column for rows that don't have 'National Park' in place guess
#looks like most of these aren't national parks 
print(place_guess5.loc[place_guess5['park'].isnull()].place_guess.unique())

['Temescal Gateway Park, Santa Monica Mountains National Recreation Area, Los Angeles County, US-CA, US'
 'Honaunau National Historical Park, Honaunau, Hawaii, US'
 'Marsh-Billings-Rockefeller National Historical Park, Windsor County, US-VT, US'
 'Malibu Creek, Malibu Creek State Park, Santa Monica Mountains National Recreation Area, Los Angeles County, US-CA, US'
 'Pepper Park, National City, California, US'
 'Chaco Culture National Historical Park, Farmington, New Mexico, US'
 'Montgomery County, Chesapeake and Ohio Canal National Historical Park, US-MD, US'
 'National Zoological Park, Washington D.C., DC, US'
 'National Mall and Memorial Parks, US-DC, District of Columbia County, US'
 'Jean Lafitte National Historical Park and Preserve, Lafitte, LA, US'
 'Redwood National and State Parks, Six Rivers Ntl Forest, CA, US'
 'San Pedro Valley County Park, San Mateo County, Golden Gate National Recreation Area, US-CA, US'
 'Crescent Lake, Olympic National park, Washington, US'
 'Cumberlan

In [14]:
#scan for issues should be about 63
print(place_guess5.park.unique(),len(place_guess5.park.unique()))

['Yellowstone National Park' 'Everglades National Park'
 'Kings Canyon/Sequoia National Park' 'Antietam Battlefield National Park'
 'Death Valley National Park' 'Grand Teton National Park'
 ' Yellowstone National Park' ' Mount Rainier National Park'
 'Redwood National Park' 'Channel Islands National Park'
 'Denali National Park' ' Grand Canyon National Park'
 'Mount Rainier National Park' 'Yosemite National Park'
 ' Rocky Mountain National Park' nan
 ' Sequoia and Kings Canyon National Parks'
 'Cuyahoga Valley National Park' ' Olympic National Park'
 ' North Cascades National Park' 'Glacier National Park'
 ' Glacier National Park' ' Theodore Roosevelt National Park'
 'Glacier Bay National Park' 'Shenandoah National Park'
 'Badlands National Park' 'Big Bend National Park'
 'Theodore Roosevelt National Park South Unit'
 ' Grand Teton National Park' ' Big Bend National Park'
 ' Yosemite National Park' 'Bryce Canyon National Park'
 'Kenai Fjords National Park' 'Joshua Tree National Park'
 

In [15]:
#df name changed bc of some deleted code
place_guess6 = place_guess5

In [16]:
place_guess6.head()

Unnamed: 0,place_guess,place_state_name,c0,c1,c2,c3,c4,c5,c6,park
0,"Yellowstone National Park, Park County, US-WY, US",Wyoming,Yellowstone National Park,Park County,US-WY,US,,,,Yellowstone National Park
1,"Everglades National Park, Monroe County, US-FL...",Florida,Everglades National Park,Monroe County,US-FL,US,,,,Everglades National Park
2,"Kings Canyon/Sequoia National Park, NV, US",California,Kings Canyon/Sequoia National Park,NV,US,,,,,Kings Canyon/Sequoia National Park
3,"Antietam Battlefield National Park, MD, US",Maryland,Antietam Battlefield National Park,MD,US,,,,,Antietam Battlefield National Park
4,"Yellowstone National Park, US",Wyoming,Yellowstone National Park,US,,,,,,Yellowstone National Park


In [17]:
place_guess6['park'].fillna('None', inplace = True)

In [18]:
#remove leading numbers or extra words
place_guess6['park'] = place_guess6.park.str.strip('-1234567890')
place_guess6['park'] = place_guess6.park.str.strip('–11179')
place_guess6['park'] = place_guess6.park.str.rstrip('& and Preserve')
place_guess6['park'] = place_guess6.park.str.rstrip('Service Complex')
place_guess6['park'] = place_guess6.park.str.rstrip('Pond')

In [19]:
#remove leading or trailing whitespaces
place_guess6['park'] = place_guess6.park.str.strip()

In [20]:
#create a function to rename typos 
def re_name(df, column, prob_name, new_name):
    """filter rows of column for the problem name and replace all matches with consistent corrected name
    Args:
        df: dataframe name
        column(str): name of column
        prob_name(str): part of the string that needs to be replaced
        new_name(str): corrected string
    Returns: df with correction"""
    df.loc[df[column].str.contains(prob_name), column] = new_name

        

In [21]:
re_name(df=place_guess6, column = 'park', prob_name = 'Theodore Roosevelt National Park', new_name = 'Theodore Roosevelt National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Grand Canyon National', new_name = 'Grand Canyon National')
re_name(df=place_guess6, column = 'park', prob_name = 'North Cascades National Park', new_name = 'North Cascades National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Saguaro National Park', new_name = 'Saguaro National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Rocky Mountain National Park', new_name = 'Rocky Mountain National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Everglades National Park', new_name = 'Everglades National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Glacier National Park', new_name = 'Glacier National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Sequoia National Park', new_name = 'Sequoia and Kings Canyon National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Kings Canyon National Park', new_name = 'Sequoia and Kings Canyon National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Great Sand Dunes National Park', new_name = 'Great Sand Dunes National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Jr. National Park', new_name = 'Yellowstone National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Haleakalā', new_name = 'Haleakala National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Hawai‘i', new_name = 'Hawaii Volcanoes National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Wrangell', new_name = 'Wrangell-St. Elias National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Kenai', new_name = 'Kenai Fjords National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'Gunnison', new_name = 'Black Canyon of the Gunnison National Park')
re_name(df=place_guess6, column = 'park', prob_name = 'enali', new_name = 'Denali National Park')

In [22]:
#some of the non-national park names in the park column
problem_list = ['National Park','Jaime Benitez National Park','Wolf Trap National Park for the Performing Art','Wolf Trap Farm Park National Park',
                'Jr. National Park', 'N', 'US National Park R', 'Tierra del Fuego National Park', 'Piscataway National Park', 'Steptoe Butte National Park','Butte National Park', 'Antietam Battlefield National Park']

In [23]:
#what are the 'N' values? can a National Park name be easily identified?
place_guess6[place_guess6['park'] == 'N']

Unnamed: 0,place_guess,place_state_name,c0,c1,c2,c3,c4,c5,c6,park
22,"Temescal Gateway Park, Santa Monica Mountains ...",California,Temescal Gateway Park,Santa Monica Mountains National Recreation Area,Los Angeles County,US-CA,US,,,N
27,"Honaunau National Historical Park, Honaunau, H...",Hawaii,Honaunau National Historical Park,Honaunau,Hawaii,US,,,,N
45,Marsh-Billings-Rockefeller National Historical...,Vermont,Marsh-Billings-Rockefeller National Historical...,Windsor County,US-VT,US,,,,N
50,"Malibu Creek, Malibu Creek State Park, Santa M...",California,Malibu Creek,Malibu Creek State Park,Santa Monica Mountains National Recreation Area,Los Angeles County,US-CA,US,,N
53,"Pepper Park, National City, California, US",California,Pepper Park,National City,California,US,,,,N
...,...,...,...,...,...,...,...,...,...,...
727,Rosie the Riveter WWII Home Front National His...,California,Rosie the Riveter WWII Home Front National His...,Richmond,CA,US,,,,N
729,"Public, Montgomery County, Chesapeake and Ohio...",Maryland,Public,Montgomery County,Chesapeake and Ohio Canal National Historical...,US-MD,US,,,N
732,Appomattox Court House National Historical Par...,Virginia,Appomattox Court House National Historical Park,Appomattox,VA,US,,,,N
734,Palo Alto Battlefield National Historical Park...,Texas,Palo Alto Battlefield National Historical Park,Brownsville,TX,US,,,,N


In [24]:
#drop the rows that with 'park' column values in the problem list
place_guess7 = place_guess6
for thing in problem_list:
    place_guess7 = place_guess7.drop(place_guess7[place_guess7['park']== thing].index)

In [25]:
#about 63 parks so not all represented in the data
print(len(place_guess7.park.unique()))

58


In [26]:
#drop the extra columns
place_guess8 = place_guess7.drop(['c0', 'c1', 'c2', 'c3', 'c4', 'c5','c6'], axis = 1)
place_guess7.shape, place_guess8.shape

((561, 10), (561, 3))

In [27]:
#found this place_guess with 100 null observations in the merge below
#append place guess Redwood National and State Parks, Trinidad, CA, US    'Redwood' 'CA' 'Redwood National Park'
R_W = {'place_guess': 'Redwood National and State Parks, Trinidad, CA, US', 'place_state_name': 'California' , 'park': 'Redwood National Park'}
place_guess8 = place_guess8.append(R_W, ignore_index = True)

  place_guess8 = place_guess8.append(R_W, ignore_index = True)


In [28]:
place_guess8.tail()

Unnamed: 0,place_guess,place_state_name,park
557,"Death Valley National Park, Nye County, US-NV, US",Nevada,Death Valley National Park
558,"El Capitan, Yosemite National Park, CA, US",California,Yosemite National Park
559,"Death Valley National Park, CA, US",California,Death Valley National Park
560,"Glacier Point, Yosemite National Park, CA, US",California,Yosemite National Park
561,"Redwood National and State Parks, Trinidad, CA...",California,Redwood National Park


In [29]:
#need to add a state abbreviation column
state_abbr = pd.read_csv('Data/states_name_abbr.csv')
state_abbr.head()

Unnamed: 0,state,name
0,AK,Alaska
1,AL,Alabama
2,AR,Arkansas
3,AZ,Arizona
4,CA,California


In [30]:
#merge state_abbr on to place_guess this adds state abbreviation column
df = place_guess8.merge(state_abbr,left_on = 'place_state_name', right_on= 'name')
df.head()

Unnamed: 0,place_guess,place_state_name,park,state,name
0,"Yellowstone National Park, Park County, US-WY, US",Wyoming,Yellowstone National Park,WY,Wyoming
1,"Yellowstone National Park, US",Wyoming,Yellowstone National Park,WY,Wyoming
2,"Yellowstone National Park, US-WY, US",Wyoming,Yellowstone National Park,WY,Wyoming
3,"Grand Teton National Park, WY, US",Wyoming,Grand Teton National Park,WY,Wyoming
4,"Yellowstone National Park, WY, US",Wyoming,Yellowstone National Park,WY,Wyoming


In [31]:
# merging more park identifier columns preparing to merge dataframes
parks = pd.read_csv('Data/parks_unique.csv', index_col = 0)
parks.head()

Unnamed: 0,ParkName,State,Park,park
0,Acadia,ME,Acadia NP,Acadia National Park
1,Arches,UT,Arches NP,Arches National Park
2,Badlands,SD,Badlands NP,Badlands National Park
3,Big Bend,TX,Big Bend NP,Big Bend National Park
4,Biscayne,FL,Biscayne NP,Biscayne National Park


In [32]:
df2= pd.merge(df, parks, on = 'park', how = 'left')
df2.head()

Unnamed: 0,place_guess,place_state_name,park,state,name,ParkName,State,Park
0,"Yellowstone National Park, Park County, US-WY, US",Wyoming,Yellowstone National Park,WY,Wyoming,Yellowstone,WY,Yellowstone NP
1,"Yellowstone National Park, US",Wyoming,Yellowstone National Park,WY,Wyoming,Yellowstone,WY,Yellowstone NP
2,"Yellowstone National Park, US-WY, US",Wyoming,Yellowstone National Park,WY,Wyoming,Yellowstone,WY,Yellowstone NP
3,"Grand Teton National Park, WY, US",Wyoming,Grand Teton National Park,WY,Wyoming,Grand Teton,WY,Grand Teton NP
4,"Yellowstone National Park, WY, US",Wyoming,Yellowstone National Park,WY,Wyoming,Yellowstone,WY,Yellowstone NP


In [33]:
#finally merging all of the cleaned park identifying columns back to the the original animal observation df from inaturalist
df_merge = pd.merge(observation, df2, on=['place_guess', 'place_state_name'], how = 'left')
df_merge.head()

Unnamed: 0,observed_on,place_guess,latitude,longitude,place_county_name,place_state_name,place_country_name,scientific_name,common_name,iconic_taxon_name,park,state,name,ParkName,State,Park
0,2008-08-08,"Yellowstone National Park, Park County, US-WY, US",44.811585,-110.481333,Park,Wyoming,United States,Bison bison,American Bison,Mammalia,Yellowstone National Park,WY,Wyoming,Yellowstone,WY,Yellowstone NP
1,2009-05-14,"Everglades National Park, Monroe County, US-FL...",25.153938,-80.85179,Monroe,Florida,United States,Trichechus manatus,West Indian Manatee,Mammalia,Everglades National Park,FL,Florida,Everglades,FL,Everglades NP
2,2009-06-08,"Kings Canyon/Sequoia National Park, NV, US",36.489655,-118.779289,Tulare,California,United States,Odocoileus hemionus californicus,California Mule Deer,Mammalia,Sequoia and Kings Canyon National Park,CA,California,Sequoia and Kings Canyon,CA,Sequoia and Kings Canyon NP
3,2009-06-08,"Kings Canyon/Sequoia National Park, NV, US",36.520012,-118.795425,Tulare,California,United States,Otospermophilus beecheyi,California Ground Squirrel,Mammalia,Sequoia and Kings Canyon National Park,CA,California,Sequoia and Kings Canyon,CA,Sequoia and Kings Canyon NP
4,2010-04-11,"Kings Canyon/Sequoia National Park, NV, US",36.524426,-118.75148,Tulare,California,United States,Otospermophilus beecheyi,California Ground Squirrel,Mammalia,Sequoia and Kings Canyon National Park,CA,California,Sequoia and Kings Canyon,CA,Sequoia and Kings Canyon NP


In [34]:
#I think I need to get rid of the problem list observations to get rid of the 1000 entry discrepency
df_merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16583 entries, 0 to 16582
Data columns (total 16 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   observed_on         16583 non-null  datetime64[ns]
 1   place_guess         16583 non-null  object        
 2   latitude            16582 non-null  float64       
 3   longitude           16582 non-null  float64       
 4   place_county_name   16552 non-null  object        
 5   place_state_name    16579 non-null  object        
 6   place_country_name  16567 non-null  object        
 7   scientific_name     16583 non-null  object        
 8   common_name         16564 non-null  object        
 9   iconic_taxon_name   16583 non-null  object        
 10  park                15874 non-null  object        
 11  state               15874 non-null  object        
 12  name                15874 non-null  object        
 13  ParkName            15284 non-null  object    

In [35]:
nul_df2 = df_merge[df_merge.park.isnull()]
nul_df2.place_guess.value_counts()

Targhee National Forest, Island Park, ID, US                                                       32
Marsh-Billings-Rockefeller National Historical Park, Woodstock, VT, US                             24
Santa Monica Mountains National Recreation Area, Newbury Park, CA, US                              24
San Pedro Valley County Park, San Mateo County, Golden Gate National Recreation Area, US-CA, US    20
Chesapeake and Ohio Canal National Historic Park, Washington, DC, US                               20
                                                                                                   ..
Ocmulgee National Monument Park, Macon, GA, US                                                      1
Virgin Islands National Park, Coral Bay, VI, US                                                     1
Channel Islands National Park, Channel Islands, Santa Barbara County, US-CA, US                     1
Fredericksburg & Spotsylvania National Military Park, Fredericksburg, VA, US      

In [36]:
#don't need any of the observations with unknown NP 
df_merge2 = df_merge.drop(df_merge[df_merge.park.isnull()].index, axis = 0)

In [37]:
#drop some of the unnecessary columns
df_merge2 = df_merge2.drop(['place_county_name', 'place_country_name', 'name', 'State'], axis = 1)
df_merge2.head()

Unnamed: 0,observed_on,place_guess,latitude,longitude,place_state_name,scientific_name,common_name,iconic_taxon_name,park,state,ParkName,Park
0,2008-08-08,"Yellowstone National Park, Park County, US-WY, US",44.811585,-110.481333,Wyoming,Bison bison,American Bison,Mammalia,Yellowstone National Park,WY,Yellowstone,Yellowstone NP
1,2009-05-14,"Everglades National Park, Monroe County, US-FL...",25.153938,-80.85179,Florida,Trichechus manatus,West Indian Manatee,Mammalia,Everglades National Park,FL,Everglades,Everglades NP
2,2009-06-08,"Kings Canyon/Sequoia National Park, NV, US",36.489655,-118.779289,California,Odocoileus hemionus californicus,California Mule Deer,Mammalia,Sequoia and Kings Canyon National Park,CA,Sequoia and Kings Canyon,Sequoia and Kings Canyon NP
3,2009-06-08,"Kings Canyon/Sequoia National Park, NV, US",36.520012,-118.795425,California,Otospermophilus beecheyi,California Ground Squirrel,Mammalia,Sequoia and Kings Canyon National Park,CA,Sequoia and Kings Canyon,Sequoia and Kings Canyon NP
4,2010-04-11,"Kings Canyon/Sequoia National Park, NV, US",36.524426,-118.75148,California,Otospermophilus beecheyi,California Ground Squirrel,Mammalia,Sequoia and Kings Canyon National Park,CA,Sequoia and Kings Canyon,Sequoia and Kings Canyon NP


In [134]:
#Save this as csv for EDA and join to park visitation data
#df_merge2.to_csv('Data/observations_park.csv')