# Data files:
* Kaggle data (df_2_kaggle.csv)
* activity data (parks_activity_data.json)
* data.world data (us-national-parks-dataset.json)

In [3]:
# Dependencies
import pandas as pd
import json
import csv
import pymongo
from pymongo import MongoClient

### Kaggle data (df_2_kaggle.csv)

In [11]:
#  Extract CSV file into dataframe
kaggle_df = pd.read_csv('Resources/df_2_kaggle.csv')
# kaggle_df.head()
# Data cleaning: remove first unnamed column
del kaggle_df['Unnamed: 0']
kaggle_df.head()

Unnamed: 0,Name,Image,Location,Date established as park[7][12],Area (2021)[13],Recreation visitors (2021)[11],Description
0,Acadia,,"Maine.mw-parser-output .geo-default,.mw-parser...","February 26, 1919","49,071.40 acres (198.6 km2)",4069098,Covering most of Mount Desert Island and other...
1,American Samoa,,American Samoa14°15′S 170°41′W﻿ / ﻿14.25°S 170...,"October 31, 1988","8,256.67 acres (33.4 km2)",8495,The southernmost national park is on three Sam...
2,Arches,,Utah38°41′N 109°34′W﻿ / ﻿38.68°N 109.57°W,"November 12, 1971","76,678.98 acres (310.3 km2)",1806865,"This site features more than 2,000 natural san..."
3,Badlands,,South Dakota43°45′N 102°30′W﻿ / ﻿43.75°N 102.50°W,"November 10, 1978","242,755.94 acres (982.4 km2)",1224226,"The Badlands are a collection of buttes, pinna..."
4,Big Bend,,Texas29°15′N 103°15′W﻿ / ﻿29.25°N 103.25°W,"June 12, 1944","801,163.21 acres (3,242.2 km2)",581220,Named for the prominent bend in the Rio Grande...


In [14]:
# Load data to MongoDB

# save as csv file
kaggle_df.to_csv('kaggle_df_2.csv')

# load data to mongo database
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Define the 'NationalTreasure_db' database in Mongo
db = client.NationalTreasure_db
mongoClient = MongoClient() 
db = mongoClient.NationalTreasure_db
db.segment.drop()

# kaggle_df.columns
header = ['Name', 'Image', 'Location', 'Date established as park[7][12]',
       'Area (2021)[13]', 'Recreation visitors (2021)[11]', 'Description']
csvfile = open('kaggle_df_2.csv', 'r')
reader = csv.DictReader( csvfile )

for each in reader:
    row={}
    for field in header:
        row[field]=each[field]
        
    print (row)
    db.segment.insert(row)


{'Name': 'Acadia', 'Image': '', 'Location': 'Maine.mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}44°21′N 68°13′W\ufeff / \ufeff44.35°N 68.21°W', 'Date established as park[7][12]': 'February 26, 1919', 'Area (2021)[13]': '49,071.40 acres (198.6\xa0km2)', 'Recreation visitors (2021)[11]': '4069098', 'Description': 'Covering most of Mount Desert Island and other coastal islands, Acadia features the tallest mountain on the Atlantic coast of the United States, granite peaks, ocean shoreline, woodlands, and lakes. There are freshwater, estuary, forest, and intertidal habitats.[14][15]'}
{'Name': 'American Samoa', 'Image': '', 'Location': 'American Samoa14°15′S 170°41′W\ufeff / \ufeff14.25°S 170.68°W', 'Date established as park[7][12]': 'October 31, 1988', 'Area (2021)[13]': '8,256.67 

  db.segment.insert(row)


### activity data (parks_activity_data.json)

In [15]:
# Read JSON file instead of API call
f = open('Resources/parks_activity_data.json')
results = json.load(f)
results

{'total': '40',
 'limit': '50',
 'start': '0',
 'data': [{'id': '09DF0950-D319-4557-A57E-04CD2F63FF42',
   'name': 'Arts and Culture',
   'parks': [{'states': 'ME',
     'parkCode': 'acad',
     'designation': 'National Park',
     'fullName': 'Acadia National Park',
     'url': 'https://www.nps.gov/acad/index.htm',
     'name': 'Acadia'},
    {'states': 'NY',
     'parkCode': 'afbg',
     'designation': 'National Monument',
     'fullName': 'African Burial Ground National Monument',
     'url': 'https://www.nps.gov/afbg/index.htm',
     'name': 'African Burial Ground'},
    {'states': 'NE',
     'parkCode': 'agfo',
     'designation': 'National Monument',
     'fullName': 'Agate Fossil Beds National Monument',
     'url': 'https://www.nps.gov/agfo/index.htm',
     'name': 'Agate Fossil Beds'},
    {'states': 'TX',
     'parkCode': 'alfl',
     'designation': 'National Monument',
     'fullName': 'Alibates Flint Quarries National Monument',
     'url': 'https://www.nps.gov/alfl/index.h

In [17]:
# Extract activity data from results (non-pandas way)

# create an empty list (will become df later)
data_list = []

# use a for loop to go through results data

for i in results['data']:
    #print(i)
    act_id = i['id']
    act_name = i['name']
    parks_list = [park_dict['fullName'] for park_dict in i['parks']]
    for park_dict in i['parks']:
        park_name = park_dict['fullName']        
        df2 = {
            'act_id': act_id,
            'act_name': act_name,
            'park_name': park_name
        }
        data_list.append(df2)

# convert into dataframe
activity_df = pd.DataFrame(data_list)
activity_df.head()

Unnamed: 0,act_id,act_name,park_name
0,09DF0950-D319-4557-A57E-04CD2F63FF42,Arts and Culture,Acadia National Park
1,09DF0950-D319-4557-A57E-04CD2F63FF42,Arts and Culture,African Burial Ground National Monument
2,09DF0950-D319-4557-A57E-04CD2F63FF42,Arts and Culture,Agate Fossil Beds National Monument
3,09DF0950-D319-4557-A57E-04CD2F63FF42,Arts and Culture,Alibates Flint Quarries National Monument
4,09DF0950-D319-4557-A57E-04CD2F63FF42,Arts and Culture,Ala Kahakai National Historic Trail


In [19]:
# Remove act_id column
activity_df = activity_df[["act_name","park_name"]]
activity_df.head()

Unnamed: 0,act_name,park_name
0,Arts and Culture,Acadia National Park
1,Arts and Culture,African Burial Ground National Monument
2,Arts and Culture,Agate Fossil Beds National Monument
3,Arts and Culture,Alibates Flint Quarries National Monument
4,Arts and Culture,Ala Kahakai National Historic Trail


In [21]:
# Reorganize data --> key: park_name; value: list of corresponding activities (act_name)
act_dict = activity_df.groupby('park_name')['act_name'].apply(list).to_dict()
# act_dict

act_new_list = []
for k in list(act_dict.keys()):
#     print(k)
    act_new_df = {
        'park_name':k,
        'act_name':act_dict[k]
    }
#     print(act_list[k])
    act_new_list.append(act_new_df)
    
activity_new_df = pd.DataFrame(act_new_list)
activity_new_df.head()

Unnamed: 0,park_name,act_name
0,**********National Mall**********,"[Arts and Culture, Boating, Food, Guided Tours..."
1,Abraham Lincoln Birthplace National Historical...,"[Astronomy, Food, Guided Tours, Junior Ranger ..."
2,Acadia National Park,"[Arts and Culture, Astronomy, Biking, Boating,..."
3,Adams National Historical Park,"[Guided Tours, Living History, Park Film, Shop..."
4,African American Civil War Memorial,[Guided Tours]


In [23]:
# load activity data to MongoDB

# save as csv file
activity_new_df.to_csv('activity_df.csv')

# load data to mongo database
# activity_new_df.columns
header = ['park_name', 'act_name']
csvfile = open('activity_df.csv', 'r')
reader = csv.DictReader( csvfile )

for each in reader:
    row={}
    for field in header:
        row[field]=each[field]
        
    print (row)
    db.segment.insert(row)

{'park_name': '**********National Mall**********', 'act_name': "['Arts and Culture', 'Boating', 'Food', 'Guided Tours', 'Shopping', 'Wildlife Watching']"}
{'park_name': 'Abraham Lincoln Birthplace National Historical Park', 'act_name': "['Astronomy', 'Food', 'Guided Tours', 'Junior Ranger Program', 'Museum Exhibits', 'Park Film', 'Shopping', 'Wildlife Watching']"}
{'park_name': 'Acadia National Park', 'act_name': "['Arts and Culture', 'Astronomy', 'Biking', 'Boating', 'Camping', 'Climbing', 'Compass and GPS', 'Fishing', 'Food', 'Guided Tours', 'Hands-On', 'Hiking', 'Horse Trekking', 'Ice Skating', 'Junior Ranger Program', 'Paddling', 'Park Film', 'Shopping', 'Skiing', 'Snow Play', 'Snowmobiling', 'Snowshoeing', 'Swimming', 'Wildlife Watching']"}
{'park_name': 'Adams National Historical Park', 'act_name': "['Guided Tours', 'Living History', 'Park Film', 'Shopping']"}
{'park_name': 'African American Civil War Memorial', 'act_name': "['Guided Tours']"}
{'park_name': 'African Burial Ground

  db.segment.insert(row)


### data.world data (us-national-parks-dataset.json)

In [24]:
#  Extract JSON file into DataFrame
dataworld_df = pd.read_json('Resources/us-national-parks-dataset.json')
dataworld_df.head()

Unnamed: 0,area,coordinates,date_established_readable,date_established_unix,description,image,nps_link,states,title,id,visitors,world_heritage_site
0,"{'acres': '49,057.36', 'square_km': '198.5'}","{'latitude': 44.35, 'longitude': -68.21}","February 26, 1919",-1604599200,Covering most of Mount Desert Island and other...,"{'url': 'acadia.jpg', 'attribution': 'PixelBay...",https://www.nps.gov/acad/index.htm,"[{'id': 'state_maine', 'title': 'Maine'}]",Acadia,park_acadia,3303393,False
1,"{'acres': '8,256.67', 'square_km': '33.4'}","{'latitude': -14.25, 'longitude': -170.68}","October 31, 1988",594280800,The southernmost National Park is on three Sam...,"{'url': 'american-samoa.jpg', 'attribution': '...",https://www.nps.gov/npsa/index.htm,"[{'id': 'state_american-samoa', 'title': 'Amer...",American Samoa,park_american-samoa,28892,False
2,"{'acres': '76,678.98', 'square_km': '310.3'}","{'latitude': 38.68, 'longitude': -109.57}","November 12, 1971",58773600,"This site features more than 2,000 natural san...","{'url': 'arches.jpg', 'attribution': 'PixelBay...",https://www.nps.gov/arch/index.htm,"[{'id': 'state_utah', 'title': 'Utah'}]",Arches,park_arches,1585718,False
3,"{'acres': '242,755.94', 'square_km': '982.4'}","{'latitude': 43.75, 'longitude': -102.5}","November 10, 1978",279525600,"The Badlands are a collection of buttes, pinna...","{'url': 'badlands.jpg', 'attribution': 'PixelB...",https://www.nps.gov/badl/index.htm,"[{'id': 'state_south-dakota', 'title': 'South ...",Badlands,park_badlands,996263,False
4,"{'acres': '801,163.21', 'square_km': '3,242.2'}","{'latitude': 29.25, 'longitude': -103.25}","June 12, 1944",-806439600,Named for the prominent bend in the Rio Grande...,"{'url': 'big-bend.jpg', 'attribution': 'PixelB...",https://www.nps.gov/bibe/index.htm,"[{'id': 'state_texas', 'title': 'Texas'}]",Big Bend,park_big-bend,388290,False


In [26]:
# load dataworld data to MongoDB

# save as csv file
dataworld_df.to_csv('dataworld_df.csv')

# load data to mongo database
# dataworld_df.columns
header = ['area', 'coordinates', 'date_established_readable',
       'date_established_unix', 'description', 'image', 'nps_link', 'states',
       'title', 'id', 'visitors', 'world_heritage_site']
csvfile = open('dataworld_df.csv', 'r')
reader = csv.DictReader( csvfile )

for each in reader:
    row={}
    for field in header:
        row[field]=each[field]
        
    print (row)
    db.segment.insert(row)

{'area': "{'acres': '49,057.36', 'square_km': '198.5'}", 'coordinates': "{'latitude': 44.35, 'longitude': -68.21}", 'date_established_readable': 'February 26, 1919', 'date_established_unix': '-1604599200', 'description': 'Covering most of Mount Desert Island and other coastal islands, Acadia features the tallest mountain on the Atlantic coast of the United States, granite peaks, ocean shoreline, woodlands, and lakes. There are freshwater, estuary, forest, and intertidal habitats.', 'image': "{'url': 'acadia.jpg', 'attribution': 'PixelBay/@Skeeze', 'attribution_url': 'https://pixabay.com/en/users/skeeze-272447/'}", 'nps_link': 'https://www.nps.gov/acad/index.htm', 'states': "[{'id': 'state_maine', 'title': 'Maine'}]", 'title': 'Acadia', 'id': 'park_acadia', 'visitors': '3,303,393', 'world_heritage_site': 'False'}
{'area': "{'acres': '8,256.67', 'square_km': '33.4'}", 'coordinates': "{'latitude': -14.25, 'longitude': -170.68}", 'date_established_readable': 'October 31, 1988', 'date_estab

  db.segment.insert(row)
