# **Project 2: Crowlugi Resort**
## By: Kathleen Pflugi and Zack Crowley


### Import dependencies and API keys

In [2]:
import pandas as pd
from sqlalchemy import create_engine, inspect 
import numpy as np
import requests
import json

# Google API Key
from config import gkey, weatherkey

# Import .csv Data

### First Store .csv data from worldcities and World Happiness Report data into DataFrames

In [3]:
# World Happiness Report data:
happiness_file = "Resources/WorldHappiness_2019.csv"
raw_happiness_data_df = pd.read_csv(happiness_file)
raw_happiness_data_df.head()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [9]:
# World Cities data, has 42905 cities:
cities_file = "Resources/worldcities_2022.csv"
raw_cities_data_df = pd.read_csv(cities_file)
# len(cities_data_df)
raw_cities_data_df.head()

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
0,Tokyo,Tokyo,35.6839,139.7744,Japan,JP,JPN,Tōkyō,primary,39105000.0,1392685764
1,Jakarta,Jakarta,-6.2146,106.8451,Indonesia,ID,IDN,Jakarta,primary,35362000.0,1360771077
2,Delhi,Delhi,28.6667,77.2167,India,IN,IND,Delhi,admin,31870000.0,1356872604
3,Manila,Manila,14.6,120.9833,Philippines,PH,PHL,Manila,primary,23971000.0,1608618140
4,São Paulo,Sao Paulo,-23.5504,-46.6339,Brazil,BR,BRA,São Paulo,admin,22495000.0,1076532519


# Clean .csv Data

### Create new dataframes with selected columns from raw data

In [31]:
# Create a new df with cols we need from raw_happiness_data_df:
# list(happiness_data_df.columns)
happiness_data_df = raw_happiness_data_df[['Overall rank','Country or region','Score', 'Generosity', 'Perceptions of corruption']].copy()
happiness_data_df.head()

# Clean up column names:
happiness_data_df = happiness_data_df.rename(columns={'Overall rank': 'overall_rank', 'Country or region': 'country', 'Score':'score','Generosity':'generosity','Perceptions of corruption':'perceptions_of_corruption'})

# Set "country_ID" as index for the primary key later in the database for each table with country data:
happiness_data_df = happiness_data_df.rename_axis('country_ID').reset_index()
happiness_data_df.head()

Unnamed: 0,country_ID,overall_rank,country,score,generosity,perceptions_of_corruption
0,0,1,Finland,7.769,0.153,0.393
1,1,2,Denmark,7.6,0.252,0.41
2,2,3,Norway,7.554,0.271,0.341
3,3,4,Iceland,7.494,0.354,0.118
4,4,5,Netherlands,7.488,0.322,0.298


In [32]:
# Create a new df with cols we need from raw_cities_data_df but has all the rows (42905 cities):
# list(raw_cities_data_df.columns)
big_cities_data_df = raw_cities_data_df[['city', 'lat', 'lng', 'country', 'population']].copy()
big_cities_data_df.head()

Unnamed: 0,city,lat,lng,country,population
0,Tokyo,35.6839,139.7744,Japan,39105000.0
1,Jakarta,-6.2146,106.8451,Indonesia,35362000.0
2,Delhi,28.6667,77.2167,India,31870000.0
3,Manila,14.6,120.9833,Philippines,23971000.0
4,São Paulo,-23.5504,-46.6339,Brazil,22495000.0


### Filter data:

In [33]:
# Filter big_cities_data_df to only cities that are in the countries from the happiness_data_df
# First, filter happiness_data_df to get the top 40 countries by rank for that data:
top40_happiness_df = happiness_data_df.loc[(happiness_data_df["overall_rank"] <= 40)].copy()
# This will be one table in database:
top40_happiness_df
# Next, get a list of countries from top40_happiness_df:
top40_happ_cntry_list = list(top40_happiness_df["country"])

# # Then, filter big_cities_data_df by the col of "country" using the happiness_cntry_list inside isin():
cities_data_df = big_cities_data_df[big_cities_data_df["country"].isin(top40_happ_cntry_list)]
cities_data_df

Unnamed: 0,city,lat,lng,country,population
4,São Paulo,-23.5504,-46.6339,Brazil,22495000.0
8,Mexico City,19.4333,-99.1333,Mexico,21505000.0
12,New York,40.6943,-73.9249,United States,18713220.0
27,Los Angeles,34.1139,-118.4068,United States,12750807.0
28,Rio de Janeiro,-22.9083,-43.1964,Brazil,12486000.0
...,...,...,...,...,...
42832,Villa O’Higgins,-48.4669,-72.5930,Chile,250.0
42835,Al Qunfudhah,19.1264,41.0789,Saudi Arabia,157.0
42850,Cuya,-19.1597,-70.1794,Chile,20.0
42854,Chuquicamata,-22.3169,-68.9301,Chile,0.0


In [34]:
# Next, sort cities_data_df by country and population
top_cities_data_df = cities_data_df.sort_values(by=['country', 'population'], ascending=[True, False])
# Finally, groupby country and take the top 50 cities based on population(or less depending on how many cities within each country)
top_cities_data_df = top_cities_data_df.groupby(by=['country'], as_index=False, sort=False).head(50).reset_index(drop=True)
# Set "city_ID" as index for the primary key later in the database for each table with city data:
top_cities_data_df = top_cities_data_df.rename_axis('city_ID').reset_index()
top_cities_data_df

Unnamed: 0,city_ID,city,lat,lng,country,population
0,0,Sydney,-33.8650,151.2094,Australia,4840600.0
1,1,Melbourne,-37.8136,144.9631,Australia,4529500.0
2,2,Brisbane,-27.4678,153.0281,Australia,2360241.0
3,3,Perth,-31.9522,115.8589,Australia,2039200.0
4,4,Adelaide,-34.9275,138.6000,Australia,1295714.0
...,...,...,...,...,...,...
1518,1518,Joaquín Suárez,-34.7336,-56.0367,Uruguay,6570.0
1519,1519,Sauce,-34.6469,-56.0628,Uruguay,6132.0
1520,1520,Sarandí Grande,-33.7250,-56.3303,Uruguay,6130.0
1521,1521,Atlántida,-34.7701,-55.7613,Uruguay,5562.0


In [47]:
# Create a df which will be saved as a table to match city tables to country table:
# First, merge top_cities_data_df with top40_happiness_df:
country_df = pd.merge(top_cities_data_df, top40_happiness_df, on = "country")
# Drop all cols expect city_ID, city, and country_ID
country_df = country_df[['city_ID', 'city', 'country_ID']]
country_df

Unnamed: 0,city_ID,city,country_ID
0,0,Sydney,10
1,1,Melbourne,10
2,2,Brisbane,10
3,3,Perth,10
4,4,Adelaide,10
...,...,...,...
1518,1518,Joaquín Suárez,32
1519,1519,Sauce,32
1520,1520,Sarandí Grande,32
1521,1521,Atlántida,32


In [48]:
# Save country data to csv in Output Folder for a table in SQL to link happiness country table to city tables in SQL:
country_df.to_csv("Output/country.csv", index=False)

In [44]:
# Save top cities data to csv in Output Folder for API calls and for the "city table" in SQL:
top_cities_data_df.to_csv("Output/top_cities.csv", index=False)

In [45]:
# Save top 40 countries by rank from the World Happiness Report data to csv 
# in Output Folder for API calls and for the "country table" in NoSQL:
top40_happiness_df.to_csv("Output/happiness_top40.csv", index=False)

## Google Places API Call

### Airport API Call:

In [50]:
# First call to get the airport data:
# Airports- names, address and rating within radius of 50,000 m (about 31 miles):
# Set up a dataframe to save the API info into, this will include city_ID, city,lat,lng, country from the cities data:
airport_df = top_cities_data_df[['city_ID','city', 'lat', 'lng', 'country']].copy()

# Add the cols that we want to get from the API call:
airport_df["airport_name"] = ""
airport_df["airport_address"] = ""
airport_df["airport_rating"] = ""
# check df:
airport_df

Unnamed: 0,city_ID,city,lat,lng,country,airport_name,airport_address,airport_rating
0,0,Sydney,-33.8650,151.2094,Australia,,,
1,1,Melbourne,-37.8136,144.9631,Australia,,,
2,2,Brisbane,-27.4678,153.0281,Australia,,,
3,3,Perth,-31.9522,115.8589,Australia,,,
4,4,Adelaide,-34.9275,138.6000,Australia,,,
...,...,...,...,...,...,...,...,...
1518,1518,Joaquín Suárez,-34.7336,-56.0367,Uruguay,,,
1519,1519,Sauce,-34.6469,-56.0628,Uruguay,,,
1520,1520,Sarandí Grande,-33.7250,-56.3303,Uruguay,,,
1521,1521,Atlántida,-34.7701,-55.7613,Uruguay,,,


In [56]:
# Google Places API Call
# params dictionary to update each iteration 
params = {
    "radius": 50000,
    "types": "airport",
    "keyword": "international airport",
    "key": gkey
}

# Use the lat/lng we recovered to identify airports
for index, row in airport_df.iterrows():
    # get lat, lng from df
    lat = row["lat"]
    lng = row["lng"]

    # change location each iteration while leaving original params in place
    params["location"] = f"{lat},{lng}"

    # Use the search term: "International Airport" and our lat/lng
    base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

    # make request and print url
    name_address = requests.get(base_url, params=params)
    
#     print the name_address url, avoid doing for public github repos in order to avoid exposing key
#     print(name_address.url)

    # convert to json
    name_address = name_address.json()
    # print(json.dumps(name_address, indent=4, sort_keys=True))

    # Since some data may be missing we incorporate a try-except to skip any that are missing a data point.
    try:
        airport_df.loc[index, "airport_name"] = name_address["results"][0]["name"]
        airport_df.loc[index, "airport_address"] = name_address["results"][0]["vicinity"]
        airport_df.loc[index, "airport_rating"] = name_address["results"][0]["rating"]
    except (KeyError, IndexError):
        print("Missing field/result... skipping.")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/

In [21]:
# Save airport data to csv after API call:
airport_df.to_csv("Output/airport_cities.csv", index=False)

# Visualize to confirm airport data appears
# This has 139 missing values- we should ask if we should remove them now or if that would be after it is in the database?
airport_df

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6,column 7,column 8
0,city_ID,city,lat,lng,country,airport_name,airport_address,airport_rating
1,0,Sydney,-33.865,151.2094,Australia,Sydney Airport,Sydney,3.7
2,1,Melbourne,-37.8136,144.9631,Australia,Melbourne Airport,Melbourne Airport VIC 3045,3.8
3,2,Brisbane,-27.4678,153.0281,Australia,Brisbane Airport,"Airport Dr, Brisbane Airport",4.0
4,3,Perth,-31.9522,115.8589,Australia,Perth Airport,Perth Airport,3.6
...,...,...,...,...,...,...,...,...
1519,1518,Joaquín Suárez,-34.7336,-56.0367,Uruguay,Carrasco International Airport,"5X7M+5G8, Capitán Juan Antonio Artigas, Ciudad...",4.6
1520,1519,Sauce,-34.6469,-56.0628,Uruguay,Carrasco International Airport,"5X7M+5G8, Capitán Juan Antonio Artigas, Ciudad...",4.6
1521,1520,Sarandí Grande,-33.725,-56.3303,Uruguay,Centro de Aviacion Civil de Florida,"WR67+P3J, Florida",4.5
1522,1521,Atlántida,-34.7701,-55.7613,Uruguay,Carrasco International Airport,"5X7M+5G8, Capitán Juan Antonio Artigas, Ciudad...",4.6


### Beaches API call:

In [22]:
# First call to get the airport data:
# Airports- names, address and rating within radius of 50,000 m (about 31 miles):
# Set up a dataframe to save the API info into, this will include city_ID, city,lat,lng, country from the cities data:
beaches_df = top_cities_data_df[['city_ID','city', 'lat', 'lng', 'country']].copy()

# Add the cols that we want to get from the API call:
beaches_df["beach_name"] = ""
beaches_df["beach_address"] = ""
beaches_df["beach_rating"] = ""
beaches_df

beaches_df.head()

Unnamed: 0,city_ID,city,lat,lng,country,beach_name,beach_address,beach_rating
0,0,Sydney,-33.865,151.2094,Australia,,,
1,1,Melbourne,-37.8136,144.9631,Australia,,,
2,2,Brisbane,-27.4678,153.0281,Australia,,,
3,3,Perth,-31.9522,115.8589,Australia,,,
4,4,Adelaide,-34.9275,138.6,Australia,,,


In [71]:
# Google Places API Call for beaches data:
# params dictionary to update each iteration 
params = {
    "radius": 50000,
    "types": "tourist_attraction",
    "keyword": "beach",
    "key": gkey
}

# Use the lat/lng we recovered to identify airports
for index, row in beaches_df.iterrows():
    # get lat, lng from df
    lat = row["lat"]
    lng = row["lng"]

    # change location each iteration while leaving original params in place
    params["location"] = f"{lat},{lng}"

    # Use the search term: "International Airport" and our lat/lng
    base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

    # make request and print url
    name_address = requests.get(base_url, params=params)
    
#     print the name_address url, avoid doing for public github repos in order to avoid exposing key
#     print(name_address.url)

    # convert to json
    name_address = name_address.json()

    # Since some data may be missing we incorporate a try-except to skip any that are missing a data point.
    try:
        beaches_df.loc[index, "beach_name"] = name_address["results"][0]["name"]
        beaches_df.loc[index, "beach_address"] = name_address["results"][0]["vicinity"]
        beaches_df.loc[index, "beach_rating"] = name_address["results"][0]["rating"]
    except (KeyError, IndexError):
        print("Missing field/result... skipping.")

Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/

In [26]:
# Save beaches data to csv after API call:
beaches_df.to_csv("Output/beaches_cities.csv", index=False)

# Visualize to confirm beaches data appears
beaches_df.head(10)

Unnamed: 0,city_ID,city,lat,lng,country,beach_name,beach_address,beach_rating
0,0,Sydney,-33.865,151.2094,Australia,,,
1,1,Melbourne,-37.8136,144.9631,Australia,,,
2,2,Brisbane,-27.4678,153.0281,Australia,,,
3,3,Perth,-31.9522,115.8589,Australia,,,
4,4,Adelaide,-34.9275,138.6,Australia,,,
5,5,Gold Coast,-28.0167,153.4,Australia,,,
6,6,Cranbourne,-38.0996,145.2834,Australia,,,
7,7,Canberra,-35.2931,149.1269,Australia,,,
8,8,Central Coast,-33.3,151.2,Australia,,,
9,9,Wollongong,-34.4331,150.8831,Australia,,,


## OpenWeather API Call

In [24]:
# Create a dataframe to save the data from the OpenWeather API call:
weather_df = top_cities_data_df[['city_ID','city', 'lat', 'lng', 'country']].copy()
# Create extra columns for data we want: "Min Temp", "Max Temp", "Humidity", "Cloudiness"
weather_df["Min Temp"] = ""
weather_df["Max Temp"] = ""
weather_df["Humidity"] = ""
weather_df["Cloudiness"] = ""

weather_df

Unnamed: 0,city_ID,city,lat,lng,country,population,Min Temp,Max Temp,Humidity,Cloudiness
801,801,Imqabba,35.8442,14.4669,Malta,3315.0,,,,
802,802,Xewkija,36.0331,14.2583,Malta,3300.0,,,,
803,803,Għajnsielem,36.0269,14.2903,Malta,3200.0,,,,
804,804,Iklin,35.9042,14.4544,Malta,3130.0,,,,
805,805,Lija,35.9014,14.4472,Malta,3070.0,,,,
...,...,...,...,...,...,...,...,...,...,...
1518,1518,Joaquín Suárez,-34.7336,-56.0367,Uruguay,6570.0,,,,
1519,1519,Sauce,-34.6469,-56.0628,Uruguay,6132.0,,,,
1520,1520,Sarandí Grande,-33.7250,-56.3303,Uruguay,6130.0,,,,
1521,1521,Atlántida,-34.7701,-55.7613,Uruguay,5562.0,,,,


In [None]:
# OpenWeather API call to get: "Min Temp", "Max Temp", "Humidity", "Cloudiness"
# Create base url to call API
base_url = 'https://api.openweathermap.org/data/2.5/weather?'
#changing units to fahrenheit
units = 'Imperial'

# For loop to go through and make the API call for each city in the df:
for index, row in weather_df.iterrows():
    # get lat, lng from weather_df dataframe
    lat = row["lat"]
    lng = row["lng"]

    # Creates url call:
    temp_response = requests.get(f'{base_url}lat={lat}&lon={lng}&appid={weatherkey}&units={units}')
    temp_json = temp_response.json()

    # Inserts the data from the API into weather_df:
    try:
        weather_df.loc[index, "Min Temp"] = temp_json["main"]["temp_min"]
        weather_df.loc[index, "Max Temp"] = temp_json["main"]["temp_max"]
        weather_df.loc[index, "Humidity"] = temp_json["main"]["humidity"]
        weather_df.loc[index, "Cloudiness"] = temp_json["clouds"]["all"]
    except (KeyError, IndexError):
        print("Missing field/result... skipping.")

In [25]:
# Save weather data to csv after API call:
weather_df.to_csv("Output/weather_data.csv", index=False)

# Visualize to confirm weather data appears
weather_df.head(10)

Unnamed: 0,city_ID,city,lat,lng,country,population,Min Temp,Max Temp,Humidity,Cloudiness
801,801,Imqabba,35.8442,14.4669,Malta,3315.0,,,,
802,802,Xewkija,36.0331,14.2583,Malta,3300.0,,,,
803,803,Għajnsielem,36.0269,14.2903,Malta,3200.0,,,,
804,804,Iklin,35.9042,14.4544,Malta,3130.0,,,,
805,805,Lija,35.9014,14.4472,Malta,3070.0,,,,
806,806,Kalkara,35.8892,14.5294,Malta,3014.0,,,,
807,807,Gudja,35.8483,14.5025,Malta,2997.0,,,,
808,808,Senglea,35.8878,14.5169,Malta,2784.0,,,,
809,809,Għargħur,35.9241,14.4534,Malta,2768.0,,,,
810,810,Qrendi,35.8342,14.4589,Malta,2752.0,,,,


# Read in Data After API Calls:

In [41]:
# Read in happiness data from csv:
happiness_file = "Output/happiness_top40.csv"
happiness_df = pd.read_csv(happiness_file)
happiness_df.head()

Unnamed: 0.1,Unnamed: 0,country_ID,overall_rank,country,score,generosity,perceptions_of_corruption
0,0,0,1,Finland,7.769,0.153,0.393
1,1,1,2,Denmark,7.6,0.252,0.41
2,2,2,3,Norway,7.554,0.271,0.341
3,3,3,4,Iceland,7.494,0.354,0.118
4,4,4,5,Netherlands,7.488,0.322,0.298


In [49]:
# Read in country list data from csv:
country_file = "Output/country.csv"
country_df = pd.read_csv(country_file)
country_df.head()

Unnamed: 0,city_ID,city,country_ID
0,0,Sydney,10
1,1,Melbourne,10
2,2,Brisbane,10
3,3,Perth,10
4,4,Adelaide,10


In [50]:
# Read in cities data from csv:
cities_file = "Output/top_cities.csv"
cities_df = pd.read_csv(cities_file)
cities_df.head()

Unnamed: 0,city_ID,city,lat,lng,country,population
0,0,Sydney,-33.865,151.2094,Australia,4840600.0
1,1,Melbourne,-37.8136,144.9631,Australia,4529500.0
2,2,Brisbane,-27.4678,153.0281,Australia,2360241.0
3,3,Perth,-31.9522,115.8589,Australia,2039200.0
4,4,Adelaide,-34.9275,138.6,Australia,1295714.0


In [51]:
# Read in airport data from csv:
airport_file = "Output/airport_cities.csv"
airport_df = pd.read_csv(airport_file)
airport_df.head()

Unnamed: 0,city_ID,city,lat,lng,country,airport_name,airport_address,airport_rating
0,0,Sydney,-33.865,151.2094,Australia,Sydney Airport,Sydney,3.7
1,1,Melbourne,-37.8136,144.9631,Australia,Melbourne Airport,Melbourne Airport VIC 3045,3.8
2,2,Brisbane,-27.4678,153.0281,Australia,Brisbane Airport,"Airport Dr, Brisbane Airport",4.0
3,3,Perth,-31.9522,115.8589,Australia,Perth Airport,Perth Airport,3.6
4,4,Adelaide,-34.9275,138.6,Australia,Adelaide Airport,"Adelaide Airport Terminal, Sir Richard William...",4.2


In [52]:
# Read in beaches data from csv:
beaches_file = "Output/beaches_cities.csv"
beaches_df = pd.read_csv(beaches_file)
beaches_df.head()

Unnamed: 0,city_ID,city,lat,lng,country,beach_name,beach_address,beach_rating
0,0,Sydney,-33.865,151.2094,Australia,Barrenjoey Lighthouse,"1199D Barrenjoey Rd, Palm Beach",4.7
1,1,Melbourne,-37.8136,144.9631,Australia,Brighton Bathing Boxes,"Esplanade, Brighton",4.4
2,2,Brisbane,-27.4678,153.0281,Australia,Streets Beach,"Stanley St Plaza, South Brisbane",4.7
3,3,Perth,-31.9522,115.8589,Australia,Parsa the Beach,Scarborough,0.0
4,4,Adelaide,-34.9275,138.6,Australia,Moana Beach,"404 Esplanade, Moana",4.7


In [53]:
# Read in weather data from csv:
weather_file = "Output/weather_data.csv"
weather_df = pd.read_csv(weather_file)
weather_df.head()

Unnamed: 0,city_ID,city,lat,lng,Min Temp,Max Temp,Humidity,Cloudiness
0,0,Sydney,-33.865,151.2094,65.5,70.61,59,20
1,1,Melbourne,-37.8136,144.9631,75.18,82.8,42,0
2,2,Brisbane,-27.4678,153.0281,74.84,85.35,45,20
3,3,Perth,-31.9522,115.8589,89.02,92.86,20,0
4,4,Adelaide,-34.9275,138.6,73.83,83.34,32,0


### So far 5 tables for the database:
- happiness_df
- cities_df
- airport_df
- beaches_df
- weather_df

### To DO:
- Set up dataframes to load into PostgreSQL
- Set up db and tables in PostgreSQL
- Make sure primary keys are in each table
- Load in data to PostgreSQL
- Create ERD
- Write up report

## SQL Work:
- First select the columns from each table that we want to load into the database:

In [55]:
# Cities df- clean up the columns, just the city-level data:
new_cities_df = cities_df[['city_ID', 'city', 'lat', 'lng', 'population']].copy()
new_cities_df

Unnamed: 0,city_ID,city,lat,lng,population
0,0,Sydney,-33.8650,151.2094,4840600.0
1,1,Melbourne,-37.8136,144.9631,4529500.0
2,2,Brisbane,-27.4678,153.0281,2360241.0
3,3,Perth,-31.9522,115.8589,2039200.0
4,4,Adelaide,-34.9275,138.6000,1295714.0
...,...,...,...,...,...
1518,1518,Joaquín Suárez,-34.7336,-56.0367,6570.0
1519,1519,Sauce,-34.6469,-56.0628,6132.0
1520,1520,Sarandí Grande,-33.7250,-56.3303,6130.0
1521,1521,Atlántida,-34.7701,-55.7613,5562.0


In [None]:
# Ready to load into postgreSQL:
happiness_df.head()

In [58]:
# Airport data:
new_airport_df = airport_df[['city_ID', 'airport_name', 'airport_address', 'airport_rating']].copy()
new_airport_df

Unnamed: 0,city_ID,airport_name,airport_address,airport_rating
0,0,Sydney Airport,Sydney,3.7
1,1,Melbourne Airport,Melbourne Airport VIC 3045,3.8
2,2,Brisbane Airport,"Airport Dr, Brisbane Airport",4.0
3,3,Perth Airport,Perth Airport,3.6
4,4,Adelaide Airport,"Adelaide Airport Terminal, Sir Richard William...",4.2
...,...,...,...,...
1518,1518,Carrasco International Airport,"5X7M+5G8, Capitán Juan Antonio Artigas, Ciudad...",4.6
1519,1519,Carrasco International Airport,"5X7M+5G8, Capitán Juan Antonio Artigas, Ciudad...",4.6
1520,1520,Centro de Aviacion Civil de Florida,"WR67+P3J, Florida",4.5
1521,1521,Carrasco International Airport,"5X7M+5G8, Capitán Juan Antonio Artigas, Ciudad...",4.6


In [60]:
# Beaches data:
new_beaches_df = beaches_df[['city_ID', 'beach_name', 'beach_address', 'beach_rating']].copy()
new_beaches_df

Unnamed: 0,city_ID,beach_name,beach_address,beach_rating
0,0,Barrenjoey Lighthouse,"1199D Barrenjoey Rd, Palm Beach",4.7
1,1,Brighton Bathing Boxes,"Esplanade, Brighton",4.4
2,2,Streets Beach,"Stanley St Plaza, South Brisbane",4.7
3,3,Parsa the Beach,Scarborough,0.0
4,4,Moana Beach,"404 Esplanade, Moana",4.7
...,...,...,...,...
1518,1518,Biarritz Beach,"6F4P+285, Biarritz",4.7
1519,1519,Biarritz Beach,"6F4P+285, Biarritz",4.7
1520,1520,,,
1521,1521,Biarritz Beach,"6F4P+285, Biarritz",4.7


In [64]:
# Weather data:
new_weather_df = weather_df[['city_ID', 'Min Temp', 'Max Temp', 'Humidity', 'Cloudiness']].copy()
new_weather_df

Unnamed: 0,city_ID,Min Temp,Max Temp,Humidity,Cloudiness
0,0,65.50,70.61,59,20
1,1,75.18,82.80,42,0
2,2,74.84,85.35,45,20
3,3,89.02,92.86,20,0
4,4,73.83,83.34,32,0
...,...,...,...,...,...
1518,1518,69.33,71.31,73,0
1519,1519,69.42,71.42,68,3
1520,1520,67.96,67.96,67,18
1521,1521,66.85,71.74,76,76


### We will now switch over to postgreSQL to create the db and tables for the data we have transformed here.
- See "table_schema_Crowlugi.sql" file for the creation of the db and tables
- Database will be named "resort_db"
  

### Connect to local database in postgreSQL

In [None]:
protocol = 'postgresql'
username = 'postgres'
password = 'postgres'
host = 'localhost'
port = 5433
database_name = 'resort_db'
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

### Check to see if the tables are in the db correctly:

In [None]:
# Check tables:
inspector = inspect(engine)
inspector.get_table_names() 

#### Use pandas to load the country_df into postgreSQL:

In [None]:
country_df.to_sql(name='country', con=engine, if_exists='replace', index=False)