In [1]:
#Import dependencies
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

In [2]:
#Import csv file and read into dataframe
file = 'wine-reviews/sample_wine_data.csv'
wine_df = pd.read_csv(file, encoding="ISO-8859-1")
wine_df.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",VulkÃÂ Bianco,87,,Sicily & Sardinia,Etna,,Kerin OÃ¢â¬â¢Keefe,@kerinokeefe,Nicosia 2013 VulkÃÂ Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwineÃÂ,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwineÃÂ,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [3]:
#Drop all null values
not_null_wine_df= wine_df.dropna(how='any')
not_null_wine_df.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwineÃÂ,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
10,10,US,"Soft, supple plum envelopes an oaky structure ...",Mountain CuvÃÂ©e,87,19.0,California,Napa Valley,Napa,Virginie Boone,@vboone,Kirkland Signature 2011 Mountain CuvÃÂ©e Cabe...,Cabernet Sauvignon,Kirkland Signature
23,23,US,This wine from the Geneseo district offers aro...,Signature Selection,87,22.0,California,Paso Robles,Central Coast,Matt Kettmann,@mattkettmann,Bianchi 2011 Signature Selection Merlot (Paso ...,Merlot,Bianchi
25,25,US,Oak and earth intermingle around robust aromas...,King Ridge Vineyard,87,69.0,California,Sonoma Coast,Sonoma,Virginie Boone,@vboone,Castello di Amorosa 2011 King Ridge Vineyard P...,Pinot Noir,Castello di Amorosa
35,35,US,As with many of the Erath 2010 vineyard design...,Hyland,86,50.0,Oregon,McMinnville,Willamette Valley,Paul Gregutt,@paulgwineÃÂ,Erath 2010 Hyland Pinot Noir (McMinnville),Pinot Noir,Erath


In [4]:
#Check column names
not_null_wine_df.count()

Unnamed: 0               24
country                  24
description              24
designation              24
points                   24
price                    24
province                 24
region_1                 24
region_2                 24
taster_name              24
taster_twitter_handle    24
title                    24
variety                  24
winery                   24
dtype: int64

In [5]:
#Drop extra columns in dataframe
cleaned_wine_df= not_null_wine_df.drop(['Unnamed: 0','description','region_2','taster_twitter_handle'], axis=1)
cleaned_wine_df.head()

Unnamed: 0,country,designation,points,price,province,region_1,taster_name,title,variety,winery
4,US,Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Paul Gregutt,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
10,US,Mountain CuvÃÂ©e,87,19.0,California,Napa Valley,Virginie Boone,Kirkland Signature 2011 Mountain CuvÃÂ©e Cabe...,Cabernet Sauvignon,Kirkland Signature
23,US,Signature Selection,87,22.0,California,Paso Robles,Matt Kettmann,Bianchi 2011 Signature Selection Merlot (Paso ...,Merlot,Bianchi
25,US,King Ridge Vineyard,87,69.0,California,Sonoma Coast,Virginie Boone,Castello di Amorosa 2011 King Ridge Vineyard P...,Pinot Noir,Castello di Amorosa
35,US,Hyland,86,50.0,Oregon,McMinnville,Paul Gregutt,Erath 2010 Hyland Pinot Noir (McMinnville),Pinot Noir,Erath


In [6]:
#Double check consistency in price column to determine data type
cleaned_wine_df['price'].head(50)

4       65.0
10      19.0
23      22.0
25      69.0
35      50.0
60     100.0
62      25.0
64      26.0
67      46.0
71      40.0
73      75.0
74      55.0
75      75.0
78      25.0
84      24.0
87      55.0
90      23.0
92      55.0
94      22.0
99      75.0
108     26.0
111     85.0
114     18.0
116     25.0
Name: price, dtype: float64

In [7]:
#Connect to database
wine_connection_string = "postgres:Bruh1need@pwd@localhost:5432/wine_db"
engine = create_engine(f'postgresql://{wine_connection_string}')

In [8]:
#Check table names
engine.table_names()

['wine_table']

In [9]:
#Convert cleaned dataframe to database
cleaned_wine_df.to_sql(name='wine_table', con=engine, if_exists='append', index=False)

In [10]:
#Confirm data had been added into database
pd.read_sql_query('select * from wine_table', con=engine).head()

Unnamed: 0,country,designation,points,price,province,region_1,taster_name,title,variety,winery
0,US,Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Paul Gregutt,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
1,US,Mountain CuvÃ©e,87,19.0,California,Napa Valley,Virginie Boone,Kirkland Signature 2011 Mountain CuvÃ©e Cabern...,Cabernet Sauvignon,Kirkland Signature
2,US,Signature Selection,87,22.0,California,Paso Robles,Matt Kettmann,Bianchi 2011 Signature Selection Merlot (Paso ...,Merlot,Bianchi
3,US,King Ridge Vineyard,87,69.0,California,Sonoma Coast,Virginie Boone,Castello di Amorosa 2011 King Ridge Vineyard P...,Pinot Noir,Castello di Amorosa
4,US,Hyland,86,50.0,Oregon,McMinnville,Paul Gregutt,Erath 2010 Hyland Pinot Noir (McMinnville),Pinot Noir,Erath
