In [175]:
# https://www.kaggle.com/selfishgene/historical-hourly-weather-data
# https://www.shanelynn.ie/summarising-aggregation-and-grouping-data-in-python-pandas/
# https://datatofish.com/create-database-python-using-sqlite3/
import pandas as pd
import sqlite3

In [176]:
####################################### Extract and Transform #############################################
sales = pd.read_csv('morse.csv')
temperature = pd.read_csv("historical-hourly-weather-data/temperature.csv")
sales.columns = ['sale_date','item_name','net_quantity']
# Remove timestamp
sales['sale_date'] = sales['sale_date'].str.split(' ',expand=True)[0]
# Create index_id
sales['index_id'] = sales.index

In [177]:
temperature = temperature[['datetime','San Francisco']]
temperature.columns = ['date','avg_sf_temperature']
# F = 1.8(K - 273) + 32, convert Kelvin To Fahrenheit
temperature['avg_sf_temperature'] = 1.8*(temperature['avg_sf_temperature']-273)+32
# Remove timestamp
temperature['date'] = temperature['date'].str.split(' ',expand=True)[0]
# convert yy-mm-dd to mm/dd/yy
temperature['date'] = pd.to_datetime(temperature['date']).dt.strftime('%m/%d/%Y')

In [178]:
# Get daily average temperature
temperature = temperature.groupby('date', as_index=False).agg({"avg_sf_temperature": "mean"})
# Round to whole degrees
temperature['avg_sf_temperature'] = temperature['avg_sf_temperature'].round()
# Create index_id
temperature['index_id'] = temperature.index

In [179]:
temperature.head(3)

Unnamed: 0,date,avg_sf_temperature,index_id
0,01/01/2013,44.0,0
1,01/01/2014,49.0,1
2,01/01/2015,48.0,2


In [180]:
sales.head(3)

Unnamed: 0,sale_date,item_name,net_quantity,index_id
0,04/22/2016,Latte,1,0
1,09/20/2016,Latte,1,1
2,12/27/2016,"Cappuccino, Unknown",1,2


In [150]:
# ####################################### Export .csv to MySQL(OracleSQL) #############################################
# temperature.to_csv('temperature.csv', index=False)
# sales.to_csv('sales.csv', index=False)

In [151]:
####################################### Load to SQLite #############################################
# create 'sales.db' if not exists
conn = sqlite3.connect('sales.db')
c = conn.cursor()
c.execute('''drop table sales_2016''')
c.execute('''drop table temperature_master''')

<sqlite3.Cursor at 0x109e2e340>

In [152]:
# Create table - sales_2016
c.execute('''CREATE TABLE if not exists sales_2016
             (sale_date text, item_name text, net_quantity integer, index_id integer)''')
# Create table - sales_2016
c.execute('''CREATE TABLE if not exists temperature_master
             (date text, avg_sf_temperature integer, index_id integer)''')

<sqlite3.Cursor at 0x109e2e340>

In [153]:
# Save (commit) the changes
conn.commit()

In [154]:
sales.to_sql('sales_2016', conn, if_exists='append', index = False)
temperature.to_sql('temperature_master', conn, if_exists='append', index = False)