# Data Loading and Database Connection for ML Forecasting

This notebook focuses on forecasting daily coffee shop sales by combining
historical sales data with external weather information stored in a
Dockerized MySQL database.

In [2]:
import os
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv

load_dotenv()

DB_NAME = os.getenv("MYSQL_DATABASE")
DB_USER = os.getenv("MYSQL_USER")
DB_PASSWORD = os.getenv("MYSQL_PASSWORD")
DB_HOST = "127.0.0.1"
DB_PORT = 3306


engine = create_engine(
    f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)


##### Loading Weather and Sales Data from Dockerized MySQL Database


In [3]:
coffee_sales = pd.read_sql("SELECT * FROM coffee_sales", engine)
weather_data = pd.read_sql("SELECT * FROM weather", engine)

In [4]:
coffee_sales.head()

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail,total_sales,month,hour,weekday,year_num,month_num,day_num
0,1,2023-01-01,0 days 07:06:11,2,5,Lower Manhattan,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,6.0,2023-01,7,Sunday,2023,1,1
1,2,2023-01-01,0 days 07:08:56,2,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,6.2,2023-01,7,Sunday,2023,1,1
2,3,2023-01-01,0 days 07:14:04,2,5,Lower Manhattan,59,4.5,Drinking Chocolate,Hot chocolate,Dark chocolate Lg,9.0,2023-01,7,Sunday,2023,1,1
3,4,2023-01-01,0 days 07:20:24,1,5,Lower Manhattan,22,2.0,Coffee,Drip coffee,Our Old Time Diner Blend Sm,2.0,2023-01,7,Sunday,2023,1,1
4,5,2023-01-01,0 days 07:22:41,2,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,6.2,2023-01,7,Sunday,2023,1,1


In [5]:
weather_data.head()

Unnamed: 0,date,temperature_2m_mean
0,2023-01-01,8.78542
1,2023-01-02,7.98125
2,2023-01-03,9.69167
3,2023-01-04,13.0813
4,2023-01-05,9.9125


In [6]:
coffee_sales.drop(columns=["transaction_time"], inplace=True)
