In [4]:
import pandas as pd

# Load dataset
df = pd.read_excel("Coffe_sales.xlsx")

# Display the first few rows
print("✅ Data Loaded Successfully")
print(df.head())

# Check basic information
df.info()


✅ Data Loaded Successfully
        date                datetime  hour_of_day cash_type  \
0 2024-03-01 2024-03-01 10:15:50.520           10      card   
1 2024-03-01 2024-03-01 12:19:22.539           12      card   
2 2024-03-01 2024-03-01 12:20:18.089           12      card   
3 2024-03-01 2024-03-01 13:46:33.006           13      card   
4 2024-03-01 2024-03-01 13:48:14.626           13      card   

                  card  money    coffee_name Time_of_Day Weekday Month_name  \
0  ANON-0000-0000-0001   38.7          Latte     Morning     Fri        Mar   
1  ANON-0000-0000-0002   38.7  Hot Chocolate   Afternoon     Fri        Mar   
2  ANON-0000-0000-0002   38.7  Hot Chocolate   Afternoon     Fri        Mar   
3  ANON-0000-0000-0003   28.9      Americano   Afternoon     Fri        Mar   
4  ANON-0000-0000-0004   38.7          Latte   Afternoon     Fri        Mar   

   Weekdaysort  Monthsort  
0            5          3  
1            5          3  
2            5          3  
3      

In [5]:
# Convert date columns to datetime
df['date'] = pd.to_datetime(df['date'])
df['datetime'] = pd.to_datetime(df['datetime'])

# Handle missing values
df = df.dropna(subset=['money', 'coffee_name'])

# Ensure numeric column types
df['money'] = df['money'].astype(float)

# Clean whitespace in string columns
df['coffee_name'] = df['coffee_name'].str.strip()

print("✅ Data cleaned and standardized!")
df.info()


✅ Data cleaned and standardized!
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3636 entries, 0 to 3635
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date         3636 non-null   datetime64[ns]
 1   datetime     3636 non-null   datetime64[ns]
 2   hour_of_day  3636 non-null   int64         
 3   cash_type    3636 non-null   object        
 4   card         3547 non-null   object        
 5   money        3636 non-null   float64       
 6   coffee_name  3636 non-null   object        
 7   Time_of_Day  3636 non-null   object        
 8   Weekday      3636 non-null   object        
 9   Month_name   3636 non-null   object        
 10  Weekdaysort  3636 non-null   int64         
 11  Monthsort    3636 non-null   int64         
dtypes: datetime64[ns](2), float64(1), int64(3), object(6)
memory usage: 341.0+ KB


In [6]:
# Total sales per day
daily_sales = df.groupby('date')['money'].sum().reset_index()

# Total sales by coffee type
coffee_sales = df.groupby('coffee_name')['money'].sum().reset_index().sort_values(by='money', ascending=False)

# Payment method breakdown
payment_types = df.groupby('cash_type')['money'].sum().reset_index()


In [7]:
daily_sales.to_csv("daily_sales_summary.csv", index=False)
coffee_sales.to_csv("coffee_sales_summary.csv", index=False)
payment_types.to_csv("payment_breakdown.csv", index=False)

print("✅ All parsed summaries saved successfully!")


✅ All parsed summaries saved successfully!


In [8]:
print("☕ Top 5 Best-Selling Coffee Types:")
print(coffee_sales.head(5))

print("\n💳 Payment Breakdown:")
print(payment_types)

print("\n📆 Total Days Recorded:", df['date'].nunique())
print("💰 Total Revenue: $", round(df['money'].sum(), 2))


☕ Top 5 Best-Selling Coffee Types:
           coffee_name     money
7                Latte  27866.30
1  Americano with Milk  25269.12
2           Cappuccino  18034.14
0            Americano  15062.26
6        Hot Chocolate  10172.46

💳 Payment Breakdown:
  cash_type      money
0      card  112245.58
1      cash    3186.00

📆 Total Days Recorded: 381
💰 Total Revenue: $ 115431.58
