<a href="https://colab.research.google.com/github/vimal-122002/upliance.ai-Assignment/blob/main/code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load Datasets
def load_datasets():
    """Load the three CSV files."""
    user_details = pd.read_csv('UserDetails.csv')
    order_details = pd.read_csv('OrderDetails.csv')
    cooking_sessions = pd.read_csv('CookingSessions.csv')
    return user_details, order_details, cooking_sessions

# Data Cleaning and Preprocessing
def clean_data(user_details, order_details, cooking_sessions):
    """Clean and preprocess the datasets."""
    # Convert date columns to datetime
    order_details['Order Date'] = pd.to_datetime(order_details['Order Date'])
    cooking_sessions['Session Start'] = pd.to_datetime(cooking_sessions['Session Start'])
    cooking_sessions['Session End'] = pd.to_datetime(cooking_sessions['Session End'])
    user_details['Registration Date'] = pd.to_datetime(user_details['Registration Date'])

    # Remove canceled orders
    order_details = order_details[order_details['Order Status'] == 'Completed']

    return user_details, order_details, cooking_sessions

# Merge Datasets
def merge_datasets(user_details, order_details, cooking_sessions):
    """Merge the datasets for comprehensive analysis."""
    # Merge order details with user details
    merged_order_user = pd.merge(order_details, user_details, on='User ID')

    # Merge cooking sessions with user details
    merged_session_user = pd.merge(cooking_sessions, user_details, on='User ID')

    return merged_order_user, merged_session_user

# Analysis Functions
def analyze_dish_popularity(order_details):
    """Analyze dish popularity."""
    dish_popularity = order_details['Dish Name'].value_counts()
    dish_revenue = order_details.groupby('Dish Name')['Amount (USD)'].sum()

    # Visualization
    plt.figure(figsize=(15, 6))
    plt.subplot(1, 2, 1)
    dish_popularity.plot(kind='bar')
    plt.title('Dish Popularity', fontsize=12)
    plt.xlabel('Dish Name')
    plt.ylabel('Number of Orders')
    plt.xticks(rotation=45, ha='right')

    plt.subplot(1, 2, 2)
    dish_revenue.plot(kind='bar')
    plt.title('Dish Revenue', fontsize=12)
    plt.xlabel('Dish Name')
    plt.ylabel('Total Revenue (USD)')
    plt.xticks(rotation=45, ha='right')

    plt.tight_layout()
    plt.savefig('dish_analysis.png')
    plt.close()

    return dish_popularity, dish_revenue

def analyze_meal_times(merged_order_user):
    """Analyze order distribution across meal times."""
    meal_time_distribution = merged_order_user['Time of Day'].value_counts()

    plt.figure(figsize=(10, 6))
    plt.pie(meal_time_distribution, labels=meal_time_distribution.index,
            autopct='%1.1f%%', startangle=90)
    plt.title('Order Distribution by Meal Time', fontsize=12)
    plt.axis('equal')
    plt.savefig('meal_time_distribution.png')
    plt.close()

    return meal_time_distribution

def analyze_cooking_session_performance(merged_session_user):
    """Analyze cooking session performance."""
    avg_session_duration = merged_session_user.groupby('Dish Name')['Duration (mins)'].mean()
    avg_session_rating = merged_session_user.groupby('Dish Name')['Session Rating'].mean()

    plt.figure(figsize=(15, 6))
    plt.subplot(1, 2, 1)
    avg_session_duration.plot(kind='bar')
    plt.title('Average Session Duration by Dish', fontsize=12)
    plt.xlabel('Dish Name')
    plt.ylabel('Average Duration (mins)')
    plt.xticks(rotation=45, ha='right')

    plt.subplot(1, 2, 2)
    avg_session_rating.plot(kind='bar')
    plt.title('Average Session Rating by Dish', fontsize=12)
    plt.xlabel('Dish Name')
    plt.ylabel('Average Rating')
    plt.xticks(rotation=45, ha='right')

    plt.tight_layout()
    plt.savefig('cooking_session_performance.png')
    plt.close()

    return avg_session_duration, avg_session_rating

def demographic_analysis(merged_order_user):
    """Analyze demographic factors influencing orders."""
    # Create age groups with added error handling
    def safe_categorize_age(age):
        if age <= 25:
            return '18-25'
        elif age <= 35:
            return '26-35'
        elif age <= 45:
            return '36-45'
        else:
            return '45+'

    # Apply age categorization
    merged_order_user['Age Group'] = merged_order_user['Age'].apply(safe_categorize_age)

    # Compute average order by age group with observed=True
    avg_order_by_age = merged_order_user.groupby('Age Group', observed=True)['Amount (USD)'].mean()

    # Compute favorite meals by age group with safe fallback
    def get_top_meal(group):
        meal_counts = group.value_counts()
        return meal_counts.index[0] if len(meal_counts) > 0 else 'No Data'

    favorite_meals_by_age = merged_order_user.groupby('Age Group', observed=True)['Meal Type'].agg(get_top_meal)

    # Visualization
    plt.figure(figsize=(10, 6))
    avg_order_by_age.plot(kind='bar')
    plt.title('Average Order Amount by Age Group', fontsize=12)
    plt.xlabel('Age Group')
    plt.ylabel('Average Order Amount (USD)')
    plt.xticks(rotation=0)
    plt.tight_layout()
    plt.savefig('demographic_order_analysis.png')
    plt.close()

    return avg_order_by_age, favorite_meals_by_age

def generate_report(dish_popularity, dish_revenue, meal_time_distribution,
                    avg_session_duration, avg_session_rating,
                    avg_order_by_age, favorite_meals_by_age):
    """Generate a comprehensive report of findings."""
    report = f"""
    Food Order Analysis Report
    =========================

    1. Dish Popularity
    -----------------
    Most Popular Dishes:
    {dish_popularity.head()}

    Top Revenue Generating Dishes:
    {dish_revenue.sort_values(ascending=False).head()}

    2. Meal Time Distribution
    -------------------------
    {meal_time_distribution}

    3. Cooking Session Performance
    ------------------------------
    Average Session Duration by Dish:
    {avg_session_duration}

    Average Session Rating by Dish:
    {avg_session_rating}

    4. Demographic Analysis
    -----------------------
    Average Order Amount by Age Group:
    {avg_order_by_age}

    Favorite Meals by Age Group:
    {favorite_meals_by_age}

    Business Recommendations
    -----------------------
    1. Focus marketing efforts on top-performing dishes
    2. Consider expanding meal options for different age groups
    3. Optimize cooking session durations for popular dishes
    4. Develop targeted promotions based on meal time preferences
    """

    with open('analysis_report.txt', 'w') as f:
        f.write(report)

    return report

def main():
    # Load Datasets
    user_details, order_details, cooking_sessions = load_datasets()

    # Clean Data
    user_details, order_details, cooking_sessions = clean_data(user_details, order_details, cooking_sessions)

    # Merge Datasets
    merged_order_user, merged_session_user = merge_datasets(user_details, order_details, cooking_sessions)

    # Perform Analyses
    dish_popularity, dish_revenue = analyze_dish_popularity(order_details)
    meal_time_distribution = analyze_meal_times(merged_order_user)
    avg_session_duration, avg_session_rating = analyze_cooking_session_performance(merged_session_user)

    # Handle potential empty dataset issue
    avg_order_by_age, favorite_meals_by_age = demographic_analysis(merged_order_user)

    # Generate Report
    report = generate_report(dish_popularity, dish_revenue, meal_time_distribution,
                             avg_session_duration, avg_session_rating,
                             avg_order_by_age, favorite_meals_by_age)

    # Display key results in the console
    print("Dish Popularity:")
    print(dish_popularity.head())
    print("\nDish Revenue:")
    print(dish_revenue.sort_values(ascending=False).head())

    print("\nMeal Time Distribution:")
    print(meal_time_distribution)

    print("\nAverage Session Duration by Dish:")
    print(avg_session_duration)

    print("\nAverage Session Rating by Dish:")
    print(avg_session_rating)

    print("\nAverage Order Amount by Age Group:")
    print(avg_order_by_age)

    print("\nFavorite Meals by Age Group:")
    print(favorite_meals_by_age)

    # Print the business recommendations
    print("\nBusiness Recommendations:")
    print("""
    1. Focus marketing efforts on top-performing dishes
    2. Consider expanding meal options for different age groups
    3. Optimize cooking session durations for popular dishes
    4. Develop targeted promotions based on meal time preferences
    """)

    # Show the saved plots
    plt.show()  # This will display the last generated plot (or you can place it after each plot generation if desired)

    print("Analysis complete. Check the generated images and report.")

if __name__ == "__main__":
    main()

Dish Popularity:
Dish Name
Spaghetti          4
Caesar Salad       3
Grilled Chicken    3
Pancakes           2
Oatmeal            1
Name: count, dtype: int64

Dish Revenue:
Dish Name
Spaghetti          55.5
Grilled Chicken    38.5
Caesar Salad       28.0
Pancakes           16.5
Veggie Burger      11.0
Name: Amount (USD), dtype: float64

Meal Time Distribution:
Time of Day
Night      7
Day        4
Morning    3
Name: count, dtype: int64

Average Session Duration by Dish:
Dish Name
Caesar Salad       21.666667
Grilled Chicken    42.500000
Oatmeal            10.000000
Pancakes           30.000000
Spaghetti          35.000000
Veggie Burger      20.000000
Name: Duration (mins), dtype: float64

Average Session Rating by Dish:
Dish Name
Caesar Salad       4.366667
Grilled Chicken    4.775000
Oatmeal            4.100000
Pancakes           4.400000
Spaghetti          4.625000
Veggie Burger      4.350000
Name: Session Rating, dtype: float64

Average Order Amount by Age Group:
Age Group
18-25    