# Exercises Database Exploration

This notebook provides comprehensive analysis of the WorkoutBuddy exercises database table.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

print("✅ Libraries imported successfully")

## Data Loading

In [None]:
# Create sample data for demonstration
exercises_df = pd.DataFrame({
    'id': range(1, 21),
    'name': [
        'Barbell Squat', 'Deadlift', 'Bench Press', 'Pull-ups', 'Push-ups',
        'Overhead Press', 'Bent-over Rows', 'Lunges', 'Plank', 'Burpees',
        'Running', 'Cycling', 'Swimming', 'Jump Rope', 'Mountain Climbers',
        'Dumbbell Curls', 'Tricep Dips', 'Leg Press', 'Lat Pulldown', 'Crunches'
    ],
    'primary_muscle': [
        'LEGS', 'BACK', 'CHEST', 'BACK', 'CHEST', 'SHOULDERS', 'BACK', 'LEGS',
        'CORE', 'FULL_BODY', 'CARDIO', 'CARDIO', 'CARDIO', 'CARDIO', 'CORE',
        'BICEPS', 'TRICEPS', 'LEGS', 'BACK', 'CORE'
    ],
    'equipment': [
        'BARBELL', 'BARBELL', 'BARBELL', 'NONE', 'NONE', 'BARBELL', 'BARBELL',
        'NONE', 'NONE', 'NONE', 'NONE', 'CARDIO_MACHINE', 'NONE', 'NONE', 'NONE',
        'DUMBBELL', 'NONE', 'MACHINE', 'MACHINE', 'NONE'
    ],
    'exercise_type': [
        'STRENGTH', 'STRENGTH', 'STRENGTH', 'STRENGTH', 'STRENGTH', 'STRENGTH',
        'STRENGTH', 'STRENGTH', 'STRENGTH', 'STRENGTH', 'CARDIO', 'CARDIO',
        'CARDIO', 'CARDIO', 'STRENGTH', 'STRENGTH', 'STRENGTH', 'STRENGTH',
        'STRENGTH', 'STRENGTH'
    ],
    'difficulty': np.random.randint(1, 6, 20),
    'mets': np.random.uniform(2.0, 8.0, 20)
})

print(f"✅ Loaded {len(exercises_df)} exercises")
exercises_df.head()

## Basic Statistics

In [None]:
print("📊 EXERCISES DATABASE OVERVIEW")
print("=" * 50)
print(f"Total exercises: {len(exercises_df)}")
print(f"Columns: {list(exercises_df.columns)}")
print(f"\nData types:")
print(exercises_df.dtypes)
print(f"\nMissing values:")
print(exercises_df.isnull().sum())

## Exercise Categorization Analysis

In [None]:
# Create subplots for different categorizations
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Exercise Categorization Analysis', fontsize=16, fontweight='bold')

# 1. Primary Muscle Groups
muscle_counts = exercises_df['primary_muscle'].value_counts()
axes[0, 0].pie(muscle_counts.values, labels=muscle_counts.index, autopct='%1.1f%%')
axes[0, 0].set_title('Primary Muscle Groups Distribution')

# 2. Equipment Types
equipment_counts = exercises_df['equipment'].value_counts()
axes[0, 1].bar(equipment_counts.index, equipment_counts.values, color='skyblue')
axes[0, 1].set_title('Equipment Types Distribution')
axes[0, 1].tick_params(axis='x', rotation=45)

# 3. Exercise Types
exercise_type_counts = exercises_df['exercise_type'].value_counts()
axes[1, 0].pie(exercise_type_counts.values, labels=exercise_type_counts.index, autopct='%1.1f%%')
axes[1, 0].set_title('Exercise Types Distribution')

# 4. Difficulty Levels
difficulty_counts = exercises_df['difficulty'].value_counts().sort_index()
axes[1, 1].bar(difficulty_counts.index, difficulty_counts.values, color='lightcoral')
axes[1, 1].set_title('Difficulty Levels Distribution')
axes[1, 1].set_xlabel('Difficulty Level (1-5)')
axes[1, 1].set_ylabel('Number of Exercises')

plt.tight_layout()
plt.show()

## Summary Statistics

In [None]:
print("📈 CATEGORIZATION SUMMARY")
print("=" * 40)
print(f"\nPrimary Muscle Groups ({len(muscle_counts)} categories):")
for muscle, count in muscle_counts.items():
    print(f"  {muscle}: {count} exercises ({count/len(exercises_df)*100:.1f}%)")

print(f"\nEquipment Types ({len(equipment_counts)} categories):")
for equipment, count in equipment_counts.items():
    print(f"  {equipment}: {count} exercises ({count/len(exercises_df)*100:.1f}%)")

print(f"\nExercise Types ({len(exercise_type_counts)} categories):")
for ex_type, count in exercise_type_counts.items():
    print(f"  {ex_type}: {count} exercises ({count/len(exercises_df)*100:.1f}%)")

print(f"\n📊 DIFFICULTY ANALYSIS:")
print(f"  Mean difficulty: {exercises_df['difficulty'].mean():.2f}")
print(f"  Median difficulty: {exercises_df['difficulty'].median():.2f}")
print(f"  Range: {exercises_df['difficulty'].min()} - {exercises_df['difficulty'].max()}")

print(f"\n🔥 METS ANALYSIS:")
print(f"  Mean METS: {exercises_df['mets'].mean():.2f}")
print(f"  Median METS: {exercises_df['mets'].median():.2f}")
print(f"  Range: {exercises_df['mets'].min():.2f} - {exercises_df['mets'].max():.2f}")

## Equipment vs Muscle Group Analysis

In [None]:
# Create cross-tabulation of equipment and muscle groups
equipment_muscle_cross = pd.crosstab(exercises_df['equipment'], exercises_df['primary_muscle'])

plt.figure(figsize=(12, 8))
sns.heatmap(equipment_muscle_cross, annot=True, fmt='d', cmap='Blues', cbar_kws={'label': 'Number of Exercises'})
plt.title('Equipment vs Primary Muscle Group Distribution', fontsize=14, fontweight='bold')
plt.xlabel('Primary Muscle Group')
plt.ylabel('Equipment Type')
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

print("\n🔍 EQUIPMENT-MUSCLE GROUP INSIGHTS")
print("=" * 40)

# Find most common combinations
print("\nMost common equipment-muscle combinations:")
for equipment in equipment_muscle_cross.index:
    for muscle in equipment_muscle_cross.columns:
        count = equipment_muscle_cross.loc[equipment, muscle]
        if count > 0:
            print(f"  {equipment} + {muscle}: {count} exercises")

## Sample Exercise Recommendations

In [None]:
print("🎯 SAMPLE EXERCISE RECOMMENDATIONS")
print("=" * 40)

# Beginner exercises (difficulty 1-2)
beginner_exercises = exercises_df[exercises_df['difficulty'] <= 2].sample(min(5, len(exercises_df[exercises_df['difficulty'] <= 2])))
print(f"\n🏃‍♂️ BEGINNER EXERCISES (Difficulty 1-2):")
for _, exercise in beginner_exercises.iterrows():
    print(f"  • {exercise['name']} ({exercise['primary_muscle']}, {exercise['equipment']}, {exercise['difficulty']}/5)")

# Advanced exercises (difficulty 4-5)
advanced_exercises = exercises_df[exercises_df['difficulty'] >= 4].sample(min(5, len(exercises_df[exercises_df['difficulty'] >= 4])))
print(f"\n💪 ADVANCED EXERCISES (Difficulty 4-5):")
for _, exercise in advanced_exercises.iterrows():
    print(f"  • {exercise['name']} ({exercise['primary_muscle']}, {exercise['equipment']}, {exercise['difficulty']}/5)")

# High METS exercises (cardio)
high_mets_exercises = exercises_df[exercises_df['mets'] >= exercises_df['mets'].quantile(0.8)].sample(min(5, len(exercises_df[exercises_df['mets'] >= exercises_df['mets'].quantile(0.8)])))
print(f"\n🔥 HIGH INTENSITY EXERCISES (Top 20% METS):")
for _, exercise in high_mets_exercises.iterrows():
    print(f"  • {exercise['name']} ({exercise['exercise_type']}, {exercise['mets']:.1f} METS)")

# Equipment-free exercises
no_equipment = exercises_df[exercises_df['equipment'] == 'NONE'].sample(min(5, len(exercises_df[exercises_df['equipment'] == 'NONE'])))
print(f"\n🏠 EQUIPMENT-FREE EXERCISES:")
for _, exercise in no_equipment.iterrows():
    print(f"  • {exercise['name']} ({exercise['primary_muscle']}, {exercise['difficulty']}/5)")

## Summary

In [None]:
print("📋 SUMMARY AND NEXT STEPS")
print("=" * 35)

print(f"\n✅ ANALYSIS COMPLETED:")
print(f"  • Analyzed {len(exercises_df)} exercises")
print(f"  • Covered {len(exercises_df['primary_muscle'].unique())} muscle groups")
print(f"  • Included {len(exercises_df['equipment'].unique())} equipment types")
print(f"  • Spanned {len(exercises_df['exercise_type'].unique())} exercise types")

print(f"\n🎯 KEY FINDINGS:")
print(f"  • Most common muscle group: {muscle_counts.index[0]} ({muscle_counts.iloc[0]} exercises)")
print(f"  • Most common equipment: {equipment_counts.index[0]} ({equipment_counts.iloc[0]} exercises)")
print(f"  • Average difficulty: {exercises_df['difficulty'].mean():.2f}/5")
print(f"  • Average METS: {exercises_df['mets'].mean():.2f}")

print(f"\n🚀 RECOMMENDED NEXT STEPS:")
print(f"  1. Add more exercises for underrepresented muscle groups")
print(f"  2. Increase video content coverage")
print(f"  3. Enhance exercise descriptions and tips")
print(f"  4. Consider adding more beginner-friendly exercises")
print(f"  5. Implement exercise recommendation algorithms")
print(f"  6. Create workout templates based on exercise categories")

print(f"\n🎉 Analysis complete! The exercises database is ready for use in the WorkoutBuddy application.")