In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

print("🏏 IPL Auction Price Prediction System")
print("="*50)

def load_dataset():
    try:
        df = pd.read_csv('ipl_auction.csv')
        print(f"✅ Dataset loaded successfully with {len(df)} players")
        
        print(f"\n📊 Dataset Overview:")
        print(df.head())
        print(f"\nDataset shape: {df.shape}")
        print(f"Columns: {list(df.columns)}")
        
        required_columns = ['Age', 'Experience', 'Matches', 'Runs', 'Wickets', 
                          'Strike_Rate', 'Economy_Rate', 'Role', 'Country', 'Price_Crores']
        
        missing_columns = [col for col in required_columns if col not in df.columns]
        if missing_columns:
            print(f"❌ Missing required columns: {missing_columns}")
            return None
            
        return df
        
    except FileNotFoundError:
        print("❌ Error: 'ipl_auction_dataset.csv' file not found!")
        print("📋 Please upload a CSV file with the following columns:")
        print("   - Player, Role, Country, Age, Experience, Matches")
        print("   - Runs, Wickets, Strike_Rate, Economy_Rate, Price_Crores")
        return None
    except Exception as e:
        print(f"❌ Error loading dataset: {e}")
        return None

def train_model(df):
    try:
        le_role = LabelEncoder()
        le_country = LabelEncoder()
        
        df_encoded = df.copy()
        df_encoded['Role_Encoded'] = le_role.fit_transform(df['Role'])
        df_encoded['Country_Encoded'] = le_country.fit_transform(df['Country'])
        
        features = ['Age', 'Experience', 'Matches', 'Runs', 'Wickets', 
                    'Strike_Rate', 'Economy_Rate', 'Role_Encoded', 'Country_Encoded']
        
        X = df_encoded[features]
        y = df_encoded['Price_Crores']
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        
        print(f"✅ Model trained successfully!")
        print(f"📊 Mean Absolute Error: {mae:.2f} Crores")
        print(f"📊 R² Score: {r2:.3f}")
        
        return model, le_role, le_country, features
        
    except Exception as e:
        print(f"❌ Error training model: {e}")
        return None, None, None, None

def predict_csv_players(df, model, le_role, le_country, features):
    try:
        df_encoded = df.copy()
        df_encoded['Role_Encoded'] = le_role.transform(df['Role'])
        df_encoded['Country_Encoded'] = le_country.transform(df['Country'])
        
        X = df_encoded[features]
        predictions = model.predict(X)
        
        results = df.copy()
        results['Predicted_Price'] = [round(pred, 2) for pred in predictions]
        results['Price_Difference'] = results['Predicted_Price'] - results['Price_Crores']
        
        print("\n📋 PREDICTION RESULTS:")
        print("="*100)
        
        if 'Player' in results.columns:
            display_cols = ['Player', 'Role', 'Country', 'Price_Crores', 'Predicted_Price', 'Price_Difference']
        else:
            display_cols = ['Role', 'Country', 'Age', 'Price_Crores', 'Predicted_Price', 'Price_Difference']
            
        print(results[display_cols].to_string(index=False))
        
        mae = np.mean(np.abs(results['Price_Difference']))
        print(f"\n📊 Prediction Statistics:")
        print(f"   Mean Absolute Error: {mae:.2f} Crores")
        print(f"   Best Prediction (lowest error): {results.loc[results['Price_Difference'].abs().idxmin(), 'Player' if 'Player' in results.columns else 'Role']}")
        print(f"   Worst Prediction (highest error): {results.loc[results['Price_Difference'].abs().idxmax(), 'Player' if 'Player' in results.columns else 'Role']}")
        
        create_visualization(results)
        
        return results
        
    except Exception as e:
        print(f"❌ Error making predictions: {e}")
        return None

def predict_custom_player(model, le_role, le_country):
    print("\n🏏 Enter Player Details:")
    print("-" * 30)
    
    try:
        age = int(input("Age: "))
        experience = int(input("Experience (years): "))
        matches = int(input("Total Matches: "))
        runs = int(input("Total Runs: "))
        wickets = int(input("Total Wickets: "))
        strike_rate = float(input("Strike Rate: "))
        economy_rate = float(input("Economy Rate: "))
        
        print(f"\nRole options: {list(le_role.classes_)}")
        role = input("Role: ").strip()
        
        print(f"Country options: {list(le_country.classes_)}")
        country = input("Country: ").strip()
        
        try:
            role_encoded = le_role.transform([role])[0]
        except ValueError:
            print(f"⚠️ Unknown role '{role}'. Available roles: {list(le_role.classes_)}")
            return None
            
        try:
            country_encoded = le_country.transform([country])[0]
        except ValueError:
            print(f"⚠️ Unknown country '{country}'. Available countries: {list(le_country.classes_)}")
            return None
        
        features_array = np.array([[age, experience, matches, runs, wickets, 
                                  strike_rate, economy_rate, role_encoded, country_encoded]])
        
        predicted_price = model.predict(features_array)[0]
        
        print("\n" + "="*60)
        print("🎯 PREDICTION RESULT")
        print("="*60)
        print(f"👤 Player Details:")
        print(f"   Age: {age} years")
        print(f"   Experience: {experience} years")
        print(f"   Role: {role}")
        print(f"   Country: {country}")
        print(f"   Matches: {matches}")
        print(f"   Runs: {runs}")
        print(f"   Wickets: {wickets}")
        print(f"   Strike Rate: {strike_rate}")
        print(f"   Economy Rate: {economy_rate}")
        print("-" * 60)
        print(f"💰 Predicted Auction Price: ₹{predicted_price:.2f} Crores")
        print("="*60)
        
        return predicted_price
        
    except ValueError as e:
        print(f"❌ Error: Please enter valid numeric values. {e}")
        return None
    except Exception as e:
        print(f"❌ Error: {e}")
        return None

def create_visualization(results):
    try:
        plt.figure(figsize=(15, 5))
        
        plt.subplot(1, 3, 1)
        plt.scatter(results['Price_Crores'], results['Predicted_Price'], 
                   alpha=0.7, color='blue', s=60)
        
        min_price = min(results['Price_Crores'].min(), results['Predicted_Price'].min())
        max_price = max(results['Price_Crores'].max(), results['Predicted_Price'].max())
        plt.plot([min_price, max_price], [min_price, max_price], 'r--', alpha=0.8)
        
        plt.xlabel('Actual Price (Crores)')
        plt.ylabel('Predicted Price (Crores)')
        plt.title('Actual vs Predicted Prices')
        plt.grid(True, alpha=0.3)
        
        plt.subplot(1, 3, 2)
        colors = ['green' if x >= 0 else 'red' for x in results['Price_Difference']]
        plt.bar(range(len(results)), results['Price_Difference'], color=colors, alpha=0.7)
        plt.axhline(y=0, color='black', linestyle='-', alpha=0.3)
        plt.xlabel('Player Index')
        plt.ylabel('Price Difference (Predicted - Actual)')
        plt.title('Prediction Errors')
        plt.grid(True, alpha=0.3)
        
        plt.subplot(1, 3, 3)
        roles = results['Role'].unique()
        prices_by_role = [results[results['Role'] == role]['Price_Crores'].tolist() for role in roles]
        plt.boxplot(prices_by_role, labels=roles)
        plt.xlabel('Role')
        plt.ylabel('Price (Crores)')
        plt.title('Price Distribution by Role')
        plt.xticks(rotation=45)
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
    except Exception as e:
        print(f"⚠️ Could not create visualization: {e}")

def main():
    print("🚀 Initializing IPL Auction Price Prediction System...")
    
    df = load_dataset()
    if df is None:
        print("❌ Cannot proceed without dataset. Please upload 'ipl_auction_dataset.csv'")
        return
    
    print(f"\n🤖 Training machine learning model...")
    model, le_role, le_country, features = train_model(df)
    
    if model is None:
        print("❌ Cannot proceed without trained model.")
        return
    
    while True:
        print("\n" + "="*50)
        print("🏏 IPL AUCTION PRICE PREDICTOR")
        print("="*50)
        print("1. 📈 Predict prices using CSV file")
        print("2. 👤 Predict price for custom player")
        print("3. ❌ Exit")
        print("="*50)
        
        choice = input("Enter your choice (1-3): ").strip()
        
        if choice == '1':
            print("\n🔍 Predicting prices for all players in CSV...")
            results = predict_csv_players(df, model, le_role, le_country, features)
            
        elif choice == '2':
            print("\n👤 Custom Player Price Prediction")
            predicted_price = predict_custom_player(model, le_role, le_country)
            
        elif choice == '3':
            print("\n👋 Thank you for using IPL Auction Price Predictor!")
            print("🏏 Good luck with your auction predictions!")
            break
            
        else:
            print("❌ Invalid choice. Please enter 1, 2, or 3.")

def show_instructions():
    print("\n" + "="*70)
    print("📋 INSTRUCTIONS TO USE THIS SYSTEM")
    print("="*70)
    print("1. 📁 Upload your CSV file named 'ipl_auction_dataset.csv'")
    print("2. 📊 Your CSV file should have these columns:")
    print("   - Player, Role, Country, Age, Experience, Matches")
    print("   - Runs, Wickets, Strike_Rate, Economy_Rate, Price_Crores")
    print("3. 🚀 Run this code and select your option")
    print("="*70)

if __name__ == "__main__":
    show_instructions()
    main()