In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import warnings
warnings.filterwarnings('ignore')

class IPLScorePredictor:
    def __init__(self):
        self.model = RandomForestRegressor(n_estimators=100, random_state=42)
        self.le_batting_team = LabelEncoder()
        self.le_bowling_team = LabelEncoder()
        self.le_venue = LabelEncoder()
        self.feature_columns = []

    def load_data(self, csv_file):
        print(f"Loading IPL data from {csv_file}...")
        self.df = pd.read_csv(csv_file)
        print(f"Dataset shape: {self.df.shape}")
        return self.df

    def preprocess_data(self):
        print("Preprocessing data...")
        critical_columns = ['batting_team', 'bowling_team', 'venue', 'runs_scored', 'wickets_lost', 'overs_completed']
        self.df.dropna(subset=critical_columns, inplace=True)

        self.df['balls_left'] = 120 - (self.df['overs_completed'] * 6)
        self.df['wickets_left'] = 10 - self.df['wickets_lost']
        self.df['current_run_rate'] = self.df['runs_scored'] / (self.df['overs_completed'] + 0.1)

        self.df['batting_team_encoded'] = self.le_batting_team.fit_transform(self.df['batting_team'])
        self.df['bowling_team_encoded'] = self.le_bowling_team.fit_transform(self.df['bowling_team'])
        self.df['venue_encoded'] = self.le_venue.fit_transform(self.df['venue'])

        self.feature_columns = [
            'batting_team_encoded', 'bowling_team_encoded', 'venue_encoded',
            'runs_scored', 'wickets_lost', 'overs_completed', 'balls_left',
            'wickets_left', 'current_run_rate'
        ]
        return self.df

    def train_model(self):
        print("Training model...")
        X = self.df[self.feature_columns]
        y = self.df['final_score']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        self.model.fit(X_train, y_train)

        y_pred = self.model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        print(f"Model Performance → MSE: {mse:.2f} | RMSE: {np.sqrt(mse):.2f} | R²: {r2:.3f}")

    def predict_score(self, batting_team, bowling_team, venue, runs_scored, wickets_lost, overs_completed):
        balls_left = 120 - (overs_completed * 6)
        wickets_left = 10 - wickets_lost
        current_run_rate = runs_scored / (overs_completed + 0.1)

        try:
            batting_encoded = self.le_batting_team.transform([batting_team])[0]
            bowling_encoded = self.le_bowling_team.transform([bowling_team])[0]
            venue_encoded = self.le_venue.transform([venue])[0]
        except ValueError:
            return "Error: Unknown team or venue. Please use valid names."

        features = np.array([[batting_encoded, bowling_encoded, venue_encoded,
                              runs_scored, wickets_lost, overs_completed,
                              balls_left, wickets_left, current_run_rate]])

        predicted_score = self.model.predict(features)[0]
        return int(predicted_score)

    def predict_from_csv(self, input_csv_file, output_csv_file=None):
        try:
            pred_df = pd.read_csv(input_csv_file)
        except FileNotFoundError:
            print(f"Error: File {input_csv_file} not found!")
            return None

        required_cols = ['batting_team', 'bowling_team', 'venue', 'runs_scored', 'wickets_lost', 'overs_completed']
        if not all(col in pred_df.columns for col in required_cols):
            print(f"Error: CSV must contain columns: {required_cols}")
            return None

        predictions = []
        for _, row in pred_df.iterrows():
            score = self.predict_score(
                batting_team=row['batting_team'],
                bowling_team=row['bowling_team'],
                venue=row['venue'],
                runs_scored=row['runs_scored'],
                wickets_lost=row['wickets_lost'],
                overs_completed=row['overs_completed']
            )
            predictions.append(score)

        pred_df['predicted_final_score'] = predictions

        if output_csv_file:
            pred_df.to_csv(output_csv_file, index=False)
            print(f"Predictions saved to {output_csv_file}")

        return pred_df

    def get_teams_and_venues(self):
        return list(self.df['batting_team'].unique()), list(self.df['venue'].unique())

# Interactive Manual Prediction
def interactive_prediction(predictor, teams, venues):
    print("\nAvailable Teams:", ", ".join(teams))
    print("Available Venues:", ", ".join(venues))
    while True:
        batting_team = input("\nEnter Batting Team (or 'quit'): ")
        if batting_team.lower() == "quit":
            break
        bowling_team = input("Enter Bowling Team: ")
        venue = input("Enter Venue: ")
        runs_scored = int(input("Enter Runs Scored: "))
        wickets_lost = int(input("Enter Wickets Lost: "))
        overs_completed = float(input("Enter Overs Completed: "))

        score = predictor.predict_score(batting_team, bowling_team, venue, runs_scored, wickets_lost, overs_completed)
        print(f"Predicted Final Score: {score}")

# CSV Prediction Mode
def csv_prediction_mode(predictor):
    file_name = input("\nEnter CSV File Path: ")
    output_file = input("Enter Output File Name (optional): ") or None
    result = predictor.predict_from_csv(file_name, output_file)
    if result is not None:
        print("\nPredictions:")
        print(result)

def main():
    predictor = IPLScorePredictor()
    predictor.load_data('ipl_score_pre.csv')
    predictor.preprocess_data()
    predictor.train_model()
    teams, venues = predictor.get_teams_and_venues()

    while True:
        print("\n" + "="*50)
        print("IPL SCORE PREDICTOR")
        print("="*50)
        print("1. Manual Custom Prediction")
        print("2. CSV File Prediction")
        print("3. Show Teams & Venues")
        print("4. Exit")
        choice = input("Choose Option: ")

        if choice == '1':
            interactive_prediction(predictor, teams, venues)
        elif choice == '2':
            csv_prediction_mode(predictor)
        elif choice == '3':
            print("\nTeams:", teams)
            print("Venues:", venues)
        elif choice == '4':
            break
        else:
            print("Invalid choice.")

if __name__ == "__main__":
    main()