In [None]:
###  First Import Important Libraries to Data Preprecessing and Train and Test Data
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Loading the  dataset
file_path = r"C:\Users\only1\Downloads\matches.csv"
matches_data = pd.read_csv(file_path)

# Droping  unnecessary columns
columns_to_drop = ['id', 'date', 'player_of_match', 'result', 'result_margin', 
                   'target_runs', 'target_overs', 'super_over', 'method', 'umpire1', 'umpire2']
matches_data_cleaned = matches_data.drop(columns=columns_to_drop)

# Fill missing values
matches_data_cleaned.loc[:, 'city'] = matches_data_cleaned['city'].fillna('Unknown')

# Encode categorical variables
label_encoders = {}
for column in ['season', 'city', 'venue', 'team1', 'team2', 'toss_winner', 'toss_decision', 'winner', 'match_type']:
    le = LabelEncoder()
    matches_data_cleaned[column] = le.fit_transform(matches_data_cleaned[column])
    label_encoders[column] = le

# Train Toss Winner Prediction Model
X_toss = matches_data_cleaned.drop(columns=['toss_winner', 'winner'])
y_toss = matches_data_cleaned['toss_winner']
X_train_toss, X_test_toss, y_train_toss, y_test_toss = train_test_split(X_toss, y_toss, test_size=0.2, random_state=42)

toss_model = RandomForestClassifier(n_estimators=100, random_state=42)
toss_model.fit(X_train_toss, y_train_toss)

y_pred_toss = toss_model.predict(X_test_toss)
toss_accuracy = accuracy_score(y_test_toss, y_pred_toss)
print(f'Toss Winner Prediction Accuracy: {toss_accuracy * 100:.2f}%')

# Train Match Winner Prediction Model
X_match = matches_data_cleaned.drop(columns=['winner'])
y_match = matches_data_cleaned['winner']
X_train_match, X_test_match, y_train_match, y_test_match = train_test_split(X_match, y_match, test_size=0.2, random_state=42)

match_model = RandomForestClassifier(n_estimators=100, random_state=42)
match_model.fit(X_train_match, y_train_match)

y_pred_match = match_model.predict(X_test_match)
match_accuracy = accuracy_score(y_test_match, y_pred_match)
print(f'Match Winner Prediction Accuracy: {match_accuracy * 100:.2f}%')

# Add '2025' to season label encoder
season_encoder = label_encoders['season']
if '2025' not in season_encoder.classes_:
    season_encoder.classes_ = np.append(season_encoder.classes_, '2025')

# Predict IPL 2025 Toss & Match Winners (Simulating a Random Match)
def predict_ipl_2025(team1, team2, venue, city, season):
    encoded_team1 = label_encoders['team1'].transform([team1])[0]
    encoded_team2 = label_encoders['team2'].transform([team2])[0]
    encoded_venue = label_encoders['venue'].transform([venue])[0]
    encoded_city = label_encoders['city'].transform([city])[0]
    encoded_season = label_encoders['season'].transform([season])[0]
    
    # Ensure all features are present and in the correct order
    feature_order_toss = X_toss.columns.tolist()
    input_dict_toss = {
        'season': encoded_season,
        'city': encoded_city,
        'venue': encoded_venue,
        'team1': encoded_team1,
        'team2': encoded_team2,
        'toss_decision': 0,
        'match_type': 0
    }
    input_data_toss = pd.DataFrame([{col: input_dict_toss[col] for col in feature_order_toss}])
    
    toss_winner_encoded = toss_model.predict(input_data_toss)[0]
    toss_winner = label_encoders['toss_winner'].inverse_transform([toss_winner_encoded])[0]
    
    # For match winner prediction, include the toss winner
    feature_order_match = X_match.columns.tolist()
    input_dict_match = {
        **input_dict_toss,
        'toss_winner': toss_winner_encoded
    }
    input_data_match = pd.DataFrame([{col: input_dict_match[col] for col in feature_order_match}])
    
    match_winner_encoded = match_model.predict(input_data_match)[0]
    match_winner = label_encoders['winner'].inverse_transform([match_winner_encoded])[0]
    
    return toss_winner, match_winner

# Example Prediction
team1 = 'Punjab Kings'
team2 = 'Lucknow Super Giants'
venue = 'Sawai Mansingh Stadium'
city = 'Jaipur'
season = '2025'

toss_winner, match_winner = predict_ipl_2025(team1, team2, venue, city, season)
print(f'Predicted Toss Winner: {toss_winner}')
print(f'Predicted Match Winner: {match_winner}')


Toss Winner Prediction Accuracy: 55.71%
Match Winner Prediction Accuracy: 46.12%
Predicted Toss Winner: Mumbai Indians
Predicted Match Winner: Rajasthan Royals
