In [None]:
try:
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.preprocessing import LabelEncoder
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
except ImportError:
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas", "scikit-learn"])
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.preprocessing import LabelEncoder
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import warnings
warnings.filterwarnings('ignore')

model = None
encoders = {}
feature_names = []

def safe_int_input(prompt, default=0):
    while True:
        try:
            value = input(prompt).strip()
            if value == "":
                return default
            return int(value)
        except ValueError:
            print("Please enter a valid number!")

def safe_input(prompt, default=""):
    try:
        value = input(prompt).strip()
        return value if value else default
    except:
        return default

def load_and_train_model():
    global model, encoders, feature_names
    
    print("Loading dataset and training model...")
    
    try:
        df = pd.read_csv("car_data.csv")
        print(f"Dataset loaded: {df.shape[0]} rows, {df.shape[1]} columns")
    except FileNotFoundError:
        print("Error: car_data.csv not found! Please ensure the file is in the same directory.")
        return False
    
    label_cols = ["Car_Brand_Model", "Fuel_Type", "Transmission", "Service_History",
                  "Accident_History", "Driving_Style", "Tire_Condition", 
                  "Battery_Condition", "Insurance"]
    
    df_encoded = df.copy()
    encoders = {}
    
    for col in label_cols:
        if col in df_encoded.columns:
            encoders[col] = LabelEncoder()
            df_encoded[col] = encoders[col].fit_transform(df_encoded[col])
    
    df_encoded['Age_KM_Ratio'] = df_encoded['Car_Age'] / (df_encoded['Kilometers_Driven'] / 10000 + 1)
    df_encoded['KM_Per_Year'] = df_encoded['Kilometers_Driven'] / (df_encoded['Car_Age'] + 1)
    df_encoded['Engine_Age_Factor'] = df_encoded['Engine_Capacity'] * df_encoded['Car_Age'] / 1000
    
    X = df_encoded.drop("Maintenance_Cost", axis=1)
    y = df_encoded["Maintenance_Cost"]
    feature_names = X.columns.tolist()
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = RandomForestRegressor(n_estimators=150, max_depth=15, min_samples_split=5, 
                                  min_samples_leaf=2, random_state=42)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    
    print(f"Model Performance:")
    print(f"   MAE: Rs.{mae:.2f}")
    print(f"   RMSE: Rs.{rmse:.2f}")
    print(f"   R2 Score: {r2:.3f}")
    
    return True

def enhanced_recommend_parts(row_dict):
    recommendations = []
    total_estimated_cost = 0
    
    car_age = row_dict.get("Car_Age", 0)
    km_driven = row_dict.get("Kilometers_Driven", 0)
    
    try:
        service_history = encoders["Service_History"].inverse_transform([row_dict.get("Service_History", 0)])[0] if "Service_History" in encoders else "Unknown"
        accident_history = encoders["Accident_History"].inverse_transform([row_dict.get("Accident_History", 0)])[0] if "Accident_History" in encoders else "Unknown"
        tire_condition = encoders["Tire_Condition"].inverse_transform([row_dict.get("Tire_Condition", 0)])[0] if "Tire_Condition" in encoders else "Unknown"
        battery_condition = encoders["Battery_Condition"].inverse_transform([row_dict.get("Battery_Condition", 0)])[0] if "Battery_Condition" in encoders else "Unknown"
        driving_style = encoders["Driving_Style"].inverse_transform([row_dict.get("Driving_Style", 0)])[0] if "Driving_Style" in encoders else "Unknown"
    except:
        service_history = "Unknown"
        accident_history = "Unknown"
        tire_condition = "Unknown"
        battery_condition = "Unknown"
        driving_style = "Unknown"
    
    if car_age > 10:
        recommendations.append("Major Service Overhaul (Rs.15,000)")
        total_estimated_cost += 15000
    elif car_age > 7:
        recommendations.append("Comprehensive Check-up (Rs.5,000)")
        total_estimated_cost += 5000
    elif car_age > 5:
        recommendations.append("Standard Service (Rs.3,000)")
        total_estimated_cost += 3000
    
    if tire_condition == "Bad":
        recommendations.append("Replace All Tires (Rs.25,000)")
        total_estimated_cost += 25000
    elif tire_condition == "Average":
        recommendations.append("Tire Rotation & Alignment (Rs.2,000)")
        total_estimated_cost += 2000
    
    if battery_condition == "Bad":
        recommendations.append("Replace Battery (Rs.5,000)")
        total_estimated_cost += 5000
    elif battery_condition == "Average":
        recommendations.append("Battery Health Check (Rs.500)")
        total_estimated_cost += 500
    
    if service_history == "Irregular":
        recommendations.append("Complete Service Package (Rs.8,000)")
        total_estimated_cost += 8000
    
    if accident_history == "Yes":
        recommendations.append("Body & Suspension Inspection (Rs.3,000)")
        total_estimated_cost += 3000
    
    if km_driven > 150000:
        recommendations.append("Engine Overhaul (Rs.50,000)")
        total_estimated_cost += 50000
    elif km_driven > 100000:
        recommendations.append("Timing Belt Replacement (Rs.8,000)")
        total_estimated_cost += 8000
    elif km_driven > 50000:
        recommendations.append("Brake Pad Replacement (Rs.4,000)")
        total_estimated_cost += 4000
    
    if driving_style == "Highway" and car_age > 5:
        recommendations.append("AC Service (Rs.3,000)")
        total_estimated_cost += 3000
    
    if not recommendations:
        recommendations.append("Regular Maintenance (Rs.2,000)")
        total_estimated_cost = 2000
    
    return recommendations, total_estimated_cost

def manual_prediction():
    print("\n" + "="*60)
    print("MANUAL PREDICTION MODE")
    print("="*60)
    
    print("\nPlease provide the following information:")
    print(f"Available Car Models: {', '.join(encoders['Car_Brand_Model'].classes_[:10])}... (and more)")
    print(f"Fuel Types: {', '.join(encoders['Fuel_Type'].classes_)}")
    print(f"Transmission: {', '.join(encoders['Transmission'].classes_)}")
    print("Conditions: Good, Average, Bad")
    print("Yes/No options: Yes, No")
    print("Service History: Regular, Irregular")
    print("Driving Style: City, Highway, Mixed")
    
    car_brand = safe_input("\nCar Brand & Model (e.g., Maruti_Swift): ")
    car_age = safe_int_input("Car Age (years): ")
    fuel_type = safe_input("Fuel Type (Petrol/Diesel/Electric/Hybrid): ")
    engine_cc = safe_int_input("Engine Capacity (cc): ")
    km = safe_int_input("Kilometers Driven: ")
    transmission = safe_input("Transmission (Manual/Automatic): ")
    service = safe_input("Service History (Regular/Irregular): ")
    accident = safe_input("Accident History (Yes/No): ")
    driving = safe_input("Driving Style (City/Highway/Mixed): ")
    tire = safe_input("Tire Condition (Good/Average/Bad): ")
    battery = safe_input("Battery Condition (Good/Average/Bad): ")
    insurance = safe_input("Insurance (Yes/No): ")
    
    input_data = {}
    
    categorical_mappings = [
        ("Car_Brand_Model", car_brand), ("Fuel_Type", fuel_type), 
        ("Transmission", transmission), ("Service_History", service),
        ("Accident_History", accident), ("Driving_Style", driving),
        ("Tire_Condition", tire), ("Battery_Condition", battery),
        ("Insurance", insurance)
    ]
    
    for col, value in categorical_mappings:
        try:
            input_data[col] = encoders[col].transform([value])[0]
        except ValueError:
            print(f"Warning: '{value}' not recognized for {col}, using default value")
            input_data[col] = 0
    
    input_data["Car_Age"] = car_age
    input_data["Engine_Capacity"] = engine_cc
    input_data["Kilometers_Driven"] = km
    
    input_data['Age_KM_Ratio'] = car_age / (km / 10000 + 1)
    input_data['KM_Per_Year'] = km / (car_age + 1)
    input_data['Engine_Age_Factor'] = engine_cc * car_age / 1000
    
    features = pd.DataFrame([input_data])
    features = features.reindex(columns=feature_names, fill_value=0)
    predicted_cost = model.predict(features)[0]
    
    discount_applied = False
    if insurance.lower() == "yes":
        predicted_cost *= 0.7
        discount_applied = True
    
    recommendations, estimated_parts_cost = enhanced_recommend_parts(input_data)
    
    print("\n" + "="*60)
    print("PREDICTION RESULTS")
    print("="*60)
    
    print(f"Predicted Maintenance Cost: Rs.{predicted_cost:,.2f}")
    if discount_applied:
        print("Insurance discount applied (30% off)")
    
    print(f"\nAdditional Parts/Services Recommended:")
    for rec in recommendations:
        print(f"   - {rec}")
    
    print(f"\nEstimated Total Additional Cost: Rs.{estimated_parts_cost:,.2f}")
    
    total_cost = predicted_cost + estimated_parts_cost
    print(f"Total Estimated Cost: Rs.{total_cost:,.2f}")
    
    if total_cost < 15000:
        print("Cost Category: LOW COST")
    elif total_cost < 35000:
        print("Cost Category: MODERATE COST")
    else:
        print("Cost Category: HIGH COST")

def csv_prediction():
    print("\n" + "="*60)
    print("CSV FILE PREDICTION MODE")
    print("="*60)
    
    file_path = safe_input("Enter CSV file path: ")
    
    try:
        input_df = pd.read_csv(file_path)
        print(f"File loaded successfully: {input_df.shape[0]} rows")
    except FileNotFoundError:
        print("File not found! Please check the path and try again.")
        return
    except Exception as e:
        print(f"Error reading file: {e}")
        return
    
    original_df = input_df.copy()
    original_df['Row_ID'] = range(1, len(original_df) + 1)
    
    df_encoded = input_df.copy()
    label_cols = ["Car_Brand_Model", "Fuel_Type", "Transmission", "Service_History",
                  "Accident_History", "Driving_Style", "Tire_Condition", 
                  "Battery_Condition", "Insurance"]
    
    for col in label_cols:
        if col in df_encoded.columns:
            try:
                df_encoded[col] = encoders[col].transform(df_encoded[col])
            except ValueError as e:
                print(f"Warning: Some values in {col} not recognized, using default values")
                valid_categories = encoders[col].classes_
                df_encoded[col] = df_encoded[col].apply(
                    lambda x: encoders[col].transform([x])[0] if x in valid_categories else 0
                )
    
    df_encoded['Age_KM_Ratio'] = df_encoded['Car_Age'] / (df_encoded['Kilometers_Driven'] / 10000 + 1)
    df_encoded['KM_Per_Year'] = df_encoded['Kilometers_Driven'] / (df_encoded['Car_Age'] + 1)
    df_encoded['Engine_Age_Factor'] = df_encoded['Engine_Capacity'] * df_encoded['Car_Age'] / 1000
    
    df_encoded = df_encoded.reindex(columns=feature_names, fill_value=0)
    predictions = model.predict(df_encoded)
    
    final_costs = []
    all_recommendations = []
    
    for i, (_, row) in enumerate(original_df.iterrows()):
        cost = predictions[i]
        
        if row.get("Insurance", "No").lower() == "yes":
            cost *= 0.7
        
        final_costs.append(cost)
        
        row_dict = df_encoded.iloc[i].to_dict()
        recommendations, _ = enhanced_recommend_parts(row_dict)
        all_recommendations.append(" | ".join(recommendations))
    
    original_df["Predicted_Cost"] = [f"Rs.{cost:,.2f}" for cost in final_costs]
    original_df["Recommended_Services"] = all_recommendations
    
    print(f"\nPREDICTION SUMMARY")
    print("=" * 40)
    print(f"Total cars processed: {len(original_df)}")
    print(f"Average predicted cost: Rs.{np.mean(final_costs):,.2f}")
    print(f"Lowest cost: Rs.{min(final_costs):,.2f}")
    print(f"Highest cost: Rs.{max(final_costs):,.2f}")
    
    print(f"\nFirst 5 Results Preview:")
    print("-" * 100)
    display_cols = ['Row_ID', 'Car_Brand_Model', 'Car_Age', 'Kilometers_Driven', 'Predicted_Cost']
    print(original_df[display_cols].head().to_string(index=False))
    
    output_file = "Predicted_Maintenance_Costs.csv"
    original_df.to_csv(output_file, index=False)
    print(f"\nComplete results saved to: {output_file}")
    
    low_cost = sum(1 for cost in final_costs if cost < 15000)
    moderate_cost = sum(1 for cost in final_costs if 15000 <= cost < 35000)
    high_cost = sum(1 for cost in final_costs if cost >= 35000)
    
    print(f"\nCost Distribution:")
    print(f"   Low Cost (<Rs.15K): {low_cost} cars")
    print(f"   Moderate Cost (Rs.15K-35K): {moderate_cost} cars")
    print(f"   High Cost (>Rs.35K): {high_cost} cars")

def main():
    print("="*60)
    print("    ENHANCED CAR MAINTENANCE COST PREDICTOR")
    print("="*60)
    
    if not load_and_train_model():
        return
    
    print("\nModel trained successfully!")
    
    while True:
        print("\n" + "="*40)
        print("PREDICTION OPTIONS")
        print("="*40)
        print("1. Manual Prediction")
        print("2. CSV File Prediction")
        print("3. Exit")
        
        choice = safe_input("\nEnter your choice (1/2/3): ")
        
        if choice == "1":
            manual_prediction()
        elif choice == "2":
            csv_prediction()
        elif choice == "3":
            print("\nThank you for using the Car Maintenance Cost Predictor!")
            print("Drive safe!")
            break
        else:
            print("Invalid choice! Please enter 1, 2, or 3.")
        
        if choice in ["1", "2"]:
            continue_choice = safe_input("\nWould you like to make another prediction? (y/n): ").lower().strip()
            if continue_choice != 'y':
                print("\nThank you for using the Car Maintenance Cost Predictor!")
                print("Drive safe!")
                break

if __name__ == "__main__":
    main()