In [None]:
!pip install pandas numpy scikit-learn matplotlib seaborn

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

class SmartTripCostPredictor:
    def __init__(self):
        self.model = RandomForestRegressor(n_estimators=100, random_state=42)
        self.label_encoders = {}
        self.scaler = StandardScaler()
        self.feature_columns = ['distance', 'duration_hours', 'transport_mode_encoded',
                                'fuel_price', 'season_encoded', 'day_type_encoded', 'num_persons']
        
    def create_sample_dataset(self, n_samples=1000):
        np.random.seed(42)
        transport_modes = ['Car', 'Bus', 'Train', 'Flight', 'Bike', 'Taxi', 'Metro']
        seasons = ['Spring', 'Summer', 'Fall', 'Winter']
        day_types = ['Weekday', 'Weekend', 'Holiday']
        
        data = []
        
        for _ in range(n_samples):
            transport_mode = np.random.choice(transport_modes)
            season = np.random.choice(seasons)
            day_type = np.random.choice(day_types)
            num_persons = np.random.randint(1, 6)
            
            if transport_mode == 'Flight':
                distance = np.random.uniform(200, 3000)
            elif transport_mode == 'Train':
                distance = np.random.uniform(50, 1500)
            elif transport_mode == 'Bus':
                distance = np.random.uniform(10, 800)
            elif transport_mode == 'Car':
                distance = np.random.uniform(5, 1000)
            elif transport_mode == 'Taxi':
                distance = np.random.uniform(2, 100)
            elif transport_mode == 'Metro':
                distance = np.random.uniform(1, 50)
            else:
                distance = np.random.uniform(1, 30)
            
            if transport_mode == 'Flight':
                duration_hours = distance / np.random.uniform(600, 900)
            elif transport_mode == 'Train':
                duration_hours = distance / np.random.uniform(60, 120)
            elif transport_mode == 'Bus':
                duration_hours = distance / np.random.uniform(40, 80)
            elif transport_mode == 'Car':
                duration_hours = distance / np.random.uniform(50, 100)
            elif transport_mode == 'Metro':
                duration_hours = distance / np.random.uniform(30, 60)
            elif transport_mode == 'Taxi':
                duration_hours = distance / np.random.uniform(25, 60)
            else:
                duration_hours = distance / np.random.uniform(15, 25)
            
            fuel_price = np.random.uniform(90, 120)
            
            if transport_mode == 'Flight':
                base_cost = distance * np.random.uniform(6, 12)
            elif transport_mode == 'Train':
                base_cost = distance * np.random.uniform(2, 5)
            elif transport_mode == 'Bus':
                base_cost = distance * np.random.uniform(1.5, 4)
            elif transport_mode == 'Car':
                base_cost = distance * fuel_price * np.random.uniform(0.08, 0.12)
            elif transport_mode == 'Taxi':
                base_cost = distance * np.random.uniform(20, 40) + np.random.uniform(50, 150)
            elif transport_mode == 'Metro':
                base_cost = min(distance * 2, 50) + np.random.uniform(10, 30)
            else:
                base_cost = np.random.uniform(0, 20)
            
            season_multiplier = {'Spring': 1.0, 'Summer': 1.15, 'Fall': 1.05, 'Winter': 1.1}[season]
            day_multiplier = {'Weekday': 1.0, 'Weekend': 1.2, 'Holiday': 1.3}[day_type]
            
            total_cost = base_cost * season_multiplier * day_multiplier
            total_cost *= num_persons
            total_cost *= np.random.uniform(0.9, 1.1)
            
            data.append({
                'distance': round(distance, 2),
                'duration_hours': round(duration_hours, 2),
                'transport_mode': transport_mode,
                'fuel_price': round(fuel_price, 2),
                'season': season,
                'day_type': day_type,
                'num_persons': num_persons,
                'cost': round(total_cost, 2)
            })
        
        return pd.DataFrame(data)
    
    def prepare_data(self, df):
        categorical_columns = ['transport_mode', 'season', 'day_type']
        df_processed = df.copy()
        
        for col in categorical_columns:
            if col not in self.label_encoders:
                self.label_encoders[col] = LabelEncoder()
                df_processed[col + '_encoded'] = self.label_encoders[col].fit_transform(df_processed[col])
            else:
                df_processed[col + '_encoded'] = self.label_encoders[col].transform(df_processed[col])
        
        return df_processed
    
    def train_model(self, df):
        df_processed = self.prepare_data(df)
        X = df_processed[self.feature_columns]
        y = df_processed['cost']
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        
        self.model.fit(X_train_scaled, y_train)
        y_pred = self.model.predict(X_test_scaled)
        
        print("Model Performance:")
        print(f"Mean Absolute Error: ₹{mean_absolute_error(y_test, y_pred):.2f}")
        print(f"Root Mean Square Error: ₹{np.sqrt(mean_squared_error(y_test, y_pred)):.2f}")
        print(f"R² Score: {r2_score(y_test, y_pred):.3f}")
        
        return X_test, y_test, y_pred
    
    def predict_custom(self, distance, duration_hours, transport_mode, fuel_price, season, day_type, num_persons):
        input_data = pd.DataFrame({
            'distance': [distance],
            'duration_hours': [duration_hours],
            'transport_mode': [transport_mode],
            'fuel_price': [fuel_price],
            'season': [season],
            'day_type': [day_type],
            'num_persons': [num_persons],
            'cost': [0]
        })
        input_processed = self.prepare_data(input_data)
        X_input = input_processed[self.feature_columns]
        X_input_scaled = self.scaler.transform(X_input)
        return self.model.predict(X_input_scaled)[0]
    
    def predict_from_csv(self, csv_file_path):
        df_new = pd.read_csv(csv_file_path)
        df_processed = self.prepare_data(df_new)
        X_new = df_processed[self.feature_columns]
        X_new_scaled = self.scaler.transform(X_new)
        df_new['predicted_cost'] = self.model.predict(X_new_scaled)
        return df_new

def main():
    predictor = SmartTripCostPredictor()
    print("Creating sample dataset...")
    df = predictor.create_sample_dataset(1500)
    df.to_csv('trip_cost_dataset.csv', index=False)
    print("✅ Sample dataset saved as 'trip_cost_dataset.csv'")
    
    X_test, y_test, y_pred = predictor.train_model(df)
    
    while True:
        print("\nChoose an option:")
        print("1. Custom Cost Prediction")
        print("2. CSV File Prediction")
        print("3. Exit")
        
        choice = input("Enter choice (1-3): ").strip()
        
        if choice == '1':
            try:
                distance = float(input("Enter distance (km): "))
                duration = float(input("Enter duration (hours): "))
                transport_mode = input("Enter transport mode: ").strip().title()
                fuel_price = float(input("Enter fuel price (₹/liter): "))
                season = input("Enter season: ").strip().title()
                day_type = input("Enter day type: ").strip().title()
                num_persons = int(input("Enter number of persons: "))
                
                predicted_cost = predictor.predict_custom(
                    distance, duration, transport_mode, fuel_price, season, day_type, num_persons
                )
                print(f"🎯 Predicted Trip Cost: ₹{predicted_cost:.2f}")
            except Exception as e:
                print(f"Error: {e}")
        
        elif choice == '2':
            csv_path = input("Enter CSV file path (or press Enter for sample): ").strip()
            if not csv_path:
                sample_data = pd.DataFrame({
                    'distance': [25.5, 150.0, 500.0],
                    'duration_hours': [0.5, 2.5, 6.0],
                    'transport_mode': ['Taxi', 'Bus', 'Flight'],
                    'fuel_price': [100, 95, 110],
                    'season': ['Summer', 'Fall', 'Winter'],
                    'day_type': ['Weekday', 'Weekend', 'Holiday'],
                    'num_persons': [2, 3, 1]
                })
                sample_data.to_csv('sample_prediction_input.csv', index=False)
                csv_path = 'sample_prediction_input.csv'
                print("Sample CSV created: sample_prediction_input.csv")
            
            try:
                results_df = predictor.predict_from_csv(csv_path)
                print(results_df[['distance', 'transport_mode', 'num_persons', 'predicted_cost']])
                results_df.to_csv('trip_cost_predictions.csv', index=False)
                print("✅ Results saved as 'trip_cost_predictions.csv'")
            except Exception as e:
                print(f"Error reading CSV: {e}")
        
        elif choice == '3':
            print("Thank you for using Smart Trip Cost Predictor! 🚗✨")
            break
        else:
            print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()
