<a href="https://colab.research.google.com/github/yashika-08/Data-Generation-using-Modelling-and-Simulation-for-Machine-Learning/blob/main/SimpyDataGeneration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install simpy

In [None]:
import simpy
import random
import numpy as np
import pandas as pd

In [None]:
def customer(env, server, service_rate, wait_times):
    arrival_time = env.now
    with server.request() as request:
        yield request
        wait_times.append(env.now - arrival_time)
        service_time = random.expovariate(service_rate)
        yield env.timeout(service_time)

def run_simulation(arrival_rate, service_rate, sim_time):
    env = simpy.Environment()
    server = simpy.Resource(env, capacity=1)
    wait_times = []

    def arrivals(env):
        while True:
            yield env.timeout(random.expovariate(arrival_rate))
            env.process(customer(env, server, service_rate, wait_times))

    env.process(arrivals(env))
    env.run(until=sim_time)

    return np.mean(wait_times)

In [None]:
np.random.seed(42)

num_simulations = 1000

arrival_rates = np.random.uniform(1, 10, num_simulations)
service_rates = np.random.uniform(2, 15, num_simulations)
sim_times = np.random.uniform(50, 200, num_simulations)

In [None]:
data = []

for a, s, t in zip(arrival_rates, service_rates, sim_times):
    avg_wait = run_simulation(a, s, t)
    data.append([a, s, t, avg_wait])

df = pd.DataFrame(
    data,
    columns=["arrival_rate", "service_rate", "simulation_time", "avg_wait_time"]
)

df.head()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor

X = df.drop("avg_wait_time", axis=1)
y = df["avg_wait_time"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "SVR": SVR(),
    "KNN": KNeighborsRegressor()
}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    results.append([
        name,
        mean_squared_error(y_test, preds),
        r2_score(y_test, preds)
    ])

results_df = pd.DataFrame(results, columns=["Model", "MSE", "R2"])
results_df

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.bar(results_df["Model"], results_df["R2"])
plt.xticks(rotation=45)
plt.title("Model Comparison using RÂ² Score")
plt.show()