In [3]:
import random
import numpy as np
from deap import algorithms, base, creator, tools
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error


In [4]:
# Load the dataset
X_train = pd.read_csv("D:/WORK/test/major_proj_karuna/bikerental/data/train/X_train.csv")
y_train=pd.read_csv("D:/WORK/test/major_proj_karuna/bikerental/data/train/y_train.csv")


In [5]:
# Preprocess the data
le = LabelEncoder()
X_train['VehiclesTitle'] = le.fit_transform(X_train['VehiclesTitle'])
X_train['FullName'] = le.fit_transform(X_train['FullName'])

In [6]:
# Define the fitness function
def fitness(individual):
    # Calculate the predicted revenue using the individual's genes
    predicted_revenue = individual.dot(X_train.T)
    
    # Calculate the mean squared error between the predicted revenue and the actual revenue
    mse = mean_squared_error(y_train, predicted_revenue)
    
    # The fitness function is the negative mean squared error
    return -mse,


In [7]:
# Define the fitness function (replace with your actual logic)
def evaluate(individual):
    score = 0
    for i in range(len(individual)):
        if individual[i] == 1:
            # Calculate the score based on the user's preferences (price, vehicle type, etc.)
            score += X_train.iloc[i]['PricePerDay'] * X_train.iloc[i]['Days']
    return (score,)  # Return a tuple with the fitness value


In [8]:
# Create the individual class
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

# Create the population
toolbox = base.Toolbox()
toolbox.register("attr_bool", np.random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=X_train.shape[1])
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Create the genetic operators
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

In [9]:
# Initialize the population
population = []
for _ in range(50):
    individual = set()
    while len(individual) < 5:
        bike_scooter = random.choice(X_train.index)
        individual.add(bike_scooter)
    population.append(individual)

In [10]:
population = tools.initRepeat(container=list, n=50, func=lambda: tools.initRepeat(list, n=len(X_train), func=toolbox.attr_bool))



In [11]:
# Define the genetic operators
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)


toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=len(X_train))
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)



In [12]:
population = [creator.Individual(tools.initRepeat(list, n=len(X_train), func=toolbox.attr_bool))
             for _ in range(50)]


In [13]:
# Run the genetic algorithm
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("min", np.min)
stats.register("max", np.max)


In [14]:
population, logbook = algorithms.eaSimple(population, toolbox, cxpb=0.7, mutpb=0.2, ngen=100, stats=stats, halloffame=hof, verbose=True)

gen	nevals	avg     	min     	max    
0  	50    	-4.51622	-14.1515	7.96419
1  	42    	-0.67497	-11.032 	7.34326
2  	41    	2.68369 	-3.13358	10.5733
3  	40    	5.42979 	-2.47231	12.0239
4  	33    	7.38364 	1.69703 	12.0239
5  	41    	9.4032  	3.73258 	13.2044
6  	43    	10.8705 	5.06214 	13.3798
7  	38    	12.0696 	5.5559  	15.7039
8  	38    	13.4934 	9.32357 	16.9133
9  	31    	14.3139 	5.64097 	17.2237
10 	33    	15.0308 	9.32357 	18.5068
11 	39    	16.0402 	12.0713 	19.3803
12 	40    	16.5887 	11.2416 	20.1802
13 	39    	17.3721 	14.034  	21.0282
14 	30    	18.1926 	11.4179 	21.6167
15 	38    	18.7865 	11.5819 	22.0991
16 	39    	19.7752 	14.6155 	22.0991
17 	38    	20.8171 	12.6203 	23.4585
18 	37    	21.5206 	15.7039 	23.7689
19 	38    	21.9168 	18.1481 	23.7803
20 	31    	22.6002 	19.0225 	23.7689
21 	40    	22.4858 	14.8119 	24.3425
22 	44    	22.5665 	15.0242 	25.0152
23 	36    	23.3812 	17.8815 	25.0152
24 	41    	23.6425 	18.3937 	25.1502
25 	43    	24.1155 	20.3047 	25.2748
2

In [15]:
# Return the best individual
best_individual = hof[0]
recommendation = []
for i in range(len(best_individual)):
    if best_individual[i] == 1:
        recommendation.append(X_train.iloc[i])

In [16]:
def format_recommendations(recommendations):

        print(f"\n\nTop 5 Recommendations:\n")
        formatted_output = ""
        for rec in recommendations[:5]:
            
            formatted_output += "\n"
            formatted_output += f"VehicleId: {rec['VehicleId']}\n"
            formatted_output += f"VehiclesTitle: {rec['VehiclesTitle']}\n"
            formatted_output += f"PricePerDay(Npr): {rec['PricePerDay']:.2f}\n"
            formatted_output += f"id: {rec['id_y']}\n"

        return formatted_output

In [17]:
# Return the best individual
best_individual = hof[0]
recommendation = []
for i in range(len(best_individual)):
    if best_individual[i] == 1:
        recommendation.append(X_train.iloc[i])

print(f"Best individual:\n {best_individual}\n\n")
print(format_recommendations(recommendation))


Best individual:
 [0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0]




Top 5 Recommendations:


VehicleId: 32
VehiclesTitle: 69
PricePerDay(Npr): 0.77
id: 6

VehicleId: 14
VehiclesTitle: 5
PricePerDay(Npr): 1.41
id: 15

VehicleId: 28
VehiclesTitle: 27
PricePerDay(Npr): 1.43
id: 20

VehicleId: 5
VehiclesTitle: 0
PricePerDay(Npr): 1.54
id: 9

VehicleId: 35
VehiclesTitle: 32
PricePerDay(Npr): 0.14
id: 46



In [18]:
import random
import numpy as np
from deap import algorithms, base, creator, tools
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error


In [19]:
import random
import numpy as np
from deap import algorithms, base, creator, tools
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error

# Load the dataset
X_train = pd.read_csv("D:/WORK/test/major_proj_karuna/bikerental/data/test/X_test.csv")
y_train=pd.read_csv("D:/WORK/test/major_proj_karuna/bikerental/data/test/y_test.csv")

# Preprocess the data
le = LabelEncoder()
X_train['VehiclesTitle'] = le.fit_transform(X_train['VehiclesTitle'])
X_train['FullName'] = le.fit_transform(X_train['FullName'])

# Define the fitness function
def fitness(individual):
    # Calculate the predicted revenue using the individual's genes
    predicted_revenue = individual.dot(X_train.T)
    
    # Calculate the mean squared error between the predicted revenue and the actual revenue
    mse = mean_squared_error(y_train, predicted_revenue)
    
    # The fitness function is the negative mean squared error
    return -mse,


# Define the fitness function (replace with your actual logic)
def evaluate(individual):
    score = 0
    for i in range(len(individual)):
        if individual[i] == 1:
            # Calculate the score based on the user's preferences (price, vehicle type, etc.)
            score += X_train.iloc[i]['PricePerDay'] * X_train.iloc[i]['Days']
    return (score,)  # Return a tuple with the fitness value


# Create the individual class
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

# Create the population
toolbox = base.Toolbox()
toolbox.register("attr_bool", np.random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=X_train.shape[1])
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Create the genetic operators
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

# Initialize the population
population = []
for _ in range(50):
    individual = set()
    while len(individual) < 5:
        bike_scooter = random.choice(X_train.index)
        individual.add(bike_scooter)
    population.append(individual)

population = tools.initRepeat(container=list, n=50, func=lambda: tools.initRepeat(list, n=len(X_train), func=toolbox.attr_bool))

# Define the genetic operators
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)


toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=len(X_train))
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)


population = [creator.Individual(tools.initRepeat(list, n=len(X_train), func=toolbox.attr_bool))
             for _ in range(50)]

# Run the genetic algorithm
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("min", np.min)
stats.register("max", np.max)

population, logbook = algorithms.eaSimple(population, toolbox, cxpb=0.7, mutpb=0.2, ngen=100, stats=stats, halloffame=hof, verbose=True)

# Return the best individual
best_individual = hof[0]
recommendation = []
for i in range(len(best_individual)):
    if best_individual[i] == 1:
        recommendation.append(X_train.iloc[i])







gen	nevals	avg   	min     	max    
0  	50    	4.1766	-1.29097	9.16307
1  	40    	5.83797	3.06868 	9.16307
2  	38    	7.18517	4.36141 	10.1532
3  	44    	8.2083 	5.54538 	10.811 
4  	37    	9.29277	5.85584 	11.8415
5  	33    	9.91448	6.97404 	11.8415




6  	33    	10.5871	6.80653 	11.8415
7  	41    	11.2227	9.16307 	11.8415
8  	41    	11.5669	9.09379 	11.9511
9  	31    	11.7115	10.2155 	11.9915
10 	41    	11.6562	10.1462 	11.9915
11 	43    	11.7366	10.0839 	12.13  
12 	40    	11.6585	8.4983  	12.13  
13 	38    	11.9123	10.6829 	12.28  
14 	38    	11.8715	9.26568 	12.28  
15 	38    	11.7464	9.00258 	12.28  
16 	39    	11.9426	8.26062 	12.28  
17 	31    	12.0347	10.2593 	12.28  
18 	39    	12.0967	9.47354 	12.28  
19 	41    	12.1708	9.32357 	12.28  
20 	40    	12.1772	10.6794 	12.28  
21 	38    	11.9676	9.11221 	12.28  
22 	36    	12.0304	8.6737  	12.28  
23 	30    	12.1793	9.29112 	12.28  
24 	34    	12.1331	9.49546 	12.28  
25 	37    	12.1932	10.6321 	12.28  
26 	36    	12.1203	9.64193 	12.28  
27 	45    	12.0682	9.25779 	12.28  
28 	32    	12.0906	9.78049 	12.28  
29 	37    	12.2   	9.60509 	12.28  
30 	42    	12.0553	8.76491 	12.28  
31 	42    	12.061 	9.51388 	12.28  
32 	35    	12.2494	10.939  	12.28  
33 	43    	12.1039	9.60158 	

In [20]:
import pickle

# Save the population as a pickle file
with open('population.pkl', 'wb') as f:
    pickle.dump(population, f)

In [21]:
# Return the best individual
best_individual = hof[0]
recommendation = []
for i in range(len(best_individual)):
    if best_individual[i] == 1:
        recommendation.append(X_train.iloc[i])

print(f"Best individual:\n {best_individual}\n\n")
print(format_recommendations(recommendation))

Best individual:
 [0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1]




Top 5 Recommendations:


VehicleId: 26
VehiclesTitle: 12
PricePerDay(Npr): 1.65
id: 51

VehicleId: 33
VehiclesTitle: 10
PricePerDay(Npr): 1.10
id: 44

VehicleId: 4
VehiclesTitle: 8
PricePerDay(Npr): 0.33
id: 39

VehicleId: 18
VehiclesTitle: 4
PricePerDay(Npr): 1.36
id: 22

VehicleId: 1
VehiclesTitle: 0
PricePerDay(Npr): 0.38
id: 1



In [22]:
import random
import numpy as np
import pandas as pd
from deap import algorithms, base, creator, tools

def recommend_bikes(csv_file):
    # Load the dataset
    X_train = pd.read_csv(csv_file)

    # Preprocess the data
    le = LabelEncoder()
    X_train['VehiclesTitle'] = le.fit_transform(X_train['VehiclesTitle'])
    X_train['FullName'] = le.fit_transform(X_train['FullName'])

    # Define the fitness function
    def fitness(individual):
        # Calculate the predicted revenue using the individual's genes
        predicted_revenue = individual.dot(X_train.T)

        # Calculate the mean squared error between the predicted revenue and the actual revenue
        mse = mean_squared_error(y_train, predicted_revenue)

        # The fitness function is the negative mean squared error
        return -mse,

    # Define the fitness function (replace with your actual logic)
    def evaluate(individual):
        score = 0
        for i in range(len(individual)):
            if individual[i] == 1:
                # Calculate the score based on the user's preferences (price, vehicle type, etc.)
                score += X_train.iloc[i]['PricePerDay'] * X_train.iloc[i]['Days']
        return (score,)  # Return a tuple with the fitness value

    # Create the individual class
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMax)

    # Create the population
    toolbox = base.Toolbox()
    toolbox.register("attr_bool", np.random.randint, 0, 1)
    toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=X_train.shape[1])
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    # Create the genetic operators
    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
    toolbox.register("select", tools.selTournament, tournsize=3)

    # Initialize the population
    population = [creator.Individual(tools.initRepeat(list, n=len(X_train), func=toolbox.attr_bool)) for _ in range(50)]

    # Run the genetic algorithm
    hof = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("min", np.min)
    stats.register("max", np.max)

    population, logbook = algorithms.eaSimple(population, toolbox, cxpb=0.7, mutpb=0.2, ngen=100, stats=stats, halloffame=hof, verbose=False)

    # Return the best individual
    best_individual = hof[0]
    recommendations = []
    for i in range(len(best_individual)):
        if best_individual[i] == 1:
            recommendations.append(X_train.iloc[i].to_dict())
    return recommendations

In [23]:
recommend_bikes("D:/WORK/test/major_proj_karuna/bikerental/data/test/X_test.csv")



AttributeError: 'Toolbox' object has no attribute 'evaluate'