In [1]:
import pandas as pd
import pymysql
import random


def preprocess_data():
    """
    Connects to a MySQL database, retrieves vehicle data,
    preprocesses it, and returns a pandas DataFrame.
    """

    # Connect to the MySQL database
    connection = pymysql.connect(host='127.0.0.1',
                                 port=3306,
                                 user='root',
                                 password='',
                                 db='bikerental')

    # Create a cursor object
    cursor = connection.cursor()

    # Execute a SQL query to retrieve the relevant features
    cursor.execute('SELECT id, VehiclesTitle, VehiclesBrand, PricePerDay, FuelType, ModelYear, SeatingCapacity, AirConditioner, PowerDoorLocks, AntiLockBrakingSystem, BrakeAssist, PowerSteering, DriverAirbag, PassengerAirbag, PowerWindows, CDPlayer, CentralLocking, CrashSensor, LeatherSeats FROM tblvehicles')

    # Fetch all the rows as a list of tuples
    rows = cursor.fetchall()

    # Create a pandas DataFrame from the list of tuples
    df = pd.DataFrame(rows, columns=[
                      'id', 'VehiclesTitle', 'VehiclesBrand', 'PricePerDay', 'FuelType', 'ModelYear', 'SeatingCapacity',
                      'AirConditioner', 'PowerDoorLocks', 'AntiLockBrakingSystem', 'BrakeAssist', 'PowerSteering',
                      'DriverAirbag', 'PassengerAirbag', 'PowerWindows', 'CDPlayer', 'CentralLocking', 'CrashSensor',
                      'LeatherSeats'])

    # Preprocess data (example: handle missing values)
    df.fillna(random.choice([0, 1]), inplace=True)  # Replace null values with random choice (0 or 1)

    # Convert boolean columns to integers (optional)
    df['AirConditioner'] = df['AirConditioner'].astype(int)
    df['PowerDoorLocks'] = df['PowerDoorLocks'].astype(int)
    df['AntiLockBrakingSystem'] = df['AntiLockBrakingSystem'].astype(int)
    df['BrakeAssist'] = df['BrakeAssist'].astype(int)
    df['PowerSteering'] = df['PowerSteering'].astype(int)
    df['DriverAirbag'] = df['DriverAirbag'].astype(int)
    df['PassengerAirbag'] = df['PassengerAirbag'].astype(int)
    df['PowerWindows'] = df['PowerWindows'].astype(int)
    df['CDPlayer'] = df['CDPlayer'].astype(int)
    df['CentralLocking'] = df['CentralLocking'].astype(int)
    df['CrashSensor'] = df['CrashSensor'].astype(int)
    df['LeatherSeats'] = df['LeatherSeats'].astype(int)

    # Close the cursor and the connection
    cursor.close()
    connection.close()

    # Return the preprocessed DataFrame
    return df


In [5]:
import random
import numpy as np
from deap import base, creator, tools, algorithms

def recommendation_function(preprocessed_data):
    # Define the fitness function
    def evaluate(individual):
        score = 0
        for i in range(len(individual)):
            if individual[i] == 1:
                # Assuming 'PricePerDay' is a relevant feature for score calculation
                score += preprocessed_data.loc[i, 'PricePerDay']
        return (score,)
    
    # Create the individual class and population
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMax)
    toolbox = base.Toolbox()
    toolbox.register("attr_bool", random.randint, 0, 1)
    toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=len(preprocessed_data))
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", evaluate)
    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
    toolbox.register("select", tools.selTournament, tournsize=3)
    
    # Initialize population
    population = toolbox.population(n=50)
    
    # Run the genetic algorithm
    hof = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("min", np.min)
    stats.register("max", np.max)
    population, logbook = algorithms.eaSimple(population, toolbox, cxpb=0.7, mutpb=0.2, ngen=100, stats=stats, halloffame=hof, verbose=True)
    
    # Return the best individual
    best_individual = hof[0]
    recommendation = [preprocessed_data.loc[i] for i, val in enumerate(best_individual) if val == 1]
    
    # Return only the top 10 recommendations
    return recommendation


In [6]:
def format_recommendations(recommendations):
    
    print(f"\n\nTop 5 Recommendations:\n")
    formatted_output = ""
    for rec in recommendations[:5]:
        formatted_output += "\n"
        formatted_output += f"VehicleId: {rec['id']}\n"  # Assuming 'id' is the column name for vehicle id
        formatted_output += f"VehiclesTitle: {rec['VehiclesTitle']}\n"
        formatted_output += f"PricePerDay(Npr): {rec['PricePerDay']:.2f}\n"
        formatted_output += f"AirConditioner: {'Yes' if rec['AirConditioner'] == 1 else 'No'}\n"
        formatted_output += f"AntiLockBrakingSystem: {'Yes' if rec['AntiLockBrakingSystem'] == 1 else 'No'}\n"
        formatted_output += f"DriverAirbag: {'Yes' if rec['DriverAirbag'] == 1 else 'No'}\n"
        formatted_output += f"PassengerAirbag: {'Yes' if rec['PassengerAirbag'] == 1 else 'No'}\n"
        formatted_output += f"LeatherSeats: {'Yes' if rec['LeatherSeats'] == 1 else 'No'}\n"
    return print(formatted_output)




In [7]:
def get_recommendation():
    data =preprocess_data()
    recommendations = recommendation_function(data)
    return recommendations
    

In [8]:
recommendations= get_recommendation()

gen	nevals	avg   	min	max   
0  	50    	342677	563	697748
1  	43    	530184	5636	697748
2  	37    	662070	346204	697748
3  	36    	697245	691549	697748
4  	37    	683743	351840	697748
5  	43    	697669	696889	697748
6  	35    	690841	352403	697748
7  	40    	697748	697748	697748
8  	30    	690795	352403	697748
9  	38    	697748	697748	697748
10 	34    	690728	352403	697748
11 	37    	697618	692112	697748
12 	40    	690841	352403	697748
13 	43    	697748	697748	697748
14 	36    	690694	351544	697748
15 	34    	690711	352403	697748
16 	36    	697607	692112	697748
17 	40    	690728	346767	697748
18 	44    	683680	351840	697748
19 	39    	690830	352403	697748
20 	41    	677027	7058  	697748
21 	31    	683934	7058  	697748
22 	42    	690841	352403	697748
23 	35    	697731	696889	697748
24 	39    	676661	346767	697748
25 	38    	690830	352403	697748
26 	38    	690451	345908	697748
27 	38    	697708	696889	697748
28 	39    	683821	346767	697748
29 	44    	690841	352403	697748
30 	45    	69084

In [9]:
format_recommendations(recommendations)



Top 5 Recommendations:


VehicleId: 1
VehiclesTitle: SS400
PricePerDay(Npr): 345345.00
AirConditioner: Yes
AntiLockBrakingSystem: Yes
DriverAirbag: Yes
PassengerAirbag: Yes
LeatherSeats: Yes

VehicleId: 2
VehiclesTitle: RS200
PricePerDay(Npr): 859.00
AirConditioner: Yes
AntiLockBrakingSystem: Yes
DriverAirbag: Yes
PassengerAirbag: Yes
LeatherSeats: Yes

VehicleId: 3
VehiclesTitle: R1
PricePerDay(Npr): 563.00
AirConditioner: Yes
AntiLockBrakingSystem: Yes
DriverAirbag: Yes
PassengerAirbag: Yes
LeatherSeats: Yes

VehicleId: 4
VehiclesTitle: Duke390
PricePerDay(Npr): 5636.00
AirConditioner: Yes
AntiLockBrakingSystem: Yes
DriverAirbag: Yes
PassengerAirbag: Yes
LeatherSeats: Yes

VehicleId: 5
VehiclesTitle: R1
PricePerDay(Npr): 345345.00
AirConditioner: Yes
AntiLockBrakingSystem: Yes
DriverAirbag: Yes
PassengerAirbag: Yes
LeatherSeats: Yes



In [6]:
import json

def format_recommendations(recommendations):
    formatted_recommendations = []
    for rec in recommendations[:5]:
        formatted_rec = {
            "VehicleId": int(rec['id']),
            "VehiclesTitle": rec['VehiclesTitle'],
            "PricePerDay(Npr)": round(float(rec['PricePerDay']), 2),
            "AirConditioner": 'Yes' if rec['AirConditioner'] == 1 else 'No',
            "AntiLockBrakingSystem": 'Yes' if rec['AntiLockBrakingSystem'] == 1 else 'No',
            "DriverAirbag": 'Yes' if rec['DriverAirbag'] == 1 else 'No',
            "PassengerAirbag": 'Yes' if rec['PassengerAirbag'] == 1 else 'No',
            "LeatherSeats": 'Yes' if rec['LeatherSeats'] == 1 else 'No'
        }
        formatted_recommendations.append(formatted_rec)
    return formatted_recommendations

def get_recommendation():
    data = preprocess_data()
    recommendations = recommendation_function(data)
    return recommendations

# Call get_recommendation to get recommendations
recommendations = get_recommendation()

# Format recommendations and print JSON output
formatted_recommendations = format_recommendations(recommendations)
print(json.dumps(formatted_recommendations))


gen	nevals	avg   	min	max   
0  	50    	356101	0  	697748
1  	43    	557744	1422	697748
2  	39    	689648	351544	697748
3  	38    	697584	696889	697748
4  	40    	697731	696889	697748
5  	36    	677005	352403	697748
6  	35    	697613	691549	697748
7  	40    	690700	352403	697748
8  	32    	697748	697748	697748
9  	35    	697624	692112	697748
10 	43    	697618	692112	697748
11 	38    	697737	697185	697748
12 	30    	697748	697748	697748
13 	41    	690717	352403	697748
14 	36    	677016	352403	697748
15 	31    	690711	352403	697748
16 	43    	697737	697185	697748
17 	39    	690795	352403	697748
18 	33    	690824	352403	697748
19 	35    	676892	346767	697748
20 	41    	697731	696889	697748
21 	43    	690830	352403	697748
22 	43    	690694	351544	697748
23 	38    	690841	352403	697748
24 	40    	677016	352403	697748
25 	36    	683821	346767	697748
26 	33    	697748	697748	697748
27 	39    	697601	692112	697748
28 	34    	690813	351544	697748
29 	40    	697720	696889	697748
30 	32    	69773

