In [234]:
import pandas as pd
import numpy as np
import random

def generate_item_df(num_items = 20, weight_range = (0, 10), value_range = (0, 1)):
    weights = np.random.uniform(*weight_range, num_items)
    values = np.random.uniform(*value_range, num_items)
    return pd.DataFrame({
        'Name': [str(i) for i in range(num_items)],
        'Weight': weights,
        'Value': values
    })

df = generate_item_df()
print(df.head(4))

  Name    Weight     Value
0    0  3.331509  0.150684
1    1  4.279698  0.071599
2    2  7.019322  0.955734
3    3  7.887609  0.511730


In [235]:
def genetic_algorithm(df, max_weight=30, population_size=100, num_generations=50, mutation_rate=0.2):
    weights = df["Weight"].tolist()
    values = df["Value"].tolist()
    n = len(values)
    
    # Normalise mutations, so that longer lists dont become more mutated.
    mutation_rate /= n

    average_weight = sum(weights) / n
    expected_items = max_weight / average_weight
    bit_flip_probability = expected_items / n

    # Initialise the population so that each individual has roughly the correct weight
    population = [[1 if random.random() < bit_flip_probability else 0 for _ in range(n)]
                  for _ in range(population_size)]
    
    def get_total_value(individual):
        return sum(value if individual[i] else 0 for i, value in enumerate(values))
    
    def get_total_weight(individual):
        return sum(weight if individual[i] else 0 for i, weight in enumerate(weights))

    def fitness(individual):
        total_weight = get_total_weight(individual)
        total_value = get_total_value(individual)
        if total_weight > max_weight:
            return 0
        return total_value

    def tournament_selection(population, k=3):
        return max(random.sample(population, k), key=fitness)

    def crossover(parent1, parent2):
        crossover_point = random.randint(1, len(parent1) - 1)
        return parent1[:crossover_point] + parent2[crossover_point:], parent2[:crossover_point] + parent1[crossover_point:]

    def mutate(individual):
        for i in range(len(individual)):
            if random.random() < mutation_rate:
                individual[i] = 1 - individual[i] # Flip bit
        return individual

    for _ in range(num_generations):
        new_population = []
        for _ in range(population_size // 2):
            parent1 = tournament_selection(population)
            parent2 = tournament_selection(population)
            offspring1, offspring2 = crossover(parent1, parent2)
            new_population.append(mutate(offspring1))
            new_population.append(mutate(offspring2))
        population = new_population

    best = max(population, key=fitness)
    return df[[bool(bit) for bit in best]].copy(), get_total_value(best), get_total_weight(best)


In [236]:
def show_genetic_algorithm(df, ga_func=genetic_algorithm, n_runs=4,
                           population_size=100, num_generations=50, mutation_rate=0.2):
    for _ in range(n_runs):
        items, value, weight = ga_func(df, population_size=population_size, num_generations=num_generations, mutation_rate=mutation_rate)
        print(f"Items: {', '.join(items['Name'].tolist())}")
        print(f"Total weight: {weight:.2f}")
        print(f"Total value: {value:.2f}")
        print()


In [237]:
# Test with the original 20 items
show_genetic_algorithm(df)
%timeit genetic_algorithm(df)

Items: 2, 4, 6, 10, 13, 14, 18
Total weight: 29.87
Total value: 4.90

Items: 2, 6, 7, 10, 13, 14, 15, 18
Total weight: 28.67
Total value: 4.91

Items: 2, 8, 10, 13, 14, 15, 18
Total weight: 29.18
Total value: 4.69

Items: 2, 4, 6, 10, 13, 14, 18
Total weight: 29.87
Total value: 4.90

65.8 ms ± 1.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [238]:
# Test with 1000 items - slow
df_1000 = generate_item_df(1000)
show_genetic_algorithm(df_1000)
timeit_1000 = %timeit -o genetic_algorithm(df_1000)

Items: 50, 79, 88, 107, 143, 184, 257, 259, 329, 453, 473, 476, 530, 549, 621, 678, 684, 739, 766, 847, 889, 902, 998
Total weight: 29.87
Total value: 16.74

Items: 38, 47, 110, 145, 178, 179, 192, 204, 259, 282, 358, 359, 395, 422, 476, 496, 506, 627, 645, 652, 684, 686, 740, 765, 782, 895, 910, 915, 947, 949, 950, 967
Total weight: 30.00
Total value: 23.62

Items: 5, 79, 80, 155, 181, 190, 204, 259, 285, 328, 358, 402, 473, 476, 524, 530, 603, 638, 645, 671, 740, 794, 864, 902, 932, 977
Total weight: 30.00
Total value: 18.65

Items: 143, 151, 220, 222, 225, 296, 311, 394, 447, 473, 476, 556, 634, 652, 657, 664, 686, 740, 755, 902, 932, 961, 979
Total weight: 29.73
Total value: 16.89

2.28 s ± 32 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [239]:
# Rewrite using numpy
def genetic_algorithm_np(df, max_weight=30, population_size=100, num_generations=50, mutation_rate=0.2):
    weights = df["Weight"].to_numpy()
    values = df["Value"].to_numpy()
    n = len(values)
    
    # Normalise mutations, so that longer lists dont become more mutated.
    mutation_rate /= n

    average_weight = np.mean(weights)
    expected_items = max_weight / average_weight
    bit_flip_probability = expected_items / n

    # Initialise the population so that each individual has roughly the correct weight
    population = np.random.rand(population_size, n) < bit_flip_probability
    
    def fitness(individual):
        total_weight = np.dot(weights, individual)
        total_value = np.dot(values, individual)
        return total_value if total_weight <= max_weight else 0

    def tournament_selection(population, k=3):
        return max(random.sample(list(population), k), key=fitness)

    def crossover(parent1, parent2):
        crossover_point = random.randint(1, n - 1)
        offspring1 = np.concatenate([parent1[:crossover_point], parent2[crossover_point:]])
        offspring2 = np.concatenate([parent2[:crossover_point], parent1[crossover_point:]])
        return offspring1, offspring2

    def mutate(individual):
        mutation_mask = np.random.rand(n) < mutation_rate
        individual[mutation_mask] = 1 - individual[mutation_mask] # Flip bit
        return individual

    for _ in range(num_generations):
        new_population = []
        for _ in range(population_size // 2):
            parent1 = tournament_selection(population)
            parent2 = tournament_selection(population)
            offspring1, offspring2 = crossover(parent1, parent2)
            new_population.append(mutate(offspring1))
            new_population.append(mutate(offspring2))
        population = np.array(new_population)

    best = max(population, key=fitness)
    best_items = df.iloc[best.astype(bool)].copy()
    return best_items, np.dot(values, best), np.dot(weights, best)

In [240]:
# Test numpy version - 10x faster
show_genetic_algorithm(df_1000, ga_func=genetic_algorithm_np)
timeit_1000_fast = %timeit -o genetic_algorithm_np(df_1000)
print(f"{timeit_1000.best/timeit_1000_fast.best:.2f}x faster")

Items: 30, 44, 50, 80, 143, 145, 179, 284, 330, 351, 409, 422, 437, 450, 466, 603, 619, 722, 724, 789, 878, 910, 911, 932, 940, 979, 998
Total weight: 29.93
Total value: 20.64

Items: 38, 73, 80, 109, 143, 178, 204, 288, 329, 447, 496, 530, 634, 674, 686, 744, 748, 753, 783, 899, 902, 904, 915, 940, 950
Total weight: 29.74
Total value: 18.96

Items: 30, 44, 107, 109, 123, 192, 202, 259, 323, 329, 409, 443, 500, 618, 627, 652, 658, 674, 748, 765, 782, 786, 813, 924, 948, 998
Total weight: 29.97
Total value: 18.78

Items: 49, 107, 133, 140, 178, 204, 305, 358, 361, 383, 444, 473, 549, 551, 574, 632, 686, 741, 766, 793, 815, 878, 902, 921, 947
Total weight: 29.89
Total value: 18.00

232 ms ± 15.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
10.32x faster
