In [308]:
import pandas as pd
import numpy as np
import random

def generate_item_df(num_items = 20, weight_range = (0, 10), value_range = (1, 5 + 1)):
    weights = np.round(np.random.uniform(*weight_range, num_items), 1)
    values = np.random.randint(*value_range, num_items)
    return pd.DataFrame({
        'Name': [str(i) for i in range(num_items)],
        'Weight': weights,
        'Value': values
    })

df = generate_item_df()
print(df.head(4))

  Name  Weight  Value
0    0     4.3      1
1    1     8.6      1
2    2     1.9      4
3    3     2.9      1


In [309]:
def genetic_algorithm(df, max_weight=30, population_size=100, num_generations=50, mutation_rate=0.5):
    weights = df["Weight"].tolist()
    values = df["Value"].tolist()
    n = len(values)
    
    # Normalise mutations, so that longer lists dont become more mutated.
    mutation_rate /= n

    average_weight = sum(weights) / n
    expected_items = max_weight / average_weight
    bit_flip_probability = expected_items / n

    # Initialise the population so that each individual has roughly the correct weight
    population = [[1 if random.random() < bit_flip_probability else 0 for _ in range(n)]
                  for _ in range(population_size)]
    
    def get_total_value(individual):
        return sum(value if individual[i] else 0 for i, value in enumerate(values))
    
    def get_total_weight(individual):
        return sum(weight if individual[i] else 0 for i, weight in enumerate(weights))

    def fitness(individual):
        total_weight = get_total_weight(individual)
        total_value = get_total_value(individual)
        if total_weight > max_weight:
            return 0
        return total_value

    def tournament_selection(population, k=3):
        return max(random.sample(population, k), key=fitness)

    def crossover(parent1, parent2):
        crossover_point = random.randint(1, len(parent1) - 1)
        return parent1[:crossover_point] + parent2[crossover_point:], parent2[:crossover_point] + parent1[crossover_point:]

    def mutate(individual):
        for i in range(len(individual)):
            if random.random() < mutation_rate:
                individual[i] = 1 - individual[i] # Flip bit
        return individual

    for _ in range(num_generations):
        new_population = []
        for _ in range(population_size // 2):
            parent1 = tournament_selection(population)
            parent2 = tournament_selection(population)
            offspring1, offspring2 = crossover(parent1, parent2)
            new_population.append(mutate(offspring1))
            new_population.append(mutate(offspring2))
        population = new_population

    best = max(population, key=fitness)
    return df[[bool(bit) for bit in best]].copy(), get_total_value(best), get_total_weight(best)


In [310]:
def show_genetic_algorithm(df, ga_func=genetic_algorithm, n_runs=4,
                           population_size=100, num_generations=50, mutation_rate=0.2):
    for _ in range(n_runs):
        items, value, weight = ga_func(df, population_size=population_size, num_generations=num_generations, mutation_rate=mutation_rate)
        print(f"Items: {', '.join(items['Name'].tolist())}")
        print(f"Total weight: {weight:.1f}")
        print(f"Total value: {value}")
        print()


In [311]:
# Test with the original 20 items
show_genetic_algorithm(df)
%timeit genetic_algorithm(df)

Items: 2, 4, 7, 9, 10, 11, 14, 16, 18, 19
Total weight: 29.3
Total value: 32

Items: 2, 4, 5, 7, 9, 11, 12, 14, 16, 19
Total weight: 29.7
Total value: 32

Items: 2, 4, 5, 9, 10, 11, 14, 16, 19
Total weight: 28.6
Total value: 33

Items: 2, 4, 6, 7, 9, 10, 11, 14, 16, 19
Total weight: 28.2
Total value: 32

69.8 ms ± 4.21 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [312]:
# Test with 1000 items - slow
df_1000 = generate_item_df(1000)
show_genetic_algorithm(df_1000)
timeit_1000 = %timeit -o genetic_algorithm(df_1000)

Items: 25, 37, 43, 108, 160, 172, 236, 239, 259, 286, 360, 365, 427, 539, 547, 569, 576, 650, 688, 728, 767, 781, 817, 819, 835, 926, 971, 994
Total weight: 29.6
Total value: 113

Items: 108, 145, 160, 219, 232, 272, 277, 286, 300, 327, 345, 374, 386, 395, 410, 577, 632, 710, 723, 781, 835, 911, 931, 979
Total weight: 29.1
Total value: 100

Items: 25, 63, 90, 154, 257, 277, 362, 386, 395, 417, 421, 481, 483, 500, 513, 569, 680, 705, 730, 748, 776, 781, 812, 813, 850, 911
Total weight: 29.9
Total value: 105

Items: 25, 63, 83, 90, 107, 108, 172, 196, 221, 257, 259, 277, 363, 378, 421, 468, 500, 513, 575, 776, 806, 836, 842, 853, 931, 994, 998
Total weight: 30.0
Total value: 105

2.22 s ± 39.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [313]:
# Rewrite using numpy
def genetic_algorithm_np(df, max_weight=30, population_size=100, num_generations=50, mutation_rate=0.5):
    weights = df["Weight"].to_numpy()
    values = df["Value"].to_numpy()
    n = len(values)
    
    # Normalise mutations, so that longer lists dont become more mutated.
    mutation_rate /= n

    average_weight = np.mean(weights)
    expected_items = max_weight / average_weight
    bit_flip_probability = expected_items / n

    # Initialise the population so that each individual has roughly the correct weight
    population = np.random.rand(population_size, n) < bit_flip_probability
    
    def fitness(individual):
        total_weight = np.dot(weights, individual)
        total_value = np.dot(values, individual)
        return total_value if total_weight <= max_weight else 0

    def tournament_selection(population, k=3):
        return max(random.sample(list(population), k), key=fitness)

    def crossover(parent1, parent2):
        crossover_point = random.randint(1, n - 1)
        offspring1 = np.concatenate([parent1[:crossover_point], parent2[crossover_point:]])
        offspring2 = np.concatenate([parent2[:crossover_point], parent1[crossover_point:]])
        return offspring1, offspring2

    def mutate(individual):
        mutation_mask = np.random.rand(n) < mutation_rate
        individual[mutation_mask] = 1 - individual[mutation_mask] # Flip bit
        return individual

    for _ in range(num_generations):
        new_population = []
        for _ in range(population_size // 2):
            parent1 = tournament_selection(population)
            parent2 = tournament_selection(population)
            offspring1, offspring2 = crossover(parent1, parent2)
            new_population.append(mutate(offspring1))
            new_population.append(mutate(offspring2))
        population = np.array(new_population)

    best = max(population, key=fitness)
    best_items = df.iloc[best.astype(bool)].copy()
    return best_items, np.dot(values, best), np.dot(weights, best)

In [314]:
# Test numpy version - 10x faster
show_genetic_algorithm(df_1000, ga_func=genetic_algorithm_np)
timeit_1000_fast = %timeit -o genetic_algorithm_np(df_1000)
print(f"{timeit_1000.best/timeit_1000_fast.best:.2f}x faster")

Items: 25, 100, 160, 172, 221, 300, 327, 362, 373, 500, 537, 628, 641, 710, 714, 728, 741, 764, 767, 836, 911, 914, 927, 971
Total weight: 30.0
Total value: 99

Items: 25, 31, 59, 192, 221, 287, 345, 395, 396, 492, 500, 513, 582, 635, 657, 707, 710, 761, 767, 813, 817, 819, 956, 998
Total weight: 30.0
Total value: 88

Items: 84, 90, 97, 143, 154, 184, 225, 259, 339, 360, 428, 513, 575, 635, 765, 781, 787, 822, 931
Total weight: 30.0
Total value: 69

Items: 25, 31, 61, 107, 145, 257, 286, 310, 381, 484, 500, 531, 569, 575, 728, 791, 833, 835, 868, 927, 994
Total weight: 30.0
Total value: 81

266 ms ± 10.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
8.41x faster


In [317]:
# Try different mutation rates
n_runs = 10
for mutation_rate in (0, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100):
    _, values, weights = list(zip(*[genetic_algorithm_np(df_1000, mutation_rate=mutation_rate) for _ in range(n_runs)]))
    print(f"Mutation rate: {mutation_rate}".ljust(22) + 
        f"Max weight: {max(weights):.1f}  "
        f"Average value: {sum(values)/n_runs:.1f}")

Mutation rate: 0      Max weight: 29.8  Average value: 55.3
Mutation rate: 0.001  Max weight: 29.8  Average value: 58.1
Mutation rate: 0.005  Max weight: 30.0  Average value: 57.2
Mutation rate: 0.01   Max weight: 30.0  Average value: 59.9
Mutation rate: 0.05   Max weight: 30.0  Average value: 66.5
Mutation rate: 0.1    Max weight: 30.0  Average value: 78.6
Mutation rate: 0.5    Max weight: 30.0  Average value: 104.8
Mutation rate: 1      Max weight: 29.7  Average value: 93.5
Mutation rate: 5      Max weight: 1167.8  Average value: 582.4
Mutation rate: 10     Max weight: 1751.1  Average value: 914.9
Mutation rate: 50     Max weight: 2593.4  Average value: 1453.8
Mutation rate: 100    Max weight: 2701.4  Average value: 1463.9


- It looks like somewhere between the mutation rates of 1 and 5, every member of the population becomes overweight.
- 0.5 - 1 looks optimal (for n = 1000 at least).