In [180]:
import pandas as pd
import numpy as np
import random

def generate_item_df(num_items = 20, weight_range = (0, 10), value_range = (0, 1)):
    weights = np.random.uniform(*weight_range, num_items)
    values = np.random.uniform(*value_range, num_items)
    return pd.DataFrame({
        'Name': [str(i) for i in range(num_items)],
        'Weight': weights,
        'Value': values
    })

df = generate_item_df()
print(df.head(4))

  Name    Weight     Value
0    0  4.447822  0.435130
1    1  2.149656  0.072205
2    2  9.080222  0.711223
3    3  9.307794  0.472355


In [181]:
def genetic_algorithm(df, max_weight=30, population_size=100, num_generations=50, mutation_rate=0.2):
    weights = df["Weight"].tolist()
    values = df["Value"].tolist()
    n = len(values)
    
    # Normalaise mutations, so that longer lists dont become more mutated.
    mutation_rate /= n

    average_weight = sum(weights) / n
    expected_items = max_weight / average_weight
    bit_flip_probability = expected_items / n

    # Initialise the population so that each individual has roughly the correct weight
    population = [[1 if random.random() < bit_flip_probability else 0 for _ in range(n)]
                  for _ in range(population_size)]
    
    def get_total_value(individual):
        return sum(value if individual[i] else 0 for i, value in enumerate(values))
    
    def get_total_weight(individual):
        return sum(weight if individual[i] else 0 for i, weight in enumerate(weights))

    def fitness(individual):
        total_weight = get_total_weight(individual)
        total_value = get_total_value(individual)
        if total_weight > max_weight:
            return 0
        return total_value

    def tournament_selection(population, k=3):
        return max(random.sample(population, k), key=fitness)

    def crossover(parent1, parent2):
        crossover_point = random.randint(1, len(parent1) - 1)
        return parent1[:crossover_point] + parent2[crossover_point:], parent2[:crossover_point] + parent1[crossover_point:]

    def mutate(individual):
        for i in range(len(individual)):
            if random.random() < mutation_rate:
                individual[i] = 1 - individual[i] # Flip bit
        return individual

    for _ in range(num_generations):
        new_population = []
        for _ in range(population_size // 2):
            parent1 = tournament_selection(population)
            parent2 = tournament_selection(population)
            offspring1, offspring2 = crossover(parent1, parent2)
            new_population.append(mutate(offspring1))
            new_population.append(mutate(offspring2))
        population = new_population

    best = max(population, key=fitness)
    return [df.iloc[i] for i, bit in enumerate(best) if bit], get_total_value(best), get_total_weight(best)


In [182]:
def show_genetic_algorithm(df, n_runs=4,
                           population_size=100, num_generations=50, mutation_rate=0.2):
    for _ in range(n_runs):
        items, value, weight = genetic_algorithm(df, population_size=population_size, num_generations=num_generations, mutation_rate=mutation_rate)
        print(f"Items: {', '.join(item['Name'] for item in items)}")
        print(f"Total weight: {weight:.2f}")
        print(f"Total value: {value:.2f}")
        print()


In [183]:
# Test with the original 20 items
show_genetic_algorithm(df)
%timeit genetic_algorithm(df)

Items: 0, 4, 6, 8, 9, 10, 13, 14, 15, 18
Total weight: 28.49
Total value: 5.44

Items: 4, 6, 8, 9, 10, 12, 13, 14, 15, 18
Total weight: 29.99
Total value: 5.76

Items: 0, 4, 8, 9, 10, 12, 13, 15, 18
Total weight: 28.90
Total value: 5.87

Items: 0, 4, 8, 9, 10, 12, 13, 15, 18
Total weight: 28.90
Total value: 5.87

70.6 ms ± 2.28 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [185]:
# Test with 1000 items - slow
df_1000 = generate_item_df(1000)
show_genetic_algorithm(df_1000)
%timeit genetic_algorithm(df_1000)

Items: 39, 49, 147, 181, 209, 255, 286, 344, 375, 431, 455, 462, 506, 611, 631, 641, 681, 688, 742, 775, 812, 866, 876, 932, 970
Total weight: 29.93
Total value: 18.36

Items: 29, 68, 138, 166, 175, 288, 294, 308, 344, 365, 395, 432, 526, 565, 734, 737, 742, 825, 835, 854, 871, 927
Total weight: 29.81
Total value: 17.75

Items: 95, 116, 123, 147, 166, 193, 227, 259, 275, 299, 384, 455, 616, 649, 705, 748, 775, 793, 835, 838, 876, 889, 911, 927
Total weight: 29.17
Total value: 17.73

Items: 68, 146, 147, 166, 168, 175, 182, 236, 256, 306, 344, 350, 431, 451, 460, 526, 580, 681, 705, 754, 793, 866, 922, 924, 948
Total weight: 29.99
Total value: 17.80

2.25 s ± 91.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
