In [1]:
!pip install pulp
from pulp import *
import numpy as np
import pandas as pd
import time

In [2]:
file = '0_100000'

In [3]:
data = pd.read_csv('data/'+ file + '.csv')
capacity = int(file.split('_')[1])

In [4]:
data.head()

Unnamed: 0,key,value,weight
0,0,90000,90001
1,1,89750,89751
2,2,10001,10002
3,3,89500,89501
4,4,10252,10254


In [5]:
print(capacity)

100000


In [7]:
def knapsack_mip(items, capacity):
    
    start_time = time.time()
    
    weights = items.weight
    values = items.value
    items_list = items.index.tolist()

    m = LpProblem("KnapsackProblem", LpMaximize)

    # Variables
    x = LpVariable.dicts('x', items_list, lowBound=0, upBound=1, cat='binary')

    # Objective
    m += lpSum([values[i] * x[i] for i in items_list])

    # Constraints
    m += LpAffineExpression(list(map(tuple,(zip(x.values(), weights))))) <= capacity

    m.solve(PULP_CBC_CMD(msg=False))
    
    taken = [var.varValue for var in m.variables()]
    
    duration = time.time() - start_time
    
    return (m.objective.value(), duration)

In [8]:
def knapsack_heuristic(items, capacity):
    
    start_time = time.time()
   
    items['density'] = items['value'] / items['weight']

    value = 0
    weight = 0
    taken = [0]*len(items)

    if np.std(items.density) > 0.1:
        items = items.sort_values('density', ascending=False)
    else:
        items = items.sort_values('weight', ascending=True)

    for i, item in items.iterrows():
        if weight + item.weight <= capacity:
            taken[i] = 1
            value += item.value
            weight += item.weight
               
    duration = time.time() - start_time
            
    return (value, duration)

In [9]:
results = {}
results['Heuristik'] = knapsack_heuristic(data, capacity)
results['MIP'] = knapsack_mip(data, capacity)
pd.DataFrame(results)

Unnamed: 0,Heuristik,MIP
0,99045.0,99998.888642
1,0.008618,0.026907


In [10]:
percent = 0.9
items = data.sort_values('value', ascending=False).iloc[:int(len(data)*percent)]

In [11]:
results = {}
results['Heuristik'] = knapsack_heuristic(items, capacity)
results['MIP'] = knapsack_mip(items, capacity)
pd.DataFrame(results)

Unnamed: 0,Heuristik,MIP
0,99084.0,99998.888642
1,0.008598,0.026392
