In [25]:
from gurobipy import *
import numpy as np
import pandas as pd
import csv

In [26]:
#Store all the variable names
with open('AirbnbTrain.csv') as csvFile:
    reader = csv.reader(csvFile)
    variables = next(reader)
    
#Load training data    
df_test = pd.read_csv("AirbnbTest.csv")
df_train = pd.read_csv("AirbnbTrain.csv")

In [27]:
df_test.head(3)

Unnamed: 0,latitude,longitude,Entire home,accommodates,bathrooms,bedrooms,beds,cleaning_fee,minimum_nights,number_of_reviews,review_scores_rating,instant_bookable,price
0,34.100604,-118.341787,0,2,1.0,1,1,40,1,261,96,1,100
1,34.100607,-118.350583,1,8,2.0,2,2,100,2,10,98,0,300
2,34.10061,-118.347617,1,2,1.0,1,1,80,2,1,100,1,125


In [28]:
df_train.head(3)

Unnamed: 0,latitude,longitude,Entire home,accommodates,bathrooms,bedrooms,beds,cleaning_fee,minimum_nights,number_of_reviews,review_scores_rating,instant_bookable,price
0,34.103701,-118.332241,1,13,2.0,3,2.0,150,2,1,100,1,350
1,34.099484,-118.331645,1,8,2.0,2,4.0,150,1,11,96,1,190
2,34.104321,-118.329662,1,4,1.0,0,1.0,55,1,1,80,0,85


In [29]:
X_variable = variables[:-1]

X_train = df_train.iloc[:, :-1]
y_train = df_train.iloc[:,-1]
X_test = df_test.iloc[:, :-1]
y_test = df_test.iloc[:,-1]

In [30]:
mod = Model()

#Define Decision Variables
beta = mod.addVars(X_train.shape[1])

z = mod.addVars(X_train.shape[0])

#Add constraints
mod.addConstrs((z[i] >= y_train[i] - (sum(beta[j] * X_train.iloc[i,j] for j in range(X_train.shape[1])))) for i in range(X_train.shape[0]))

mod.addConstrs((z[i] >= sum(beta[j] * X_train.iloc[i,j] for j in range(X_train.shape[1])) - y_train[i]) for i in range(X_train.shape[0]))

# Create the objective function, and set it to be minimized
mod.setObjective((1/X_train.shape[0])*sum(z[i] for i in range(X_train.shape[0])),GRB.MINIMIZE) 

In [31]:
################## MODEL II ##################
mod = Model()

#Define Decision Variables
beta = mod.addVars(X_train.shape[1])

z = mod.addVars(X_train.shape[0])

w = mod.addVars(X_train.shape[1], vtype = GRB.BINARY)

#Add constraints
mod.addConstrs((z[i]>= y_train[i] - sum(beta[j]*X_train.iloc[i,j]* w[j] for j in range(X_train.shape[1]))) for i in range(X_train.shape[0]))

mod.addConstrs((z[i] >= sum(beta[j] * X_train.iloc[i,j] * w[j]  for j in range(X_train.shape[1])) - y_train[i]) for i in range(X_train.shape[0]))

# Create the objective function, and set it to be minimized
mod.setObjective(1/X_train.shape[0]*sum(z[i] for i in range (X_train.shape[0])),GRB.MINIMIZE) 

mod.update()
mod.optimize()

Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (mac64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 0 rows, 1724 columns and 0 nonzeros
Model fingerprint: 0x8f3fdf44
Model has 3400 quadratic constraints
Variable types: 1712 continuous, 12 integer (12 binary)
Coefficient statistics:
  Matrix range     [0e+00, 0e+00]
  QMatrix range    [5e-01, 5e+02]
  QLMatrix range   [1e+00, 1e+00]
  Objective range  [6e-04, 6e-04]
  Bounds range     [1e+00, 1e+00]
  RHS range        [0e+00, 0e+00]
  QRHS range       [1e+01, 2e+03]
Presolve time: 0.15s
Presolved: 3424 rows, 1760 columns, 41428 nonzeros
Presolved model has 24 SOS constraint(s)
Variable types: 1736 continuous, 24 integer (24 binary)

Root relaxation: objective 3.644463e+01, 1419 iterations, 0.15 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0   36.44463    0    9

In [32]:
if mod.status == GRB.OPTIMAL:
    print("Solved to optimality")
    
beta_opt = [beta[j].x for j in range(X_train.shape[1])]

for j in range(X_train.shape[1]):
    print(f'{X_variable[j]}',beta_opt[j])

opt_val = mod.objval
print("Train Error for Model I is:",opt_val)

Solved to optimality
latitude 319.992934368494
longitude 92.59248291518554
Entire home 36.88350246775536
accommodates 9.921904281257916
bathrooms 30.885153450264202
bedrooms 20.081468198281296
beds 0.0
cleaning_fee 0.3135225045261489
minimum_nights 0.0
number_of_reviews 0.0
review_scores_rating 0.26080203976446925
instant_bookable 5.058599783436263
Train Error for Model I is: 36.444630420827046


In [33]:
SAE = 1 / X_test.shape[0] * sum(abs(y_test[i] - sum(beta_opt[j] * X_test.iloc[i,j] for j in range(X_test.shape[1]))) for i in range(X_test.shape[0]))
print("Prediction Error for Model I is:", SAE)

Prediction Error for Model I is: 35.62719512642499


In [34]:
################## MODEL II ##################
#Add the binary constraint to only use 3 variables
mod.addConstr(sum (w[j] for j in range(X_train.shape[1])) <= 3)
mod.update()
mod.optimize()

Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (mac64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 1 rows, 1724 columns and 12 nonzeros
Model fingerprint: 0x33472bcf
Model has 3400 quadratic constraints
Variable types: 1712 continuous, 12 integer (12 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  QMatrix range    [5e-01, 5e+02]
  QLMatrix range   [1e+00, 1e+00]
  Objective range  [6e-04, 6e-04]
  Bounds range     [1e+00, 1e+00]
  RHS range        [3e+00, 3e+00]
  QRHS range       [1e+01, 2e+03]

MIP start from previous solve did not produce a new incumbent solution
MIP start from previous solve violates constraint R0 by 9.000000000

Found heuristic solution: objective 2.000000e+09
Presolve added 822 rows and 409 columns
Presolve time: 0.14s
Presolved: 3421 rows, 2169 columns, 32974 nonzeros
Presolved model has 24 SOS constraint(s)
Variable types: 2145 continuous, 24 integer (24 binary)

Root relaxation: objectiv

In [35]:
if mod.status == GRB.OPTIMAL:
    print("Solved to optimality")
    
beta_opt = [beta[j].x for j in range(X_train.shape[1])]

for j in range(X_train.shape[1]):
    if beta_opt[j] > 0:
        print(f'{X_variable[j]}',beta_opt[j])

opt_val = mod.objval
print("Train Error for Model II is:",opt_val)

Solved to optimality
Entire home 52.0
accommodates 14.0
bedrooms 32.0
Train Error for Model II is: 38.33882352941181


In [36]:
SAE = 1 / X_test.shape[0] * sum(abs(y_test[i] - sum(beta_opt[j] * X_test.iloc[i,j] for j in range(X_test.shape[1]))) for i in range(X_test.shape[0]))
print("Prediction Error for Model II is:", SAE)

Prediction Error for Model II is: 37.73676680972818


In [37]:
################## MODEL III ##################
#Add the constraint to make sure beds is used in the model
mod.addConstr(w[X_variable.index('beds')] == 1)
mod.update()
mod.optimize()

Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (mac64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 2 rows, 1724 columns and 13 nonzeros
Model fingerprint: 0x0545107f
Model has 3400 quadratic constraints
Variable types: 1712 continuous, 12 integer (12 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  QMatrix range    [5e-01, 5e+02]
  QLMatrix range   [1e+00, 1e+00]
  Objective range  [6e-04, 6e-04]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 3e+00]
  QRHS range       [1e+01, 2e+03]

MIP start from previous solve did not produce a new incumbent solution
MIP start from previous solve violates constraint R1 by 1.000000000

Presolve added 825 rows and 410 columns
Presolve time: 0.13s
Presolved: 3421 rows, 2167 columns, 32954 nonzeros
Presolved model has 22 SOS constraint(s)
Variable types: 2145 continuous, 22 integer (22 binary)
Found heuristic solution: objective 60.5442647

Root relaxation: objective 

In [38]:
if mod.status == GRB.OPTIMAL:
    print("Solved to optimality")
    
beta_opt = [beta[j].x for j in range(X_train.shape[1])]

for j in range(X_train.shape[1]):
    if beta_opt[j] > 0:
        print(f'{X_variable[j]}',beta_opt[j])

opt_val = mod.objval
print("Train Error for Model III is:",opt_val)

Solved to optimality
Entire home 67.875
bedrooms 47.37499999999999
beds 12.125000000000002
Train Error for Model III is: 40.08014705882356


In [39]:
SAE = 1 / X_test.shape[0] * sum(abs(y_test[i] - sum(beta_opt[j] * X_test.iloc[i,j] for j in range(X_test.shape[1]))) for i in range(X_test.shape[0]))
print("Prediction Error for Model III is:", SAE)

Prediction Error for Model III is: 38.59960658082976
