# Build a model for predicting rents in Barcelona, using the AirBnb dataset.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder
import random
import pickle

model_name = 'rent_model.sav'

### Preprocess data by one-hot encoding categorical features

In [3]:
orig_data = pd.read_csv('barca_airbnb.csv')
orig_data = orig_data.drop(labels='reviews', axis=1)
orig_data = orig_data.drop(labels='overall_satisfaction', axis=1)
orig_data = orig_data.drop(labels='latitude', axis=1)
orig_data = orig_data.drop(labels='longitude', axis=1)

categorical_feature_mask = orig_data.dtypes == object
categorical_features = orig_data.columns[categorical_feature_mask].tolist()

one_hot_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
one_hot_encoder.fit(orig_data[categorical_features])

one_hot_categorical_data = pd.DataFrame(
    data=one_hot_encoder.transform(
        orig_data[categorical_features]
    ),
    columns=one_hot_encoder.get_feature_names_out(
        input_features=categorical_features
    )
)
data = pd.concat([orig_data, one_hot_categorical_data], axis=1)
data = data.drop(columns=categorical_features, axis=1)

labels = data.price
data.drop(labels='price', axis=1, inplace=True)

train_data, test_data, train_labels, test_labels = train_test_split(
    data, labels, test_size=0.25
)

test_data

Unnamed: 0,accommodates,bedrooms,latitude,longitude,room_type_Entire home/apt,room_type_Private room,room_type_Shared room,neighborhood_Ciutat Vella,neighborhood_Eixample,neighborhood_Gràcia,neighborhood_Horta-Guinardó,neighborhood_Les Corts,neighborhood_Nou Barris,neighborhood_Sant Andreu,neighborhood_Sant Martí,neighborhood_Sants-Montjuïc,neighborhood_Sarrià-Sant Gervasi
6213,4,1.0,41.422419,2.152964,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
7810,2,1.0,41.381300,2.153719,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7369,2,1.0,41.404691,2.203036,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2919,2,1.0,41.408874,2.208603,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2307,4,2.0,41.401314,2.174983,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
731,9,4.0,41.400325,2.138974,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
8317,2,1.0,41.390143,2.144089,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1386,4,2.0,41.393607,2.164562,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5190,2,1.0,41.411746,2.172186,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
