In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: valentinakomarova
"""

import csv
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn import model_selection
from sklearn import metrics
from sklearn import linear_model
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

In [None]:
#loading data
df = pd.read_csv('clean_Airbnb_data.csv')

df.head()

In [None]:
# Getting summary

df.describe()

In [None]:
# correlation table - between the variables

cor = df.corr()
sns.heatmap(cor, annot = True, cmap = plt.cm.Reds)

#correlation table - with target variable Price
corr_Price = abs(cor['price']).sort_values(ascending = False)
print(corr_Price)

In [None]:
# checking for NAs one last time 

df.dropna(inplace=True)
df.describe()

In [None]:
#Prepare the dataset for the machine learning pipeline

# define X and y
X = df.drop(['price'], axis=1)
y = df['price'].values

# split into test and train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

X_train_num = X_train.select_dtypes(include=np.number)
X_train_cat = df.select_dtypes(exclude=['number'])

In [None]:
X_train_num.head()

In [None]:
#Encoding labels for categorical features

labelencoder = LabelEncoder()

#applying label encoding for categorical features in train set
X_train_cat=X_train_cat.apply(LabelEncoder().fit_transform)

In [None]:
num_attribs = list (X_train_num)
cat_attribs = list (X_train_cat)

#creating a full pipeline: numerical + categorical
#full_pipeline = ColumnTransformer([("num",num_pipeline,num_attribs),("cat",OneHotEncoder(handle_unknown='ignore'),cat_attribs)])
full_pipeline = ColumnTransformer([("cat",OneHotEncoder(handle_unknown='ignore'),cat_attribs)])


#fit and transform the train set using the full pipeline
X_train_prep = full_pipeline.fit_transform(X_train)

In [None]:
#only transform the test set using the full pipeline
X_test_prep = full_pipeline.transform(X_test)

In [None]:
# Linear model

# Create linear regression object
lin_regr = linear_model.LinearRegression()

# Train the model using the training sets
lin_regr.fit(X_train_prep, y_train)

# Make predictions using the testing set
y_pred = lin_regr.predict(X_test_prep)

linear_mse = metrics.mean_squared_error(y_test, y_pred)

# Look at metrics

print("Mean squared error: %.2f" % linear_mse)
print("R squared: %.2f" % metrics.r2_score(y_test, y_pred))

In [None]:
#neural networks models

model_nn1 = MLPRegressor(hidden_layer_sizes=(10),random_state=1,max_iter=2000)
model_nn1 = model_nn1.fit(X_train_prep, y_train)
predicted = model_nn1.predict(X_test_prep)
print("model_nn1 MSE:", metrics.mean_squared_error(y_test, predicted))
print("model_nn1 R Squared:", metrics.r2_score(y_test, predicted))


model_nn2 = MLPRegressor(hidden_layer_sizes=(10,10),random_state=1,max_iter=2000)
model_nn2 = model_nn2.fit(X_train_prep, y_train)
predicted = model_nn2.predict(X_test_prep)
print("model_nn2 MSE:", metrics.mean_squared_error(y_test, predicted))
print("model_nn2 R Squared:", metrics.r2_score(y_test, predicted))

model_nn3 = MLPRegressor(hidden_layer_sizes=(10,10,10),random_state=1,max_iter=2000)
model_nn3 = model_nn3.fit(X_train_prep, y_train)
predicted = model_nn3.predict(X_test_prep)
print("model_nn3 MSE:", metrics.mean_squared_error(y_test, predicted))
print("model_nn3 R Squared:", metrics.r2_score(y_test, predicted))

