# Data Preprocessing

In [1]:
# Importing the libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings

warnings.filterwarnings('ignore')

## Load the dataset

In [2]:
df = pd.read_csv('games-regression-dataset.csv')

## Setting data types

In [3]:
# drop Primary Genre
df.drop(['Primary Genre', 'ID', 'URL', 'Icon URL'], axis=1, inplace=True)

df['Original Release Date'] = pd.to_datetime(df['Original Release Date'], format='%d/%m/%Y')
df['Current Version Release Date'] = pd.to_datetime(df['Current Version Release Date'], format='%d/%m/%Y')


## Data Exploration

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df['Genres'] = df['Genres'].astype(str)
df['Genres'] = df['Genres'].str.strip('[]').str.replace("'", "").str.split(", ")

genre_counts = df.explode('Genres').groupby('Genres').size().sort_values(ascending=False)
genre_counts


In [None]:
df['Developer'].value_counts()

In [None]:
df['Developer'].unique().size

In [None]:
df['Languages'] = df['Languages'].astype(str)

df['Languages'] = df['Languages'].str.strip('[]').str.replace("'", "").str.split(", ")

langs_counts = df.explode('Languages').groupby('Languages').size().sort_values(ascending=False)
langs_counts

## Developer preprocessing

In [4]:
# Convert to string
df['Developer'] = df['Developer'].astype(str)
df['Developer'] = df['Developer'].str.replace("'", "").str.strip('[]')

dev_counts = df['Developer'].value_counts()
other = dev_counts[dev_counts < 5].index
df['Developer'] = df['Developer'].replace(other, 'Other')

dev_df = df[['Developer', 'Average User Rating']].groupby('Developer').mean()
dev_df['Count'] = df['Developer'].value_counts()

dev_df = dev_df.sort_values(by='Count', ascending=False)
dev_df

Unnamed: 0_level_0,Average User Rating,Count
Developer,Unnamed: 1_level_1,Unnamed: 2_level_1
Other,4.029807,3791
Tapps Tecnologia da Informa\xe7\xe3o Ltda.,4.421875,96
Detention Apps,4.453125,32
HexWar Games Ltd,3.362069,29
EASY Inc.,3.962963,27
...,...,...
Eric Snider,3.900000,5
"RoboNacho Systems, LLC",4.000000,5
Ember Entertainment,4.400000,5
SH Limited,4.200000,5


In [5]:
# Replace the developer names with the average user rating from dev_df
df['Developer'] = df['Developer'].replace(dev_df.index, dev_df['Average User Rating'])
df.head()

Unnamed: 0,Name,Subtitle,User Rating Count,Price,In-app Purchases,Description,Developer,Age Rating,Languages,Size,Genres,Original Release Date,Current Version Release Date,Average User Rating
0,HEIR OF LIGHT,Dark Fantasy RPG,982,0.0,"29.99, 19.99, 9.99, 29.99, 29.99, 8.99, 4.99, ...","A Dark Fantasy, Collectible RPG\n\nDarkness ha...",4.029807,12+,"EN, FR, DE, JA, KO, ZH, ES, TH, ZH, VI",894489600,"Games, Role Playing, Strategy",2018-03-06,2019-07-31,4.0
1,Endgame:Eurasia,,19,0.0,,"""This interactive experience is an exploration...",4.029807,12+,EN,116407296,"Games, Simulation, Strategy, News",2013-03-21,2017-06-28,3.5
2,Free Solitaire+,,14,0.0,,Same Solitaire game with classic Solitaire run...,4.029807,4+,"EN, ZH",50647040,"Games, Strategy, Entertainment, Card",2013-04-04,2015-04-21,4.5
3,Draft Trainer,,88,1.99,,** Discounted for a limited time **\n\nEver wo...,4.029807,9+,EN,28120064,"Games, Utilities, Card, Strategy",2011-05-26,2019-07-23,3.5
4,Rogue Knight: Infested Lands,Tactical roguelike w/ stealth,13,3.99,,Fight or sneak your way through hordes of mons...,4.029807,12+,EN,39915520,"Games, Role Playing, Strategy",2017-05-19,2019-02-06,4.5


## Genres preprocessing

In [6]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import PCA

# Convert the genres column to a list of strings
df['Genres'] = df['Genres'].astype(str)
df['Genres'] = df['Genres'].str.strip('[]').str.replace("'", "").str.split(", ")

# drop Games, Strategy, Entertainment from the Genres column
df['Genres'] = df['Genres'].apply(lambda x: [genre for genre in x if genre not in ['Games', 'Strategy', 'Entertainment']])

# Join the list of genres into a single string
genres = df['Genres'].apply(lambda x: ' '.join(x))

# Create a count Vectorizer and fit it to the genres
count_vec = CountVectorizer()
bow_genres = count_vec.fit_transform(genres)

# Apply principal component analysis to reduce the dimensionality
pca = PCA(n_components=10)
pca_genres = pca.fit_transform(bow_genres.toarray())

# Add the PCA-transformed genres to the original dataframe
for i in range(10):
    df[f'Genre_PCA_{i}'] = pca_genres[:, i]

# Drop the original column
df = df.drop(['Genres'], axis=1)

## Languages preprocessing

In [7]:
# Convert the genres column to a list of strings
df['Languages'] = df['Languages'].astype(str)
df['Languages'] = df['Languages'].str.strip('[]').str.replace("'", "").str.split(", ")

# drop Games, Strategy, Entertainment from the Genres column
df['Languages'] = df['Languages'].apply(lambda x: [lang for lang in x if lang not in ['En']])

# Join the list of genres into a single string
languages = df['Languages'].apply(lambda x: ' '.join(x))

# Create a count Vectorizer and fit it to the genres
count_vec = CountVectorizer()
bow_languages = count_vec.fit_transform(languages)

# Apply principal component analysis to reduce the dimensionality
pca = PCA(n_components=10)
pca_languages = pca.fit_transform(bow_languages.toarray())

# Add the PCA-transformed genres to the original dataframe
for i in range(10):
    df[f'Languages_PCA_{i}'] = pca_languages[:, i]

# Drop the original column
df = df.drop(['Languages'], axis=1)

## In-app Purchases preprocessing

In [8]:
# Free apps might skew the in-app purchases column,
# so we might split the dataset into free and paid apps

df['In-app Purchases'] = df['In-app Purchases'].astype(str)
df['In-app Purchases'] = df['In-app Purchases'].str.strip('[]').str.replace("'", "").str.split(", ")

In [9]:
# Convert to float
df['In-app Purchases'] = df['In-app Purchases'].apply(lambda x: [float(i) for i in x])

# Get the lowest, highest and average purchase
df['Lowest Purchase'] = df['In-app Purchases'].apply(lambda x: min(x) if len(x) > 0 else 0)
df['Highest Purchase'] = df['In-app Purchases'].apply(lambda x: max(x) if len(x) > 0 else 0)
df['Average Purchase'] = df['In-app Purchases'].apply(lambda x: np.mean(x) if len(x) > 0 else 0)


In [10]:
# Drop the original column
df = df.drop(['In-app Purchases'], axis=1)

df['Lowest Purchase'] = df['Lowest Purchase'].fillna(0)
df['Highest Purchase'] = df['Highest Purchase'].fillna(0)
df['Average Purchase'] = df['Average Purchase'].fillna(0)
df.head()

Unnamed: 0,Name,Subtitle,User Rating Count,Price,Description,Developer,Age Rating,Size,Original Release Date,Current Version Release Date,...,Languages_PCA_3,Languages_PCA_4,Languages_PCA_5,Languages_PCA_6,Languages_PCA_7,Languages_PCA_8,Languages_PCA_9,Lowest Purchase,Highest Purchase,Average Purchase
0,HEIR OF LIGHT,Dark Fantasy RPG,982,0.0,"A Dark Fantasy, Collectible RPG\n\nDarkness ha...",4.029807,12+,894489600,2018-03-06,2019-07-31,...,0.362915,-0.805313,-0.014666,-0.823012,0.121173,0.439999,0.262634,4.99,29.99,19.24
1,Endgame:Eurasia,,19,0.0,"""This interactive experience is an exploration...",4.029807,12+,116407296,2013-03-21,2017-06-28,...,-0.018229,0.001665,0.017068,0.008857,0.010479,-0.003501,-0.003242,0.0,0.0,0.0
2,Free Solitaire+,,14,0.0,Same Solitaire game with classic Solitaire run...,4.029807,4+,50647040,2013-04-04,2015-04-21,...,0.198497,0.282549,0.028089,-0.017963,0.014262,-0.034115,-0.002305,0.0,0.0,0.0
3,Draft Trainer,,88,1.99,** Discounted for a limited time **\n\nEver wo...,4.029807,9+,28120064,2011-05-26,2019-07-23,...,-0.018229,0.001665,0.017068,0.008857,0.010479,-0.003501,-0.003242,0.0,0.0,0.0
4,Rogue Knight: Infested Lands,Tactical roguelike w/ stealth,13,3.99,Fight or sneak your way through hordes of mons...,4.029807,12+,39915520,2017-05-19,2019-02-06,...,-0.018229,0.001665,0.017068,0.008857,0.010479,-0.003501,-0.003242,0.0,0.0,0.0


## Age Rating preprocessing

In [11]:
# Convert to string
df['Age Rating'] = df['Age Rating'].astype(str)

# Remove the + sign
df['Age Rating'] = df['Age Rating'].str.replace('+', '')

# Convert to int
df['Age Rating'] = df['Age Rating'].astype(float)

## Dates preprocessing

In [12]:
# Convert the datetime to ordinal
df['Original Release Date'] = df['Original Release Date'].apply(lambda x: x.toordinal())
df['Current Version Release Date'] = df['Current Version Release Date'].apply(lambda x: x.toordinal())

df.head()


Unnamed: 0,Name,Subtitle,User Rating Count,Price,Description,Developer,Age Rating,Size,Original Release Date,Current Version Release Date,...,Languages_PCA_3,Languages_PCA_4,Languages_PCA_5,Languages_PCA_6,Languages_PCA_7,Languages_PCA_8,Languages_PCA_9,Lowest Purchase,Highest Purchase,Average Purchase
0,HEIR OF LIGHT,Dark Fantasy RPG,982,0.0,"A Dark Fantasy, Collectible RPG\n\nDarkness ha...",4.029807,12.0,894489600,736759,737271,...,0.362915,-0.805313,-0.014666,-0.823012,0.121173,0.439999,0.262634,4.99,29.99,19.24
1,Endgame:Eurasia,,19,0.0,"""This interactive experience is an exploration...",4.029807,12.0,116407296,734948,736508,...,-0.018229,0.001665,0.017068,0.008857,0.010479,-0.003501,-0.003242,0.0,0.0,0.0
2,Free Solitaire+,,14,0.0,Same Solitaire game with classic Solitaire run...,4.029807,4.0,50647040,734962,735709,...,0.198497,0.282549,0.028089,-0.017963,0.014262,-0.034115,-0.002305,0.0,0.0,0.0
3,Draft Trainer,,88,1.99,** Discounted for a limited time **\n\nEver wo...,4.029807,9.0,28120064,734283,737263,...,-0.018229,0.001665,0.017068,0.008857,0.010479,-0.003501,-0.003242,0.0,0.0,0.0
4,Rogue Knight: Infested Lands,Tactical roguelike w/ stealth,13,3.99,Fight or sneak your way through hordes of mons...,4.029807,12.0,39915520,736468,737096,...,-0.018229,0.001665,0.017068,0.008857,0.010479,-0.003501,-0.003242,0.0,0.0,0.0


## NLP preprocessing

In [15]:
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.corpus import stopwords
from textblob import TextBlob
import re

def preprocess_nlp(col):
    df[col] = df[col].astype(str)

    # Remove URLs and email addresses
    df[col] = df[col].apply(lambda x: re.sub(r'http\S+|www.\S+|\S+@\S+', '', x))

    # Remove the punctuation, numbers, and convert to lowercase
    df[col] = df[col].apply(lambda x: " ".join(re.findall(r'\w+', x.lower())))

    # Remove the stopwords
    stop = stopwords.words('english')
    df[col] = df[col].apply(lambda x: " ".join(x for x in x.split() if x not in stop))

    # Stemming
    st = nltk.PorterStemmer()
    df[col] = df[col].apply(lambda x: " ".join([st.stem(word) for word in x.split()]))

    # Lemmatization
    lem = nltk.WordNetLemmatizer()
    df[col] = df[col].apply(lambda x: " ".join([lem.lemmatize(word) for word in x.split()]))

    # Remove the frequent and rare words
    freq = pd.Series(' '.join(df[col]).split()).value_counts()
    common_freq = list(freq[:10].index)
    rare_freq = list(freq[-10:].index)
    df[col] = df[col].apply(lambda x: " ".join(x for x in x.split() if x not in common_freq+rare_freq))

    # Remove the whitespaces
    df[col] = df[col].apply(lambda x: " ".join(x.strip() for x in x.split()))

    # Replace NaN values with empty string
    df[col] = df[col].fillna('')

    # Convert text data to bag-of-words representation
    vectorizer = CountVectorizer()
    BoW = vectorizer.fit_transform(df[col])

    # Apply principal component analysis to reduce the dimensionality
    pca_ = PCA(n_components=10)
    pca_col = pca_.fit_transform(BoW.toarray())

    # Add the PCA-transformed genres to the original dataframe
    for feat in range(10):
        df[f'{col}_PCA_{feat}'] = pca_col[:, feat]



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Yusuf\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Yusuf\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Yusuf\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [16]:
preprocess_nlp('Description')
preprocess_nlp('Subtitle')
preprocess_nlp('Name')

df = df.drop(['Description', 'Subtitle', 'Name'], axis=1)

df.head()

Unnamed: 0,User Rating Count,Price,Developer,Age Rating,Size,Original Release Date,Current Version Release Date,Average User Rating,Genre_PCA_0,Genre_PCA_1,...,Name_PCA_0,Name_PCA_1,Name_PCA_2,Name_PCA_3,Name_PCA_4,Name_PCA_5,Name_PCA_6,Name_PCA_7,Name_PCA_8,Name_PCA_9
0,982,0.0,4.029807,12.0,894489600,736759,737271,4.0,1.262014,0.298783,...,-0.016259,-0.003904,-0.038339,-0.021536,-0.032058,-0.033364,-0.027534,-0.016501,-0.009215,-0.018201
1,19,0.0,4.029807,12.0,116407296,734948,736508,3.5,-0.128832,-0.524941,...,-0.014634,-0.003287,-0.034084,-0.019752,-0.027978,-0.028768,-0.02451,-0.014035,-0.00681,-0.013536
2,14,0.0,4.029807,4.0,50647040,734962,735709,4.5,-0.044157,-0.091367,...,-0.034241,-0.012349,-0.073516,-0.065253,-0.088736,-0.116033,-0.111314,-0.026199,-0.300193,-0.171169
3,88,1.99,4.029807,9.0,28120064,734283,737263,3.5,-0.038748,-0.099684,...,-0.016055,-0.003782,-0.035287,-0.025109,-0.021007,-0.027572,-0.025729,-0.013225,-0.010538,-0.012107
4,13,3.99,4.029807,12.0,39915520,736468,737096,4.5,1.262014,0.298783,...,-0.021382,-0.01136,-0.030838,-0.018968,-0.02767,-0.03269,-0.022544,0.008284,0.020233,-0.017075



## Icon preprocessing

### Download the icons

In [None]:
# Convert to string
df['Icon URL'] = df['Icon URL'].astype(str)

import requests
import os
import shutil


def download_image(url, filename):
    r = requests.get(url, stream=True)
    if r.status_code == 200:
        with open(filename, 'wb') as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)


# Create a folder to store the images
if not os.path.exists('icons'):
    os.makedirs('icons')

# Download the images
for i, row in df.iterrows():
    download_image(row['Icon URL'], f'icons/{i}.png')



### Extract features from the icons

In [17]:
import cv2
import numpy as np

def preprocess_icon(img_path):
    # Load the game icon image
    img = cv2.imread(img_path)
    img = cv2.resize(img, (100, 100))

    # Extract color features using color histograms
    colors = ('b', 'g', 'r')
    color_features = []
    for k, col in enumerate(colors):
        hist = cv2.calcHist([img], [k], None, [256], [0, 256])
        color_features.append(hist)

    # Reshape the color features to have a single dimension
    color_features = np.concatenate(color_features).ravel()

    # Extract shape features using edge detection
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 100, 200)
    edge_features = np.array(edges).flatten()

    # Combine the color and shape features into a single feature vector
    feature_vector = np.concatenate((color_features, edge_features))

    # Normalize the feature vector to have unit length
    normalized_feature_vector = feature_vector / np.linalg.norm(feature_vector)

    return normalized_feature_vector

# Create a list to store the feature vectors
icon_features = []

# Iterate over the images and extract the features
for i, row in df.iterrows():
    icon_features.append(preprocess_icon(f'icons/{i}.png'))

# Apply PCA to reduce the number of features
pca = PCA(n_components=10)
icon_features_pca = pca.fit_transform(icon_features)

icon_features_df = pd.DataFrame(icon_features_pca, columns=[f'icon_{i}' for i in range(icon_features_pca.shape[1])])
icon_features_df.head()

Unnamed: 0,icon_0,icon_1,icon_2,icon_3,icon_4,icon_5,icon_6,icon_7,icon_8,icon_9
0,-0.124898,0.001234,-0.015389,-0.06283,0.003135,-0.068292,-0.029305,-0.006171,-0.030016,0.035971
1,-0.02642,0.157912,-0.021355,-0.027148,-0.021729,0.046399,-0.17212,-0.043609,0.101468,0.040901
2,0.127909,-0.050887,-0.175489,0.169673,-0.055064,0.003982,-0.002101,-0.03547,0.017367,-0.062924
3,0.205941,0.651786,-0.071989,-0.042054,-0.058608,0.04088,-0.087128,-0.020634,0.080747,0.022682
4,-0.133586,-0.035073,-0.025085,0.059569,-0.021883,-0.0006,-0.130829,-0.0405,0.093091,0.002537


### Add the icon features to the dataset

In [18]:
# Concatenate the icon features with the other features
df = pd.concat([df, icon_features_df], axis=1)

# Save the updated dataset
df.to_csv('games_with_icon_features.csv', index=False)

In [19]:
df.head()

Unnamed: 0,User Rating Count,Price,Developer,Age Rating,Size,Original Release Date,Current Version Release Date,Average User Rating,Genre_PCA_0,Genre_PCA_1,...,icon_0,icon_1,icon_2,icon_3,icon_4,icon_5,icon_6,icon_7,icon_8,icon_9
0,982,0.0,4.029807,12.0,894489600,736759,737271,4.0,1.262014,0.298783,...,-0.124898,0.001234,-0.015389,-0.06283,0.003135,-0.068292,-0.029305,-0.006171,-0.030016,0.035971
1,19,0.0,4.029807,12.0,116407296,734948,736508,3.5,-0.128832,-0.524941,...,-0.02642,0.157912,-0.021355,-0.027148,-0.021729,0.046399,-0.17212,-0.043609,0.101468,0.040901
2,14,0.0,4.029807,4.0,50647040,734962,735709,4.5,-0.044157,-0.091367,...,0.127909,-0.050887,-0.175489,0.169673,-0.055064,0.003982,-0.002101,-0.03547,0.017367,-0.062924
3,88,1.99,4.029807,9.0,28120064,734283,737263,3.5,-0.038748,-0.099684,...,0.205941,0.651786,-0.071989,-0.042054,-0.058608,0.04088,-0.087128,-0.020634,0.080747,0.022682
4,13,3.99,4.029807,12.0,39915520,736468,737096,4.5,1.262014,0.298783,...,-0.133586,-0.035073,-0.025085,0.059569,-0.021883,-0.0006,-0.130829,-0.0405,0.093091,0.002537


## Run the model

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
df = pd.read_csv('games_with_icon_features.csv')

# Split the dataset into training and testing sets
y = df['Average User Rating']
X = df.drop(['Average User Rating'], axis=1)

# Feature selection
from sklearn.feature_selection import SelectKBest, f_regression
selector = SelectKBest(f_regression, k=10)
X = selector.fit_transform(X, y)

# Scale the features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# Create a linear regression model
model = LinearRegression()


# Train the model
model.fit(X_train, y_train)

# Save the model
import pickle
pickle.dump(model, open('models/LR_model.pkl', 'wb'))

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))

X = df.drop(['Average User Rating'], axis=1)

# print the features weights
for i in range(len(model.coef_)):
    print(f'{X.columns[i]}: {model.coef_[i]}')


Mean squared error: 0.49
Coefficient of determination: 0.15
User Rating Count: 0.21390635839350386
Price: 0.0951945546362111
Developer: 0.09581508634729094
Age Rating: -0.029473314637886168
Size: 0.10555807971946429
Original Release Date: -0.11398791014318865
Current Version Release Date: 0.035962559789323806
Genre_PCA_0: 0.006827945430988695
Genre_PCA_1: 0.04795810859462081
Genre_PCA_2: -0.04339401005585522


In [22]:
# Create a ridge regression model
model = Ridge(alpha=0.5)

# Train the model
model.fit(X_train, y_train)

# Save the model
pickle.dump(model, open('models/Ridge_model.pkl', 'wb'))

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))


Mean squared error: 0.49
Coefficient of determination: 0.14


In [23]:
# Create a lasso regression model
model = Lasso(alpha=0.5)

# Train the model
model.fit(X_train, y_train)

# Save the model
pickle.dump(model, open('models/Lasso_model.pkl', 'wb'))

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))


Mean squared error: 0.53
Coefficient of determination: 0.08


In [24]:
# Create an elastic net regression model
model = ElasticNet(alpha=0.5)

# Train the model
model.fit(X_train, y_train)

# Save the model
pickle.dump(model, open('models/ElasticNet_model.pkl', 'wb'))

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))


Mean squared error: 0.53
Coefficient of determination: 0.08


In [26]:
# Create a polynomial regression model
poly = PolynomialFeatures(degree=3)

X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.fit_transform(X_test)

# Train the model
model = LinearRegression()
model.fit(X_train_poly, y_train)

# Save the model
pickle.dump(model, open('models/Polynomial_model.pkl', 'wb'))

# Make predictions
y_pred = model.predict(X_test_poly)

# Evaluate the model
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))


Mean squared error: 860.01
Coefficient of determination: -1497.34
