In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
%matplotlib inline

In [2]:
artworks = pd.read_csv('https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artworks.csv')

In [3]:
# Select Columns.
artworks = artworks[['Artist', 'Nationality', 'Gender', 'Date', 'Department',
                    'DateAcquired', 'URL', 'ThumbnailURL', 'Height (cm)', 'Width (cm)']]

# Convert URL's to booleans.
artworks['URL'] = artworks['URL'].notnull()
artworks['ThumbnailURL'] = artworks['ThumbnailURL'].notnull()

# Drop films and some other tricky rows.
artworks = artworks[artworks['Department']!='Film']
artworks = artworks[artworks['Department']!='Media and Performance Art']
artworks = artworks[artworks['Department']!='Fluxus Collection']

# Drop missing data.
artworks = artworks.dropna()

In [4]:
artworks.head()

Unnamed: 0,Artist,Nationality,Gender,Date,Department,DateAcquired,URL,ThumbnailURL,Height (cm),Width (cm)
0,Otto Wagner,(Austrian),(Male),1896,Architecture & Design,1996-04-09,True,True,48.6,168.9
1,Christian de Portzamparc,(French),(Male),1987,Architecture & Design,1995-01-17,True,True,40.6401,29.8451
2,Emil Hoppe,(Austrian),(Male),1903,Architecture & Design,1997-01-15,True,True,34.3,31.8
3,Bernard Tschumi,(),(Male),1980,Architecture & Design,1995-01-17,True,True,50.8,50.8
4,Emil Hoppe,(Austrian),(Male),1903,Architecture & Design,1997-01-15,True,True,38.4,19.1


In [5]:
artworks.shape

(108413, 10)

In [6]:
artworks['DateAcquired'] = pd.to_datetime(artworks.DateAcquired)
artworks['YearAcquired'] = artworks.DateAcquired.dt.year
artworks['YearAcquired'].dtype

dtype('int64')

In [7]:
# Remove multiple nationalities, genders, and artists.
artworks.loc[artworks['Gender'].str.contains('\) \('), 'Gender'] = '\(multiple_persons\)'
artworks.loc[artworks['Nationality'].str.contains('\) \('), 'Nationality'] = '\(multiple_nationalities\)'
artworks.loc[artworks['Artist'].str.contains(','), 'Artist'] = 'Multiple_Artists'

# Convert dates to start date, cutting down number of distinct examples.
artworks['Date'] = pd.Series(artworks.Date.str.extract(
    '([0-9]{4})', expand=False))[:-1]

# Final column drops and NA drop.
X = artworks.drop(['Department', 'DateAcquired', 'Artist', 'Nationality', 'Date'], 1)

# Create dummies separately.
artists = pd.get_dummies(artworks.Artist)
nationalities = pd.get_dummies(artworks.Nationality)
dates = pd.get_dummies(artworks.Date)

# Concat with other variables, but artists slows this wayyyyy down so we'll keep it out for now
X = pd.get_dummies(X, sparse=True)
X = pd.concat([X, nationalities, dates], axis=1)

Y = artworks.Department

## Trying different hidden layer sizes

In [8]:
mlp = MLPClassifier(hidden_layer_sizes=(10,))
mlp.fit(X, Y)
mlp.score(X,Y)

0.7269146689050207

In [9]:
mlp = MLPClassifier(hidden_layer_sizes=(1000,), max_iter=10000)
mlp.fit(X, Y)
mlp.score(X,Y)

0.7829872801232325

From above two classifiers it's clear that more number of perceptrons clearly increases the accuracy.

In [10]:
mlp = MLPClassifier(hidden_layer_sizes=(10, 20, 10, 20, 10, 20, 10, 20), max_iter=10000)
mlp.fit(X, Y)
mlp.score(X,Y)

0.7689852692942728

More layers and less perceptrons is much better than less layers and more perceptrons as it may prevent overfitting issue.

In [11]:
mlp = MLPClassifier(hidden_layer_sizes=(10,20,30), max_iter=10000)
mlp.fit(X, Y)
mlp.score(X,Y)

0.7807458515122725

In [12]:
mlp = MLPClassifier(hidden_layer_sizes=(30, 20, 10), max_iter=10000)
mlp.fit(X, Y)
mlp.score(X,Y)

0.7971368747290454

Order of size of perceptrons in model's layers doesn't make much difference in it's accuracy.

In [13]:
mlp = MLPClassifier(hidden_layer_sizes=(50, 40, 30, 20, 10), max_iter=10000)
mlp.fit(X, Y)
mlp.score(X,Y)

0.8194035770617915

## Changing activation

Now activation should be chosen according to the type of data. So, we can't generalise any assumptions taken from this particular data for every MLP model.

In [14]:
mlp = MLPClassifier(hidden_layer_sizes=(30, 20, 10), max_iter=10000, activation="identity")
mlp.fit(X, Y)
mlp.score(X,Y)

0.6970381780782747

In [15]:
mlp = MLPClassifier(hidden_layer_sizes=(30, 20, 10), max_iter=10000, activation="logistic")
mlp.fit(X, Y)
mlp.score(X,Y)

0.7611079852047263

In [16]:
mlp = MLPClassifier(hidden_layer_sizes=(30, 20, 10), max_iter=10000, activation="tanh")
mlp.fit(X, Y)
mlp.score(X,Y)

0.7513766799184599

## Changing solver

In [17]:
mlp = MLPClassifier(hidden_layer_sizes=(30, 20, 10), max_iter=10000, solver="lbfgs")
mlp.fit(X, Y)
mlp.score(X,Y)

0.6225360427255034

In [18]:
mlp = MLPClassifier(hidden_layer_sizes=(30, 20, 10), max_iter=10000, solver="sgd")
mlp.fit(X, Y)
mlp.score(X,Y)

0.6225360427255034

## Changing learning rate

In [19]:
mlp = MLPClassifier(hidden_layer_sizes=(30, 20, 10), max_iter=10000, learning_rate="invscaling")
mlp.fit(X, Y)
mlp.score(X,Y)

0.8043131358785386

In [20]:
mlp = MLPClassifier(hidden_layer_sizes=(30, 20, 10), max_iter=10000, learning_rate="adaptive")
mlp.fit(X, Y)
mlp.score(X,Y)

0.7872303137077656

Learning rate looks most important as it's changing accuracy by high margin. Also learning rate is used to set speed of model so it might affect time taken to complete model.

# Conclusion

In conclusion, we should consider using more layers with suitable number of perceptrons in each layer. Moreover, learning rate and activation are two hyperparameters which affects our model score most.
