1. Import Data Set

In [20]:
import pandas as pd

music_data = pd.read_csv('music.csv')
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


2. Preparing/Cleaning the Data

In [21]:
X = music_data.drop(columns=['genre']) # creates a data set without the specified column (input set in our case)
y = music_data['genre'] # contains only the specified column (output set in our case)
# y

3. Learning and Predicting

We are going to use the algorithm 'Decision Tree'

In [23]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
model.fit(X, y) # this model takes 2 sets (output and input)

# (21 year old, male), (22 year old, female) -> 2 predictions at the same time
predictions = model.predict([ [21, 1], [22, 0] ])
predictions



array(['HipHop', 'Dance'], dtype=object)

4. Measure Accuracy of a Model

In [363]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# train_test_split randomly picks data for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.6) # allocating {test_size * 100}% of out data to testing

model2 = DecisionTreeClassifier()
model2.fit(X_train, y_train)
predictions2 = model2.predict(X_test)

score = accuracy_score(y_test, predictions2)
score

0.2727272727272727

5. Persisting Models

Store already trained models so we do not have to train them again

In [368]:
# from sklearn.externals import joblib # error
import joblib

# music_data3 = pd.read_csv('music.csv')
# X3 = music_data3.drop(columns=['genre'])
# y3 = music_data3['genre']

# model3 = DecisionTreeClassifier()
# model3.fit(X3, y3)

# joblib.dump(model3, 'music-recommender.joblib') # creates a joblib file containing our trained model
# ------------------------- Commented as we already have a model in 'music-recommender.joblib'

model3 = joblib.load('music-recommender.joblib')
predictions3 = model3.predict([[37, 1]])
predictions3



array(['Classical'], dtype=object)

6. Visualizing a Decision Tree

To see how our model makes predictions

After creating the tree.export... go to .dot file,
click '...' in the right corner, then select 'Open Preview to the Side'

In [None]:
from sklearn import tree

music_data4 = pd.read_csv('music.csv')
X4 = music_data4.drop(columns=['genre'])
y4 = music_data4['genre']

model4 = DecisionTreeClassifier()
model4.fit(X4, y4)

tree.export_graphviz(model4, out_file='music-recommender.dot',
                     feature_names=['age', 'gender'],
                     class_names=sorted(y.unique()),
                     label='all', rounded=True,
                     filled=True)