In [None]:
Simple Example of How to train an ML model on your dataset, and get it to make predictions.

In [47]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

music_data = pd.read_csv('music.csv')
X = music_data.drop(columns = ['genre'])  # input set, which conatins only the first 2 columns
y = music_data['genre'] # output dataset

model = DecisionTreeClassifier()  # instead of having to create our own prediction model we can just use any pre-made prediction models, her we are using the Decision Tree.
model.fit(X.values,y)  #this takes an input and output set
predictions = model.predict([ [21,1], [22,0] ])  # We are asking our model to predict genre choice for 21 year old male and 22 year old female, using our dataset.
predictions

array(['HipHop', 'Dance'], dtype=object)

In [None]:
How to test accuracy of your trained model

In [40]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

music_data = pd.read_csv('music.csv')
X = music_data.drop(columns = ['genre'])  # input set, which conatins only the first 2 columns
y = music_data['genre'] # output dataset
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)  # splits the dataset into 2 parts, train, and test. This allows us to first let the model make predictions based on the "Train" Dataset. It also takes a part of X, "X_test"(the input part), and a part of Y, "y_test" (the output part). 0.2 means we are using 80% of our data for training, and 20% for testing. 

model = DecisionTreeClassifier()  # instead of having to create our own prediction model we can just use any pre-made prediction models, her we are using the Decision Tree.
model.fit(X_train,y_train)  #taking train datasets as inpts
predictions = model.predict(X_test)  # X contains the input values for testing
score = accuracy_score(y_test, predictions) #comparing output values corresponding to X_test inputs, with the predictions made by our model
#Note, score will have different values everytime you run it, because everytime the dataset gets split using train_test_split() function, we get different train and test datasets.
score


1.0

In [None]:
# Persisting Models: How to save our trained model so we don't have to train it everytime we need to make a new prediction. This is extremely imporat for Large datasets.


In [48]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib #used for saving and loading models

music_data = pd.read_csv('music.csv')
X = music_data.drop(columns = ['genre'])  # input set, which conatins only the first 2 columns
y = music_data['genre'] # output dataset

model = DecisionTreeClassifier()  
model.fit(X.values,y)  

joblib.dump(model, 'Music-recommender.joblib')

# predictions = model.predict([ [21,1], [22,0] ]) 

['Music-recommender.joblib']

In [None]:
Now we can simply load our model, as our model has been saved.

In [49]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib #used for saving and loading models

# music_data = pd.read_csv('music.csv')
# X = music_data.drop(columns = ['genre'])  # input set, which conatins only the first 2 columns
# y = music_data['genre'] # output dataset

# model = DecisionTreeClassifier()  
# model.fit(X,y)  

model = joblib.load('Music-recommender.joblib')
predictions = model.predict([ [21,1], [22,0] ]) 
predictions

array(['HipHop', 'Dance'], dtype=object)

In [None]:
Visualising Decision Trees: How Decision Trees Work

In [52]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

music_data = pd.read_csv('music.csv')
X = music_data.drop(columns = ['genre'])  # input set, which conatins only the first 2 columns
y = music_data['genre'] # output dataset

model = DecisionTreeClassifier()  # instead of having to create our own prediction model we can just use any pre-made prediction models, her we are using the Decision Tree.
model.fit(X,y)  #this takes an input and output set

tree.export_graphviz(model,out_file='music-recommended.dot', 
                     feature_names=['age','gender'], 
                     class_names=sorted(y.unique()), 
                     label='all', 
                     rounded=True, 
                     filled=True)

#This export_graphviz function is being used to create a dot file. We can open this docfile in vs code, and then run the following command in the vs code terminal:
# dot -Tpng your_dotfile_name.dot -o output.png
# Now, a visualisation of your Decision tree is created inside the output.png file.
