# Machine Learning in Python


**Youtube video:** https://www.youtube.com/watch?v=7eh4d6sabA0&t=1375s 

**Pandas Library** https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html

<!--"CTRL /" comment in Markdown in Jupyter -->

In [10]:
# importing libraries
import pandas as pd
import numpy as np

In [18]:
music_data = pd.read_csv('music.csv')
music_data.head(3)

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop


In [12]:
# music_data.drop(0) removes the "row 0" in the table
music_data.drop(0).head(3) #.head(3) is for show just 3 rows

Unnamed: 0,age,gender,genre
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz


In [13]:
# music_data.drop(1) removes the "row 1" in the table
music_data.drop(1).head(3) #.head(3) is for show just 3 rows

Unnamed: 0,age,gender,genre
0,20,1,HipHop
2,25,1,HipHop
3,26,1,Jazz


In [14]:
# column 'genre' was deleted
X = music_data.drop(columns =['genre'])
X.head(3)

Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1


In [15]:
# creating a new column independent from the previous dataset
Y = music_data['genre']
Y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

## Decision Tree Model - ML

In [32]:
# we are going to run a "decision tree" algorithm
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# reading the file
music_data = pd.read_csv('music.csv')

# we are going to split the data set in two
# dropping column 'genre' from the dataset
X = music_data.drop(columns =['genre'])

# adding column 'genre' as a separate column of the dataset
Y = music_data['genre']

# this gives a 20% of the data for testing
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2)

# running the model
model = DecisionTreeClassifier()
model.fit(X_train,Y_train)

# adding "X_test" as samples to test the algorithm
predictions = model.predict(X_test)

# by score the accuray of the model
score = accuracy_score(Y_test, predictions)

# to show score 
score

0.25

## Decision Tree Model - ML with joblib option (Dumb model)

In [41]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import joblib

# Loading the music data
music_data = pd.read_csv('music.csv')

# Splitting the data into features (X) and target variable (Y)
X = music_data.drop(columns=['genre'])
Y = music_data['genre']

# Splitting the data into training and testing sets (80% training, 20% testing)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

# Creating and training the decision tree model
model = DecisionTreeClassifier()
model.fit(X_train, Y_train)

# Saving the trained model to a file using joblib
joblib.dump(model, 'music-recommender.joblib')


['music-recommender.joblib']

## Decision Tree Model - ML (Loaded Method)

In [43]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import joblib

# Loading the music data
music_data = pd.read_csv('music.csv')

# # Splitting the data into features (X) and target variable (Y)
# X = music_data.drop(columns=['genre'])
# Y = music_data['genre']

# # Splitting the data into training and testing sets (80% training, 20% testing)
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

# # Creating and training the decision tree model
# model = DecisionTreeClassifier()
# model.fit(X_train, Y_train)

# Saving the trained model to a file using joblib
joblib.load('music-recommender.joblib')
predictions = model.predict([[21, 1]])
predictions






array(['HipHop'], dtype=object)

## Decision Trees in visual format

In [45]:
# Importing necessary libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.model_selection import train_test_split

# Loading the dataset
music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
Y = music_data['genre']

# Splitting the data into training and testing sets (if needed)
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

# Creating and training the decision tree model
model = DecisionTreeClassifier()
model.fit(X, Y)

# Exporting the decision tree visualization to a DOT file
export_graphviz(model, out_file='music-recommender.dot', feature_names=['age', 'gender'],
                class_names=sorted(Y.unique()), label='all', rounded=True, filled=True)
