# ML

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("./vgsales.csv")
df.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [3]:
df.shape

(16598, 11)

In [4]:
df.describe()

Unnamed: 0,Rank,Year,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
count,16598.0,16327.0,16598.0,16598.0,16598.0,16598.0,16598.0
mean,8300.605254,2006.406443,0.264667,0.146652,0.077782,0.048063,0.537441
std,4791.853933,5.828981,0.816683,0.505351,0.309291,0.188588,1.555028
min,1.0,1980.0,0.0,0.0,0.0,0.0,0.01
25%,4151.25,2003.0,0.0,0.0,0.0,0.0,0.06
50%,8300.5,2007.0,0.08,0.02,0.0,0.01,0.17
75%,12449.75,2010.0,0.24,0.11,0.04,0.04,0.47
max,16600.0,2020.0,41.49,29.02,10.22,10.57,82.74


In [5]:
df.values

array([[1, 'Wii Sports', 'Wii', ..., 3.77, 8.46, 82.74],
       [2, 'Super Mario Bros.', 'NES', ..., 6.81, 0.77, 40.24],
       [3, 'Mario Kart Wii', 'Wii', ..., 3.79, 3.31, 35.82],
       ...,
       [16598, 'SCORE International Baja 1000: The Official Game', 'PS2',
        ..., 0.0, 0.0, 0.01],
       [16599, 'Know How 2', 'DS', ..., 0.0, 0.0, 0.01],
       [16600, 'Spirits & Spells', 'GBA', ..., 0.0, 0.0, 0.01]],
      dtype=object)

## Jupyter Shortcuts
H - Show all shortcuts


# ML Project - Music Prediction

In [6]:
music_data = pd.read_csv("./music.csv")
music_data.head()

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz


In [7]:
X = music_data.drop(columns=["genre"])
y = music_data["genre"]

print(X.shape, y.shape)

(18, 2) (18,)


In [8]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
model.fit(X, y)

In [14]:
model.predict([[21, 1], [29, 0], [22, 0]])

array(['HipHop', 'Acoustic', 'Dance'], dtype=object)

## Calculating accuracy

In [19]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

music_data = pd.read_csv("./music.csv")
X = music_data.drop(columns=["genre"])
y = music_data["genre"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X.shape, y.shape)


model = DecisionTreeClassifier()
model.fit(X_train, y_train)

predictions = model.predict(X_test)
print(f"{predictions = }")
print(f"{y_test.to_list() = }")
print(accuracy_score(y_test, predictions))

(18, 2) (18,)
predictions = array(['HipHop', 'HipHop', 'Classical', 'Jazz'], dtype=object)
y_test.to_list() = ['HipHop', 'HipHop', 'Classical', 'Jazz']
1.0


## Model Persistance

In [21]:
from joblib import dump, load

dump(model, "music-recommender.joblib")

['music-recommender.joblib']

In [22]:
model2 = load("music-recommender.joblib")
model2.predict(X_test)

array(['HipHop', 'HipHop', 'Classical', 'Jazz'], dtype=object)

## Visualising Tree

In [23]:
from sklearn import tree

tree.export_graphviz(
    model,
    out_file="music-recommender.dot",
    feature_names=["age", "gender"],
    class_names=sorted(y.unique()),
    label="all",
    rounded=True,
    filled=True,
)