In [60]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn import tree


# Read the dataset from the CSV file
music_data = pd.read_csv('music_data.csv')

In [61]:
music_data

Unnamed: 0,Age,Gender,Genre
0,10,Male,Pop
1,10,Non-Binary,Pop
2,10,Male,Pop
3,10,Male,Pop
4,10,Non-Binary,Pop
...,...,...,...
9995,80,Non-Binary,Rock
9996,80,Male,Rock
9997,80,Female,Rock
9998,80,Female,Rock


In [62]:
# One-hot encode the 'Gender' column to handle categorical data
encoder = OneHotEncoder(sparse_output=False)
gender_encoded = encoder.fit_transform(music_data[['Gender']])

# Create a DataFrame from the encoded gender data
gender_encoded_df = pd.DataFrame(
    gender_encoded, columns=encoder.get_feature_names_out(['Gender']))

gender_encoded_df

Unnamed: 0,Gender_Female,Gender_Male,Gender_Non-Binary
0,0.0,1.0,0.0
1,0.0,0.0,1.0
2,0.0,1.0,0.0
3,0.0,1.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,0.0,0.0,1.0
9996,0.0,1.0,0.0
9997,1.0,0.0,0.0
9998,1.0,0.0,0.0


In [63]:
# Combine the age and encoded gender data as features (X), and the genre as labels (y)
X = pd.concat([music_data[['Age']], gender_encoded_df], axis=1)
y = music_data['Genre']


In [64]:
X

Unnamed: 0,Age,Gender_Female,Gender_Male,Gender_Non-Binary
0,10,0.0,1.0,0.0
1,10,0.0,0.0,1.0
2,10,0.0,1.0,0.0
3,10,0.0,1.0,0.0
4,10,0.0,0.0,1.0
...,...,...,...,...
9995,80,0.0,0.0,1.0
9996,80,0.0,1.0,0.0
9997,80,1.0,0.0,0.0
9998,80,1.0,0.0,0.0


In [71]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Initialize the DecisionTreeClassifier model
model = DecisionTreeClassifier()

# Train the model on the training data - input, output
model.fit(X_train, y_train)

tree.export_graphviz( 
    model, 
    out_file='music_recommender.dot', 
    feature_names=[
        'Age', 
        'Gender_Female', 
        'Gender_Male', 
        'Gender_Non-Binary'
        ], 
    class_names=sorted(y.unique()), 
    label='all', 
    rounded=True, 
    filled=True)

In [66]:
# Make predictions on the test data
predictions = model.predict(X_test)

# Calculate and print the accuracy of the model
accuracy = accuracy_score(y_test, predictions)
accuracy

1.0