In [115]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier  # Import DecisionTreeClassifier from scikit-learn library
from sklearn.model_selection import train_test_split  # Import train_test_split for splitting data
from sklearn.metrics import accuracy_score  # Import accuracy_score for evaluating model performance
import joblib  # Import joblib for model persistence

music_data = pd.read_csv('music.csv')

# Prepare input (X) and output (y) data
X = music_data.drop(columns=['genre'])  # Exclude 'genre' column from input data
y = music_data['genre']  # Use 'genre' column as output data (target)

# Set a fixed random seed for reproducible results
random_seed = 42  # Choose any integer seed value for consistency

# Split the data into training and testing sets with a fixed random seed (without a fixd seed, the train/test sets will change every time since it's randomly generated)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_seed)

# Initialize Decision Tree classifier model
model = DecisionTreeClassifier()

# Train the model using the training data
model.fit(X_train, y_train)

# Make predictions on the test data
predictions = model.predict(X_test)

# Evaluate the model accuracy by comparing predictions to actual y_test values
score = accuracy_score(y_test, predictions)  # Calculate how accurate the predictions are to y_test
score

# Persist the trained model by saving it to a .joblib file
# joblib.dump(model, 'music-recommender.joblib')

['music-recommender.joblib']

In [4]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier  # Import DecisionTreeClassifier from scikit-learn library
from sklearn.model_selection import train_test_split  # Import train_test_split for splitting data
from sklearn.metrics import accuracy_score  # Import accuracy_score for evaluating model performance
import joblib  # Import joblib for model persistence

# Load the persisted model to make predictions
model = joblib.load('music-recommender.joblib')

# Input new data points to predict their genre
predictions = model.predict([[20, 0], [28, 1]])

# Display the predictions based on the input data
predictions



array(['Dance', 'Jazz'], dtype=object)

In [118]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier  
from sklearn.model_selection import train_test_split  
from sklearn.metrics import accuracy_score  
from sklearn import tree

music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre'] 

random_seed = 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_seed)

model = DecisionTreeClassifier()
model.fit(X_train, y_train)

tree.export_graphviz(model, 
                     out_file='music-recommender.dot', 
                     feature_names=['age', 'gender'], 
                     class_names=sorted(y.unique()), 
                     label='all',
                     rounded=True, 
                     filled=True)