In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Path to the dataset file
file_path = '/content/dataset mla.csv'

# Load the dataset
data = pd.read_csv(file_path)

# Select features and target variable
features = ['Category', 'Duration (hours)', 'Enrolled_Students',
            'Completion_Rate (%)', 'Platform', 'Price ($)']
target = 'Rating (out of 5)'

X = data[features]
y = data[target]

# One-hot encoding for categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['Category', 'Platform'])
    ],
    remainder='passthrough'
)

# Transform the features
X_transformed = preprocessor.fit_transform(X)

# Decision tree model
decision_tree = DecisionTreeRegressor(random_state=0)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=0)

# Train the model
decision_tree.fit(X_train, y_train)

# Make predictions
y_pred = decision_tree.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5

print(f'Root Mean Squared Error: {rmse:.2f}')

# Visualize the decision tree
plt.figure(figsize=(20, 10))
plot_tree(decision_tree, feature_names=preprocessor.get_feature_names_out(), filled=True, fontsize=10)
plt.title("Decision Tree Visualization for Course Rating Prediction")
plt.show()


Root Mean Squared Error: 0.84
