## Example-1

### 1. Import the Libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn import tree
import matplotlib.pyplot as plt

### 2. Load the Dataset

In [None]:
# Load training and test datasets
df_train = pd.read_csv("buy_comp_train_data.csv")
df_test = pd.read_csv("buy_comp_test_data.csv")

In [None]:
df_train.head()

In [None]:
df_test.head()

### 3. One-Hot Encoding and Restoring Target Variable

In [None]:
# Converting categorical variables to numerical using one-hot encoding
df_encoded_train = pd.get_dummies(df_train.drop('Buy Comp', axis=1))
df_encoded_test = pd.get_dummies(df_test.drop('Buy Comp', axis=1))

# Align the columns in the test set to match the train set
df_encoded_test = df_encoded_test.reindex(columns=df_encoded_train.columns, fill_value=0)

# After One hot encoding again add target column
df_encoded_train['Buy Comp'] = df_train['Buy Comp']  
df_encoded_test['Buy Comp'] = df_test['Buy Comp']

### 4. Splitting Features and Target for Training and Testing

In [None]:
# Split features and target
X_train = df_encoded_train.drop('Buy Comp', axis=1)  # Features
y_train = df_encoded_train['Buy Comp']  # Target

# In the test set, we don't have the target, so just use the features
X_test = df_encoded_test.drop('Buy Comp', axis=1)

### 5. Train the Decision Tree Classifier

In [None]:
# Train the Decision Tree Classifier
clf = DecisionTreeClassifier(criterion='entropy', random_state=42)
clf.fit(X_train, y_train)

### 6. Vizualise the Tree

In [None]:
plt.figure(figsize=(12,8))
tree.plot_tree(clf, feature_names=X_train.columns, class_names=['No', 'Yes'], filled=True)
plt.show()

### 7. Predict the test set results

In [None]:
# Predict the test set results
y_pred = clf.predict(X_test)
print(y_pred)

## Example-2

### 1. Import the Libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn import tree
import matplotlib.pyplot as plt

### 2. Load the Datasets

In [None]:
df = pd.read_csv("play_tennis_full.csv")
print(len(df))
df.head()

### 3. Binary encoding the target variable

In [None]:
# Converting categorical variables to numerical using one-hot encoding
df_encoded = pd.get_dummies(df.drop('PlayTennis', axis=1))
df_encoded['PlayTennis'] = df['PlayTennis'].map({'Yes': 1, 'No': 0})  # Binary encoding for the target

X = df_encoded.drop('PlayTennis', axis=1)  # Features
y = df_encoded['PlayTennis']  # Target

### 4. Splitting the data into test and train sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
X_test

### 5. Using the built-in function for decision tree classifier

In [None]:
# Train the Decision Tree Classifier
clf = DecisionTreeClassifier(criterion='entropy', random_state=42)
clf.fit(X_train, y_train)

In [None]:
plt.figure(figsize=(12,8))
tree.plot_tree(clf, feature_names=X.columns, class_names=['No', 'Yes'], filled=True)
plt.show()

### 6. Model Prediction

In [None]:
# Predict the test set results
y_pred = clf.predict(X_test)
y_pred

### 7. Model Evaluation

In [None]:
import seaborn as sns
import numpy as np

# Compute the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Create a heatmap for the confusion matrix with borders
plt.figure(figsize=(6,4))
sns.heatmap(conf_matrix, annot=True, cmap="Blues", fmt='g', 
            xticklabels=['Yes', 'No'], 
            yticklabels=['Yes', 'No'], 
            linewidths=0.8, linecolor='black')

# Add titles and labels
plt.title('Confusion Matrix with Borders')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')

# Show the plot
plt.show()

In [None]:
# Evaluate the model
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
print("\nClassification Report:\n", classification_report(y_test, y_pred))

## Example-3

### 1. Import the Libraries

In [None]:
# Load a sample dataset using sklearn 
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn import tree
import matplotlib.pyplot as plt
import numpy as np
iris = load_iris()

### 2. Load the Datasets

In [None]:
# Create a DataFrame
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target
print(len(df))
df.head()

### 3. Binary encoding the target variable

In [None]:
df_encoded = pd.get_dummies(df.drop('target', axis=1))
df_encoded['target'] = df['target'].map({0: 'Setosa', 1: 'Versicolor',2:'Virginica'}) 
X = df_encoded.drop('target', axis=1)  # Features
y = df_encoded['target']  # Target

### 4. Splitting the Datasets into Train and Test set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

### 5. Train the Classifier

In [None]:
# Train the Decision Tree Classifier
clf = DecisionTreeClassifier(criterion='entropy', random_state=42)
clf.fit(X_train, y_train)

### 6. Predict and evalaute the model

In [None]:
# Predict the test set results
y_pred = clf.predict(X_test)
y_pred

### 7. Evaluate the Model

In [None]:
# Compute the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Create a heatmap for the confusion matrix with borders
plt.figure(figsize=(8,6))
sns.heatmap(conf_matrix, annot=True, cmap="Blues", fmt='g', 
            xticklabels=['Setosa', 'Versicolor', 'Virginica'], 
            yticklabels=['Setosa', 'Versicolor', 'Virginica'], 
            linewidths=0.8, linecolor='black')

# Add titles and labels
plt.title('Confusion Matrix with Borders')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')

# Show the plot
plt.show()


In [None]:
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))

In [None]:
print("\nClassification Report:\n", classification_report(y_test, y_pred))

### 8. Visualizing the Decision Tree

In [None]:
plt.figure(figsize=(10,8))
tree.plot_tree(clf, feature_names=X.columns, class_names=['Setosa', 'Versicolor','Virginica' ], filled=True)
plt.show()

## Example-4:

In [None]:
df = pd.read_csv("House_Data.csv")
print(len(df))
df.head()

In [None]:
df = df.drop('Id', axis=1)
df.head()