In [6]:
import pandas as pd
from sklearn.datasets import load_iris

# Load the Iris dataset
iris = load_iris()

# Create a DataFrame from the feature data and target labels
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target

# Display some basic information about the dataset
print(f"Classes: {iris.target_names}")
print(f"Samples per class: {iris.target.shape[0]}")
print(f"Samples total: {len(iris_df)}")
print(f"Dimensionality: {iris.data.shape[1]}")
print(f"Features: {iris.feature_names}")

# Display the entire DataFrame
iris_df


Classes: ['setosa' 'versicolor' 'virginica']
Samples per class: 150
Samples total: 150
Dimensionality: 4
Features: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [7]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

#import necessary libraries


In [8]:
iris = load_iris()
X = iris.data
y = iris.target

#load iris dataset

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#split data set into training set and testing set

In [32]:
# Create the Decision Tree Classifier with pruning

clf = DecisionTreeClassifier(max_depth=None, random_state=0)

In [33]:
clf.fit(X_train, y_train)

#fit classifier into training data

In [34]:
y_pred = clf.predict(X_test)

#make predictions on test data

In [35]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

#evaluate performance of DT model

Accuracy: 1.00


In [47]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into a training set (80%) and a testing set (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the hyperparameters and their possible values
param_grid = {
    'max_depth': [3, 4, 5, 6],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=0)

# Create a GridSearchCV object with cross-validation (e.g., 5-fold cross-validation)
grid_search = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy')

# Fit the model to the training data using Grid Search for hyperparameter tuning
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and the best estimator (model)
best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

# Print the best hyperparameters
print(f"Best Hyperparameters: {best_params}")

# Use the best model to make predictions on the test data
y_pred = best_estimator.predict(X_test)

# Evaluate the model's accuracy on the test data
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


Best Hyperparameters: {'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 2}
Accuracy: 1.00


In [2]:
from sklearn.datasets import load_wine

# Load the Wine dataset
wine = load_wine()

# Access the data and target variables
X = wine.data  # Features (attributes)
y = wine.target  # Target labels

In [5]:
import pandas as pd
from sklearn.datasets import load_wine

# Load the Wine dataset
wine = load_wine()

# Create a DataFrame from the feature data and target labels
wine_df = pd.DataFrame(data=wine.data, columns=wine.feature_names)
wine_df['target'] = wine.target

# Display some basic information about the dataset
print(f"Classes: {wine.target_names}")
print(f"Samples per class: {wine.target.shape[0]}")
print(f"Samples total: {len(wine_df)}")
print(f"Dimensionality: {wine.data.shape[1]}")
print(f"Features: {wine.feature_names}")

# Display the entire DataFrame
wine_df


Classes: ['class_0' 'class_1' 'class_2']
Samples per class: 178
Samples total: 178
Dimensionality: 13
Features: ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']


Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0,2
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0,2
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0,2
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0,2


In [45]:
from sklearn.datasets import load_wine
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

#import necessary libraries

In [38]:
wine = load_wine()
X = wine.data
y = wine.target

#load wine dataset

In [39]:
# Split the data into a training set (80%) and a testing set (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
clf = DecisionTreeClassifier(max_depth=3, random_state=42)
# Create the Decision Tree Classifier with pruning

In [42]:
clf.fit(X_train, y_train)

#train the model by fitting the classifier into training data

In [43]:
y_pred = clf.predict(X_test)
#Evaluate the model and use the trained model to make predictions on the test data

In [44]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

#evaluate performance of DT model

Accuracy: 0.94


In [46]:
from sklearn.datasets import load_wine
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

# Load the Wine dataset
wine = load_wine()
X = wine.data
y = wine.target

# Split the data into a training set (80%) and a testing set (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the hyperparameters and their possible values
param_grid = {
    'max_depth': [3, 4, 5, 6],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)

# Create a GridSearchCV object with cross-validation (e.g., 5-fold cross-validation)
grid_search = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy')

# Fit the model to the training data using Grid Search for hyperparameter tuning
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and the best estimator (model)
best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

# Print the best hyperparameters
print(f"Best Hyperparameters: {best_params}")

# Use the best model to make predictions on the test data
y_pred = best_estimator.predict(X_test)

# Evaluate the model's accuracy on the test data
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Best Hyperparameters: {'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2}
Accuracy: 0.94


In [48]:
# Import necessary libraries
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# ... (Your code for model training and evaluation)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Create a confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Generate a classification report
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)

# Visualize the confusion matrix (optional)
plt.imshow(cm, cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.colorbar()
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()


ModuleNotFoundError: No module named 'matplotlib'