In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load Iris dataset
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Display the first few rows of the dataset
print("Features:")
print(X.head())
print("\nLabels:")
print(y[:5])

# Compute Mutual Information between each feature and the target variable
mi = mutual_info_classif(X, y)

# Create a DataFrame to visualize mutual information scores
mi_df = pd.DataFrame({
    'Feature': data.feature_names,
    'Mutual Information': mi
}).sort_values(by='Mutual Information', ascending=False)

print("\nMutual Information Scores for Features:")
print(mi_df)

# Select features with high MI for modeling
# In this case, let's take top 2 features for simplicity
top_features = mi_df['Feature'].iloc[:2].values
X_selected = X[top_features]

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.3, random_state=42)

# Train a simple Random Forest classifier on selected features
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy with top 2 selected features: {accuracy:.2f}")


Features:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2

Labels:
[0 0 0 0 0]

Mutual Information Scores for Features:
             Feature  Mutual Information
3   petal width (cm)            0.989486
2  petal length (cm)            0.986095
0  sepal length (cm)            0.496751
1   sepal width (cm)            0.281338

Accuracy with top 2 selected features: 1.00
