In [1]:
pip install streamlit scikit-learn pyswarm


Collecting pyswarm
  Downloading pyswarm-0.6.tar.gz (4.3 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: pyswarm
  Building wheel for pyswarm (setup.py): started
  Building wheel for pyswarm (setup.py): finished with status 'done'
  Created wheel for pyswarm: filename=pyswarm-0.6-py3-none-any.whl size=4486 sha256=247d3262c9cd9f1ff1dccc1541feec2fd2d202c8b5cbc1bef568266805850da9
  Stored in directory: c:\users\kiit\appdata\local\pip\cache\wheels\93\15\89\3970ef96abd6123028010a90f007c4e6a2bed700db0aa2d36a
Successfully built pyswarm
Installing collected packages: pyswarm
Successfully installed pyswarm-0.6
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install pyswarm


Note: you may need to restart the kernel to use updated packages.


In [3]:
%%writefile Feature_Selection_app.py
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from io import StringIO

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from pyswarm import pso
from sklearn import datasets

# ------------------------------------------------------------------
# Custom CSS Styling (harmonious color palette, improved fonts and borders)
# ------------------------------------------------------------------
st.markdown("""
    <style>
    @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;500;700&display=swap');

    .main .block-container {
        padding-top: 2rem;
        padding-bottom: 2rem;
        background-color: #FAF0E6; /* Light warm background */
        font-family: 'Roboto', sans-serif;
        font-size: 1.1rem;
    }
    
    h1 {
        color: #483D8B; /* Dark Slate Blue */
        font-size: 3rem;
        text-align: center;
        font-weight: 700;
        margin-bottom: 1rem;
    }
    
    h2, h3, h4 {
        color: #483D8B;
        font-weight: 500;
        border-bottom: 2px solid #4682B4;
        padding-bottom: 0.3rem;
        margin-bottom: 1rem;
    }
    
    .stButton>button {
        background-color: #4682B4; /* SteelBlue */
        color: white;
        font-size: 1rem;
        font-weight: bold;
        border: none;
        border-radius: 5px;
        padding: 0.5rem 1rem;
    }
    
    .sidebar .sidebar-content {
        background: linear-gradient(135deg, #87CEEB, #4682B4);
        color: white;
        font-size: 1rem;
    }
    
    .stSelectbox>div>div>div>input {
        background-color: #F8F8FF;
        color: #333;
    }
    
    img {
        border-radius: 10px;
        border: 2px solid #4682B4;
    }
    
    .section-container {
        border: 1px solid #D3D3D3;
        border-radius: 10px;
        padding: 1rem;
        background-color: #FFFFFF;
        margin-bottom: 1.5rem;
    }
    </style>
    """, unsafe_allow_html=True)

# ------------------------------------------------------------------
# PSO Feature Selection
# ------------------------------------------------------------------
def run_pso(X_train, X_test, y_train, y_test, swarm_size, max_iter):
    def fitness_function(selected_features):
        selected_indices = [i for i, val in enumerate(selected_features) if val > 0.5]
        if len(selected_indices) == 0:
            return 1  # High error if no features selected
        
        X_train_sel = X_train.iloc[:, selected_indices]
        model = RandomForestClassifier(n_estimators=50, max_depth=5, random_state=42)
        scores = cross_val_score(model, X_train_sel, y_train, cv=5, scoring='accuracy')
        avg_accuracy = scores.mean()
        return 1 - avg_accuracy

    num_features = X_train.shape[1]
    lb = [0] * num_features
    ub = [1] * num_features
    best_solution, _ = pso(fitness_function, lb, ub, swarmsize=swarm_size, maxiter=max_iter)
    selected_features = [i for i, val in enumerate(best_solution) if val > 0.5]
    return selected_features

# ------------------------------------------------------------------
# Utility Functions
# ------------------------------------------------------------------
def encode_and_scale(X_train, X_test):
    for col in X_train.select_dtypes(include=['object']).columns:
        le = LabelEncoder()
        le.fit(X_train[col])
        X_train[col] = le.transform(X_train[col])
        known_labels = set(le.classes_)
        def map_unseen_to_other(val):
            return val if val in known_labels else 'Other'
        X_test[col] = X_test[col].apply(map_unseen_to_other)
        if 'Other' not in le.classes_:
            new_classes = list(le.classes_) + ['Other']
            le.classes_ = np.array(new_classes)
        X_test[col] = le.transform(X_test[col])
    
    scaler = StandardScaler()
    X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
    X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)
    return X_train_scaled, X_test_scaled

def load_sample_digits():
    data = datasets.load_digits()
    col_names = [f'pixel_{i}' for i in range(data.data.shape[1])]
    df = pd.DataFrame(data.data, columns=col_names)
    df['target'] = data.target
    return df

# ------------------------------------------------------------------
# Sidebar Navigation
# ------------------------------------------------------------------
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to", ["Welcome", "Sample Datasets", "Instructions", "Feature Selection"], index=0)

# ------------------------------------------------------------------
# Welcome Page
# ------------------------------------------------------------------
if page == "Welcome":
    st.title("Feature Selection using PSO 🚀")
    st.markdown("""
    <div class="section-container">
    Welcome to our <b>Feature Selection Web Application</b>! 😊<br><br>
    This tool leverages <b>Particle Swarm Optimization (PSO)</b> to determine the most informative subset 
    of features from your dataset. By eliminating noisy and redundant variables, our app aims to simplify 
    your predictive model and improve its generalization. Detailed evaluations including accuracy, precision, 
    recall, and F1-score will help you understand the impact of feature selection.
    </div>
    """, unsafe_allow_html=True)

# ------------------------------------------------------------------
# Sample Datasets Page
# ------------------------------------------------------------------
elif page == "Sample Datasets":
    st.title("Sample Datasets 📂")
    st.markdown("Download this sample dataset to test the app.")
    st.subheader("Digits Dataset")
    digits_df = load_sample_digits()
    csv_digits = digits_df.to_csv(index=False)
    st.download_button(label="Download Digits CSV", data=csv_digits, file_name="digits.csv", mime="text/csv")

# ------------------------------------------------------------------
# Instructions Page
# ------------------------------------------------------------------
elif page == "Instructions":
    st.title("How to Use This App 📝")
    st.markdown("""
    <div class="section-container">
    <strong>1. Upload a Dataset:</strong> Use your own CSV file or the sample Digits dataset provided.<br><br>
    <strong>2. (Optional) Drop Columns:</strong> Remove any columns that you suspect might be leaky or irrelevant (like IDs or timestamps).<br><br>
    <strong>3. Select the Target Variable:</strong> Choose the column you want to predict.<br><br>
    <strong>4. Set PSO Parameters:</strong> Use the sliders to choose the swarm size and number of iterations.<br><br>
    <strong>5. Run Feature Selection:</strong> PSO will optimize the feature subset.<br><br>
    <strong>6. Evaluate the Model:</strong> Compare performance metrics (accuracy, confusion matrix, classification report) 
    between the full and the selected feature sets.
    </div>
    """, unsafe_allow_html=True)

# ------------------------------------------------------------------
# Feature Selection Page
# ------------------------------------------------------------------
elif page == "Feature Selection":
    st.title("Feature Selection using PSO 🔍")
    
    uploaded_file = st.file_uploader("Upload your dataset (CSV)", type=["csv"])
    if uploaded_file is not None:
        try:
            df = pd.read_csv(uploaded_file)
        except Exception as e:
            st.error("Error reading CSV file. Please check the file format.")
            st.stop()
        
        # Data cleaning: replace '.' with NaN and drop rows with missing values.
        df.replace('.', np.nan, inplace=True)
        df.dropna(inplace=True)
        st.write("### Dataset Preview")
        st.write(df.head())
        
        # Optional: Allow user to drop unwanted columns
        st.markdown("### Drop Columns (Optional) 🗑️")
        columns_to_drop = st.multiselect("Select columns to drop if they are leaky or irrelevant:", options=df.columns)
        if columns_to_drop:
            df.drop(columns=columns_to_drop, inplace=True)
            st.write("Updated dataset preview:")
            st.write(df.head())
        
        # Target variable selection
        st.markdown("### Target Variable Selection 🎯")
        target_column = st.selectbox("Select Target Variable", df.columns)
        if target_column:
            # Let user set PSO parameters via sliders
            st.markdown("### Set PSO Parameters ⚙️")
            swarm_size = st.slider("Swarm Size", min_value=5, max_value=50, value=10, step=1)
            max_iter = st.slider("Max Iterations", min_value=5, max_value=50, value=10, step=1)
            
            if st.button("Confirm and Run Feature Selection"):
                X = df.drop(columns=[target_column])
                y = df[target_column]
                if y.dtype == 'O':
                    y = LabelEncoder().fit_transform(y)
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
                X_train, X_test = encode_and_scale(X_train, X_test)
                
                st.write("### Running PSO for Feature Selection...")
                selected_indices = run_pso(X_train, X_test, y_train, y_test, swarm_size, max_iter)
                st.write("**Selected Features:**", list(X_train.columns[selected_indices]))
                
                model = RandomForestClassifier(n_estimators=50, max_depth=5, random_state=42)
                
                # Evaluate selected features
                X_train_sel = X_train.iloc[:, selected_indices]
                X_test_sel = X_test.iloc[:, selected_indices]
                model.fit(X_train_sel, y_train)
                y_pred_sel = model.predict(X_test_sel)
                accuracy_selected = accuracy_score(y_test, y_pred_sel)
                sel_conf_matrix = confusion_matrix(y_test, y_pred_sel)
                sel_report = classification_report(y_test, y_pred_sel, output_dict=True)
                
                # Evaluate all features
                model.fit(X_train, y_train)
                y_pred_all = model.predict(X_test)
                accuracy_all = accuracy_score(y_test, y_pred_all)
                all_conf_matrix = confusion_matrix(y_test, y_pred_all)
                all_report = classification_report(y_test, y_pred_all, output_dict=True)
                
                st.write("## Model Evaluation")
                col1, col2 = st.columns(2)
                with col1:
                    st.markdown("<h3 style='color:#483D8B;'>Accuracy (All Features)</h3>", unsafe_allow_html=True)
                    st.write(accuracy_all)
                with col2:
                    st.markdown("<h3 style='color:#483D8B;'>Accuracy (Selected Features)</h3>", unsafe_allow_html=True)
                    st.write(accuracy_selected)
                
                st.markdown("""
                <div style="background-color:#F5DEB3; padding:1rem; border-radius:10px;">
                <strong>Note:</strong> Even if overall accuracy dips slightly, improvements in precision, recall, 
                or F1-score for certain classes may indicate that the selected subset is more informative. 😊
                </div>
                """, unsafe_allow_html=True)
                
                st.subheader("Confusion Matrix (All Features)")
                fig_all, ax_all = plt.subplots()
                sns.heatmap(all_conf_matrix, annot=True, cmap="Blues", fmt="d", ax=ax_all)
                st.pyplot(fig_all)
                
                st.subheader("Confusion Matrix (Selected Features)")
                fig_sel, ax_sel = plt.subplots()
                sns.heatmap(sel_conf_matrix, annot=True, cmap="Blues", fmt="d", ax=ax_sel)
                st.pyplot(fig_sel)
                
                st.subheader("Classification Report (All Features)")
                st.dataframe(pd.DataFrame(all_report).transpose())
                
                st.subheader("Classification Report (Selected Features)")
                st.dataframe(pd.DataFrame(sel_report).transpose())
        else:
            st.info("Please select a target variable before running feature selection.")


Writing Feature_Selection_app.py
