In [None]:
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    accuracy_score,
)

# Function to preprocess text
def preprocess_text(text):
    # Your preprocessing code here (lowercasing, tokenization, stopwords removal, lemmatization, etc.)
    return processed_text

# Function to create word clouds
def create_word_clouds(df, text_column_name):
    # Your word cloud generation code here
    pass

# Function to evaluate and display metrics
def evaluate(model):
    # Your evaluation code here
    pass

# Streamlit app
def main():
    st.title("Text Classification Streamlit App")

    # Load data
    df = pd.read_csv('/content/bbc-text1.csv')

    # Visualize distribution of encoded labels
    st.subheader("Distribution of Encoded Labels")
    sns.countplot(x='category', data=df)
    st.pyplot()

    # Preprocess text
    df['text_processed'] = df['text'].apply(preprocess_text)

    # Create word clouds
    st.subheader("Word Clouds for Each Class")
    create_word_clouds(df, 'text_processed')

    # Encode labels
    label_encoder = LabelEncoder()
    df['category'] = label_encoder.fit_transform(df['category'])
    class_names = label_encoder.classes_
    X = df['text_processed']
    y = df['category']

    # TF-IDF Vectorization
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(X)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

    # Model training and evaluation
    st.subheader("Model Training and Evaluation")

    # Logistic Regression
    st.subheader("Logistic Regression")
    Logistic_Regression = LogisticRegression(random_state=42)
    Logistic_Regression.fit(X_train, y_train)
    evaluate(Logistic_Regression)

    # Decision Tree
    st.subheader("Decision Tree")
    Decision_Tree = DecisionTreeClassifier(random_state=42)
    Decision_Tree.fit(X_train, y_train)
    evaluate(Decision_Tree)

    # Random Forest
    st.subheader("Random Forest")
    Random_Forest = RandomForestClassifier(random_state=42)
    Random_Forest.fit(X_train, y_train)
    evaluate(Random_Forest)

if __name__ == "__main__":
    main()
