In [1]:
pip install streamlit


Note: you may need to restart the kernel to use updated packages.


In [3]:
import streamlit as st
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Function to perform clustering
def perform_clustering(data, n_clusters):
    # Scale the data
    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(data)

    # Fit K-means model
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(data_scaled)

    # Predict clusters
    clusters = kmeans.predict(data_scaled)

    # Add cluster labels to the original data
    data_clustered = data.copy()
    data_clustered['Cluster'] = clusters

    # Plot clusters if the data is 2D
    if data.shape[1] == 2:
        plt.figure(figsize=(10, 6))
        sns.scatterplot(x=data.iloc[:, 0], y=data.iloc[:, 1], hue=clusters, palette='viridis')
        plt.title('K-means Clustering')
        st.pyplot()

    return data_clustered

# Function to display Task 1
def display_task1():
    st.header("Task 1: Clustering Results")

    # Load or generate data for demonstration
    data = load_data()  # Replace with your data loading mechanism

    # Perform clustering
    n_clusters = st.slider("Select number of clusters:", 2, 10, 3)
    clustered_data = perform_clustering(data, n_clusters)

    st.subheader("Clustered Data:")
    st.dataframe(clustered_data)

    # Show explanation for a data point
    if st.checkbox("Explain Cluster for a Data Point"):
        data_point = st.selectbox("Select a Data Point:", data.index)
        explanation = f"Data point {data_point} belongs to cluster {clustered_data.loc[data_point, 'Cluster']}."
        st.write(explanation)

# Function to load data (replace with your actual data loading mechanism)
def load_data():
    # Example: Generate random data for demonstration
    import numpy as np
    np.random.seed(0)
    data = pd.DataFrame({
        'Feature1': np.random.rand(100) * 100,
        'Feature2': np.random.rand(100) * 50
    })
    return data

### Task 2: Classification

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Function to perform classification
def perform_classification(X_train, y_train, X_test):
    # Scale the data using the StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Initialize classifiers
    classifiers = {
        "K-Nearest Neighbors (KNN)": KNeighborsClassifier(n_neighbors=5),
        "Decision Tree": DecisionTreeClassifier(random_state=42),
        "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42)
    }

    # Train and predict with each classifier
    predictions = {}
    train_accuracies = {}
    for name, clf in classifiers.items():
        clf.fit(X_train_scaled, y_train)
        y_pred = clf.predict(X_test_scaled)
        predictions[name] = y_pred
        train_accuracies[name] = accuracy_score(y_train, clf.predict(X_train_scaled))

    return predictions, train_accuracies

# Function to display Task 2
def display_task2():
    st.header("Task 2: Classification Results")

    # Load or generate data for demonstration
    X_train, y_train, X_test = load_classification_data()  # Replace with your data loading mechanism

    # Perform classification
    predictions, train_accuracies = perform_classification(X_train, y_train, X_test)

    # Display results
    st.subheader("Train Accuracy:")
    st.write(train_accuracies)

    st.subheader("Predictions:")
    st.write(predictions)

# Function to load classification data (replace with your actual data loading mechanism)
def load_classification_data():
    # Example: Generate random data for demonstration
    import numpy as np
    np.random.seed(0)
    X_train = pd.DataFrame({
        'Feature1': np.random.rand(100) * 100,
        'Feature2': np.random.rand(100) * 50
    })
    y_train = np.random.randint(0, 2, 100)
    X_test = pd.DataFrame({
        'Feature1': np.random.rand(20) * 100,
        'Feature2': np.random.rand(20) * 50
    })
    return X_train, y_train, X_test

### Task 3: Raw Data Analysis

# Function to perform raw data analysis
def perform_raw_data_analysis(raw_data):
    # 1. Datewise total duration for each inside and outside
    raw_data['time'] = pd.to_datetime(raw_data['time'])
    datewise_duration = raw_data.groupby(['date', 'position'])['time'].sum().unstack(fill_value=pd.Timedelta(seconds=0))

    # 2. Datewise number of picking and placing activities
    pick_activities = raw_data[raw_data['activity'] == 'picked'].groupby('date').size()
    place_activities = raw_data[raw_data['activity'] == 'placed'].groupby('date').size()

    return datewise_duration, pick_activities, place_activities

# Function to display Task 3
def display_task3(raw_data):
    st.header("Task 3: Raw Data Analysis Results")

    # Perform raw data analysis
    datewise_duration, pick_activities, place_activities = perform_raw_data_analysis(raw_data)

    # Display results
    st.subheader("Datewise Total Duration for Each Inside and Outside:")
    st.dataframe(datewise_duration)

    st.subheader("Datewise Number of Picking Activities:")
    st.write(pick_activities)

    st.subheader("Datewise Number of Placing Activities:")
    st.write(place_activities)

# Main function to run the Streamlit app
def main():
    st.title("Data Analysis Tasks")

    # Sidebar menu
    menu = ["Task 1: Clustering", "Task 2: Classification", "Task 3: Raw Data Analysis"]
    choice = st.sidebar.selectbox("Select Task", menu)

    if choice == "Task 1: Clustering":
        display_task1()
    elif choice == "Task 2: Classification":
        display_task2()
    elif choice == "Task 3: Raw Data Analysis":
        # Load or generate raw data for demonstration
        raw_data = load_raw_data()  # Replace with your data loading mechanism
        display_task3(raw_data)
    else:
        st.error("Invalid Choice")

# Function to load raw data (replace with your actual data loading mechanism)
def load_raw_data():
    # Example: Generate raw data for demonstration
    import datetime as dt
    np.random.seed(0)
    dates = pd.date_range(dt.datetime(2024, 1, 1), dt.datetime(2024, 1, 31))
    raw_data = pd.DataFrame({
        'date': np.random.choice(dates, 100),
        'time': np.random.choice(pd.date_range("0:00", "23:59", freq="1s"), 100),
        'sensor': np.random.randint(0, 2, 100),
        'location': np.random.choice(['A1', 'A2', 'A3', 'A4', 'A5'], 100),
        'number': np.random.randint(1, 5, 100),
        'activity': np.random.choice(['picked', 'placed'], 100),
        'position': np.random.choice(['Inside', 'Outside'], 100)
    })
    return raw_data

if __name__ == "__main__":
    main()


