In [3]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from ipywidgets import widgets, VBox, HBox
from IPython.display import display, clear_output

# Load the data
df = pd.read_csv('macrophages_GSVA_DEGs.csv')  # Adjust the file path as needed

# Ensure the first column is used as the pathway names
df = df.set_index(df.columns[0])

# Strip any leading/trailing whitespace from the index (pathway names)
df.index = df.index.str.strip()

# Convert adj.P.Val to -log10(adj.P.Val) for plotting
df['-log10(adj.P.Val)'] = -np.log10(df['P.Value'])

# Define categories based on GSVA score and significance, with logic for both "AND" and "OR"
def get_category(row, keywords=[], logic='AND'):
    pathway_name = row.name.replace('_', ' ').upper()
    
    if logic == 'AND':
        # All keywords must be present in the pathway name
        if all(keyword.upper() in pathway_name for keyword in keywords):
            return 'keyword_match'
    elif logic == 'OR':
        # Any keyword must be present in the pathway name
        if any(keyword.upper() in pathway_name for keyword in keywords):
            return 'keyword_match'

    # Default logic for significance and GSVA score
    if row['GSVA_score'] > 0.5 and row['-log10(adj.P.Val)'] > -np.log10(0.05):
        return 'upregulated'
    elif row['GSVA_score'] < -0.5 and row['-log10(adj.P.Val)'] > -np.log10(0.05):
        return 'downregulated'
    else:
        return 'non-significant'

# Function to update the plot based on keyword input
def update_plot(keywords=[], logic='AND'):
    # Apply category based on keyword search
    df['category'] = df.apply(get_category, axis=1, keywords=keywords, logic=logic)
    
    # Define color palette
    palette = {
        'keyword_match': '#32CD32',  # Green for pathways matching the search
        'upregulated': '#FF6347',  # Red for upregulated pathways
        'downregulated': '#1E90FF',  # Blue for downregulated pathways
        'non-significant': '#A9A9A9'  # Grey for non-significant pathways
    }

    # Create figure object
    fig = go.Figure()

    # Plot non-significant pathways
    non_significant_df = df[df['category'] == 'non-significant']
    fig.add_trace(go.Scatter(
        x=non_significant_df['GSVA_score'], 
        y=non_significant_df['-log10(adj.P.Val)'], 
        mode='markers',
        marker=dict(
            size=8,  # Dot size for other pathways
            color=palette['non-significant'],  # Grey color for non-significant pathways
            opacity=0.8,  # Set transparency
            line=dict(
                width=0.5,  # Set border thickness
                color='black'  # Set border color
            )
        ),
        text=non_significant_df.index,  # Hover text with pathway names from index
        hoverinfo='text',
        name='Non-Significant'
    ))

    # Plot upregulated pathways
    upregulated_df = df[df['category'] == 'upregulated']
    fig.add_trace(go.Scatter(
        x=upregulated_df['GSVA_score'], 
        y=upregulated_df['-log10(adj.P.Val)'], 
        mode='markers',
        marker=dict(
            size=8,  # Dot size for upregulated pathways
            color=palette['upregulated'],  # Red color for upregulated pathways
            opacity=0.8,  # Set transparency
            line=dict(
                width=0.5,  # Set border thickness
                color='black'  # Set border color
            )
        ),
        text=upregulated_df.index,  # Hover text with pathway names from index
        hoverinfo='text',
        name='Upregulated'
    ))

    # Plot downregulated pathways
    downregulated_df = df[df['category'] == 'downregulated']
    fig.add_trace(go.Scatter(
        x=downregulated_df['GSVA_score'], 
        y=downregulated_df['-log10(adj.P.Val)'], 
        mode='markers',
        marker=dict(
            size=8,  # Dot size for downregulated pathways
            color=palette['downregulated'],  # Blue color for downregulated pathways
            opacity=0.8,  # Set transparency
            line=dict(
                width=0.5,  # Set border thickness
                color='black'  # Set border color
            )
        ),
        text=downregulated_df.index,  # Hover text with pathway names from index
        hoverinfo='text',
        name='Downregulated'
    ))

    # Plot keyword matching pathways on top
    keyword_df = df[df['category'] == 'keyword_match']
    fig.add_trace(go.Scatter(
        x=keyword_df['GSVA_score'], 
        y=keyword_df['-log10(adj.P.Val)'], 
        mode='markers',
        marker=dict(
            size=15,  # Dot size for keyword-matching pathways
            color=palette['keyword_match'],  # Green color for keyword-matching pathways
            opacity=0.8,  # Set transparency
            line=dict(
                width=0.5,  # Set border thickness
                color='black'  # Set border color
            )
        ),
        text=keyword_df.index,  # Hover text with pathway names from index
        hoverinfo='text',
        name='Keyword Matched Pathways'
    ))

    # Set layout with transparent background and white plot background
    fig.update_layout(
        paper_bgcolor='rgba(0,0,0,0)',  # Transparent background
        plot_bgcolor='rgba(255,255,255,1)',  # White plot background
        title='Interactive Volcano Plot with Keyword Search',
        xaxis_title='GSVA Score',
        yaxis_title='-log10(adj.P.Val)',
        title_font_size=18,
        width=800,  # Custom width for figure size
        height=600,  # Custom height for figure size
        legend_title_text='Pathway Categories'  # Custom legend title
    )

    # Add horizontal line for significance threshold (e.g., adj.P.Val < 0.05)
    fig.add_hline(y=-np.log10(0.05), line_dash="dash", line_color="grey")

    # Add vertical lines for GSVA score cutoffs
    fig.add_vline(x=-0.5, line_dash="dash", line_color="grey")
    fig.add_vline(x=0.5, line_dash="dash", line_color="grey")

    # Show the interactive plot
    fig.show()

    # Save the plot as an HTML file (optional)
    #fig.write_html("interactive_volcano_plot_keyword_search.html")

# Create a function to dynamically add search boxes
def add_search_box():
    # Create a new text input for an additional keyword
    keyword_input = widgets.Text(
        value='',
        placeholder='Type a keyword',
        description=f'Keyword {len(keyword_inputs) + 1}:',
        disabled=False
    )
    # Add the new input box to the list and display it
    keyword_inputs.append(keyword_input)
    search_boxes.children = keyword_inputs + [logic_dropdown, add_button, search_button]

# Function to handle search button click
def on_search_button_click(b):
    # Clear the previous plot output
    clear_output(wait=True)

    # Display the input fields and buttons again
    display(search_boxes)

    # Collect the keywords from all input boxes
    keywords = [kw.value for kw in keyword_inputs if kw.value.strip()]
    
    # Get the logic type (AND or OR)
    logic = logic_dropdown.value
    
    # Update the plot with the collected keywords and logic
    update_plot(keywords, logic)

# List to store keyword input boxes
keyword_inputs = []

# Create a dropdown to choose between "AND" and "OR" logic
logic_dropdown = widgets.Dropdown(
    options=['AND', 'OR'],
    value='AND',
    description='Logic:',
    disabled=False,
)

# Create a button to add more keyword input boxes
add_button = widgets.Button(
    description="Add Keyword",
    disabled=False,
    button_style='info',  # 'success', 'info', 'warning', 'danger'
    tooltip='Click to add another keyword search box',
    icon='plus'
)

# Attach the function to the add button
add_button.on_click(lambda b: add_search_box())

# Create the search button
search_button = widgets.Button(
    description="Search Pathways",
    disabled=False,
    button_style='success',
    tooltip='Click to search for pathways containing all keywords',
    icon='search'
)

# Attach the search button's behavior
search_button.on_click(on_search_button_click)

# VBox to hold the input boxes and buttons
search_boxes = VBox([])

# Initialize with one search box
add_search_box()

# Display the input fields and button
display(search_boxes)

# Initial plot without any keyword
update_plot()


VBox(children=(Text(value='', description='Keyword 1:', placeholder='Type a keyword'), Dropdown(description='L…