# Yada Deconvolution Result

---

- Run the following cells for deconvolution using Yada.

## 1 - Import prerequisites

In [1]:
from IPython.display import FileLink, FileLinks
import pandas as pd

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 10000)

In [None]:
!pip install -U kaleido==0.1.0post1

In [3]:
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.io as pio


#Load the fist dataframe.
df1 = pd.read_csv('./r3-fine.csv')
df1 = df1.loc[df1.is_latest & (df1.metric == 'pearson')]
df1 = df1.groupby(['submitter', 'celltype']).mean(numeric_only=True).reset_index()
df1['Test'] = 'r3fine'  # Add a column to distinguish this batch of data

# Load the second dataframe.
df2 = pd.read_csv('./r4-fine.csv')
df2 = df2.loc[df2.is_latest & (df2.metric == 'pearson')]
df2 = df2.groupby(['submitter', 'celltype']).mean(numeric_only=True).reset_index()
df2['Test'] = 'r4fine'  # Add a column to distinguish this batch of data

# Combine the dataframes
df = pd.concat([df1, df2])
# Plot
fig = px.box(df, y='metric_value', x='celltype', color='Test', hover_name='submitter', points='all', height=1000, width=2500, category_orders={"Test": ["r3fine", "r4fine"]})
# Update layout if needed
fig.update_layout(font=dict(size=24))
fig.show()

#df = df.loc[df.is_latest]
#df = df.loc[df.metric == 'pearson']
#df = df.groupby(['submitter', 'celltype']).mean(numeric_only=True)
#df = df.reset_index()

#Creating a new column with colors.
#df['mcolour'] = np.where(df.submitter == 'D3Team', 'YADA', 'Other')
#fig = px.box(df.reset_index(), y = 'metric_value', x = 'celltype', hover_name = 'submitter', points = 'all', height=4000, width=6000, color = 'mcolour')
#fig = px.box(df.reset_index(), y = 'metric_value', x = 'celltype', hover_name = 'submitter', points = 'all', height=1000, width=2500, color = 'mcolour')

# Set font size for text
#fig.update_layout(font=dict(size=24))  # Adjust the font size as needed
#fig.show()

In [4]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio


# Load and prepare the first DataFrame
df1 = pd.read_csv('./r3-fine.csv')
df1 = df1[df1.is_latest & (df1.metric == 'pearson')]
# Select only numeric columns for aggregation, excluding 'submitter' and 'celltype' from the mean calculation
numeric_cols = df1.select_dtypes(include=['number']).columns.tolist()
# Make sure to include 'submitter' and 'celltype' for grouping
grouping_cols = ['submitter', 'celltype']
# Group by 'submitter' and 'celltype', then calculate mean only for numeric columns
df1 = df1.groupby(grouping_cols, as_index=False)[numeric_cols].mean()
df1['Test'] = 'round3'

# Load and prepare the second DataFrame
df2 = pd.read_csv('./r4-fine.csv')
df2 = df2[df2.is_latest & (df2.metric == 'pearson')]
# Select only numeric columns for aggregation, excluding 'submitter' and 'celltype' from the mean calculation
numeric_cols = df2.select_dtypes(include=['number']).columns.tolist()
# Make sure to include 'submitter' and 'celltype' for grouping
grouping_cols = ['submitter', 'celltype']
# Group by 'submitter' and 'celltype', then calculate mean only for numeric columns
df2 = df2.groupby(grouping_cols, as_index=False)[numeric_cols].mean()
df2['Test'] = 'round4'

# Combine the DataFrames
df_combined = pd.concat([df1, df2])

# Initialize the figure
#fig = go.Figure()
# Assuming df_combined is your prepared DataFrame
fig = px.box(df_combined, y='metric_value', x='celltype', color='Test',
             hover_name='submitter', points='all', height=2000, width=6000, #2000, 6000
             category_orders={"Test": ["round3", "round4"]})
#fig.update_layout(font=dict(size=24), height=1000, width=2500)

# Track whether the D3Team legend item has been added
d3team_legend_added = False

# Highlight 'D3Team'
for test in df_combined['Test'].unique():
    for celltype in df_combined['celltype'].unique():
        df_d3team = df_combined[(df_combined['Test'] == test) & (df_combined['celltype'] == celltype) & (df_combined['submitter'] == 'D3Team')]
        if not df_d3team.empty:
            fig.add_trace(go.Scatter(
                y=df_d3team['metric_value'],
                x=df_d3team['celltype'],
                name=f"YADA",
                mode='markers',
                marker=dict(
                    color='yellow',  # A bright color for high visibility
                    size=20,  # Larger size to stand out
                    line=dict(
                        color='black',  # Add a border to increase visibility
                        width=2
                    )
                ),
                hoverinfo='y+name',
                showlegend=not d3team_legend_added  # Only show legend for the first D3Team scatter trace
            ))
            d3team_legend_added = True  # Ensure legend is not added for subsequent D3Team traces

# Update layout
fig.update_layout(
    title="Metric Values by Celltype and Test",
    xaxis=dict(title="Celltype", type='category'),
    yaxis_title="Metric Value",
    margin=dict(t=200),  # Increase top margin
    font=dict(size=60),
    boxmode='group',  # Group boxes together by celltype
    boxgap=0.1,  # Decrease for narrower gaps between boxes
    boxgroupgap=0.2  # Decrease for narrower gaps between groups of boxes
)

fig.show()


In [5]:
# Export to SVG
pio.write_image(fig, 'c:\\Users\\danili\\Documents\\GitHub\\Yada\\data\\Challenge\\histogram.svg', height=3000, width=6000, format='svg')