In [20]:
# read the file in downloads/TRIMMER00113_P101_LightChain.tsv
# create a dataframe of connections between the Sample_Name and DuplicatedIn whic is a list of Sample_Names

import pandas as pd
import sys
import os
import re
import argparse
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import gravis as gv
import glob


def read_in_nodes(file_name):
    """Reads the file and returns a dataframe of nodes"""
    df = pd.read_csv(file_name, sep='\t')
    # filter for only ones that contain LC in the chain column
    dfh = df[df['Chain'].str.contains('HC')]
    df = df[df['Chain'].str.contains('LC')]
    df = df[['Sample_Name', 'DuplicatedIn', 'MabID']]
    # replace empty space in the Sampline Name and DuplicatedIn columns with ""
    # if duplicated in is empty, create a connection to itsefl
    df['DuplicatedIn'] = df['DuplicatedIn'].fillna(df['Sample_Name'])
    df = df.dropna()
    df['DuplicatedIn'] = df['DuplicatedIn'].apply(lambda x: x.split(','))
    # if 

    # groupby the MabID and count the number of unique samples
    # print("Number of MabIDs reporting a sequence for the LC")
    # print(df.groupby('MabID')['Sample_Name'].nunique().count())
    # # do the same for the dfh
    # print("Number of MabIDs reporting a sequence for the HC")
    # print(dfh.groupby('MabID')['Sample_Name'].nunique().count())
    df = df.explode('DuplicatedIn')
    df = df.drop_duplicates()
    # drop duplicates for the other direction too
    df = df.rename(columns={'Sample_Name': 'Source', 'DuplicatedIn': 'Target'})
    # replace empty space in the Sampline Name and DuplicatedIn columns with ""
    df["Source"] = df["Source"].str.replace(' ', '')
    df["Target"] = df["Target"].str.replace(' ', '')
    df = df.drop_duplicates()
    df = df.reset_index(drop=True)
    # set a color column based on the MabId and everyhting before the first .
    df['category'] = df['MabID'].apply(lambda x: x.split('.')[0])
    # add a label column based on the source part after the _   
    df['label'] = df['Source'].apply(lambda x: x.split('_')[1])
    # drop the MaBID column
    df = df.drop(columns=['MabID'])

    # also print the number of unique samples

    return df

def create_graph(df):
    """Creates a graph from the dataframe and retain the color information"""
    G = nx.from_pandas_edgelist(df, 'Source', 'Target')
    # add the category as a node attribute
    nx.set_node_attributes(G, df.set_index('Source')['category'].to_dict(), 'category')
    # add the label as a node attribute
    nx.set_node_attributes(G, df.set_index('Source')['label'].to_dict(), 'label')

    return G

In [2]:

import matplotlib.pyplot as plt
import networkx as nx
import plotly.graph_objs as go
import matplotlib.pyplot as plt


def create_plotly_network_graph(G, file_name):
    # Ensure positions are assigned to the nodes
    if not nx.get_node_attributes(G, 'pos'):
        pos = nx.spring_layout(G)
    else:
        pos = nx.get_node_attributes(G, 'pos')

    # Assign a color to each category
    categories = set(nx.get_node_attributes(G, 'category').values())
    color_map = plt.cm.get_cmap('Set1', len(categories))
    category_color = {category: f'rgb{color_map(i)[:3]}' for i, category in enumerate(categories)}

    # Create edge trace
    edge_trace = go.Scatter(
        x=[],
        y=[],
        line=dict(width=0.5, color='#888'),
        hoverinfo='none',
        mode='lines')

    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_trace['x'] += tuple([x0, x1, None])
        edge_trace['y'] += tuple([y0, y1, None])

    # Create node trace
    node_trace = go.Scatter(
        x=[],
        y=[],
        mode='markers',
        hoverinfo='none',
        marker=dict(
            showscale=False,
            size=10,
            color=[category_color[G.nodes[node]['category']] for node in G.nodes()],
            line_width=2))

    # Node positions for annotation
    node_x = []
    node_y = []
    node_text = []

    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        node_text.append(G.nodes[node].get('label', str(node)))

    node_trace['x'] = node_x
    node_trace['y'] = node_y

    # Adding annotations for labels
    annotations = []
    for idx, label in enumerate(node_text):
        annotations.append(dict(
            x=node_x[idx],
            y=node_y[idx],
            xref="x",
            yref="y",
            text=label,
            showarrow=True,
            font=dict(
                size=16,
            ),
            ax=0,
            # put the label above the node
            
            ay=12,
        ))

    # Create network graph
    fig = go.Figure(data=[edge_trace, node_trace],
                 layout=go.Layout(
                    title=f"<br>{file_name}",
                    titlefont_size=16,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    annotations=annotations,
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                    )

    # incrase the height of the figure
    fig.update_layout(height=800)
    return fig






In [21]:

# just do glob here
import glob
# get all the files in the temp_plates directory
files = glob.glob("/Users/keithmitchell/Desktop/temp_plates/*_Sequences.tsv")
# sort the globbed files
files.sort()
# loop through the files
for i, file in enumerate(files):
    # print("---------------")
    # print(f"Processing file {file}")

    # look in desktop/temp_plates
    fig = create_plotly_network_graph(create_graph(read_in_nodes(file)), file)
    
    # write out the figure as an html
    #fig.write_html(f"{project}_{plate}_LightChain.html")
    fig.show()

In [186]:
import plotly.offline as pyo

def save_multiple_graphs_to_html(graphs, filename="multiple_graphs.html"):
    html_string = """
    <html>
    <head>
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    </head>
    <body>
    """
    
    for fig in graphs:
        # Convert each Plotly graph to an HTML div string
        div = pyo.plot(fig, include_plotlyjs=False, output_type='div')
        html_string += div
    
    html_string += """
    </body>
    </html>
    """

    # Write the combined HTML to a file
    with open(filename, 'w') as f:
        f.write(html_string)




In [198]:
# save all the files to an html
fig_list = []
for i in range(1, 105):    
    # if the plate is less than 100, add a 0 to the front else if plate is less than 10 add 00 to the front
    plate = f"P{i+1}"
    # glob for the file based on the plate
    file = glob.glob(f"/Users/keithmitchell/Downloads/alllcs/*_{plate}_LightChain.tsv")
    #fig = create_plotly_network_graph(create_graph(read_in_nodes(f"/Users/keithmitchell/Downloads/{project}_{plate}_LightChain.tsv")))
    if file:
        print(file, plate)
        fig = create_plotly_network_graph(create_graph(read_in_nodes(file[0])), file[0])
        # write out the figure as an html
        fig.write_html(f"{project}_{plate}_LightChain.html")
        fig_list.append(fig)

save_multiple_graphs_to_html(fig_list, filename="multiple_graphs.html")

['/Users/keithmitchell/Downloads/alllcs/TRIMMER0002_P2_LightChain.tsv'] P2
['/Users/keithmitchell/Downloads/alllcs/TRIMMER0003_P3_LightChain.tsv'] P3
['/Users/keithmitchell/Downloads/alllcs/TRIMMER0004.2_P4_LightChain.tsv'] P4
['/Users/keithmitchell/Downloads/alllcs/TRIMMER0005_P5_LightChain.tsv'] P5
['/Users/keithmitchell/Downloads/alllcs/TRIMMER0006_P6_LightChain.tsv'] P6
['/Users/keithmitchell/Downloads/alllcs/TRIMMER0007_P7_LightChain.tsv'] P7
['/Users/keithmitchell/Downloads/alllcs/TRIMMER0018_P8_LightChain.tsv'] P8
['/Users/keithmitchell/Downloads/alllcs/TRIMMER0019_P9_LightChain.tsv'] P9
['/Users/keithmitchell/Downloads/alllcs/TRIMMER0020_P10_LightChain.tsv'] P10
['/Users/keithmitchell/Downloads/alllcs/TRIMMER0021_P11_LightChain.tsv'] P11
['/Users/keithmitchell/Downloads/alllcs/TRIMMER0022_P12_LightChain.tsv'] P12
['/Users/keithmitchell/Downloads/alllcs/TRIMMER0023_P13_LightChain.tsv'] P13
['/Users/keithmitchell/Downloads/alllcs/TRIMMER0024_P14_LightChain.tsv'] P14
['/Users/keit