In [1]:
import random
import collections
import math

from scipy.sparse import coo_matrix
import pandas as pd
import matplotlib.pyplot as plt


import networkx as nx
from sklearn.neighbors import NearestNeighbors

import pickle
import ipywidgets as widgets
from ipywidgets import interact, fixed

In [2]:
def create_df(tumorList, stromaList, TILList1, TILList2, TILList3, \
              numtumor=100, numstroma=100, numTIL1=10, numTIL2=20, numTIL3=20):
    df = pd.DataFrame(columns=['x', 'y', 'label'])
    pos= [] 
    x = []
    y = []
    label = []
    tumor = random.sample(tumorList, numtumor)
    stroma = random.sample(stromaList, numstroma)
    TIL1 = random.sample(set(TILList1) - set(tumor) - set(stroma), numTIL1)
    TIL2 = random.sample(set(TILList2) - set(tumor) - set(stroma) - set(TIL1), numTIL2)
    TIL3 = random.sample(set(TILList3) - set(tumor) - set(stroma)- set(TIL1) - set(TIL2), numTIL3)
    
    loop1 = []
    loop2 = []
    for i,j in zip([tumor, stroma, TIL1, TIL2, TIL3], ['Tumor', 'Stroma', 'CD3p', 'CD3p_CD4p', 'CD8p_CD3p']):
        if i:
            loop1.append(i)
            loop2.append(j)

    for l, labelName in zip(loop1, loop2):
        pos.extend(l)
        for idx, content in enumerate(zip(*l)):
            [x, y][idx].extend(content)
        label.extend([labelName for i in range(len(content))])
    df['x'] = x
    df['y'] = y
    df['label'] = label
    return df, pos


def create_graph(df, pos):
    dfXY = df[['x', 'y']].copy()
    N = len(dfXY)
    nn = NearestNeighbors(radius=10)
    nn.fit(dfXY)
    dists, ids = nn.radius_neighbors(dfXY)
    dists_ = [j for i in dists for j in i]
    ids_ = [j for i in ids for j in i]
    # generate row indices
    rows = [i for i, j in enumerate(ids) for k in j]
    # number of edges
    M = len(rows)
    w = np.ones(M)
    # complete matrix according to positions
    _W = coo_matrix((w, (rows, ids_)), shape=(N, N))
    coo_matrix.setdiag(_W, 0)
    _W = 1/2*(_W + _W.T)
    # create networkx graph
    G = nx.from_scipy_sparse_matrix(_W)
    for i in range(len(G.nodes)):
        G.nodes[i]['pos'] = pos[i]
        G.nodes[i]['cell_types'] = df['label'][i]
    return G
    

In [3]:
patientKeys = [('{:0>4d}'.format(i+1)) for i in range(10)]
patientDict = collections.defaultdict(list)
tumorList = [(x+1,y+1) for x in range(0, 50) for y in range(0, 100)]
stromaList = [(x+1,y+1) for x in range(50, 100) for y in range(0, 100)]

def add_data(id_, range_, nums=[100, 100, 10, 20, 20], count=1):
    TILList1 = [(x+1,y+1) for x in range(range_[0][0], range_[0][1]) for y in range(range_[0][2], range_[0][3])]
    TILList2 = [(x+1,y+1) for x in range(range_[1][0], range_[1][1]) for y in range(range_[1][2], range_[1][3])]
    TILList3 = [(x+1,y+1) for x in range(range_[2][0], range_[2][1]) for y in range(range_[2][2], range_[2][3])]
    for j in range(count):
        df, pos = create_df(tumorList, stromaList, TILList1, TILList2, TILList3, \
                            numtumor=nums[0], numstroma=nums[1], numTIL1=nums[2], numTIL2=nums[3], numTIL3=nums[4])
        G = create_graph(df, pos)
        patientDict[id_].append(G)

# Data creation

## dataset A

In [4]:
# add similar graphs
# for i in patientKeys:
#     add_data(i, [[0, 0, 0, 0], [0, 100, 0, 100], [0, 0, 0, 0]], nums=[100, 100, 0, 50, 0], count=4)
    
# add discriminative graphs
# add_data('0001', [[50, 100, 0, 100], [50, 100, 0, 100], [50, 100, 0, 100]])
# add_data('0002', [[0, 50, 0, 100], [50, 100, 0, 100], [50, 100, 0, 100]])
# add_data('0003', [[50, 100, 0, 100], [0, 50, 0, 100], [50, 100, 0, 100]])
# add_data('0004', [[50, 100, 0, 100], [50, 100, 0, 100], [0, 50, 0, 100]])
# add_data('0005', [[0, 50, 0, 100], [0, 50, 0, 100], [50, 100, 0, 100]])
# add_data('0006', [[0, 50, 0, 100], [50, 100, 0, 100], [0, 50, 0, 100]])
# add_data('0007', [[50, 100, 0, 100], [0, 50, 0, 100], [0, 50, 0, 100]])
# add_data('0008', [[0, 50, 0, 100], [0, 50, 0, 100], [0, 50, 0, 100]])
# add_data('0009', [[0, 0, 0, 0], [0, 50, 0, 100], [50, 100, 0, 100]], [105, 105, 0, 20, 20])
# add_data('0010', [[0, 0, 0, 0], [50, 100, 0, 100], [0, 50, 0, 100]], [105, 105, 0, 20, 20])

## dataset B

In [5]:
# add similar graphs
for i in patientKeys:
    add_data(i, [[0, 0, 0, 0], [0, 100, 0, 100], [0, 0, 0, 0]], nums=[100, 100, 0, 50, 0], count=3)
    
# add discriminative graphs
add_data('0001', [[50, 100, 0, 100], [50, 100, 0, 100], [50, 100, 0, 100]])
add_data('0001', [[0, 50, 0, 100], [50, 100, 0, 100], [50, 100, 0, 100]])

add_data('0002', [[50, 100, 0, 100], [50, 100, 0, 100], [50, 100, 0, 100]])
add_data('0002', [[50, 100, 0, 100], [0, 50, 0, 100], [50, 100, 0, 100]])

add_data('0003', [[50, 100, 0, 100], [50, 100, 0, 100], [50, 100, 0, 100]])
add_data('0003', [[50, 100, 0, 100], [50, 100, 0, 100], [0, 50, 0, 100]])

add_data('0004', [[50, 100, 0, 100], [50, 100, 0, 100], [50, 100, 0, 100]])
add_data('0004', [[0, 50, 0, 100], [0, 50, 0, 100], [50, 100, 0, 100]])

add_data('0005', [[0, 50, 0, 100], [50, 100, 0, 100], [50, 100, 0, 100]])
add_data('0005', [[50, 100, 0, 100], [0, 50, 0, 100], [50, 100, 0, 100]])

add_data('0006', [[0, 50, 0, 100], [50, 100, 0, 100], [50, 100, 0, 100]])
add_data('0006', [[50, 100, 0, 100], [50, 100, 0, 100], [0, 50, 0, 100]])

add_data('0007', [[0, 50, 0, 100], [50, 100, 0, 100], [50, 100, 0, 100]])
add_data('0007', [[0, 50, 0, 100], [0, 50, 0, 100], [50, 100, 0, 100]])

add_data('0008', [[50, 100, 0, 100], [0, 50, 0, 100], [50, 100, 0, 100]])
add_data('0008', [[50, 100, 0, 100], [50, 100, 0, 100], [0, 50, 0, 100]])

add_data('0009', [[50, 100, 0, 100], [0, 50, 0, 100], [50, 100, 0, 100]])
add_data('0009', [[0, 50, 0, 100], [0, 50, 0, 100], [50, 100, 0, 100]])

add_data('0010', [[50, 100, 0, 100], [50, 100, 0, 100], [0, 50, 0, 100]])
add_data('0010', [[0, 50, 0, 100], [0, 50, 0, 100], [50, 100, 0, 100]])

# Visualization

In [6]:
# Create widgets
id_ = \
widgets.Dropdown(
    options = patientDict.keys(),
    description='Patient ID: '
)
graphs  = widgets.IntSlider(
              min=0,
              max=len(patientDict[id_.value])-1,
              step=1,
              description='Graph Index: ',
              orientation='horizontal',
              continuous_update = False
)

# Update graph options based on patient id
def update_graphs(*args):
    graphs.max = len(patientDict[id_.value])-1
# Tie graph options to patient id
id_.observe(update_graphs, 'value')

nodeColorsDict = {'CD3p': 'b', 'CD3p_CD4p': 'g', 'CD8p_CD3p': 'r', 'Tumor': 'c', 'Stroma': 'y'}
def graph_visualization(id_, graphs):
    plt.figure(figsize = (5, 5))
    G = patientDict[id_][graphs]
    posDict = nx.get_node_attributes(G, 'pos')
    for label in nodeColorsDict:
        plt.plot([0], [0], color=nodeColorsDict[label], label=label)
    nodeColorList = [nodeColorsDict[i] for i in list(nx.get_node_attributes(G, 'cell_types').values())]
    nx.draw_networkx(G, pos=posDict, with_labels=False, node_size=30, node_color=nodeColorList)
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.show()

_ = interact(graph_visualization, id_=id_, graphs=graphs)
    

interactive(children=(Dropdown(description='Patient ID: ', options=('0001', '0002', '0003', '0004', '0005', '0…

# Save data

In [10]:
# if not os.path.exists(r'./data/patient_gumbel4_val.pickle'):
#     with open(r'./data/patient_gumbel4_val.pickle', 'wb') as handle:
#         pickle.dump(patientDict, handle, protocol=pickle.HIGHEST_PROTOCOL)
