# Visualisation of the simulation
This notebook provides graphical representations of the simulated tumour, the glands which are selected for "sequencing", and the methylation array histograms and correlation plots. 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
# Define relative paths to output files
outputs_path = '../data/synthetic_tumour_1/'
methylation_file = 'output_methylation_arrays.dat'
demes_file = 'output_deme_methylation.dat'

In [None]:
# Read in the output files
methylation = pd.read_csv(outputs_path + methylation_file, sep='\t')
demes = pd.read_csv(outputs_path + demes_file, sep='\t')
carrying_capacity = demes.Population.max() - 1
filled_demes = demes[demes['Population'] == carrying_capacity]

In [None]:
filled_demes.shape

In [None]:
# Separate the demes methylation array strings into lists of floats
deme_arrays = filled_demes.AverageArray.apply(lambda x: [float(i) for i in x.split()])

In [None]:
# Plot a randomly selected deme's methylation array
# Randomly select deme to plot
deme = np.random.randint(0, len(deme_arrays))
# Plot the deme's methylation histogram
plt.hist(deme_arrays.iloc[deme], bins=50)
plt.title('Deme ' + str(deme) + ' methylation array')

In [None]:
# Set origin coordinates in the middle of the tumour
xorigin = filled_demes.X.median()
yorigin = filled_demes.Y.median()

In [None]:
# Separate tumour into left and right sides
left_glands = filled_demes[filled_demes['X'] < xorigin].sort_values(by='Y')
right_glands = filled_demes[filled_demes['X'] >= yorigin].sort_values(by='Y')

In [None]:
def select_demes(demes):
    # Determine the edge X values
    min_x = demes['X'].min()
    max_x = demes['X'].max()

    # Filter demes based on proximity to the edge X values
    left_demes = demes[demes['X'] <= min_x + 10]
    right_demes = demes[demes['X'] >= max_x - 10]

    # Sort these demes based on their population in descending order
    left_demes_sorted = left_demes.sort_values(by='Population', ascending=False)
    right_demes_sorted = right_demes.sort_values(by='Population', ascending=False)

    # Select the top 4 demes from each side
    left_selection = left_demes_sorted.head(4)
    right_selection = right_demes_sorted.head(4)

    return left_selection, right_selection

In [None]:
left_selection, right_selection = select_demes(demes)

In [None]:
left_arrays = left_selection.AverageArray.apply(lambda x: [float(i) for i in x.split()])
right_arrays = right_selection.AverageArray.apply(lambda x: [float(i) for i in x.split()])

In [None]:
def plot_demes(demes, left_selection, right_selection):
    min_population = demes['Population'].min()
    max_population = demes['Population'].max()

    # Plot all demes in greyscale based on population
    scatter = plt.scatter(demes['X'], demes['Y'], c=demes['Population'], cmap='gray_r', s=100, label='All Demes', vmin=min_population, vmax=max_population)
    
    # Highlight the selected demes
    plt.scatter(left_selection['X'], left_selection['Y'], color='blue', edgecolors='white', s=150, label='Left Selection')
    plt.scatter(right_selection['X'], right_selection['Y'], color='red', edgecolors='white', s=150, label='Right Selection')
    
    plt.xlabel('X')
    plt.ylabel('Y')
    plt.title('Sampled glands')
    # plt.grid(True)
    plt.colorbar(scatter, label='Population')  # Use the scatter plot object for the colorbar
    plt.show()

In [None]:
plot_demes(demes, left_selection, right_selection)

In [None]:
# Plot the methylation arrays of the selected demes on the same histogram with left glands being in a blue colour palette and right glands in orange
plt.figure(figsize=(10, 5))
for i in range(len(left_arrays)):
    plt.hist(left_arrays.iloc[i], bins=50, color='blue', alpha=0.5)
    plt.hist(right_arrays.iloc[i], bins=50, color='orange', alpha=0.5)

In [None]:
left_arrays = left_arrays.reset_index(drop=True)
right_arrays = right_arrays.reset_index(drop=True)
all_samples = pd.concat([left_arrays, right_arrays], axis=0)

In [None]:
# Correlation plots for each two sampled demes

num_samples = 8
fig, axes = plt.subplots(num_samples, num_samples, figsize=(15, 15))

for i in range(num_samples):
    for j in range(num_samples):
        if j > i:  # plot above the diagonal
            sns.scatterplot(data=all_samples, x=all_samples.iloc[i], y=all_samples.iloc[j], ax=axes[i, j])
        else:
            axes[i, j].set_axis_off()  # Turn off axes for other plots

plt.tight_layout()
plt.show()