<a target="_blank" href="https://colab.research.google.com/github/cns-iu/hra-vccf-cell-distance-visualizations/blob/main/CDE_Demo_CIFAR_Peter_Zandstra.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [9]:
# Import native packages
import os
from pprint import pprint
import random

In [10]:
#Install and import external packages
%pip install matplotlib pandas ipywidgets hra_jupyter_widgets

import pandas as pd
import ipywidgets as widgets

# Import hra-jupyter-widgets. For documentation, please see https://github.com/x-atlas-consortia/hra-jupyter-widgets/blob/main/usage.ipynb
from hra_jupyter_widgets import CdeVisualization



## Download data from Image Store.

Use curl to download the file contining all file urls, then download all files.

In [11]:
!curl -L https://cdn.humanatlas.io/image-store/cifar-data/peter-zandstra-data/__s3_files-cifar-zandstra.csv -o __s3_files-cifar-zandstra.csv

# Make sure the data folder is present
folder_path = "data"

if not os.path.exists(folder_path):
    os.makedirs(folder_path)
    print(f"Folder '{folder_path}' created.")
else:
    print(f"Folder '{folder_path}' already exists.")

# Read csv as dataframe.
df = pd.read_csv('/content/__s3_files-cifar-zandstra.csv', header=None)
# df = pd.read_csv('/content/s3_files-cifar-zandstra1.csv', header=None) # Using locally uploaded file as file on cdn not yet updated.

# Iterate through df.
for i in range(len(df)):
  fileurl = df.iloc[i, 0]
  print(f"FILEURL: {fileurl}")
  filename = fileurl.split('/')[-1]
  # Define the path to the file.
  file_path = f'{folder_path}/{filename}'
  # Check if the file exists
  if not os.path.exists(file_path):
      # If the file doesn't exist, run the curl command
      !curl -L {fileurl} -o {file_path}
      print(f"File downloaded and saved at {file_path}")
  else:
      print(f"File already exists at {file_path}")

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  9712  100  9712    0     0  19405      0 --:--:-- --:--:-- --:--:-- 19424
Folder 'data' already exists.
FILEURL: https://cdn.humanatlas.io/image-store/cifar-data/peter-zandstra-data/MultiRun-CombinedData--20201203_Laura_Thymus_2_1.csv
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  924k  100  924k    0     0   798k      0  0:00:01  0:00:01 --:--:--  799k
File downloaded and saved at data/MultiRun-CombinedData--20201203_Laura_Thymus_2_1.csv
FILEURL: https://cdn.humanatlas.io/image-store/cifar-data/peter-zandstra-data/MultiRun-CombinedData--20201203_Laura_Thymus_2_10.csv
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    

## Read data as DataFrame

In [12]:
len(os.listdir("data/"))

144

In [13]:
# Get a random file from data/
# Get a random number between 0 and len(os.listdir("data/"))
random_index = random.randint(0, len(os.listdir("data/"))-1)

# Get the file path
file_path = os.listdir("data/")[random_index]
print(file_path)

MultiRun-CombinedDataT100--20220928_LS_KS_T100_Thymic_Arch_Stanford_Panel_2.csv


In [14]:
# Read the CSV file and convert it to a df
df_nodes = pd.read_csv(f"data/{file_path}", header=0)
df_nodes.head()

Unnamed: 0,x,y,Cell Type
0,780,666,Auto
1,523,895,mTECs
2,4,405,Auto
3,1446,696,mTECs
4,201,1419,DPs


# Display

In [15]:
# Next, let's define a function that turns a DataFrame into a node list that can then be passed into the CdeVisualization or NodeDistVis widget
def make_node_list(df:pd.DataFrame, is_3d:bool = False):
  """Turn a DataFrame into a list of dicts for passing them into a HRA widget

  Args:
      df (pd.DataFrame): A DataFrame with cells
  """

  # If the df does not have a z-axis column, let's add one and set all cells to 0
  if not is_3d:
    df.loc[:, ('z')] = 0

  node_list = [{'x': row['x'], 'y': row['y'], 'z': row['z'], 'Cell Type': row['Cell Type']}
                 for index, row in df.iterrows()]

  return node_list

In [16]:
# Read a random file from the output data path.
node_list = make_node_list(df_nodes, False)
# edge_list = [["Cell ID","Target ID", "X1", "Y1", "Z1", "X2", "Y2", "Z2"]] # this makes an empty edges list

In [17]:
# Check if df_nodes has "ECs" or "Endothelial cells" in Cell Type column. Assign whichever exists to anchor_cell_type variable.
if "ECs" in df_nodes['Cell Type'].values:
    anchor_cell_type = "ECs"
elif "Endothelial cells" in df_nodes['Cell Type'].values:
    anchor_cell_type = "Endothelial cells"
else:
    anchor_cell_type = None

In [None]:
# Finally, let's instantiate the CDEVisualization class with our node_list as parameter.
cde = CdeVisualization(
    node_target_selector=anchor_cell_type,
    max_edge_distance=200,
    nodes=node_list,
    node_target_key = "Cell Type",
)

# Display our new widget
display(cde)