In [1]:
import boto3
import os
import pandas as pd
import glob
import matplotlib.pyplot as plt
import umap
import numpy as np

from sagemaker import Session

# Initialize a SageMaker session
sagemaker_session = Session()



role = "arn:aws:iam::016114370410:role/tf-binding-sites"

prefix = "inference/output/Motifs-AR-22Rv1-2024-10-19-11-56-44"
local_dir = "/Users/wejarrard/projects/tf-binding/data/jsonl_output"


# Initialize the S3 client
s3 = boto3.client('s3')

# Specify your S3 bucket name
bucket_name = sagemaker_session.default_bucket()


# Function to download files from an S3 bucket
def download_from_s3(bucket_name, prefix, local_dir):
    if not os.path.exists(local_dir):
        os.makedirs(local_dir)

    # List objects within the specified prefix
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)

    # Iterate over each object
    for obj in response.get('Contents', []):
        key = obj['Key']
        # Skip if it's a folder
        if key.endswith('/'):
            continue
        local_path = os.path.join(local_dir, os.path.basename(key))
        
        # Download the file
        s3.download_file(bucket_name, key, local_path)
        print(f'Downloaded {key} to {local_path}')

# Bucket name extracted from the S3 URI
bucket_name = "tf-binding-sites"

# # delete the files in local dir
# for file in os.listdir(local_dir):
#     os.remove(os.path.join(local_dir, file))
    

# # Download the files
# download_from_s3(bucket_name, prefix, "/Users/wejarrard/projects/tf-binding/data/jsonl_output")


# Path to the directory containing JSON files
json_dir = "/Users/wejarrard/projects/tf-binding/data/jsonl_output"

# Get a list of all JSON files in the directory
json_files = glob.glob(f"{json_dir}/*.jsonl.gz.out")

# Read each JSON file into a DataFrame and store in a list
dataframes = [pd.read_json(file) for file in json_files]

# Concatenate all DataFrames into a single DataFrame
df = pd.concat(dataframes, ignore_index=True)

  from .autonotebook import tqdm as notebook_tqdm


sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/wejarrard/Library/Application Support/sagemaker/config.yaml


In [2]:
# get accuracy precision and recall targets vs predicted for entire dataset, also get tp fp tn fn
df['targets'] = df['targets'].astype('int')
df['predicted'] = df['predicted'].astype('int')

# Calculate accuracy, precision, and recall
accuracy = (df['targets'] == df['predicted']).mean()
precision = ((df['targets'] == 1) & (df['predicted'] == 1)).sum() / (df['predicted'] == 1).sum()
recall = ((df['targets'] == 1) & (df['predicted'] == 1)).sum() / (df['targets'] == 1).sum()

# get tp tn fn fp for 0.9, 0.95, 0.99, 0.999 thresholds
thresholds = [0.5, 0.9, 0.95, 0.99, 0.999]

for threshold in thresholds:
    print(f"Threshold: {threshold}")
    tp = ((df['probabilities'] >= threshold) & (df['targets'] == 1)).sum()
    tn = ((df['probabilities'] < threshold) & (df['targets'] == 0)).sum()
    fn = ((df['probabilities'] < threshold) & (df['targets'] == 1)).sum()
    fp = ((df['probabilities'] >= threshold) & (df['targets'] == 0)).sum()

    accuracy = (tp + tn) / (tp + tn + fp + fn)
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)

    # print all tp tn fn fp acc prec rec
    print(f"TP: {tp}")
    print(f"TN: {tn}")
    print(f"FN: {fn}")
    print(f"FP: {fp}")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print()



Threshold: 0.5
TP: 5221
TN: 79618
FN: 113
FP: 18257
Accuracy: 0.8220116462711585
Precision: 0.22237839679700144
Recall: 0.9788151481064867

Threshold: 0.9
TP: 5126
TN: 86985
FN: 208
FP: 10890
Accuracy: 0.8924706178724724
Precision: 0.32005494505494503
Recall: 0.9610048743907011

Threshold: 0.95
TP: 5080
TN: 89050
FN: 254
FP: 8825
Accuracy: 0.9120328653508899
Precision: 0.36533620999640415
Recall: 0.9523809523809523

Threshold: 0.99
TP: 4857
TN: 92985
FN: 477
FP: 4890
Accuracy: 0.9479987210417696
Precision: 0.49830717143736536
Recall: 0.9105736782902137

Threshold: 0.999
TP: 3869
TN: 96489
FN: 1465
FP: 1386
Accuracy: 0.9723764400391439
Precision: 0.7362511893434824
Recall: 0.7253468316460443



In [3]:
# Create a new DataFrame from the extracted arrays
df_512 = pd.DataFrame([x[0] for x in df['linear_512_output']], index=df.index)

# Optionally, rename the columns if you want them to have specific names
df_512.columns = [f'feature_{i}' for i in range(512)]

# Concatenate the new DataFrame with the original one
df = pd.concat([df, df_512], axis=1).reset_index(drop=True)
df.head()


Unnamed: 0,chr_name,start,end,cell_line,targets,predicted,weights,probabilities,linear_512_output,motifs,...,feature_502,feature_503,feature_504,feature_505,feature_506,feature_507,feature_508,feature_509,feature_510,feature_511
0,chr3,160491076,160493437,22Rv1,0,0,0,3e-05,"[[-8.3039941788, -9.3181114197, -9.2222795486,...",CAAATATTCTTTAGA,...,-7.012489,-6.935061,-6.906998,-6.615226,-6.706022,-6.751296,-6.717427,-6.739665,-6.580424,-4.979805
1,chr3,160538752,160539163,22Rv1,0,0,0,1e-05,"[[-6.2788534164, -9.3681640625, -8.4964818954,...",AAGTACTTAAACTGT,...,-8.737627,-8.354425,-8.278188,-8.449848,-8.83527,-8.991581,-8.734056,-9.017185,-9.189394,-8.348048
2,chr3,160543990,160544480,22Rv1,0,0,0,6e-06,"[[-7.2467284203, -9.0045938492, -8.8901319504,...",AGGTACTCTAAGATT,...,-11.865794,-11.910988,-11.821841,-11.637135,-11.371367,-11.011414,-10.873665,-10.88805,-10.295553,-8.260612
3,chr3,160553933,160554809,22Rv1,0,0,0,0.000332,"[[-2.3411931992, -3.7899346352, -3.027173996, ...",AAGGGGGAATGATTG,...,-2.807557,-2.519457,-2.657507,-2.959347,-2.726985,-2.843576,-2.755872,-2.595378,-2.110865,-2.480732
4,chr3,160564582,160566324,22Rv1,0,0,0,0.001626,"[[0.5962683558, -0.1447146833, -0.966160714600...",CGACTTGGAGGTAAA,...,-1.496511,-1.39242,-0.943701,-1.021763,-0.770029,-0.535903,-0.059459,0.073631,0.791403,0.854661


In [4]:
# ============================
# 1. Extract Feature Columns
# ============================
feature_columns = [f'feature_{i}' for i in range(512)]
feature_data = df[feature_columns].values  # Use .values for faster processing

# ============================
# 2. Label Definitions
# ============================

# 2.1. Probability Coloring (Numeric)
probabilities = df['probabilities']

# 2.2. Motif Coloring ('AGGAACAGTCAGCAG', 'ACTTGGGCATTCTCA', 'Not')
# Define the specific motifs to highlight
specific_motifs = ['AGGAACAGTCAGCAG', 'ACTTGGGCATTCTCA']

# Create a new column for motif labels
df['motif_label'] = np.where(df['motifs'].isin(specific_motifs), df['motifs'], 'Not')

# 2.3. Confusion Matrix Labeling (TP, FP, TN, FN)
conditions = [
    (df['targets'] == 1) & (df['predicted'] == 1),  # True Positive
    (df['targets'] == 0) & (df['predicted'] == 1),  # False Positive
    (df['targets'] == 0) & (df['predicted'] == 0),  # True Negative
    (df['targets'] == 1) & (df['predicted'] == 0),  # False Negative
]
choices = ['TP', 'FP', 'TN', 'FN']
df['confusion_matrix_label'] = np.select(conditions, choices, default='Unknown')

# 2.3. Confusion Matrix Labeling 0.9 (TP, FP, TN, FN)
df['predicted_0.9'] = df['probabilities'].apply(lambda x: 1 if x > 0.9 else 0)
conditions_09 = [
    (df['targets'] == 1) & (df['predicted_0.9'] == 1),  # True Positive
    (df['targets'] == 0) & (df['predicted_0.9'] == 1),  # False Positive
    (df['targets'] == 0) & (df['predicted_0.9'] == 0),  # True Negative
    (df['targets'] == 1) & (df['predicted_0.9'] == 0),  # False Negative
]
df['confusion_matrix_label_0.9'] = np.select(conditions_09, choices, default='Unknown')

# .95
df['predicted_0.95'] = df['probabilities'].apply(lambda x: 1 if x > 0.95 else 0)
conditions_095 = [
    (df['targets'] == 1) & (df['predicted_0.95'] == 1),  # True Positive
    (df['targets'] == 0) & (df['predicted_0.95'] == 1),  # False Positive
    (df['targets'] == 0) & (df['predicted_0.95'] == 0),  # True Negative
    (df['targets'] == 1) & (df['predicted_0.95'] == 0),  # False Negative
]
df['confusion_matrix_label_0.95'] = np.select(conditions_095, choices, default='Unknown')


# .99
df['predicted_0.99'] = df['probabilities'].apply(lambda x: 1 if x > 0.99 else 0)
conditions_099 = [
    (df['targets'] == 1) & (df['predicted_0.99'] == 1),  # True Positive
    (df['targets'] == 0) & (df['predicted_0.99'] == 1),  # False Positive
    (df['targets'] == 0) & (df['predicted_0.99'] == 0),  # True Negative
    (df['targets'] == 1) & (df['predicted_0.99'] == 0),  # False Negative
]
df['confusion_matrix_label_0.99'] = np.select(conditions_099, choices, default='Unknown')

# .999
df['predicted_0.999'] = df['probabilities'].apply(lambda x: 1 if x > 0.999 else 0)
conditions_0999 = [
    (df['targets'] == 1) & (df['predicted_0.999'] == 1),  # True Positive
    (df['targets'] == 0) & (df['predicted_0.999'] == 1),  # False Positive
    (df['targets'] == 0) & (df['predicted_0.999'] == 0),  # True Negative
    (df['targets'] == 1) & (df['predicted_0.999'] == 0),  # False Negative
]
df['confusion_matrix_label_0.999'] = np.select(conditions_0999, choices, default='Unknown')

# 2.4. Weights Labeling
# Assuming 'weights' is an integer column
weights = df['weights']

# ============================
# 3. Color and Marker Mapping
# ============================

# 3.1. Motif Coloring Map
motif_color_map = {
    'AGGAACAGTCAGCAG': 'green',
    'ACTTGGGCATTCTCA': 'blue',
    'Not': 'red'
}

# 3.2. Motif Marker Map and Sizes
motif_marker_map = {
    'AGGAACAGTCAGCAG': ('o', 50),   # Circle, larger size
    'ACTTGGGCATTCTCA': ('s', 50),   # Square, larger size
    'Not': ('x', 10)                  # Cross, smaller size
}

# 3.3. Confusion Matrix Coloring Map
confusion_matrix_color_map = {
    'TP': 'green',
    'FP': 'red',
    'TN': 'blue',
    'FN': 'orange',
    'Unknown': 'gray'
}

# 3.4. Weights Coloring Map
# Since 'weights' are integers, map each unique weight to a distinct color
unique_weights = sorted(df['weights'].unique())
num_weights = len(unique_weights)

# Choose a colormap with enough distinct colors
# 'tab10' has 10 distinct colors; use 'tab20' if more are needed
if num_weights <= 10:
    weights_cmap = plt.get_cmap('tab10')
elif num_weights <= 20:
    weights_cmap = plt.get_cmap('tab20')
else:
    # For more than 20 classes, use a larger colormap or repeat colors
    weights_cmap = plt.get_cmap('tab20')
    print("Warning: More than 20 unique weights detected. Colors may repeat.")

# Assign a unique color to each weight
weights_colors = weights_cmap(np.linspace(0, 1, num_weights))
weight_color_map = {weight: colors for weight, colors in zip(unique_weights, weights_colors)}

# ============================
# 4. Hyperparameter Setup
# ============================

# Define hyperparameter ranges
n_neighbors_values = [3, 5, 10]  # You can expand this list as needed
min_dist_values = [0.01, 0.1, 0.3, 0.5]
n_components_values = [2]  # Keep it 2 for 2D plots; add 3 for 3D if desired

# ============================
# 5. Output Directories Setup
# ============================

# Define output directories for combined, TP, and FP plots
output_dirs = {
    'combined': "umap_plots_combined",
    'TP_FP': "umap_plots_TP_FP"
}

for dir_path in output_dirs.values():
    os.makedirs(dir_path, exist_ok=True)

# ============================
# 6. Define Subsets
# ============================

# Define the subsets for plotting
subsets = {
    'combined': df,
    'TP_FP': df[df['confusion_matrix_label'].isin(['TP', 'FP'])]
}

# ============================
# 7. Define UMAP Plotting Function
# ============================

def generate_umap_plots(subset_name, subset_df, output_dir):
    """
    Generates UMAP plots for a given subset of the data.

    Parameters:
    - subset_name: str, name of the subset (e.g., 'combined', 'TP', 'FP')
    - subset_df: pandas DataFrame, the subset of data to plot
    - output_dir: str, directory to save the plots
    """
    if subset_df.empty:
        print(f"Subset '{subset_name}' is empty. Skipping...")
        return

    feature_data_subset = subset_df[feature_columns].values
    probabilities_subset = subset_df['probabilities']
    motif_label_subset = subset_df['motif_label']
    confusion_matrix_label_subset = subset_df['confusion_matrix_label']
    weights_subset = subset_df['weights']

    # Assign colors for weights in the subset
    subset_unique_weights = sorted(subset_df['weights'].unique())
    subset_num_weights = len(subset_unique_weights)

    if subset_num_weights <= 10:
        subset_weights_cmap = plt.get_cmap('tab10')
    elif subset_num_weights <= 20:
        subset_weights_cmap = plt.get_cmap('tab20')
    else:
        subset_weights_cmap = plt.get_cmap('tab20')
        print(f"Warning: More than 20 unique weights detected in subset '{subset_name}'. Colors may repeat.")

    subset_weights_colors = subset_weights_cmap(np.linspace(0, 1, subset_num_weights))
    subset_weight_color_map = {weight: colors for weight, colors in zip(subset_unique_weights, subset_weights_colors)}

    # Define UMAP hyperparameters
    for n_neighbors in n_neighbors_values:
        for min_dist in min_dist_values:
            for n_components in n_components_values:
                # Initialize UMAP with current hyperparameters
                reducer = umap.UMAP(
                    n_neighbors=n_neighbors,
                    min_dist=min_dist,
                    n_components=n_components,
                    random_state=42  # Ensures reproducibility
                )

                # Fit and transform the data
                embedding = reducer.fit_transform(feature_data_subset)

                # Initialize the plot
                fig, axes = plt.subplots(3, 3, figsize=(24, 21))
                axes = axes.flatten()

                # Plot 1: Probability Coloring
                scatter1 = axes[0].scatter(
                    embedding[:, 0],
                    embedding[:, 1],
                    c=probabilities_subset,
                    cmap='viridis',
                    s=5,
                    alpha=0.7,
                )
                cbar1 = plt.colorbar(scatter1, ax=axes[0])
                cbar1.set_label('Probabilities')
                axes[0].set_title(f'UMAP ({subset_name})\n n={n_neighbors}, md={min_dist} - Probabilities')
                axes[0].set_xlabel('UMAP 1')
                axes[0].set_ylabel('UMAP 2')

                # Plot 2: Motif Coloring with Enhanced Visualization
                unique_motifs_subset = subset_df['motif_label'].unique()
                for motif in unique_motifs_subset:
                    idx = subset_df['motif_label'] == motif
                    marker, size = motif_marker_map.get(motif, ('o', 10))
                    axes[1].scatter(
                        embedding[idx, 0],
                        embedding[idx, 1],
                        c=[motif_color_map.get(motif, 'gray')],
                        label=motif,
                        marker=marker,
                        s=size,
                        alpha=0.9,
                        edgecolors='w',  # Add white edges for better visibility
                        linewidths=0.5
                    )
                axes[1].legend(title="Motif Label", loc="best")
                axes[1].set_title(f'UMAP ({subset_name})\n n={n_neighbors}, md={min_dist} - Motifs')
                axes[1].set_xlabel('UMAP 1')
                axes[1].set_ylabel('UMAP 2')

                # Plot 3: Weights Coloring with Discrete Colors
                for weight in subset_unique_weights:
                    idx = subset_df['weights'] == weight
                    axes[2].scatter(
                        embedding[idx, 0],
                        embedding[idx, 1],
                        c=[subset_weight_color_map[weight]],
                        label=str(weight),
                        marker='D',  # Diamond marker for weights
                        s=50,         # Larger size for better visibility
                        alpha=0.7,
                        edgecolors='w',
                        linewidths=0.5
                    )
                axes[2].legend(title="Weights", loc="best", bbox_to_anchor=(1.05, 1))
                axes[2].set_title(f'UMAP ({subset_name})\n n={n_neighbors}, md={min_dist} - Weights')
                axes[2].set_xlabel('UMAP 1')
                axes[2].set_ylabel('UMAP 2')


                # Plot 4: Confusion Matrix Coloring
                unique_cm_labels_subset = subset_df['confusion_matrix_label'].unique()
                for cm_label in unique_cm_labels_subset:
                    idx = subset_df['confusion_matrix_label'] == cm_label
                    axes[3].scatter(
                        embedding[idx, 0],
                        embedding[idx, 1],
                        c=[confusion_matrix_color_map.get(cm_label, 'gray')],
                        label=cm_label,
                        s=5,
                        alpha=0.7,
                    )
                axes[3].legend(title="Confusion Matrix Label", loc="best")
                axes[3].set_title(f'UMAP ({subset_name})\n n={n_neighbors}, md={min_dist} - Confusion Matrix')
                axes[3].set_xlabel('UMAP 1')
                axes[3].set_ylabel('UMAP 2')

                # Plot 5: Confusion Matrix Coloring
                unique_cm_labels_subset = subset_df['confusion_matrix_label_0.9'].unique()
                for cm_label in unique_cm_labels_subset:
                    idx = subset_df['confusion_matrix_label_0.9'] == cm_label
                    axes[4].scatter(
                        embedding[idx, 0],
                        embedding[idx, 1],
                        c=[confusion_matrix_color_map.get(cm_label, 'gray')],
                        label=cm_label,
                        s=5,
                        alpha=0.7,
                    )
                axes[4].legend(title="Confusion Matrix Label 0.9", loc="best")
                axes[4].set_title(f'UMAP ({subset_name})\n n={n_neighbors}, md={min_dist} - Confusion Matrix')
                axes[4].set_xlabel('UMAP 1')
                axes[4].set_ylabel('UMAP 2')

                # Plot 6: Confusion Matrix Coloring
                unique_cm_labels_subset = subset_df['confusion_matrix_label_0.95'].unique()
                for cm_label in unique_cm_labels_subset:
                    idx = subset_df['confusion_matrix_label_0.95'] == cm_label
                    axes[5].scatter(
                        embedding[idx, 0],
                        embedding[idx, 1],
                        c=[confusion_matrix_color_map.get(cm_label, 'gray')],
                        label=cm_label,
                        s=5,
                        alpha=0.7,
                    )
                axes[5].legend(title="Confusion Matrix Label 0.95", loc="best")
                axes[5].set_title(f'UMAP ({subset_name})\n n={n_neighbors}, md={min_dist} - Confusion Matrix')
                axes[5].set_xlabel('UMAP 1')
                axes[5].set_ylabel('UMAP 2')

                # Plot 7: Confusion Matrix Coloring
                unique_cm_labels_subset = subset_df['confusion_matrix_label_0.99'].unique()
                for cm_label in unique_cm_labels_subset:
                    idx = subset_df['confusion_matrix_label_0.99'] == cm_label
                    axes[6].scatter(
                        embedding[idx, 0],
                        embedding[idx, 1],
                        c=[confusion_matrix_color_map.get(cm_label, 'gray')],
                        label=cm_label,
                        s=5,
                        alpha=0.7,
                    )
                axes[6].legend(title="Confusion Matrix Label 0.99", loc="best")
                axes[6].set_title(f'UMAP ({subset_name})\n n={n_neighbors}, md={min_dist} - Confusion Matrix')
                axes[6].set_xlabel('UMAP 1')
                axes[6].set_ylabel('UMAP 2')

                # Plot 8: Confusion Matrix Coloring
                unique_cm_labels_subset = subset_df['confusion_matrix_label_0.999'].unique()
                for cm_label in unique_cm_labels_subset:
                    idx = subset_df['confusion_matrix_label_0.999'] == cm_label
                    axes[7].scatter(
                        embedding[idx, 0],
                        embedding[idx, 1],
                        c=[confusion_matrix_color_map.get(cm_label, 'gray')],
                        label=cm_label,
                        s=5,
                        alpha=0.7,
                    )
                axes[7].legend(title="Confusion Matrix Label 0.999", loc="best")
                axes[7].set_title(f'UMAP ({subset_name})\n n={n_neighbors}, md={min_dist} - Confusion Matrix')
                axes[7].set_xlabel('UMAP 1')
                axes[7].set_ylabel('UMAP 2')


                # Adjust layout
                plt.tight_layout()

                # Save the combined plot
                plot_filename = f"UMAP_{subset_name}_n{n_neighbors}_md{min_dist}_nc{n_components}.png"
                plt.savefig(os.path.join(output_dir, plot_filename), dpi=300, bbox_inches='tight')
                plt.close()

                print(f"Generated plot for subset '{subset_name}': {plot_filename}")

# ============================
# 8. Generate Plots for Each Subset
# ============================

for subset_name, subset_df in subsets.items():
    output_dir = output_dirs[subset_name]
    generate_umap_plots(subset_name, subset_df, output_dir)

print("All UMAP plots generated successfully.")

  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'combined': UMAP_combined_n3_md0.01_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'combined': UMAP_combined_n3_md0.1_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'combined': UMAP_combined_n3_md0.3_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'combined': UMAP_combined_n3_md0.5_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'combined': UMAP_combined_n5_md0.01_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'combined': UMAP_combined_n5_md0.1_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'combined': UMAP_combined_n5_md0.3_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'combined': UMAP_combined_n5_md0.5_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'combined': UMAP_combined_n10_md0.01_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'combined': UMAP_combined_n10_md0.1_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'combined': UMAP_combined_n10_md0.3_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'combined': UMAP_combined_n10_md0.5_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'TP_FP': UMAP_TP_FP_n3_md0.01_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'TP_FP': UMAP_TP_FP_n3_md0.1_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'TP_FP': UMAP_TP_FP_n3_md0.3_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'TP_FP': UMAP_TP_FP_n3_md0.5_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'TP_FP': UMAP_TP_FP_n5_md0.01_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'TP_FP': UMAP_TP_FP_n5_md0.1_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'TP_FP': UMAP_TP_FP_n5_md0.3_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'TP_FP': UMAP_TP_FP_n5_md0.5_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'TP_FP': UMAP_TP_FP_n10_md0.01_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'TP_FP': UMAP_TP_FP_n10_md0.1_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'TP_FP': UMAP_TP_FP_n10_md0.3_nc2.png


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  axes[1].scatter(


Generated plot for subset 'TP_FP': UMAP_TP_FP_n10_md0.5_nc2.png
All UMAP plots generated successfully.
