In [None]:
from torch.utils.tensorboard import SummaryWriter
import MinkowskiEngine as ME
from torch import nn
import torch
import numpy as np

import matplotlib.pyplot as plt
import matplotlib as mpl

%matplotlib inline
mpl.rcParams['figure.figsize'] = [8, 6]
mpl.rcParams['font.size'] = 16
mpl.rcParams['axes.grid'] = True

## Tell pytorch we have a GPU if we do
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.device(device)

SEED=12345
_=np.random.seed(SEED)
_=torch.manual_seed(SEED)
writer = SummaryWriter("log")

In [None]:
## Includes from my libraries for this project                                                                                                                                           
from ME_NN_libs import NTXentMerged, ContrastiveEncoderME

## Import transformations                                                                                                                                                                
from ME_dataset_libs import CenterCrop, get_transform

## Import dataset                                                                                                                                                                        
from ME_dataset_libs import SingleModuleImage2D_MultiHDF5_ME, cat_ME_collate_fn
from ME_dataset_libs import SingleModuleImage2D_solo_ME, solo_ME_collate_fn

## For later visualization
from ME_dataset_libs import make_dense_from_tensor

In [None]:
## Training function
def run_training(num_iterations, log_dir, encoder, temperature, dataloader, optimizer, batch_size, scheduler=None):

    print("Training with", num_iterations, "iterations")
    tstart = time.process_time()

    if log_dir: writer = SummaryWriter(log_dir=log_dir)

    loss_fn = NTXentMerged(temperature)

    encoder.to(device)
    
    ## Loop over the desired iterations
    for iteration in range(num_iterations):
        
        total_loss = 0
        nbatches   = 0
        
        # Set train mode for both the encoder and the decoder
        encoder.train()
        
        # Iterate over batches of images with the dataloader
        for cat_bcoords, cat_bfeats in train_loader:

            ## Send to the device, then make the sparse tensors                                                                                                                          
            cat_bcoords = cat_bcoords.to(device, non_blocking=True)
            cat_bfeats  = cat_bfeats .to(device)
            cat_batch   = ME.SparseTensor(cat_bfeats, cat_bcoords, device=device)

            ## Now do the forward pass                                                                                                                                                 
            encoded_batch = encoder(cat_batch)
     
            # Evaluate loss
            loss = loss_fn(encoded_batch.F)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()            
            
            total_loss += loss.item()
            nbatches += 1
            
            torch.cuda.empty_cache()
        
        ## See if we have an LR scheduler...
        if scheduler: scheduler.step() #total_loss)
        
        av_loss = total_loss/nbatches

        if log_dir: writer.add_scalar('loss/train', av_loss, iteration)
        print("Processed", iteration, "/", num_iterations, "; loss =", av_loss)
        print("Time taken:", time.process_time() - tstart)
        
        ## End so empty cache because MinkowskiEngine can't be trusted
        torch.cuda.empty_cache()

In [None]:
import time

aug_transform = get_transform('block10x10')

inDir = "/pscratch/sd/c/cwilk/h5_inputs_v9/"
start = time.process_time() 
train_dataset = SingleModuleImage2D_MultiHDF5_ME(inDir, nom_transform=CenterCrop(), aug_transform=aug_transform, max_events=100000)
print("Time taken to load", train_dataset.__len__(),"images:", time.process_time() - start)

batch_size=512
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           collate_fn=cat_ME_collate_fn,
                                           batch_size=batch_size,
                                           shuffle=True, 
                                           num_workers=8,
                                           drop_last=True,
                                           pin_memory=False,
                                           prefetch_factor=1)

In [None]:
## This is a useful but experimental pytorch function which flags where synchronization calls are made
## (useful for debugging only)
## torch.cuda.set_sync_debug_mode(0)

## Varius config parameters
nchan=32
nlatent=128
hidden_act_fn=ME.MinkowskiSiLU
latent_act_fn=ME.MinkowskiTanh
dropout = 0
temperature = 0.5
num_iterations=50
log_dir="log"

## Define the models
encoder=ContrastiveEncoderME(nchan, nlatent, hidden_act_fn, latent_act_fn, dropout)

## Load in the pre-calculated model weights if they exist
chk_file=None 
if chk_file:
    checkpoint = torch.load(chk_file, map_location='cpu')
    encoder.load_state_dict(checkpoint['encoder_state_dict'])

encoder.to(device)

params_to_optimize = [
        {'params': encoder.parameters()},
    ]

lr=1e-5
weight_decay=1e-5
optimizer = torch.optim.AdamW(params_to_optimize, lr=lr, weight_decay=weight_decay)

## Scheduler options
scheduler = None 
# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=1e-3, total_steps=num_iterations, cycle_momentum=False)

run_training(num_iterations, log_dir, encoder, temperature, train_loader, optimizer, batch_size, scheduler)

In [None]:
## Now take the trained model and try to run some unsupervised learning on it...
import numpy as np

## Make a single loader to loop over for ease
single_dataset = SingleModuleImage2D_solo_ME(inDir, transform=CenterCrop(), max_events=50000) #nevents)
single_loader = torch.utils.data.DataLoader(single_dataset,
                                            collate_fn=solo_ME_collate_fn,
                                            batch_size=512,
                                            shuffle=False,
                                            num_workers=4)

latent = []
nhits  = []

## Make this work with batches larger than 1...
for orig_bcoords, orig_bfeats in single_loader:

    orig_bcoords = orig_bcoords.to(device)
    orig_bfeats = orig_bfeats.to(device)
    orig_batch = ME.SparseTensor(orig_bfeats, orig_bcoords, device=device)            
                                            
    ## Now do the forward passes            
    encoder.eval()
    with torch.no_grad(): 
        encoded_batch = encoder(orig_batch)
    
    nhits += [i.shape[0] for i in orig_batch.decomposed_features if i.shape[0] != 0] 
    latent += [x.cpu().numpy() for x in encoded_batch.decomposed_features]
    
lat_vect = np.vstack(latent)
hit_vect = np.array(nhits)

In [None]:
## Make a plot of what it looks like
plt.scatter(lat_vect[:,0], lat_vect[:,1], s=1, vmin=100, vmax=500, c=hit_vect)

In [None]:
from cuml.manifold import TSNE as cuML_TSNE
import cupy as cp

## Define a function for running t-SNE using the cuml version
def run_tsne_cuml(perp=300, exag=100, input_vect=lat_vect, nhits=hit_vect):
    print("Running cuML t-SNE with: perplexity =", perp, "early exaggeration =", exag)
    
    input_vect = cp.asarray(input_vect, dtype=cp.float32)

    ## I haven't played with most of cuml's t-SNE parameters
    tsne = cuML_TSNE(n_components=2, perplexity=perp, n_iter=1000, early_exaggeration=exag, late_exaggeration=1, metric='cosine', learning_rate=100, n_neighbors=1000)
    tsne_results = tsne.fit_transform(input_vect)

    tsne_results = cp.asnumpy(tsne_results)  # Convert to NumPy for matplotlib

    gr = plt.scatter(tsne_results[:, 0], tsne_results[:, 1], s=0.2, alpha=0.8, vmin=100, vmax=500, c=nhits)
    plt.colorbar(gr, label='N.hits')
    plt.xlabel('t-SNE #0')
    plt.ylabel('t-SNE #1')
    plt.show()

    return tsne_results

In [None]:
## Actually run t-SNE
perp=100
exag=10
tsne_results = run_tsne_cuml(perp, exag, lat_vect, hit_vect)

In [None]:
from cuml.cluster import DBSCAN
from sklearn.preprocessing import normalize

## Run DBSCAN using the cuml implementation
def run_dbscan_gpu(eps=0.1, min_samples=20, input_vect=None):
    if input_vect is None:
        raise ValueError("input_vect must be provided.")

    print(f"Running GPU-accelerated DBSCAN with eps={eps}, min_samples={min_samples}")

    # Normalize vectors for cosine similarity (same as CPU version)
    input_vect = normalize(input_vect, norm='l2', axis=1)

    # Move data to GPU using CuPy
    input_vect_gpu = cp.asarray(input_vect)

    # Run DBSCAN on GPU
    dbscan = DBSCAN(eps=eps, min_samples=min_samples, metric='cosine', index_type='int64')
    labels = dbscan.fit_predict(input_vect_gpu).get()  # Move result back to CPU

    # Compute cluster statistics
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    n_noise_ = np.sum(labels == -1)
    n_points = np.bincount(labels[labels >= 0]) if n_clusters_ > 0 else []

    print(f"Estimated number of clusters: {n_clusters_}")
    print(f"N. points in clusters: {n_points.tolist()}")
    print(f"Estimated number of noise points: {n_noise_} (out of {len(input_vect)})")

    return labels, n_clusters_, n_noise_, n_points, dbscan

In [None]:
## Actually run an example of dbscan
eps=0.04
min_samples=20
labels, n_clusters_, n_noise_, n_points, dbscan = run_dbscan_gpu(eps, min_samples, input_vect=lat_vect)

In [None]:
unique_labels = set(labels)
core_samples_mask = np.zeros_like(labels, dtype=bool)
core_samples_mask[dbscan.core_sample_indices_.get()] = True

colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = [0, 0, 0, 1]

    class_member_mask = labels == k

    xy = lat_vect[class_member_mask & core_samples_mask]
    plt.plot(
        xy[:, 0],
        xy[:, 1],
        "o",
        markerfacecolor=tuple(col),
        markeredgecolor="k",
        markersize=14,
    )

    xy = lat_vect[class_member_mask & ~core_samples_mask]
    plt.plot(
        xy[:, 0],
        xy[:, 1],
        "o",
        markerfacecolor=tuple(col),
        markeredgecolor="k",
        markersize=0.1,
    )

plt.title(f"Estimated number of clusters: {n_clusters_}")
plt.show()


In [None]:
## Function to show examples for each cluster
def plot_cluster_examples(dataset, labels, index, max_images=10): 
    
    plt.figure(figsize=(12,4.5))

    ## Get a mask of labels
    indices = np.where(np.array(labels) == index)[0]
    
    ## Grab the first 10 images (if there are 10)
    if len(indices) < max_images:
        max_images = len(indices)
    
    ## Plot
    for i in range(max_images):
        ax = plt.subplot(2,max_images,i+1)
        
        numpy_coords, numpy_feats = dataset[indices[i]]
    
        # Create batched coordinates for the SparseTensor input
        orig_bcoords  = ME.utils.batched_coordinates([numpy_coords])
        orig_bfeats  = torch.from_numpy(np.concatenate([numpy_feats], 0)).float()

        orig_bcoords = orig_bcoords.to(device)
        orig_bfeats = orig_bfeats.to(device)
        orig = ME.SparseTensor(orig_bfeats, orig_bcoords, device=device)
            
        inputs  = make_dense_from_tensor(orig)
        inputs  = inputs .cpu().squeeze().numpy()
        
        plt.imshow(inputs, origin='lower')
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)            
    plt.show()   

In [None]:
## Now pull out a bank of example images for each cluster

for index in range(n_clusters_):
    print("Showing examples for cluster:", index, "which has", n_points[index], "values")
    plot_cluster_examples(single_dataset, labels, index)

print("Showing examples for the noise, which has", n_noise_, "values")
plot_cluster_examples(single_dataset, labels, -1)