In [2]:
import pandas as pd
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns
import argparse

import torch
import numpy as np
from tabpfn_extensions.embedding import TabPFNEmbedding


import warnings
warnings.filterwarnings("ignore")

In [3]:
def tsne_plot(features,property_values,num_levels, name):
    print("Data loaded successfully.")
    print(f"Shape of the feature data: {features.shape}")
    # Step 2: Initialize and run the t-SNE algorithm.
    print("\nStarting t-SNE dimensionality reduction...")
    tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=1000, random_state=42)
    tsne_results = tsne.fit_transform(features)
    print("t-SNE calculation complete.")
    # Step 3: Create DataFrame and discretize property values for coloring.
    df_tsne = pd.DataFrame(data=tsne_results, columns=['tsne_dim_1', 'tsne_dim_2'])
    df_tsne['property'] = property_values

    # Bin the continuous property values into discrete levels.
    # `pd.cut` creates bins of equal width from the range of property values.

    #df_tsne['property_level'] = pd.cut(df_tsne['property'], bins=num_levels, labels=False)
    df_tsne['property_level'] = pd.qcut(df_tsne['property'], q=num_levels, labels=False, duplicates='drop')


    # Step 4: Visualize the results.
    print("Generating plot...")
    plt.style.use('seaborn-v0_8-whitegrid')
    plt.figure(figsize=(14, 12))
    scatter_plot = sns.scatterplot(
        x="tsne_dim_1", y="tsne_dim_2",
        hue="property_level",  # Color points by the discrete property level
        palette=sns.color_palette("viridis", n_colors=num_levels), # Use a discrete colormap
        data=df_tsne,
        alpha=0.7,
        s=50,
        legend='full' # Ensure the legend is shown correctly for discrete values
    )
    plt.title(f't-SNE Visualization ({num_levels} Property Levels)', fontsize=18)
    plt.xlabel('t-SNE Dimension 1', fontsize=12)
    plt.ylabel('t-SNE Dimension 2', fontsize=12)

    # Set the legend title.
    legend = scatter_plot.get_legend()
    if legend:
        legend.set_title(f'Property Level')
    plt.grid(True)

    # Save the plot to a file
    output_filename = f'{name}_tsne_{num_levels}_levels.png'
    plt.savefig(output_filename, dpi=300)
    print(f"\nPlot saved as '{output_filename}'")

    # Display the plot
    plt.show()



In [4]:
compo = "./TabPFN_4"
Farm_tab = compo+"/tabpfn_model.pt"
input_csv = "TabPFN_fold4_data/fold4_train_labeled.csv"  # adjust if needed


In [5]:
FMmodel = torch.load(Farm_tab)
FM_extractor = TabPFNEmbedding(tabpfn_clf=FMmodel, n_fold=0)
df_test = pd.read_csv(input_csv)

# Split features and targets
FMX = df_test.drop(columns=["target"]).values.astype(np.float32)
FMy = df_test["target"].values.astype(np.float32)

print(FMX.shape)
print(FMy.shape)

(2520, 152)
(2520,)


In [6]:
import tabpfn_extensions
print(tabpfn_extensions.__version__)
import tabpfn
print(tabpfn.__version__)



0.1.6
2.2.1


In [9]:
import os
import numpy as np
torch.cuda.empty_cache()
import gc
gc.collect()

batch_size = 256  # try 128 or 64 if memory is still tight
embeddings = []

os.environ["CUDA_VISIBLE_DEVICES"] = "2"   # or another index
torch.cuda.set_device(0)

for i in range(0, len(FMX), batch_size):
    
    X_batch = FMX[i:i + batch_size]
    with torch.no_grad():
        emb = FM_extractor.get_embeddings(
            X_batch, FMy[i:i + batch_size], X_batch, data_source="train"
        )
    embeddings.append(emb)
    torch.cuda.empty_cache()   # free unused memory between batches
    gc.collect()

FM_embeddings = np.concatenate(embeddings, axis=0)
print("FM_embeddings:", FM_embeddings.shape)


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.07 GiB. GPU 0 has a total capacity of 10.75 GiB of which 507.69 MiB is free. Process 2479587 has 5.76 GiB memory in use. Process 2479625 has 1.27 GiB memory in use. Process 2479646 has 1.27 GiB memory in use. Including non-PyTorch memory, this process has 1.94 GiB memory in use. Of the allocated memory 1.68 GiB is allocated by PyTorch, and 74.36 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)