In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import numpy as np
import torch
import torch.nn as nn
import sys
import pandas as pd
from sklearn.model_selection import train_test_split
sys.path.append('..')
import pickle
from typing import NamedTuple, Literal

In [5]:
class LLMPRegressionDescription(NamedTuple):
    x_context: torch.Tensor
    y_context: torch.Tensor
    x_target: torch.Tensor
    y_target: torch.Tensor
    knowledge: list[str] | tuple[str] | None
    num_total_points: int
    num_context_points: int

class TempData:
    def __init__(self, data: pd.DataFrame, max_num_context: int, device, random_state=42):
        self.data = data
        self.max_num_context = max_num_context
        x_values = data.iloc[0][3:].values.astype('float32') # (288,)
        assert x_values.shape[0] == 288
        self.x_values = torch.linspace(-2, 2, len(x_values), device=device).unsqueeze(0)
        
        y_values = data.iloc[1:, 3:].values
        y_desc = data.iloc[1:, 2].values
        y_values_train, y_values_temp, y_desc_train, y_desc_temp = train_test_split(
            y_values, y_desc, test_size=0.3, random_state=random_state
        )
        y_values_val, y_values_test, y_desc_val, y_desc_test = train_test_split(
            y_values_temp, y_desc_temp, test_size=0.5, random_state=random_state
        )

        self.y_values_train = torch.tensor(y_values_train).float().to(device)
        self.y_values_val = torch.tensor(y_values_val).float().to(device)
        self.y_values_test = torch.tensor(y_values_test).float().to(device)
        self.y_desc_train = y_desc_train
        self.y_desc_val = y_desc_val
        self.y_desc_test = y_desc_test
        
        # # self.x_values = torch.from_numpy(x_values).unsqueeze(0).to(device)  # Shape: [1, num_points]
        
        # self.y_values_train = torch.tensor(data.iloc[1:508, 3:].values).float().to(device)  # Shape: [num_samples, num_points]
        # self.y_values_test = torch.tensor(data.iloc[509:618, 3:].values).float().to(device)  # Shape: [num_samples, num_points]
        # self.y_values_val = torch.tensor(data.iloc[619:, 3:].values).float().to(device)  # Shape: [num_samples, num_points]
        # self.y_desc_train = data.iloc[1:508, 2].values
        # self.y_desc_test = data.iloc[509:618, 2].values
        # self.y_desc_val = data.iloc[619:, 2].values

    def generate_batch(self, 
                       batch_size: int,
                       split: Literal['train', 'val', 'test'],
                       device: torch.device = torch.device('cpu'),
                       return_knowledge: bool = False,
                       num_context: None | int = None
                       ) -> LLMPRegressionDescription:
        num_total_points = self.x_values.size(-1)
        if num_context is None:
            num_context = np.random.randint(low=1, high=self.max_num_context)
        else:
            assert isinstance(num_context, int) 
        num_target = num_total_points  # Using all points as target

        if split == 'train':
            selected_indices = np.random.choice(self.y_values_train.size(0), batch_size, replace=False)
            selected_y_values = self.y_values_train[selected_indices]  # Shape: [batch_size, num_points]

            knowledge = self.y_desc_train[selected_indices]
        elif split == 'val':
            selected_indices = np.random.choice(self.y_values_val.size(0), batch_size, replace=False)
            selected_y_values = self.y_values_val[selected_indices]  # Shape: [batch_size, num_points]

            knowledge = self.y_desc_val[selected_indices]

        elif split == 'test':
            selected_indices = np.random.choice(self.y_values_test.size(0), batch_size, replace=False)
            selected_y_values = self.y_values_test[selected_indices]  # Shape: [batch_size, num_points]

            knowledge = self.y_desc_test[selected_indices]
        else:
            raise ValueError("split must be one of ['train', 'val', 'test']")
        # Split into context and target sets
        context_indices = np.random.choice(num_total_points // 2, num_context, replace=False)

        x_context = self.x_values[:, context_indices].repeat(batch_size, 1)  # Shape: [batch_size, num_context]
        y_context = selected_y_values[:, context_indices]  # Shape: [batch_size, num_context]

        x_target = self.x_values[::4].repeat(batch_size, 1)  # Shape: [batch_size, num_target]
        y_target = selected_y_values[::4]  # Shape: [batch_size, num_target]
        print(x_target.shape)
        if return_knowledge:
            
            return LLMPRegressionDescription(
                x_context=x_context.unsqueeze(-1).to(device),  # Shape: [batch_size, num_context, x_size]
                y_context=y_context.unsqueeze(-1).to(device),  # Shape: [batch_size, num_context, y_size]
                x_target=x_target.unsqueeze(-1).to(device),    # Shape: [batch_size, num_target, x_size]
                y_target=y_target.unsqueeze(-1).to(device),    # Shape: [batch_size, num_target, y_size]
                knowledge=list(knowledge), # Shape/type: TODO
                num_total_points=num_total_points,
                num_context_points=num_context
            )

        else:
            
            return LLMPRegressionDescription(
                x_context=x_context.unsqueeze(-1).to(device),  # Shape: [batch_size, num_context, x_size]
                y_context=y_context.unsqueeze(-1).to(device),  # Shape: [batch_size, num_context, y_size]
                x_target=x_target.unsqueeze(-1).to(device),    # Shape: [batch_size, num_target, x_size]
                y_target=y_target.unsqueeze(-1).to(device),    # Shape: [batch_size, num_target, y_size]
                knowledge=None,
                num_total_points=num_total_points,
                num_context_points=num_context
            )

In [6]:
def process_and_save_data(file_path, pkl_output_path, max_num_context=20, device='cpu', random_state=42, return_knowledge=False):

    data = pd.read_csv(file_path)
    
    temp_data = TempData(data=data, max_num_context=max_num_context, device=device, random_state=random_state)
    
    llmp_description = temp_data.generate_batch(batch_size=1, split='train', device=device, return_knowledge=return_knowledge, num_context=10)

    data_to_save = {
        'x_train': llmp_description.x_context.cpu().numpy().flatten(),
        'y_train': llmp_description.y_context.cpu().numpy().flatten(),
        'x_test': llmp_description.x_target.cpu().numpy().flatten(),
        'y_test': llmp_description.y_target.cpu().numpy().flatten(),
        'x_true': llmp_description.x_target.cpu().numpy().flatten(),
        'y_true': llmp_description.y_target.cpu().numpy().flatten(),
    }
    
    with open(pkl_output_path, 'wb') as handle:
        pickle.dump(data_to_save, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
    knowledge = None
    if return_knowledge:
        knowledge = data.iloc[0]['description']
    
    return {
        'data': data_to_save,
        'knowledge': knowledge
    }

# Example usage
result = process_and_save_data('../data/kasia/data_with_desc.csv', 'random_sample_output_data.pkl', return_knowledge=True)
print("X Training Data:", result['data']['x_train'])
print("Y Training Data:", result['data']['y_train'])
print("X Test Data:", result['data']['x_test'].shape)
print("Y Test Data:", result['data']['y_test'])
print("Knowledge:", result['knowledge'])

torch.Size([1, 288])
X Training Data: [-1.5400697  -0.77351916 -0.71777004 -1.5121951  -1.4425087  -0.20209058
 -1.0383276  -1.7351916  -0.6480836  -0.8989547 ]
Y Training Data: [ 9.8 10.7 10.7 10.1 10.3 11.8 10.7 10.5 10.8 10.7]
X Test Data: (288,)
Y Test Data: [ 9.5  9.6  9.7  9.6  9.9 10.2 10.2 10.3 10.4 10.4 10.4 10.5 10.5 10.5
 10.5 10.5 10.4 10.5 10.5 10.5 10.5 10.4 10.4 10.5 10.3 10.2 10.4 10.2
  9.9  9.9  9.9  9.9  9.9  9.8 10.  10.1 10.1  9.9  9.8 10.  10.3 10.
  9.7  9.7  9.5  9.5  9.7  9.7  9.9 10.  10.   9.9  9.8  9.9  9.8  9.8
  9.9 10.  10.2 10.3 10.3 10.2 10.1 10.3 10.4 10.6 10.7 10.7 10.8 10.7
 10.7 10.7 10.7 10.7 10.7 10.6 10.7 10.7 10.6 10.7 10.6 10.6 10.6 10.6
 10.6 10.6 10.7 10.7 10.7 10.7 10.8 10.7 10.7 10.7 10.7 10.7 10.7 10.8
 10.8 10.8 10.8 10.8 10.9 10.8 10.5 10.3 10.3 10.3 10.2 10.2 10.1 10.2
 10.2 10.2 10.3 10.3 10.4 10.5 10.6 10.6 10.7 10.7 10.8 11.  11.  11.3
 11.2 11.4 11.6 11.8 12.2 12.1 12.2 12.3 12.3 12.5 12.7 12.6 12.6 12.5
 12.5 12.6 12.5 12.6 12.7 12

In [95]:
'''# with open("../data/functions/linear_25_seed_6.pkl", 'rb') as f:
#     data = pickle.load(f)
data = {
    'x_train': np.array([53., 115., 188.]),  # Select first 3 elements
    'y_train': np.array([-0.44, 0.17, 0.85]),  # Select first 3 elements
    'x_test': np.array([0., 5., 10., 15., 20., 25., 30., 35., 40., 45., 50.,
                        55., 60., 65., 70., 75., 80., 85., 90., 95., 100., 105.,
                        110., 115., 120., 125., 130., 135., 140., 145., 150., 155., 160.,
                        165., 170., 175., 180., 185., 190., 195.]),
    'y_test': [-1.0, -0.95, -0.9, -0.85, -0.8, -0.75, -0.7, -0.65, -0.6, -0.55, -0.5, -0.45, -0.4, -0.35, -0.3, -0.25, -0.2, -0.15, -0.1, -0.05, 0.01, 0.06, 0.11, 0.16, 0.21, 0.26, 0.31, 0.36, 0.41, 0.46, 0.51, 0.56, 0.61, 0.66, 0.71, 0.76, 0.81, 0.86, 0.91, 0.96],
    'x_true': np.array([0., 1., 2., 3., 4.]),
    'y_true': np.array([-1.        , -0.98994975, -0.9798995 , -0.96984925, -0.95979899])
}

with open('test_data.pkl', 'wb') as handle:
    pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)'''

'# with open("../data/functions/linear_25_seed_6.pkl", \'rb\') as f:\n#     data = pickle.load(f)\ndata = {\n    \'x_train\': np.array([53., 115., 188.]),  # Select first 3 elements\n    \'y_train\': np.array([-0.44, 0.17, 0.85]),  # Select first 3 elements\n    \'x_test\': np.array([0., 5., 10., 15., 20., 25., 30., 35., 40., 45., 50.,\n                        55., 60., 65., 70., 75., 80., 85., 90., 95., 100., 105.,\n                        110., 115., 120., 125., 130., 135., 140., 145., 150., 155., 160.,\n                        165., 170., 175., 180., 185., 190., 195.]),\n    \'y_test\': [-1.0, -0.95, -0.9, -0.85, -0.8, -0.75, -0.7, -0.65, -0.6, -0.55, -0.5, -0.45, -0.4, -0.35, -0.3, -0.25, -0.2, -0.15, -0.1, -0.05, 0.01, 0.06, 0.11, 0.16, 0.21, 0.26, 0.31, 0.36, 0.41, 0.46, 0.51, 0.56, 0.61, 0.66, 0.71, 0.76, 0.81, 0.86, 0.91, 0.96],\n    \'x_true\': np.array([0., 1., 2., 3., 4.]),\n    \'y_true\': np.array([-1.        , -0.98994975, -0.9798995 , -0.96984925, -0.95979899])\n}\n\nwit

In [108]:
from src.plot import plot_samples, plot_images, plot_heatmap
from src.hf_api import get_model_and_tokenizer
from src.parse_args import parse_command_line
from src.compute_nll import compute_nll
from src.sample import sample
from src.prepare_data import prepare_data


In [107]:
from types import SimpleNamespace

args = SimpleNamespace(
    cfg=None,  # Assuming `ActionConfigFile` is a custom action, replace with the appropriate default if needed.
    mode='sample_logpy',
    experiment_name='test',
    # data_path='../data/functions/linear_25_seed_6.pkl',
    data_path='random_sample_output_data.pkl',
    llm_path=None,
    llm_type="llama-3-8B", # "llama-3-70B"
    prompt_ordering='distance',
    output_dir='./output',
    plot_dir='./plots',
    seed=1,
    num_decimal_places_x=0,
    num_decimal_places_y=2,
    batch_size=5,
    autoregressive=True,
    prefix=result['knowledge'],
    x_prefix='',
    y_prefix=', ',
    break_str='\n',
    sort_x_test=False,
    forecast=True,
    print_prompts=False,
    print_logprobs=False,
    num_samples=50,
    temperature=1.0,
    top_p=0.9,
    max_generated_length=7,
    y_min=None,
    y_max=None,
    plot_trajectories=5,
    specify_xy=False,
    xs=None,  # Assuming default is None when no arguments are provided
    ys=None,  # Assuming default is None when no arguments are provided
    xs_start=None,
    xs_end=None,
    num_xs=None,
    ys_start=None,
    ys_end=None,
    num_ys=None,
    mask_unused_tokens=True
)

In [104]:
import os
os.environ['HF_HOME'] = '/workspace/will/LLMP/hf_cache/' 
os.environ['HF_HUB_CACHE'] = '/workspace/will/LLMP/hf_cache/'

In [99]:
# mistral_token = "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"
# llama_token = "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"
# llama_chat_token = "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"
llama3_token = "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"
# llama2_70b = "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"
# llama3_70b = "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"
# get the llm and asociated tokenizer
model, tokenizer = get_model_and_tokenizer(args.llm_path, args.llm_type, llama3_token)
# Load model directly
# from transformers import AutoTokenizer, AutoModelForCausalLM

# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=llama_chat_token)
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=llama_chat_token)

Here


Loading checkpoint shards: 100%|██████████| 4/4 [01:16<00:00, 19.12s/it]


In [12]:
from accelerate import infer_auto_device_map

device_map = infer_auto_device_map(model, max_memory={0: "80GiB", 1: "80GiB",  "cpu": "250GiB"})
print(device_map)

Device 1 is not available, available devices are [0]


OrderedDict({'': 0})


In [109]:
from src.run_llm_process import run_llm_process
run_llm_process(args=args, model=model, tokenizer=tokenizer)

Sampling: 91it [04:55,  3.25s/it]


KeyboardInterrupt: 

In [14]:
import os
cwd = os.getcwd()
cwd

'/workspace/will/LLMP/my_notebooks'

In [15]:
with open('../data/functions/beat_05_seed_9.pkl','rb') as f:
     contents = pickle.load(f)
# '../data/weather/weather_llm_proc_10.pkl'

In [16]:
print(contents)

{'x_train': array([155.,  58.,  75., 175., 127.]), 'y_train': array([ 0.71,  0.02,  0.6 , -0.41,  0.04]), 'x_test': array([  0.,   5.,  10.,  15.,  20.,  25.,  30.,  35.,  40.,  45.,  50.,
        55.,  60.,  65.,  70.,  75.,  80.,  85.,  90.,  95., 100., 105.,
       110., 115., 120., 125., 130., 135., 140., 145., 150., 155., 160.,
       165., 170., 175., 180., 185., 190., 195.]), 'y_test': [0.0, 0.33, 0.0, -0.81, -0.0, 0.86, 0.0, -0.45, -0.0, -0.2, 0.0, 0.74, 0.0, -0.89, -0.0, 0.56, 0.0, 0.07, 0.0, -0.66, 0.0, 0.9, -0.0, -0.66, -0.0, 0.07, -0.0, 0.56, 0.0, -0.89, 0.0, 0.74, 0.0, -0.2, 0.0, -0.45, -0.0, 0.86, 0.0, -0.81], 'x_true': array([  0.,   1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,
        11.,  12.,  13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,
        22.,  23.,  24.,  25.,  26.,  27.,  28.,  29.,  30.,  31.,  32.,
        33.,  34.,  35.,  36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,
        44.,  45.,  46.,  47.,  48.,  49.,  50.,  51.,  52.,  53.,