In [1]:
pip install transformers torch numpy

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
from collections import defaultdict

# Custom Linear Layer with SVF
class SVFLinear(nn.Module):
    def __init__(self, original_linear):
        super().__init__()
        self.original_linear = original_linear
        with torch.no_grad():
            U, Sigma, V = torch.svd(original_linear.weight.float())
            self.U = nn.Parameter(U, requires_grad=False)
            self.Sigma = nn.Parameter(Sigma, requires_grad=False)
            self.V = nn.Parameter(V.t(), requires_grad=False)
        self.z = nn.Parameter(torch.ones_like(self.Sigma), requires_grad=True)

    def forward(self, x):
        Sigma_z = self.Sigma * self.z
        Vx = torch.matmul(self.V, x.T if x.dim() == 2 else x.unsqueeze(-1))
        Sigma_Vx = Sigma_z.unsqueeze(-1) * Vx
        output = torch.matmul(self.U, Sigma_Vx)
        if self.original_linear.bias is not None:
            output = output + self.original_linear.bias.unsqueeze(-1)
        return output.squeeze(-1) if x.dim() == 2 else output

# Graph Node for Task-Specific Adaptations
class Node:
    def __init__(self, embedding, z_vectors, count=1):
        self.embedding = embedding
        self.z_vectors = z_vectors
        self.count = count
        self.past_responses = set()

    def update_embedding(self, new_embedding):
        self.embedding = (self.count * self.embedding + new_embedding) / (self.count + 1)
        self.count += 1

# Self-Adaptive LLM with Graph Structure
class AdaptiveLLM:
    def __init__(self, model_name="meta-llama/Llama-3.2-1B-Instruct", distance_threshold=0.5, buffer_size=5):
        # Optional: Log in to Hugging Face (provide your token if needed)
        # login(token="your_hf_token_here")

        # Load model and tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        if self.tokenizer.pad_token_id is None:
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
        self.base_model = AutoModelForCausalLM.from_pretrained(model_name)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.base_model.to(self.device)

        # Replace linear layers with SVFLinear (targeting MLP layers)
        self.svf_layers = []
        for name, module in self.base_model.named_modules():
            if isinstance(module, nn.Linear) and "mlp" in name:
                svf_layer = SVFLinear(module)
                self.svf_layers.append(svf_layer)
                layer_idx = int(name.split(".")[2])
                if "c_fc" in name:
                    self.base_model.model.layers[layer_idx].mlp.c_fc = svf_layer
                elif "c_proj" in name:
                    self.base_model.model.layers[layer_idx].mlp.c_proj = svf_layer

        # Initialize graph
        initial_embedding = torch.zeros(2048).to(self.device)  # Llama-3.2-1B hidden size
        initial_z_vectors = [layer.z.clone().detach() for layer in self.svf_layers]
        self.nodes = [Node(initial_embedding, initial_z_vectors)]
        self.buffer = []
        self.distance_threshold = distance_threshold
        self.buffer_size = buffer_size

    def embed_input(self, text):
        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.device)
        with torch.no_grad():
            outputs = self.base_model(**inputs, output_hidden_states=True)
            hidden_states = outputs.hidden_states[-1]
            return torch.mean(hidden_states, dim=1).squeeze(0)

    def compute_distance(self, emb1, emb2):
        return 1 - F.cosine_similarity(emb1.unsqueeze(0), emb2.unsqueeze(0)).item()

    def set_z_vectors(self, z_vectors):
        for layer, z in zip(self.svf_layers, z_vectors):
            layer.z.data = z.clone().to(self.device)

    def process_input(self, text, feedback=None):
        embedding = self.embed_input(text)
        distances = [self.compute_distance(embedding, node.embedding) for node in self.nodes]
        min_distance = min(distances)
        closest_idx = np.argmin(distances)
        closest_node = self.nodes[closest_idx]

        if min_distance > self.distance_threshold:
            new_z_vectors = [layer.z.clone().detach() for layer in self.svf_layers]
            new_node = Node(embedding, new_z_vectors)
            self.nodes.append(new_node)
            closest_node = new_node
        else:
            closest_node.update_embedding(embedding)

        self.set_z_vectors(closest_node.z_vectors)
        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.device)
        outputs = self.base_model.generate(
            **inputs,
            max_new_tokens=50,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_k=50,
            pad_token_id=self.tokenizer.eos_token_id
        )
        response = self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()

        while response in closest_node.past_responses and feedback == -1:
            outputs = self.base_model.generate(
                **inputs,
                max_new_tokens=50,
                num_return_sequences=1,
                do_sample=True,
                temperature=0.9,
                top_k=50,
                pad_token_id=self.tokenizer.eos_token_id
            )
            response = self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()

        reward = feedback if feedback is not None else 1
        closest_node.past_responses.add(response)
        self.buffer.append((text, response, reward, closest_node))

        if len(self.buffer) >= self.buffer_size:
            self.update_nodes()

        return response

    def update_nodes(self):
        node_data = defaultdict(list)
        for text, response, reward, node in self.buffer:
            node_data[id(node)].append((text, response, reward))

        for node in self.nodes:
            data = node_data.get(id(node), [])
            if data:
                optimizers = [torch.optim.Adam([z], lr=0.002) for z in node.z_vectors]
                for text, response, reward in data:
                    # Concatenate input and response to get logits for the full sequence
                    full_text = text + " " + response
                    inputs = self.tokenizer(full_text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.device)
                    input_len = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).input_ids.shape[1]
                    targets = self.tokenizer(response, return_tensors="pt", padding=True, truncation=True, max_length=50).input_ids.to(self.device)

                    self.set_z_vectors(node.z_vectors)
                    outputs = self.base_model(**inputs)
                    logits = outputs.logits[:, input_len-1:-1, :]  # Slice logits for response part only
                    if logits.shape[1] < targets.shape[1]:
                        # Pad logits if shorter than targets
                        padding = torch.zeros((1, targets.shape[1] - logits.shape[1], logits.shape[2]), device=self.device)
                        logits = torch.cat([logits, padding], dim=1)
                    elif logits.shape[1] > targets.shape[1]:
                        # Truncate logits if longer than targets
                        logits = logits[:, :targets.shape[1], :]

                    log_probs = F.log_softmax(logits, dim=-1)
                    target_log_probs = log_probs.gather(2, targets.unsqueeze(-1)).squeeze(-1)
                    loss = -reward * target_log_probs.mean() * (2 if reward < 0 else 1)

                    for opt in optimizers:
                        opt.zero_grad()
                    loss.backward()
                    for opt in optimizers:
                        opt.step()
        self.buffer = []

# Simple CLI Interface
def run_interface():
    print("Welcome to the Self-Adaptive LLM Interface!")
    print("Using meta-llama/Llama-3.2-1B model.")
    print("Type 'quit' to exit.")

    try:
        model = AdaptiveLLM()
        while True:
            user_input = input("\nEnter your prompt: ")
            if user_input.lower() == "quit":
                break

            response = model.process_input(user_input)
            print(f"Model response: {response}")

            feedback = input("Was the response good? (y/n): ").lower()
            reward = 1 if feedback == "y" else -1
            model.process_input(user_input, feedback=reward)
            print(f"Feedback recorded. Graph nodes: {len(model.nodes)}")

    except Exception as e:
        print(f"Error: {e}")
        print("Ensure you have authenticated with Hugging Face and have sufficient resources.")

if __name__ == "__main__":
    run_interface()

Welcome to the Self-Adaptive LLM Interface!
Using meta-llama/Llama-3.2-1B model.
Type 'quit' to exit.
Model response: There are 3 r's in the word "strawberry".
Feedback recorded. Graph nodes: 2
Model response: There are 2 R's in "strawberry".
Feedback recorded. Graph nodes: 2
Model response: Let's count the number of R's in "strawberry"...

Here are the R's: 1, 2, 3, 4, 5, 6

There are 6 R's in "straw
Feedback recorded. Graph nodes: 2
Model response: ?
There are 2 R's in the word "strawberry".
Feedback recorded. Graph nodes: 2
Model response: There are two R's in the word "strawberry".
Feedback recorded. Graph nodes: 2
Model response: ?
There are 3 Rs in the word strawberry.
Feedback recorded. Graph nodes: 2
Model response: There are 3 r's in the word "strawberry".
Feedback recorded. Graph nodes: 2
Model response: There are 2 R's in the word "strawberry".
Feedback recorded. Graph nodes: 2
Model response: strawberry

There are 2 R's in the word "strawberry".
Feedback recorded. Graph nod