In [1]:
import torch
from torch import bfloat16
import transformers

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_instruct_id = "mistralai/Mistral-7B-Instruct-v0.2"
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_instruct_id,
    trust_remote_code=True,
    torch_dtype=bfloat16,
    device_map="auto"
).to(device)
tokenizer = transformers.AutoTokenizer.from_pretrained(model_instruct_id)

model.eval()

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNorm()
  

### Obtain raw logits output by the final layer of the decoder

In [3]:
input_text = "The future of AI is"
input_tokenized = tokenizer(input_text, return_tensors="pt")
print("input_tokenized: ", input_tokenized)
input_ids = input_tokenized["input_ids"].to(device)

with torch.no_grad(): # prevents backpropagation thus preventing updating the model
    output = model(input_ids=input_ids) # the output logits of a sequence of tokens
print("output: ", output)

input_tokenized:  {'input_ids': tensor([[    1,   415,  3437,   302, 16107,   349]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}
output:  CausalLMOutputWithPast(loss=None, logits=tensor([[[-5.5625, -5.5938, -0.2520,  ..., -4.1875, -3.2656, -3.9062],
         [-8.0625, -8.5000, -3.5469,  ..., -5.6562, -5.1250, -5.0938],
         [-8.0000, -8.3750, -4.0000,  ..., -6.0312, -5.9688, -6.8125],
         [-6.7500, -7.0000, -4.1562,  ..., -5.6250, -4.2812, -5.4062],
         [-7.9688, -8.7500, -3.5625,  ..., -6.2188, -8.7500, -6.7812],
         [-7.5938, -7.6562, -2.9688,  ..., -6.7188, -5.3750, -4.9375]]],
       device='cuda:0'), past_key_values=((tensor([[[[-9.4727e-02,  2.3071e-02,  2.3438e-01,  ..., -1.3438e+00,
           -1.9844e+00, -2.1562e+00],
          [ 5.5000e+00, -4.0000e+00, -2.6250e+00,  ...,  2.0938e+00,
            5.7031e-01, -3.1641e-01],
          [ 3.4062e+00, -4.9688e+00, -9.1797e-01,  ...,  2.3906e+00,
            1.2969e+00,  6.7188e-01],
          [-4.8125e+00, 

### You can in evaluate the other output logits to determine if a text is generated
Generative AI detection is unreliable


In [4]:

print(output.logits.shape)
print(model.vocab_size)
last_token_logits = output.logits[0, -1, :]
print(last_token_logits.shape)
probabilities = torch.nn.functional.softmax(last_token_logits)
# select the top 10 tokens with the highest probability
amt = 10
top_prob, top_indices = torch.topk(probabilities, amt)
top = [ {"prob": top_prob[i], "idx": top_indices[i]} for i in range(len(top_indices))]

torch.Size([1, 6, 32000])
32000
torch.Size([32000])


  probabilities = torch.nn.functional.softmax(last_token_logits)


### Experiment with creating your own token selection

In [5]:
from scipy.spatial.distance import cosine


embedder_id="sentence-transformers/all-mpnet-base-v2"
embedding_tokenizer = transformers.AutoTokenizer.from_pretrained(embedder_id)
embedding_tokenizer.model_max_length = 512
embedding_model = transformers.AutoModel.from_pretrained(embedder_id)

def _get_sentence_embedding(sentence):
    inputs = embedding_tokenizer(sentence, return_tensors='pt', padding=True, truncation=True)
    with torch.no_grad():
        outputs = embedding_model(**inputs)

    # Mean Pooling - Take attention mask into account for correct averaging
    attention_mask = inputs['attention_mask']
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(outputs.last_hidden_state.size()).float()
    sum_embeddings = torch.sum(outputs.last_hidden_state * input_mask_expanded, 1)
    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    mean_pooled = sum_embeddings / sum_mask

    return mean_pooled[0].numpy()

def sentence_similarity(sent1, sent2):
    embedding1 = _get_sentence_embedding(sent1)
    embedding2 = _get_sentence_embedding(sent2)
    return 1 - cosine(embedding1, embedding2)

  return self.fget.__get__(instance, owner)()


In [31]:
semantic_comparison = "University of Florida"
sc_weight = 3
for i in top:
    similarity_score = sentence_similarity(tokenizer.decode(i["idx"]), semantic_comparison)
    similarity_score_tensor = torch.tensor(similarity_score).to(device)
    i["semantic_similarity"] = similarity_score_tensor
    i["score"] = i["prob"] * i["semantic_similarity"]**sc_weight
print(top)

is_eos = tokenizer.decode(top[0]["idx"]) == tokenizer.eos_token
argmax = max(top, key=lambda x: x["score"])
print(argmax)


[{'prob': tensor(0.1307, device='cuda:0'), 'idx': tensor(6833, device='cuda:0'), 'semantic_similarity': tensor(0.1588, device='cuda:0', dtype=torch.float64), 'score': tensor(0.0005, device='cuda:0', dtype=torch.float64)}, {'prob': tensor(0.1228, device='cuda:0'), 'idx': tensor(459, device='cuda:0'), 'semantic_similarity': tensor(0.0590, device='cuda:0', dtype=torch.float64), 'score': tensor(2.5274e-05, device='cuda:0', dtype=torch.float64)}, {'prob': tensor(0.0844, device='cuda:0'), 'idx': tensor(264, device='cuda:0'), 'semantic_similarity': tensor(0.1646, device='cuda:0', dtype=torch.float64), 'score': tensor(0.0004, device='cuda:0', dtype=torch.float64)}, {'prob': tensor(0.0745, device='cuda:0'), 'idx': tensor(297, device='cuda:0'), 'semantic_similarity': tensor(0.1239, device='cuda:0', dtype=torch.float64), 'score': tensor(0.0001, device='cuda:0', dtype=torch.float64)}, {'prob': tensor(0.0331, device='cuda:0'), 'idx': tensor(1236, device='cuda:0'), 'semantic_similarity': tensor(0.18

In [44]:
a = [{"a":1}]
any(lambda x: x["a"] == 1 for x in a)

True

In [50]:
def semantically_similar_generation(input_text, semantic_comparison, instruct_model, instruct_tokenizer, device, sc_weight=3, amt=10):
    """
    input_text: str, the input text to be continued
    semantic_comparison: str, the sentence to compare the generated outputs to
    instruct_model: transformers model, the model to be used for generation
    instruct_tokenizer: transformers tokenizer, the tokenizer to be used for generation
    device: torch.device, the device to be used for computation
    sc_weight: int, the weight to be used for the semantic similarity score
    amt: int, the amount of top tokens to consider

    This will generate a continuation of the input_text and return the top token that has the highest score unless the eos token is the top token.    
    """
    input_tokenized = instruct_tokenizer(input_text, return_tensors="pt")
    input_ids = input_tokenized["input_ids"].to(device)

    with torch.no_grad(): # prevents backpropagation thus preventing updating the model
        output = instruct_model(input_ids=input_ids) # the output logits of a sequence of tokens
    last_token_logits = output.logits[0, -1, :]
    probabilities = torch.nn.functional.softmax(last_token_logits)
    top_prob, top_indices = torch.topk(probabilities, amt)
    top = [ {"prob": top_prob[i], "idx": top_indices[i]} for i in range(len(top_indices))]
    is_eos = any(i["idx"] == instruct_tokenizer.eos_token_id for i in top)
    if is_eos:
        print("FOUND EOS: ", is_eos)
        return {"idx": instruct_tokenizer.eos_token_id, "prob": 1.0, "semantic_similarity": 1.0, "score": 1.0}

    for i in top:
        similarity_score = sentence_similarity(instruct_tokenizer.decode(i["idx"]), semantic_comparison)
        similarity_score_tensor = torch.tensor(similarity_score).to(device)
        i["semantic_similarity"] = similarity_score_tensor
        i["score"] = i["prob"] * i["semantic_similarity"]**sc_weight

    argmax = max(top, key=lambda x: x["score"])
    return argmax

def ss_full_instruct_generation(input_text, semantic_comparison, instruct_model, instruct_tokenizer, device, sc_weight=3, amt=10, tab=""):
    """
    input_text: str, the input text to be continued
    semantic_comparison: str, the sentence to compare the generated outputs to
    instruct_model: transformers model, the model to be used for generation
    instruct_tokenizer: transformers tokenizer, the tokenizer to be used for generation
    device: torch.device, the device to be used for computation
    sc_weight: int, the weight to be used for the semantic similarity score
    amt: int, the amount of top tokens to consider

    This will recursively generate a instruction using semantically_similar_generation until the eos token is the top token. 
    """
    gen = semantically_similar_generation(input_text, semantic_comparison, instruct_model, instruct_tokenizer, device, sc_weight, amt)
    print("Generation: " + tab + instruct_tokenizer.decode(gen["idx"]) + " > Sc: " + str(round(float(gen["score"]))) + " | Pr: " + str(round(float(gen["prob"]))) + " | SS: " + str(round(float(gen["semantic_similarity"]))))
    if instruct_tokenizer.decode(gen["idx"]) == instruct_tokenizer.eos_token:
        return input_text + instruct_tokenizer.decode(gen["idx"])
    else:
        return ss_full_instruct_generation(input_text + instruct_tokenizer.decode(gen["idx"]), semantic_comparison, instruct_model, instruct_tokenizer, device, sc_weight, amt, tab + "  ")

In [51]:
input_text = "<s>[INST] Select the university of your choice within the south east portion of US, only respond the name nothing else: University of ___ [/INST]"
semantic_comparison = "University of Florida"
generated_instruction = ss_full_instruct_generation(input_text, semantic_comparison, model, tokenizer, device, sc_weight=3, amt=10, tab="")
print("\n\nGenerated Instruction:\n", generated_instruction)

  probabilities = torch.nn.functional.softmax(last_token_logits)


Generation: University > Sc: 0 | Pr: 1 | SS: 1
Generation:   of > Sc: 0 | Pr: 1 | SS: 0
Generation:     Florida > Sc: 0 | Pr: 1 | SS: 1
Generation:       
 > Sc: 0 | Pr: 1 | SS: 0
Generation:         
 > Sc: 0 | Pr: 1 | SS: 0
Generation:           Univers > Sc: 0 | Pr: 0 | SS: 1
Generation:             ity > Sc: 0 | Pr: 1 | SS: 0
Generation:               of > Sc: 0 | Pr: 1 | SS: 0
Generation:                 South > Sc: 0 | Pr: 1 | SS: 0


KeyboardInterrupt: 

In [49]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.01,  # Controls the randomness of outputs
    top_k=1,  # Number of top tokens to consider (0 relies on top_p)
    max_new_tokens=512,  # Limits the number of generated tokens
    repetition_penalty=1.1  # Discourages repetitive outputs
)
generated_text = generate_text(input_text)
print("Generated text: ", generated_text)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated text:  [{'generated_text': '<s>[INST] Select the university of your choice within the south east portion of US, only respond the name nothing else: University of ___ [/INST] University of North Carolina at Chapel Hill'}]
