Install base library and stuff to get a 7B model to fit into colab RAM limits

In [None]:
!pip install git+https://github.com/EGjoni/DRUGS.git
!pip install bitsandbytes accelerate

make output text wrap in colab for easier reading

In [None]:
import sys
import bitsandbytes #necessary to fit in colab
import accelerate #necessary to fit in colab
import torch
from transformers import AutoTokenizer, TextStreamer, GenerationConfig, AutoModelForCausalLM
from drugs.dgenerate import DRUGS

model_id = "NousResearch/Llama-2-7b-chat-hf" #Feel free to change this to a better model. But only LLama2 variants are supported at the moment.
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token_id = tokenizer.eos_token_id
sober_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True)
sober_model.eval()
streamer = TextStreamer(tokenizer)

drugs = DRUGS()
model = drugs.inject(sober_model)

In [None]:
# @title Dosage { run: "auto", form-width: "50%", display-mode: "form" }
# @markdown How much noise to inject. Range technically 0 - to infinity. Where pi is way too much (as it can allow for vectors that point in the exact opposite direction). You can go even higher, but shouldn't.
dose_theta = 0.101 # @param {type:"slider", min:0, max:1, step:0.001}
drugs.set_A_dose_theta(dose_theta) #you can also specify K_dose_theta, V_dose_theta, Q_dose_theta, or any combination of the 4.
#If you opt for changing the type, make sure you update the type in the dose_shape cel after this one too.


#sneaking this in here. Just makes the text wrap in cel outputs so it doesn't yeet offscreen.
from IPython.display import display, HTML
def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)


In [None]:
# @title DRµG profile { run: "auto" }
# @markdown ###**Advanced control. Lets you specify how much various depths of the network are injected with how much noise.**
# @markdown ---
# @markdown How deep to inject noise. 0 corresponds to first layer, 1 corresponds to last layer.
injection_depth = 0.4 # @param {type:"slider", min:0, max:1, step:0.001}
# @markdown How many adjacent layers the noise should affect. 0 means no adjacent layers, 1 means all of the layers in the model. 0.5 means half the total number of model layers on either side.
spread = 0.301 # @param {type:"slider", min:0, max:1, step:0.001}

drug_profile = ([ #you can add as many of these injection sites as you want
    {'depth': (injection_depth-(spread*1.01)), 'peakratio': 0}, #ramp up from 0 noise
    {'depth': (injection_depth-spread), 'peakratio': 1}, #sustained peak at max  (dose_theta) noise
    {'depth': (injection_depth+spread), 'peakratio' : 1}, #sustained peak at max  (dose_theta) noise
    {'depth': (injection_depth+(spread*1.01)), 'peakratio' : 0}], #cooldown to 0 noise
'ceil') #other options include 'floor', and 'interpolate'
drugs.set_A_dose_shape(drug_profile) #each profile (A, K, Q, or V) can be independently injected into different layers, if you are expecially picky about what your noise is doing to which things.

# @markdown ---
# @markdown **You can edit the code for more finegrained control over which layers get how much noise.**

## Chat

By default this notebook prompts you for input. If viewed in a browser, a dialogue may pop up asking you to say something (but in colab it should ask for input directly in the cel).

Note that all variety in the model's responses is due purely to the noise being injected, the selected token is ALWAYS whatever the model thinks is the most likely one!

In [None]:
#If you don't see an input prompt, stop the cell and run it again. Not sure why it's finicky.

initial_input = str(input("\bAsk Something:"))
tokenized_start = tokenizer.apply_chat_template([
    {'role': 'system',
    'content': 'You are Alan Watts.'},
    {'role': 'user',
     'content': initial_input}
], return_tensors='pt')
model.dgenerator.past_key_values = None #temporary hack to clear cache.
with torch.no_grad():
  while True:
    generated_tokens = model.Dgenerate(
            input_ids = tokenized_start,
            streamer = streamer
        )
    print("\n\nAsk Something:", end="")
    model.cold_shower(True) #Sets the kv-cache back to theoretically pure baseline, if this is important to you.
    await_input = str(input(": "))
    tokenized_start = tokenizer.apply_chat_template([{
        'role': 'user',
        'content': await_input}], return_tensors="pt")

## NOTE: If you don't see an input box when running the cel above, stop the cell and run it again.
(Not sure why it's finicky)