# Code Generation with ReplitLM
A notebook to generate code in any of the programming languages for which the [ReplitLM](https://github.com/replit/ReplitLM) model has been trained.  
No hardware acceleration required.  

Install the missing requirements in the Colab VMs.

In [None]:
!pip install transformers sentencepiece einops accelerate

Download model and tokenizer using the Hugging Face's Transformer library.

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed

model_id = "replit/replit-code-v1-3b"
PAD_TOKEN = "<|pad|>"
EOS_TOKEN = "<|endoftext|>"
UNK_TOKEN = "<|unk|>"
MAX_INPUT_TOKENS = 1024

device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.truncation_side = "left" 
if device == "cuda":
    model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, low_cpu_mem_usage=True).to(device, dtype=torch.bfloat16)
else:
    model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, low_cpu_mem_usage=True)
model.eval()

Set the prompt and the values of other model parameters.

In [None]:
prompt = 'def sieve_eratosthenes(n):' #@param {type:"string"}
max_tokens = 112 #@param {type:"slider", min:10, max:1024, step:1}
temperature = 0.2 #@param {type:"slider", min:0.1, max:2.5, step:0.1}
seed = 42 #@param {type:"slider", min:0, max:1000, step:1}
top_p = 0.9 #@param {type:"slider", min:0, max:1, step:0.1}
top_k = 4 #@param {type:"slider", min:1, max:64, step:1}
repetition_penalty = 1.0 #@param {type:"slider", min:1.0, max:1.9, step:0.1}
use_cache = True #@param {type:"boolean"}


Generate code.

In [None]:
model_input = tokenizer.encode(prompt, return_tensors="pt", max_length=MAX_INPUT_TOKENS, truncation=True).to(device)
set_seed(seed)
model_output = model.generate(model_input, 
                   max_length=max_tokens, 
                   do_sample=True, 
                   top_p=top_p, 
                   top_k=top_k, 
                   temperature=temperature, 
                   num_return_sequences=1, 
                   repetition_penalty = repetition_penalty,
                   pad_token_id=tokenizer.pad_token_id,
                   eos_token_id=tokenizer.eos_token_id,
                   use_cache=use_cache,)

Decode the generated output.

In [None]:
generated_code = tokenizer.decode(model_output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
print(generated_code)
prompt = generated_code