Source: https://github.com/conceptofmind/toolformer.git

In [None]:
!pip -q install -U accelerate
!pip -q install sentencepiece
!pip -q install datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import re
import os
import json
import time
import copy
import torch
import random

import nltk
nltk.download('punkt')
from nltk import tokenize

from torch import nn
from typing import List

from datasets import load_dataset

from dataclasses import dataclass
import dateutil.parser as dparser

from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
from transformers import (PreTrainedTokenizerBase, pipeline, PreTrainedModel, TextGenerationPipeline)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
#API for retrieval
"""
retrieval
Uses Carptriever to retrieve sentences before the current context.
input_sentences - List[String], sentences to retrieve from
input_text - String, the input text (e.g. The dog's name is)
k - The number of sentences to retrieve
output - A list of strings, each string is the retrieved sentence, and the sentence after.
"""

def mean_pooling(token_embeddings: torch.Tensor, mask: torch.Tensor):
    token_embeddings = token_embeddings.masked_fill(~mask[..., None].bool(), 0.0)
    sentence_embeddings = token_embeddings.sum(dim=1) / mask.sum(dim=1)[..., None]
    return sentence_embeddings

class Retriever:
    def __init__(self):
        self.model = AutoModel.from_pretrained("CarperAI/carptriever-1", add_pooling_layer=False).cuda()
        self.tokenizer = AutoTokenizer.from_pretrained("CarperAI/carptriever-1")

    def retrieval(self, input_sentences: List[str], input_text: str, k: int) -> List[str]:
        if k > len(input_sentences):
            # I'd error but LMs do stupid stuff sometimes
            return input_sentences
        input_sentences = copy.deepcopy(input_sentences)
        input_sentences.append(input_text)
        output_list = []
        for sentence in input_sentences:
            inputs = self.tokenizer(sentence, padding=True, truncation=True, return_tensors="pt")
            # print(inputs)
            inputs["input_ids"] = inputs["input_ids"].cuda()
            inputs["token_type_ids"] = inputs["token_type_ids"].cuda()
            inputs["attention_mask"] = inputs["attention_mask"].cuda()
            with torch.no_grad():
                outputs = self.model(**inputs)
                embeddings = mean_pooling(outputs[0], inputs["attention_mask"])
            output_list.append(embeddings)
        query_embedding, sentence_embeddings = output_list[-1], torch.concat(output_list[:-1], 0)
        # print(len(sentence_embeddings), sentence_embeddings[0].shape)
        scores = (query_embedding @ sentence_embeddings.transpose(0, 1)).cpu().tolist()
        # print(scores)
        sentence_score_pairs = sorted(zip(input_sentences[:-1], scores[0]), reverse=True, key=lambda x: x[1])
        continued_sentence_score_pairs = sorted(
            zip(input_sentences[1:], scores[0]), reverse=True, key=lambda x: x[1])
        # print(sentence_score_pairs)
        return [sentence_pair[0] + " " + continue_pair[0] for sentence_pair, continue_pair in zip(sentence_score_pairs[:k],
                                                                                                  continued_sentence_score_pairs[:k]
                                                                                                  )
        ]

In [None]:
retrieval_prompt = """
Your task is to complete a given piece of text.
You can use a Retrieval API to look up information from previous sentences.
You can do so by writing "[Retrieval(term)]" where "term" is the search term you want to look up.
Here are some examples of API calls:
Input: As we mentioned before, the colors on the flag of Ghana have the following meanings: red is for the blood of martyrs, green for forests, and gold for mineral wealth.
Output: As we mentioned before, the colors on the flag of Ghana have the following meanings: red is for [Retrieval("Ghana flag color")] the blood of martyrs, green for forests, and gold for mineral wealth.
Input: But what are the risks during production of nanomaterials? Some nanomaterials may give rise to various kinds of lung damage.
Output: But what are the risks during production of nanomaterials? [Retrieval("nanomaterial production risks")] Some nanomaterials may give rise to various kinds of lung damage.
Input: Metformin is the first-line drug for patients with type 2 diabetes and obesity.
Output: Metformin is the first-line drug for [Retrieval("illness, diabetes, obesity")] patients with type 2 diabetes and obesity.
Input: <REPLACEGPT>
Output:
"""

In [None]:
def filter_continuations(input_logits, labels):
  # First, figure out locations...
  probs = torch.softmax(input_logits, dim=-1)
  # Make sure we don't keep any tokens that are supposed to be [
  remove_tokens = 1.0 - torch.sum(torch.stack([labels == start_token for start_token in start_tokens]),dim=0,)
  # Get maximum probability... Should be sufficient. Maybe switch to sum if there's issues later
  max_start_tokens = torch.amax(torch.stack([probs[:, :, start_token] for start_token in start_tokens]),dim=0,)
  max_start_tokens = max_start_tokens * remove_tokens
  return torch.topk(max_start_tokens[:, : -(M + 1)], k=k_values, dim=1) #Values and Indices

In [None]:
def create_candidates(indices,values,input_tokens,labels,input_start,model,tokenizer,generator,criterion):
  # Setup lists...
  outputs = list()
  num_to_keeps = list()
  texts_to_test = list()
  max_index = 0
  for i, batch in enumerate(indices):
      for j, index in enumerate(batch):
          if values[i][j] < minimum_percentage:
              continue
          # Get base output
          base_outputs = model(input_tokens[:, input_start:].cuda()).logits[:, index : index + M]
          # Find starting location...
          num_keep = int(input_tokens[:, input_start:].shape[1] - index)
          # Calculate loss without API
          base_loss = criterion(base_outputs.view(-1, base_outputs.size(-1)),
                                labels[:, index : index + M].cuda().view(-1),)
          # For padding later
          max_index = max(max_index, index)
          # API Text
          texts_to_te   st.append(tokenizer.decode(input_tokens[:, : input_start + index][i])+ f" [{api_text}")
          # grab 5 generations
          outputs.append(generator(texts_to_test[-1], max_new_tokens=28, num_return_sequences=5))
          # Add additional items to generation outputs...
          for k in range(5):
              outputs[-1][k]["index"] = int(index)
              outputs[-1][k]["base_loss"] = float(base_loss.item())
              outputs[-1][k]["base_outputs"] = base_outputs
          # So we know where to look
          num_to_keeps.append(num_keep)
  return outputs, num_to_keeps, texts_to_test, max_index

In [None]:
def add_api_calls(candidate,outputs,texts_to_test,tokenizer,input_tokens,input_start,nums_to_keep,base_loss, *args, **kwargs):
  retrieval_strings = args[0]
  generated_texts = list()
  max_token_len = N
  max_token_len_base = N
  for j in range(len(outputs)):
      outputs[j]["Retrieval"] = outputs[j]["generated_text"].replace(texts_to_test[candidate], "")
      outputs[j]["Generated"] = outputs[j]["generated_text"].split("Output:")[-1]
      if "]" in outputs[j]["Retrieval"]:
          outputs[j]["Retrieval"] = (outputs[j]["Retrieval"].replace("Retrieval(", "").split("]")[0])

          if ")" in outputs[j]["Retrieval"]:
              outputs[j]["Retrieval"] = outputs[j]["Retrieval"].split(")")[0]

          outputs[j]["Retrieval_text"] = ("[Retrieval(" + outputs[j]["Retrieval"] + ")")

          base_inputs = tokenizer(outputs[j]["Retrieval_text"] + "]" + "\n",return_tensors="pt",)["input_ids"].cuda()

          outputs[j]["Retrieval"] = Retriever().retrieval(retrieval_strings, outputs[j]["Retrieval"], 3)
          outputs[j]["Retrieval_output"] = [outputs[j]["Retrieval_text"][1:], ", ".join(outputs[j]["Retrieval"])]
          outputs[j]["Retrieval_text"] = (outputs[j]["Retrieval_text"]+ "->"+ ", ".join(outputs[j]["Retrieval"])+ "]")

          test_inputs = tokenizer(outputs[j]["Retrieval_text"] + "\n",return_tensors="pt",)["input_ids"].cuda()

          test_inputs = torch.concat(
              [
                  test_inputs.cuda(),
                  input_tokens[:, input_start:].cuda(),
              ],
              dim=1,
          )

          if test_inputs.shape[1] > MAX_LEN:
              continue

          base_inputs = torch.concat(
              [
                  base_inputs.cuda(),
                  input_tokens[:, input_start:].cuda(),
              ],
              dim=1,
          )

          max_token_len = max(max_token_len, test_inputs.shape[1])
          max_token_len_base = max(max_token_len_base, test_inputs.shape[1])

          generated_texts.append(
              [
                  test_inputs,
                  base_inputs,
                  nums_to_keep[candidate],
                  base_loss,
                  outputs[j],
              ]
          )
  return generated_texts, max_token_len, max_token_len_base

In [None]:
def generate_continuations(input_tokens, input_logits, labels, model,tokenizer, *args, **kwargs):
        # Setup token stuff...
        input_start = input_tokens.shape[1] - input_logits.shape[1]
        start_str = tokenizer.decode(input_tokens[:, :input_start][0])
        # Find top tokens...
        values, indices = filter_continuations(input_logits, labels)
        # setup generation calls...
        generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)  # type: TextGenerationPipeline
        criterion = nn.CrossEntropyLoss()
        with torch.no_grad():
            outputs, num_to_keeps, texts_to_test, max_index = create_candidates(indices,values,input_tokens,labels,
                                                                                input_start,model,tokenizer,generator,
                                                                                criterion,)
            for i in range(len(outputs)):
                generated_texts, max_token_len, max_token_len_base = add_api_calls(i,outputs[i],texts_to_test,tokenizer,
                                                                                   input_tokens,input_start,num_to_keeps,
                                                                                   outputs[i][0]["base_loss"],
                                                                                   *args, **kwargs,
                                                                                   )
                if len(generated_texts) == 0:
                    outputs[i] = None
                    continue
                # shape the batches...
                for j in range(len(generated_texts)):
                    generated_texts[j].append(max_token_len - generated_texts[j][0].shape[1])
                    if generated_texts[j][-1] != 0:
                        generated_texts[j][0] = torch.cat((generated_texts[j][0],
                                                           torch.zeros((1, generated_texts[j][-1]),
                                                                       dtype=generated_texts[j][0].dtype,
                                                                       device=generated_texts[j][0].device,
                                                                       ),
                                                           ),
                                                          dim=1,
                                                          )
                    generated_texts[j].append(max_token_len_base - generated_texts[j][1].shape[1])
                    if generated_texts[j][-1] != 0:
                        generated_texts[j][1] = torch.cat((generated_texts[j][1],
                                                           torch.zeros((1, generated_texts[j][-1]),
                                                                       dtype=generated_texts[j][1].dtype,
                                                                       device=generated_texts[j][1].device,
                                                                       ),
                                                           ),
                                                          dim=1,
                                                          )

                test_outputs = model(torch.cat(list(generated_text[0] for generated_text in generated_texts),dim=0,)).logits
                base_outputs = model(torch.cat(list(generated_text[1] for generated_text in generated_texts),dim=0,)).logits
                best_loss = -99.0
                best_output = outputs[i][0]
                for j in range(len(generated_texts)):
                    num_to_keep = generated_texts[j][2]
                    if generated_texts[j][-2] != 0:
                        test = test_outputs[j][: -generated_texts[j][-2]]
                        test_loss = criterion(test[-num_to_keep : -(num_to_keep - M)].view(-1,
                                                                                           generated_texts[j][-3]["base_outputs"].size(-1)),
                                              labels[:, -num_to_keep : -(num_to_keep - M)].cuda().view(-1),
                                              )
                    else:
                        test_loss = criterion(test_outputs[j][-num_to_keep : -(num_to_keep - M)].view(-1,
                                                                                                      generated_texts[j][-3]["base_outputs"].size(-1)),
                                              labels[:, -num_to_keep : -(num_to_keep - M)].cuda().view(-1),)
                    if generated_texts[j][-1] != 0:
                        base = base_outputs[j][: -generated_texts[j][-1]]
                        base_loss = criterion(base[-num_to_keep : -(num_to_keep - M)].view(-1,
                                                                                           generated_texts[j][-3]["base_outputs"].size(-1)),
                                              labels[:, -num_to_keep : -(num_to_keep - M)].cuda().view(-1),
                                              )
                    else:
                        base_loss = criterion(base_outputs[j][-num_to_keep : -(num_to_keep - M)].view(-1,
                                                                                                      generated_texts[j][-3]["base_outputs"].size(-1)),
                                              labels[:, -num_to_keep : -(num_to_keep - M)].cuda().view(-1),
                                              )
                    generated_texts[j][-3]["generated_text"] = generated_texts[j][-3]["generated_text"].replace(start_str, "")
                    if (min(base_loss.item(), generated_texts[j][-3]["base_loss"]) - test_loss > best_loss):
                        best_output = generated_texts[j][-3]
                        best_loss = generated_texts[j][-3]["base_loss"] - test_loss
                if len(generated_texts) > 0:
                    outputs[i] = best_output
                    outputs[i]["Score"] = float(best_loss.item())
                    outputs[i]["base_api_loss"] = float(base_loss.item())
                    del outputs[i]["base_outputs"]
                else:
                    outputs[i] = None
        # print(json.dumps(outputs, indent=2))
        return outputs #individual candidate outputs

In [None]:
def parse_article(data, model, tokenizer):
  outputs = list()
  tokens = tokenizer(data["text"], return_tensors="pt")["input_ids"]
  start_step = 2048//N
  ret_skip = 1024//N  # naively assuming the model should be able to look back if it's less than this.
  total_steps = tokens.shape[1]//N

  for i in range(start_step, total_steps):
      input_tokens = tokens[:, (-N * (i + 1) - 1) : (-N * (i) - 1)]
      labels = tokens[:,int(tokens.shape[1] + (-N * (i + 1))) : int(tokens.shape[1] + (-N * i)),]
      ret_tokens = tokens[:, : (-(N) * ((i - ret_skip) + 1) - 1)]
      # print(tokens.shape)
      string = tokenizer.decode(input_tokens[0])
      ret_strings = tokenize.sent_tokenize(tokenizer.decode(ret_tokens[0]))
      # print(ret_strings)
      model_input = tokenizer(retrieval_prompt.replace("<REPLACEGPT>", string) + string, return_tensors="pt",)["input_ids"]
      # print(string)
      # print(model_input.shape)
      with torch.no_grad():
          output = model(model_input.cuda()).logits.cpu()[:, -N:]
      new_outputs = generate_continuations(model_input,
                                           output,
                                           labels,
                                           model,
                                           tokenizer,
                                           ret_strings,
                                           )
      for output in new_outputs:
          if output is None:
              continue
          output["index"] += int(tokens.shape[1] + (-N * (i + 1)))
          # filter by score
          if output["Score"] > 1.0:
              outputs.append([output["Score"], output["index"]] + output["Retrieval_output"])
  return outputs

In [None]:
@dataclass
class AvailableAPIs:
    """Keeps track of available APIs"""
    retrieval: bool = True
    calendar: bool = True
    calculator: bool = True
    def check_any_available(self):
        return any([self.retrieval, self.calendar, self.calculator])

def check_apis_available(data: dict, tokenizer: PreTrainedTokenizerBase) -> AvailableAPIs:
    """
    Returns available APIs with boolean flags
    :param data: from load_dataset, assumes ['text'] is available
    :param tokenizer: Tokenizer to tokenize data
    :return: AvailableAPIs
    """
    tokenized_data = tokenizer(data["text"])["input_ids"]
    available = AvailableAPIs()
    # In case we need a different version, found this here:
    # https://stackoverflow.com/questions/28198370/regex-for-validating-correct-input-for-calculator
    calc_pattern = re.compile("^(\d+[\+\-\*\/]{1})+\d+$")
    if len(tokenized_data) < 4096:
        available.retrieval = False
    try:
        date = dparser.parse(data["url"], fuzzy=True)
    except (ValueError, OverflowError):
        available.calendar = False
    available.calculator = False
    tried_rand = False
    for i in range(len(tokenized_data) // 100):
        text = tokenizer.decode(tokenized_data[i * 100 : (i + 1) * 100])
        operators = bool(re.search(calc_pattern, text))
        equals = any(["=" in text, "equal to" in text, "total of" in text, "average of" in text])
        if not (operators and equals) and not tried_rand:
            tried_rand = True
            text = text.replace("\n", " ")
            text = text.split(" ")
            text = [item for item in text if item.replace(".", "", 1).isnumeric()]
            if len(text) >= 3:
                if random.randint(0, 99) == 0:
                    available.calculator = True
        else:
            available.calculator = True
    return available

In [None]:
#Configs
#N = 64  # SEQ Len
N = 128  # SEQ Len
M = 16  # Min Loss Span To Consider
MAX_LEN = 1024  # Maximum retrieval length

# Default topk generation, might be better to pass it in
k_values = 5
minimum_percentage = 0.1
api_text = "Retrieval("

In [None]:
#Load tokenizer
gpt_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
#Load model
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B").cuda()
#Load dataset
dataset = load_dataset("c4", "en", split="train", streaming=True)

tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.31G [00:00<?, ?B/s]

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/3.29k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.40M [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/7.77k [00:00<?, ?B/s]

In [None]:
#tokenize
prompt_tokens = gpt_tokenizer(retrieval_prompt, return_tensors="pt")["input_ids"]
start_tokens = [gpt_tokenizer("[")["input_ids"][0],
                gpt_tokenizer(" [")["input_ids"][0],
                ]
end_tokens = [gpt_tokenizer("]")["input_ids"][0],
              gpt_tokenizer(" ]")["input_ids"][0],
              ]

In [None]:
#input
iter_data = iter(dataset)

In [None]:
num_devices=1
device_id=0

if os.path.isfile(f"retrieval_data_{device_id}.json"):
    with open(f"retrieval_data_{device_id}.json") as f:
        output_dataset = json.load(f)
        start_count = output_dataset[-1]['file_index']
        for item in output_dataset:
            num_examples -= len(item['retrieval_outputs'])

In [None]:
test = False
counter = 0
file_counter = 0
found_examples = 0
output_dataset = list()
start_time = time.process_time()
num_examples = int(25000.0/float(num_devices))
start_count = -1

#C => C* (only retrival)
while found_examples < num_examples:
    data = next(iter_data)
    print(next(iter_data))
    if file_counter < start_count:
        file_counter += 1
        continue
    if file_counter % num_devices != device_id:
        file_counter += 1
        continue
    available = check_apis_available(data, gpt_tokenizer)
    test = available.retrieval
    if test:
        data_outputs = parse_article(data, model, gpt_tokenizer)
        output_dataset.append(
            {
                "file_index": file_counter,
                "text": data["text"],
                "retrieval_outputs": data_outputs
            }
        )
        prev_found = found_examples
        found_examples += len(output_dataset[-1]["retrieval_outputs"])
        eta_s = (num_examples - found_examples) * (time.process_time()-start_time) / max(1, found_examples)
        eta_m = eta_s // 60
        eta_h = eta_m // 60
        eta_m = eta_m - (eta_h*60)
        eta_s = eta_s - ((eta_m*60) + (eta_h*60*60))
        print(f"Found: {found_examples}/{num_examples}, ETA: {eta_h}H:{eta_m}M:{eta_s}s")
        print(data_outputs)
        if found_examples//100 > prev_found//100:
            with open(f"retrieval_data_{device_id}.json", 'w') as f:
                json.dump(output_dataset, f, indent=2)
        counter += 1
    file_counter += 1

{'text': 'Discussion in \'Mac OS X Lion (10.7)\' started by axboi87, Jan 20, 2012.\nI\'ve got a 500gb internal drive and a 240gb SSD.\nWhen trying to restore using disk utility i\'m given the error "Not enough space on disk ____ to restore"\nBut I shouldn\'t have to do that!!!\nAny ideas or workarounds before resorting to the above?\nUse Carbon Copy Cloner to copy one drive to the other. I\'ve done this several times going from larger HDD to smaller SSD and I wound up with a bootable SSD drive. One step you have to remember not to skip is to use Disk Utility to partition the SSD as GUID partition scheme HFS+ before doing the clone. If it came Apple Partition Scheme, even if you let CCC do the clone, the resulting drive won\'t be bootable. CCC usually works in "file mode" and it can easily copy a larger drive (that\'s mostly empty) onto a smaller drive. If you tell CCC to clone a drive you did NOT boot from, it can work in block copy mode where the destination drive must be the same siz

Token indices sequence length is longer than the specified maximum sequence length for this model (2944 > 2048). Running this sequence through the model will result in indexing errors


{'text': 'The OTR 1840 is a CWDM Fiber Optic to SDI transmitter and receiver combined in a compact self contained package. It is a convenient and cost effective solution to combat the restrictions involved with the distribution of uncompressed broadcast quality video signals over long distances.\nEach OTR 1840 CWDM tranceiver has an independant transmitter and receiver channel, which provides an effective solution for any SDI signal up to 1080p60 (3Gbit/s) while preserving full uncompressed quality. Select from 18 transmitter wavelengths for full CWDM compatibility (ITU-T G.694.2).\nThe OTR 1840 will auto-detect and re-clock any 270Mbit / 1.5Gbit and 3Gbit SDI source prior to conversion. The module is fully compatible with 3Gbit Level A and Level B formats.', 'timestamp': '2019-04-22T04:51:38Z', 'url': 'https://www.lynx-technik.com/products/yellobrik/fiber-conversion/otr-1840-3gbit-fiber-optic-sdi-transceiver-cwdm-40km/'}
{'text': 'Geweldige plek in een geweldige locatie!\nAre there an

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/348 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Found: 4/25000, ETA: 251.0H:13.0M:37.297781817032956s
[[2.7904794216156006, 2691, 'Retrieval(["Administration", "planning", "design", "implementation")', 'CARDET is one of the leading research and training centres in the eastern Mediterranean region with global expertise in project design and implementation, project management, training, and e-learning. CARDET has completed numerous projects relating to the development of adult and vocational training initiatives in the areas of financial literacy, innovation, and entrepreneurship., Furthermore, Polibienestar has been advising both the Administration and private companies on the planning, design and implementation of social welfare and sustainable resources and policies. Areas of research: Health and Social policies, Vulnerable groups, Governance and public administrations, Corporate economics, Smart Cities, Tourism., EKEPY’s areas of intervention are, in a few words, Crisis Management in the Health Sector, Accidents and Devastations, 

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Found: 6/25000, ETA: 299.0H:10.0M:34.9645730627235s
[[3.883902072906494, 1981, 'Retrieval(("baboons, klipspringers, waterfalls")', 'There are baboons, grysbok, grey rhebuck, klipsringer, leopards and caracal - with the last two seldom being sighted. The road is narrow in certain sections and some reversing might be required to get past an oncoming vehicle., If you get to drive this pass around sunset, this rock face glows a deep and fiery red in the setting sun and has earned itself the nickname of "Wall of Fire"\nHere you are likely to see baboons and klipspringers, the latter which seem to float up the mountainside with no apparent effort. If you have the time, you will see that as part Bain\'s dry-walled roads, he constructed underground tunnels to disperse flood waters., This is also the kloof that was featured in the TV commercial with well known South African singer, song-writer, playwright and actor - David Kramer, chasing his beloved Volksie Bus up one of the hairpins in his tr

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Found: 11/25000, ETA: 266.0H:57.0M:38.6440514883725s
{'text': 'When visiting MJI, you will be greeted by a friendly and professional staff. Awaiting you is a comfortable and quiet customer lounge where you can enjoy WiFi, television, and a hot cup of premium coffee. We have a large and clean work area filled with the most up to date equipment, including BMW and MINI specific tire install and wheel alignment machines. We understand that your time is valuable, so we strive to provide same day repairs. We do this by stocking a huge inventory of BMW and MINI parts and accessories to ensure that our expert technicians and service writers can get straight to work on your vehicle.\nMJI also offers shuttle service, vehicle pick up and drop off, and loaner vehicles to customers that are in need of alternative transportation. This is provided at no additional charge. When you choose MJI, you will be pleased by how easy we make it to affordably maintain your BMW or MINI Cooper. Come and join us a

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [None]:
#Save C*
with open(f"retrieval_data_{device_id}.json", 'w') as f:
    json.dump(output_dataset, f, indent=2)

In [None]:
#Original model (Colab pro)
gpt_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B",
                                                 revision="sharded",
                                                 #torch_dtype=torch.float16,
                                                 low_cpu_mem_usage=True,
                                                 ).cuda()