In [16]:
# %%
import os
from os import path
import pandas as pd
import numpy as np
import glob

# %%
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments

# %%
from datasets import load_dataset
from datasets import Dataset, Features, Value


from accelerate import Accelerator
from peft import LoraConfig, get_peft_model

# Definitions

In [17]:
HOME=os.path.expanduser('~')
LIFE2SCENARIO_ROOT_PATH=path.join(HOME,"Documents/life2scenario/")
DATASET_ROOT_PATH=path.join(LIFE2SCENARIO_ROOT_PATH,"life2scenario_minimal/dataset/train/")

print(DATASET_ROOT_PATH)

/mnt/home/yucedago/Documents/life2scenario/life2scenario_minimal/dataset/train/


In [18]:

# %%
PROMPTS_ROOT=path.join(DATASET_ROOT_PATH, "prompts")
REFERENCE_ROOT=path.join(DATASET_ROOT_PATH, "ref_scenarios")
TARGET_ROOT=path.join(DATASET_ROOT_PATH, "target_scenarios")

# %%
PREP_PICKLES_ROOT=path.join(LIFE2SCENARIO_ROOT_PATH, "prep_pickles")

CHECKPOINT="/mnt/home/yucedago/Documents/life2scenario_core/starcoder/train/checkpoints_v2/checkpoint-210"

# Checkpoint Load

In [3]:
import os

device = "cuda" if torch.cuda.is_available() else "cpu"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

In [11]:
def get_gpus_max_memory(max_memory):
    max_memory = {i: max_memory for i in range(torch.cuda.device_count())}
    return max_memory

orig_checkpoint = "bigcode/starcoderbase-1b"
tokenizer = AutoTokenizer.from_pretrained(orig_checkpoint, load_in_8bit=True)
# to save memory consider using fp16 or bf16 by specifying torch_dtype=torch.float16 for example
model = AutoModelForCausalLM.from_pretrained(CHECKPOINT, torch_dtype=torch.float16,
        use_auth_token=True,
        load_in_8bit=True,
        max_memory=get_gpus_max_memory("70GB"))



In [12]:
model

GPTBigCodeForCausalLM(
  (transformer): GPTBigCodeModel(
    (wte): Embedding(49152, 2048)
    (wpe): Embedding(8192, 2048)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPTBigCodeBlock(
        (ln_1): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (attn): GPTBigCodeAttention(
          (c_attn): lora.Linear8bitLt(
            (base_layer): Linear8bitLt(in_features=2048, out_features=2304, bias=True)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=2048, out_features=16, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=16, out_features=2304, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
          )
          (c_proj): lora.Linear8bitLt(
            (base_layer): Linear8bitLt(i

# Load the dataset

In [19]:
train_final = pd.read_csv(path.join(PREP_PICKLES_ROOT, "train_dataset.csv"))

train_final = train_final[["request", "response"]]
train_final.head()

Unnamed: 0,request,response
0,would you add pedestrian close to hero?\n```\n...,"Here is the result:\n```\n<?xml version=""1.0"" ..."
1,i would like to command you to add pedestrian ...,"Here is the result:\n```\n<?xml version=""1.0"" ..."
2,i would like to request you to add pedestrian ...,"Here is the result:\n```\n<?xml version=""1.0"" ..."
3,i would like to tell you to add pedestrian at ...,"Here is the result:\n```\n<?xml version=""1.0"" ..."
4,add pedestrian close to hero?\n```\n<?xml vers...,"Here is the result:\n```\n<?xml version=""1.0"" ..."


# Generate

In [22]:
def save_model_output(output: str, save_location: str):
    with open(save_location, "w") as f:
        f.write(output)
        print(f"Saved to {save_location}")

In [20]:
data_prompt = train_final["request"][0]

In [23]:
inputs = tokenizer.encode(f"{data_prompt}", return_tensors="pt").to(device)
outputs = model.generate(inputs)
# clean_up_tokenization_spaces=False prevents a tokenizer edge case which can result in spaces being removed around punctuation
response = tokenizer.decode(outputs[0], clean_up_tokenization_spaces=False)

save_model_output(response, "output")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Saved to output


