In [1]:
!pip install -q -U accelerate --no-index --find-links ../input/llm-detect-pip/
!pip install -q -U bitsandbytes --no-index --find-links ../input/llm-detect-pip/

In [2]:
import sys
sys.path.insert(0,'/kaggle/input/llm-packages')
import pandas as pd
import numpy as np
from functools import partial
import pickle
from peft import PrefixTuningConfig,PromptEncoderConfig,PromptTuningConfig,LoraConfig,get_peft_model
from transformers import AutoTokenizer, AutoModelForSequenceClassification,AutoModelForCausalLM,BitsAndBytesConfig
from torch.utils.data import DataLoader
from llm_util import *
INPUT_DIR = "../input/"
save_model_path = '/kaggle/input/llm-model'
batch_size = 8
device = 'cuda'



In [3]:
config_map = {'prefix':PrefixTuningConfig,
                'prompt_encoder':PromptEncoderConfig,
                'prompt_txt': PromptTuningConfig,
                'LoRA': LoraConfig,}


def load_config(config_dict):
    TARGET_MODEL = config_dict["TARGET_MODEL"]
    pred_type = config_dict["pred_type"]
    config_type = config_dict["config_type"]
    epochs = config_dict["epochs"]
    alpha = config_dict["alpha"]
    aug_kwargs = config_dict["aug_kwargs"]
    config_class = config_map[config_dict["config_class"]]
    config_kwargs = config_dict["config_kwargs"]

    return TARGET_MODEL, pred_type, config_type, epochs, alpha, aug_kwargs, config_class, config_kwargs

In [4]:
# load config for inference
import pickle
with open(save_model_path+'/config.pkl', 'rb') as pickle_file:
    config = pickle.load(pickle_file)
TARGET_MODEL, pred_type, config_type, epochs, alpha, aug_kwargs, config_class, config_kwargs = load_config(config)

In [5]:
model_folder_map = {'mistralai/Mistral-7B-v0.1':"/kaggle/input/mistral-7b-v0-1/Mistral-7B-v0.1"}

Data

In [6]:
test_df = pd.read_csv(INPUT_DIR + "llm-detect-ai-generated-text/test_essays.csv", sep=',')

tokenizer = AutoTokenizer.from_pretrained(model_folder_map[TARGET_MODEL])
tokenizer.pad_token = tokenizer.eos_token

if pred_type == 'LM':
    prompt = tokenizer.batch_encode_plus(['Is this essay AI-generated, yes or no?'],add_special_tokens=False,return_tensors='pt')
    prompt,prompt_mask = prompt['input_ids'],prompt['attention_mask']
else:
    prompt,prompt_mask = None, None
val_data = InfData(test_df)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False,\
                        collate_fn=partial(collate_inf,tokenizer=tokenizer,prompt=prompt,prompt_mask=prompt_mask))

Model

In [7]:
nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)
base_class = AutoModelForCausalLM if pred_type == 'LM' else AutoModelForSequenceClassification
base_model = base_class.from_pretrained(model_folder_map[TARGET_MODEL],quantization_config=nf4_config, \
                                                          device_map={"":0})
peft_config = config_class(**config_kwargs)    
model = get_peft_model(base_model, peft_config)
model.config.pad_token_id = tokenizer.pad_token_id

model.load_state_dict(torch.load(save_model_path+'/weights.pth'),strict=False)
model = model.half()
if pred_type == 'LM':
    model_lm = LM(model,tokenizer,config_kwargs.get('num_virtual_tokens', 0),alpha,config_type)
else:
    model_lm = Classification(model,alpha)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Predict

In [8]:
yhat = []
for input_ids,attention_mask in val_loader:
    input_ids,attention_mask = input_ids.to('cuda'),attention_mask.to('cuda')
    out = model_lm.predict(input_ids,attention_mask).detach().cpu().numpy()
    yhat.append(out)
yhat = np.concatenate(yhat)

In [9]:
sub = pd.DataFrame()
sub['id'] = test_df['id']
sub['generated'] = yhat
sub.to_csv('submission.csv', index=False)