## Let's implement CCS from scratch.
This will deliberately be a simple (but less efficient) implementation to make everything as clear as possible.

In [1]:
from tqdm.auto import tqdm
import copy
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch import optim

import pickle
import hashlib
from pathlib import Path
import os
# os.environ["HF_DATASETS_OFFLINE"] = "0"
from datasets import load_dataset
import datasets
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM, AutoModelForCausalLM
from transformers import LlamaTokenizer, LlamaForCausalLM
from sklearn.linear_model import LogisticRegression

import lightning.pytorch as pl
from dataclasses import dataclass
from torch.utils.data import random_split, DataLoader, TensorDataset
from transformers.models.auto.modeling_auto import AutoModel
# from scipy.stats import zscore
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from sklearn.preprocessing import RobustScaler
import gc

from loguru import logger
logger.add(os.sys.stderr, format="{time} {level} {message}", level="INFO")

import os

# Model

In [2]:
model_options = dict(
    device_map="auto", 
    # load_in_8bit=True,
    load_in_4bit=True,
    torch_dtype=torch.float16,
)

# 7B
# model_repo = "Neko-Institute-of-Science/LLaMA-7B-HF"
# lora_repo = "chansung/gpt4-alpaca-lora-7b"

# 13B
model_repo = "Neko-Institute-of-Science/LLaMA-13B-HF"
lora_repo = "chansung/gpt4-alpaca-lora-13b"

model_repo = "TheBloke/Wizard-Vicuna-13B-Uncensored-HF"
lora_repo = None

# 30B
model_repo = "TheBloke/OpenAssistant-SFT-7-Llama-30B-HF"
model_repo = "ausboss/llama-30b-supercot"
# model_repo= "timdettmers/guanaco-33b-merged"
lora_repo = None

model_repo = "Neko-Institute-of-Science/LLaMA-30B-HF"
lora_repo = "chansung/gpt4-alpaca-lora-30b"
    
tokenizer = LlamaTokenizer.from_pretrained(model_repo)
model = AutoModelForCausalLM.from_pretrained(model_repo, **model_options)

if lora_repo is not None:
    # https://github.com/tloen/alpaca-lora/blob/main/generate.py#L40
    from peft import PeftModel
    model = PeftModel.from_pretrained(
        model, 
        lora_repo, 
        torch_dtype=torch.float16,
        device_map='auto'#{'': 0}
    )

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/50.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/7 [00:00<?, ?it/s]

Downloading (…)of-00007.safetensors:   0%|          | 0.00/9.82G [00:00<?, ?B/s]

Downloading (…)of-00007.safetensors:   0%|          | 0.00/9.96G [00:00<?, ?B/s]

Downloading (…)of-00007.safetensors:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

Downloading (…)of-00007.safetensors:   0%|          | 0.00/9.87G [00:00<?, ?B/s]

Downloading (…)of-00007.safetensors:   0%|          | 0.00/9.87G [00:00<?, ?B/s]

Downloading (…)of-00007.safetensors:   0%|          | 0.00/9.96G [00:00<?, ?B/s]

Downloading (…)of-00007.safetensors:   0%|          | 0.00/5.69G [00:00<?, ?B/s]


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/ubuntu/mambaforge/envs/dlk2/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /home/ubuntu/mambaforge/envs/dlk2/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/ubuntu/mambaforge/envs/dlk2/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Downloading (…)/adapter_config.json:   0%|          | 0.00/429 [00:00<?, ?B/s]

Downloading adapter_model.bin:   0%|          | 0.00/205M [00:00<?, ?B/s]