<a href="https://colab.research.google.com/github/wangjing0/others/blob/main/inference_with_fine_tuned_mistral_for_QA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
# use runtime type: T4

In [11]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers "trl<0.9.0" peft accelerate bitsandbytes

In [12]:
from unsloth import FastLanguageModel
from typing import Dict, List, Tuple, Union, Any
import pandas
from tqdm import trange, tqdm
import torch

# pull model from huggingface
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "jingwang/foo",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)

adapter_config.json:   0%|          | 0.00/737 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

==((====))==  Unsloth: Fast Mistral patching release 2024.6
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.26.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


model.safetensors:   0%|          | 0.00/4.14G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/137k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

Unsloth: Will load unsloth/mistral-7b-v0.3-bnb-4bit as a legacy tokenizer.


adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Unsloth 2024.6 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [13]:
# prompt template used for fine-tuning

class FormatPrompt():
    '''format prompt class'''
    def __init__(self, eos_token:str='</s>') -> None:
        self.inputs = ['question'] # required input fields
        self.outputs = ['answer'] #  for training, and model inference output fields
        self.eos_token = eos_token

    def __call__(self, instance: Dict[str, Any]) -> str:
        '''
        function call operator
        Args:
            instance: dictionary with keys: 'question', 'answer'
        Returns:
            prompt: formatted prompt
        '''
        return self.formatting_prompt_func(instance)

    def formatting_prompt_func(self, instance: dict, domain: str='anything') -> str:
        '''format prompt for domain specific QA
        note this is for fine-tuning pre-trained model,
        if starting with instuct tuned model, use `tokenizer.apply_chat_template(messages)` instead
        '''

        assert all([ item in instance.keys()  for item in self.inputs ]), print(f"instance must have {self.inputs}!")

        if 'domain' in instance:
            domain = str(instance['domain'])
        elif domain is None:
            domain = 'general knowledge'
        else:
            domain = str(domain)

        prompt = f"""<s> [INST] You are a specialist in {domain}. Your task is to generate an explanation for the given terminology.\
        Your answer should be direct, accurate, and thorough. Your tone should be scientific, coherent and conversational.
        Question: What is "{str(instance["question"])}" ? [/INST]
        Answer: """

        if 'answer' in instance:
            prompt += str(instance['answer']) + self.eos_token

        return prompt

In [14]:
formatting_func = FormatPrompt()
formatting_func.inputs, formatting_func.outputs

(['question'], ['answer'])

In [15]:
data_eval = [{'question': 'Coacervation',
  'answer': 'The phase separation process that leads to the formation of dense, ordered clusters of molecules, often proteins or polymers, from a dilute solution.'},
 {'question': 'Active Transport',
  'answer': 'The movement of molecules across a cell membrane from lower to higher concentration, requiring energy.'},
 {'question': 'Pulse-Chase Experiment',
  'answer': "A method used to examine a cellular process over time by first 'labeling' a molecule and then following its fate."},
 {'question': 'Ultracentrifugation',
  'answer': 'A separation technique that uses centrifugal force to separate particles from a solution according to their size, shape, density, viscosity of the medium, and rotor speed.'},
 ]

In [16]:
df_eval = pandas.DataFrame(data_eval)
df_eval['domain'] ='biophysics'
df_eval.rename(columns={'answer':'gold_answer'}, inplace=True)

_prompts = map(formatting_func, df_eval.to_dict('records') )
df_eval['prompt'] = [prompt for prompt in _prompts]
df_eval


Unnamed: 0,question,gold_answer,domain,prompt
0,Coacervation,The phase separation process that leads to the...,biophysics,<s> [INST] You are a specialist in biophysics....
1,Active Transport,The movement of molecules across a cell membra...,biophysics,<s> [INST] You are a specialist in biophysics....
2,Pulse-Chase Experiment,A method used to examine a cellular process ov...,biophysics,<s> [INST] You are a specialist in biophysics....
3,Ultracentrifugation,A separation technique that uses centrifugal f...,biophysics,<s> [INST] You are a specialist in biophysics....


In [20]:
FastLanguageModel.for_inference(model)

for idx, row in tqdm(df_eval.iterrows()):
  inputs = tokenizer([row['prompt']],  return_tensors="pt", padding=False).to(model.device)
  input_length = inputs.input_ids.shape[-1]

  with torch.no_grad():
    output = model.generate(**inputs,
                            do_sample=False,
                            temperature=0.1,
                            max_new_tokens=32,
                            pad_token_id=tokenizer.eos_token_id,
                            use_cache=False,
                            )
    response = tokenizer.decode(
                    output[0][input_length::], # response only, remove prompts
                    skip_special_tokens=True,
                    )
    print(response)
    df_eval.at[idx,'_generation'] = response


1it [00:06,  6.73s/it]

The process by which certain proteins and polysaccharides aggregate to form liquid-filled spheres.


2it [00:12,  6.32s/it]

The movement of molecules across a cell membrane, requiring energy to overcome a concentration gradient.


3it [00:18,  5.96s/it]

A technique used to study the movement of a labeled molecule through a biological system.


4it [00:23,  5.93s/it]

A method used to separate mixtures of molecules by centrifugal force.





In [21]:
df_eval

Unnamed: 0,question,gold_answer,domain,prompt,_generation
0,Coacervation,The phase separation process that leads to the...,biophysics,<s> [INST] You are a specialist in biophysics....,The process by which certain proteins and poly...
1,Active Transport,The movement of molecules across a cell membra...,biophysics,<s> [INST] You are a specialist in biophysics....,The movement of molecules across a cell membra...
2,Pulse-Chase Experiment,A method used to examine a cellular process ov...,biophysics,<s> [INST] You are a specialist in biophysics....,A technique used to study the movement of a la...
3,Ultracentrifugation,A separation technique that uses centrifugal f...,biophysics,<s> [INST] You are a specialist in biophysics....,A method used to separate mixtures of molecule...
