# Comparison between different prompt engineering tactics

## Prepare model and dataset

### prepare model

In [2]:
import os
from ctransformers import AutoModelForCausalLM, AutoTokenizer
model_name = "TheBloke/zephyr-7B-alpha-GGUF"
model_file = "zephyr-7b-alpha.Q2_K.gguf"

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
print(f"Using model: {model_name}")
zephyr_model = AutoModelForCausalLM.from_pretrained(model_name, model_file=model_file, model_type="llama", gpu_layers=0, hf=True)
zephyr_tokenizer = AutoTokenizer.from_pretrained(zephyr_model)

Using model: TheBloke/zephyr-7B-alpha-GGUF


Fetching 1 files: 100%|██████████| 1/1 [00:00<?, ?it/s]
Fetching 1 files: 100%|██████████| 1/1 [00:00<?, ?it/s]


In [4]:
from transformers import pipeline, TextStreamer

streamer = TextStreamer(zephyr_tokenizer)
generate = pipeline("text-generation", model=zephyr_model, tokenizer=zephyr_tokenizer, streamer=streamer)

#### prepare data

In [5]:
from datasets import load_dataset
dataset = load_dataset('imdb')

In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})

In [7]:
import pandas as pd
data_pd= pd.DataFrame(dataset['train'])

In [8]:
data_neg = data_pd.iloc[0:10]
data_pos = data_pd.iloc[12500:12510]
dataset_new= pd.concat([data_neg,data_pos], axis=0)
# change the index of the dataframe, otherwise there would be errors
dataset_new.index=range(0,20)

## zero_slot engineering

In [None]:
prompt_template_zero_slot = '\n Given the above review we have two two classes.\n class_0: In such film reviews, customers complain about something they do not like and think this film is not satisfactory. \n class_1: In such film reviews, customers show positive emotions, they might mention something that they like.'

In [None]:
print(prompt_template_one)


 Given the above review we have two two classes.
 class_0: In such film reviews, customers complain about something they do not like and think this film is not satisfactory. 
 class_1: In such film reviews, customers show positive emotions, they might mention something that they like.


In [None]:
def make_prompt_for_sentiment_analysis_1(text):
    
    """Construct a prompt that instructs the model to analyze a text in terms of sentiment.

    Arguments:
        text (str): The text to be analyzed in terms of sentiment.

    Returns:
        A prompt to feed into an zephyr model.
    """

    # YOUR CODE HERE
   
    system = '### System: You are an AI assistant that follows instruction extremely well.\n### Human:\n'
    user = ' '.join([text, prompt_template_zero_slot])
    assistant = '\n Please classify this film review into one class out of these two classes,and just output the label without anymore word \n ### Assistant:\n'
    prompt = ' '.join([system,user,assistant])
    
    return prompt

In [13]:
from sklearn.metrics import classification_report

def metrics_for_templates(templates_function,data):
    
    labels_gold=[]
    labels_pre=[]
    
    for i in range(len(data)):
        text = data['text'].iloc[i]
        text_truncated = ' '.join(text.split()[:200])
        prompt = templates_function(text_truncated)
        output = generate(prompt, max_new_tokens=3)
        content = output[0]['generated_text']
        index = content.find("Assistant:")
        label = content[index+12:]
        pre = 0 if label =='class_0' else 1
        labels_pre.append(pre)
        labels_gold.append(data.label[i])
    
    print(classification_report(labels_gold,labels_pre))

    


In [144]:
%%time

streamer.skip_prompt = True 
metrics_for_templates(make_prompt_for_sentiment_analysis_1,dataset_new)

 class_0
 class_0
 class_0
 class_0
 class_0
 class_0
 class_0
 class_0
 class_0
 class_0
 class_0
 class_1
 class_0
 class_1
 class_1
 class_1
 class_1
 class_0
 class_1
 class_1
              precision    recall  f1-score   support

           0       0.77      1.00      0.87        10
           1       1.00      0.70      0.82        10

    accuracy                           0.85        20
   macro avg       0.88      0.85      0.85        20
weighted avg       0.88      0.85      0.85        20

CPU times: total: 35min 46s
Wall time: 10min 37s


## Find few slots

In [193]:
data_test= pd.DataFrame(dataset['test'])
data_test_neg = data_test[0:12500]
list_length=[]

for i in range(len(data_test_neg)):
    text=data_test_neg['text'].iloc[i]
    l= len(text.split()) 
    list_length.append(l)

index = np.argmin(list_length)
data_test_neg['text'].iloc[index]
print(list_length[index])
data_test_neg['text'].iloc[index]    

4


'Primary plot!Primary direction!Poor interpretation.'

In [194]:
np.argsort(list_length)

array([ 5220,  6845,  8658, ...,  5672, 12431,  8182], dtype=int64)

In [195]:
data_test_neg['text'].iloc[5220]

'Primary plot!Primary direction!Poor interpretation.'

In [196]:
data_test_neg['text'].iloc[6845]

'Read the book, forget the movie!'

In [185]:
data_test_pos = data_test[12500:]
list_length=[]

for i in data_test_pos.index:
    text = data_test_pos['text'][i]
    l= len(text.split()) 
    list_length.append(l)

index = np.argmin(list_length)

print(list_length[index])
data_test_pos['text'][index+12500]

10


'Brilliant and moving performances by Tom Courtenay and Peter Finch.'

In [188]:
np.argsort(list_length)

array([ 110, 2258, 9155, ..., 3750, 4012, 8632], dtype=int64)

In [192]:
data_test_pos['text'][2258+12500]

'This is a great movie. Too bad it is not available on home video.'

## Few slots prompt engineering

In [11]:
# few positive slots
reviews_pos_1 = 'Brilliant and moving performances by Tom Courtenay and Peter Finch.'
reviews_pos_2 = 'This is a great movie. Too bad it is not available on home video.'

# few negative slots
reviews_neg_1 = 'Primary plot!Primary direction!Poor interpretation.'
reviews_neg_2 = 'Read the book, forget the movie!'

In [12]:
review_pos_part = "First review:" + " " + reviews_pos_1 + "\n" + "Second review:" + " " + reviews_pos_2
review_neg_part = "Third review:" + " " + reviews_neg_1 + "\n" + "Fourth review:" + " " + reviews_neg_2
prompt_template_few_slots = "\n" + review_pos_part + "\n" + review_neg_part + "\n" + "Above, the first two reviews belong to 'class_1'; the last two reviews belong to 'class_0'."
transition = "Please classify the following film review into one class out of these two classes,and the output format should be the same as 'This review belongs to 'class_X'' \nNew review:"

In [13]:
def make_prompt_for_sentiment_analysis_2(text):
    
    """Construct a prompt that instructs the model to analyze a text in terms of sentiment.

    Arguments:
        text (str): The text to be analyzed in terms of sentiment.

    Returns:
        A prompt to feed into an zephyr model.
    """
    # YOUR CODE HERE
   
    system = '### System: You are an AI assistant that follows instruction extremely well.\n### Human:'
    user = ' '.join([prompt_template_few_slots,transition,text])
    assistant = '\n### Assistant:\n'
    prompt = ' '.join([system,user,assistant])
    
    return prompt

In [17]:
from sklearn.metrics import classification_report

def metrics_for_templates_2(templates_function,data):
    
    labels_gold=[]
    labels_pre=[]
    
    for i in range(len(data)):
        text = data['text'].iloc[i]
        text_truncated = ' '.join(text.split()[:200])
        prompt = templates_function(text_truncated)
        output = generate(prompt, max_new_tokens=15)
        content = output[0]['generated_text']
        index = content.find("Assistant:")
        pre = 0 if "class_0" in content[index:] else 1
        labels_pre.append(pre)
        labels_gold.append(data.label[i])
    
    print(classification_report(labels_gold,labels_pre))

In [18]:
%%time

streamer.skip_prompt = True 
metrics_for_templates_2(make_prompt_for_sentiment_analysis_2,dataset_new)

This review belongs to 'class_1'.
This review belongs to 'class_0'.
This review belongs to 'class_0'.
This film review belongs to 'class_0'.
This review belongs to 'class_0'.
This review belongs to 'class_0'.
This review belongs to 'class_0'.
This review belongs to 'class_0'.
This review belongs to 'class_0'.
This review belongs to 'class_0'.
This review belongs to 'class_0'.
This review belongs to 'class_1'.
This review belongs to 'class_1'.
This review belongs to 'class_1'.
This review belongs to 'class_1'.
This review belongs to 'class_1'.
This review belongs to 'class_1'.
This review belongs to 'class_1'.
This review belongs to 'class_1'.
This review belongs to 'class_1'.
              precision    recall  f1-score   support

           0       0.90      0.90      0.90        10
           1       0.90      0.90      0.90        10

    accuracy                           0.90        20
   macro avg       0.90      0.90      0.90        20
weighted avg       0.90      0.90      0.90

## Baseline Prompt Engineering

In [23]:
prompt_template_baseline = "\n Given the above review we have two two classes. 'class_0' and 'class_1'."

In [24]:
def make_prompt_for_sentiment_analysis_baseline(text):
    
    """Construct a prompt that instructs the model to analyze a text in terms of sentiment.

    Arguments:
        text (str): The text to be analyzed in terms of sentiment.

    Returns:
        A prompt to feed into an zephyr model.
    """

    # YOUR CODE HERE
   
    system = '### System: You are an AI assistant that follows instruction extremely well.\n### Human:\n'
    user = ' '.join([text, prompt_template_baseline])
    assistant = '\n Please classify this film review into one class out of these two classes,and just output the label without anymore word \n ### Assistant:\n'
    prompt = ' '.join([system,user,assistant])
    
    return prompt

In [25]:
text = dataset_new.text.iloc[0]
text_truncated = ' '.join(text.split()[:200])
prompt = make_prompt_for_sentiment_analysis_baseline(text_truncated)

In [28]:
output = generate(prompt, max_new_tokens=15)
content = output[0]['generated_text']

 class_0


In [30]:
from sklearn.metrics import classification_report

def metrics_for_templates_baseline(templates_function,data):
    
    labels_gold=[]
    labels_pre=[]
    
    for i in range(len(data)):
        text = data['text'].iloc[i]
        text_truncated = ' '.join(text.split()[:200])
        prompt = templates_function(text_truncated)
        output = generate(prompt, max_new_tokens=15)
        content = output[0]['generated_text']
        index = content.find("Assistant:")
        pre = 0 if "class_0" in content[index:] else 1
        labels_pre.append(pre)
        labels_gold.append(data.label[i])
    
    print(classification_report(labels_gold,labels_pre))

In [32]:
%%time

streamer.skip_prompt = True 
metrics_for_templates_baseline(make_prompt_for_sentiment_analysis_baseline,dataset_new)

 class_0
 class_1
 class_0
 class_1
 class_1
 class_1 (unwatchable trash)
 class_1 (negative review)
 class_1
 class_1 (negative review)
 class_0
 class_1
 class_0
 class_1
 class_1
 class_1
 class_0
 class_1
 class_1
 class_1
 class_0
              precision    recall  f1-score   support

           0       0.50      0.30      0.37        10
           1       0.50      0.70      0.58        10

    accuracy                           0.50        20
   macro avg       0.50      0.50      0.48        20
weighted avg       0.50      0.50      0.48        20

CPU times: total: 35min 36s
Wall time: 8min 59s
