In [1]:
import pandas as pd
import ollama
import random

In [2]:
df = pd.read_csv('arabot_1k_translated_random.csv')

In [3]:
categories = df['Type'].unique().tolist()


In [4]:
# create a dictionary to store examples for each category
examples = {}
for category in categories:
    category_data = df[df['Type'] == category]
    examples[category] = category_data.sample(n=2)['Description_English'].tolist()

In [None]:
examples

In [6]:
# separate the examples and the remaining data
example_indices = []
for category_examples in examples.values():
    for example in category_examples:
        indices = df[df['Description_English'] == example].index
        example_indices.extend(indices)

df_examples = df.loc[example_indices]
df_to_predict = df.drop(example_indices)

In [7]:
# create few shot prompt
def create_few_shot_prompt(text, examples):
    prompt = "Here are some examples of text and their categories:\n\n"
    for category, category_examples in examples.items():
        for example in category_examples:
            prompt += f"Text: {example}\nCategory: {category}\n\n"
    prompt = prompt + f"Now, classify the following text into one of the categories. Output only the category name, nothing else:\n\nText: {text}\nCategory:"
    return prompt

In [8]:
# function to perform few-shot classification
def few_shot_classification(text, examples):
    prompt = create_few_shot_prompt(text, examples)
    response = ollama.chat(model='llama3:70b', messages=[{'role': 'user', 'content': prompt}])
    return response['message']['content'].strip()

In [9]:
few_shot_classification(df_to_predict['Description_English'][0], examples)

'Payment Operations'

I will be running this function on 30 observations so that i'm consistent with the llama3-70b zero-shot analysis. Based on the 6.5 min runtime in the previous cell, this should take around 3-4 hours.

In [10]:
# perform classification on the remaining data
df_to_predict['Predicted_Type'] = df_to_predict['Description_English'][0:30].apply(lambda x: few_shot_classification(x, examples))

In [11]:
df_to_predict.head()

Unnamed: 0,Description_English,Type,Predicted_Type
0,3100 riyals were transferred to a quorum accou...,Payment Operations,Payment Operations
1,A bank transfer was transferred to a friend on...,Payment Operations,Payment Operations
2,A broken incoming transfer. The amount of 568 ...,Payment Operations,Payment Operations
3,A company's amount was withdrawn twice on a da...,Bank Cards,Payment Operations
4,A complaint has already been revealed and I ha...,Financing & Leasing,Financing & Leasing


In [12]:
accuracy = (df_to_predict['Type'][0:30] == df_to_predict['Predicted_Type'][0:30]).mean()
print(f"Few-Shot Classification Accuracy: {accuracy * 100:.2f}%")

Few-Shot Classification Accuracy: 60.00%
