In [1]:
import pandas as pd
import ollama
import random

In [2]:
df = pd.read_csv('arabot_1k_translated.csv')

In [3]:
categories = df['Type'].unique().tolist()


In [4]:
# create a dictionary to store examples for each category
examples = {}
for category in categories:
    category_data = df[df['Type'] == category]
    examples[category] = category_data.sample(n=2)['Description_English'].tolist()

In [None]:
examples

In [9]:
# separate the examples and the remaining data
example_indices = []
for category_examples in examples.values():
    for example in category_examples:
        indices = df[df['Description_English'] == example].index
        example_indices.extend(indices)

df_examples = df.loc[example_indices]
df_to_predict = df.drop(example_indices)

In [10]:
# create few shot prompt
def create_few_shot_prompt(text, examples):
    prompt = "Here are some examples of text and their categories:\n\n"
    for category, category_examples in examples.items():
        for example in category_examples:
            prompt += f"Text: {example}\nCategory: {category}\n\n"
    prompt = prompt + f"Now, classify the following text into one of the categories. Output only the category name, nothing else:\n\nText: {text}\nCategory:"
    return prompt

In [11]:
# function to perform few-shot classification
def few_shot_classification(text, examples):
    prompt = create_few_shot_prompt(text, examples)
    response = ollama.chat(model='llama3', messages=[{'role': 'user', 'content': prompt}])
    return response['message']['content'].strip()

In [12]:
# perform classification on the remaining data
df_to_predict['Predicted_Type'] = df_to_predict['Description_English'].apply(lambda x: few_shot_classification(x, examples))

In [None]:
df_to_predict.head()

In [14]:
accuracy = (df_to_predict['Type'] == df_to_predict['Predicted_Type']).mean()
print(f"Few-Shot Classification Accuracy: {accuracy * 100:.2f}%")

Few-Shot Classification Accuracy: 55.24%
