#### Gemma 7b-V3 + Prompt Engineering Strategy:

Trying to randomize prompts to Gemma 7b-V3 so they become more relevant for the challenge's goal

In [None]:
import pandas as pd
import numpy as np

import random
import string

In [None]:
# Load models

from transformers import AutoTokenizer, AutoModelForCausalLM

model_path = '/kaggle/input/gemma-2/transformers/gemma-2-2b-it/2'

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto",
)

In [None]:
def generate_prompt(topic):
    """
    Generate a prompt for the GEMMA model based on the given topic. The prompt strategy involves:
    - Introducing the topic in an engaging manner.
    - Including instructions to guide the model's response.
    - Adding context or constraints for better specificity.
    """
    introduction = random.choice([
        f"Write a detailed essay on the topic '{topic}', exploring its various dimensions.",
        f"Craft an insightful and balanced essay about '{topic}', considering multiple perspectives.",
        f"Discuss the significance of '{topic}' in contemporary society and its implications."
    ])

    instructions = random.choice([
        "Make sure to include examples, counterarguments, and a conclusion.",
        "Incorporate historical context, current trends, and potential future developments.",
        "Use a mix of rhetorical questions, facts, analogies, and quotes to enhance the essay."
    ])

    constraints = random.choice([
        "Limit the essay to approximately 100 words.",
        "Focus on clarity and coherence while maintaining an engaging tone.",
        "Ensure the essay is suitable for a general audience with diverse backgrounds."
    ])

    prompt = f"{introduction} {instructions} {constraints}"
    return prompt

In [None]:
def create_submission(test_df, model, tokenizer):
    essays = []
    for _, row in test_df.iterrows():
        # Generate the prompt
        prompt = generate_prompt(row['topic'])
        
        # Tokenize the input prompt
        input_ids = tokenizer(prompt, return_tensors="pt")
        input_ids = {k: v.to('cuda') for k, v in input_ids.items()}
        
        # Generate the essay
        outputs = model.generate(**input_ids, max_new_tokens=100)
        output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        essays.append(output_text)
    
    # Add the essays to the test DataFrame
    test_df['essay'] = essays
    
    # Return only the required columns
    return test_df[['id', 'essay']]

In [None]:
def load_data(file_path):
    return pd.read_csv(file_path)
    
def save_submission(submission_df, output_file):
    submission_df.to_csv(output_file, index=False)

In [None]:
test_file = "/kaggle/input/llms-you-cant-please-them-all/test.csv"
test_data = load_data(test_file)

submission_df = create_submission(test_data, model, tokenizer)

output_file = "submission.csv"
save_submission(submission_df, output_file)

In [None]:
submission_df