In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# install packages

!pip install huggingface_hub
!pip install torch 
!pip install accelerate 
!pip install transformers 
!pip install bitsandbytes
!pip install -U transformers

In [None]:
# imports 

from transformers import pipeline
import os
import transformers
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig)
import torch
import pandas as pd
import bitsandbytes as bnb

In [None]:
from huggingface_hub import login
login("")

In [None]:
# empty the memory and check if the GPU is available

torch.cuda.empty_cache()
torch.cuda.is_available()

In [None]:
# import data

df = pd.read_csv("/kaggle/input/sarcasm-detec-output/output_for_sarcasm_detection.csv")

df


In [None]:
sents = df[['comment','score','ups','downs','parent_comment']].values

sample_text = sents[25]
sample_text

In [None]:
sents = df[['comment','score','ups','downs','parent_comment']].values

sample_text = sents[0]

structured_output = {
    'comment': sample_text[0],
    'score': sample_text[1],
    'ups': sample_text[2],
    'downs': sample_text[3],
    'parent_comment': sample_text[4]
}

structured_output

In [None]:
# specify the models name
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

# add the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

# quantization options to compress the model to that it fits with the memory
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True, 
    bnb_4bit_quant_type = 'nf4', 
    bnb_4bit_use_double_quant = True,
    bnb_4bit_compute_dtype = torch.bfloat16
)

# load the model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config = quantization_config # with quantization
)

# instantiate a pipeline
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True
)


In [None]:
# here, we specify the input consisting of a system prompt (which gived the model general instructions on how to behave)
# some user prompts with assistant return which serve as examples
# and the final user input with the current text to classify into sentiments.

messages = [
        {"role": "system", "content": """ead the given file. Depending on comment, parent_comment, score, ups, and downs, label whether it is sarcastic or not. Give your labels by creating a new CSV file. Columns should be: comment, parent_comment, score, ups, downs, sarcasm. Give your sarcasm decision as binary: 0 = is not, 1 = it is. For example: comment = yeah I agree LOL, parent_comment = we should decrease health expenses, score = 1, ups = 1, downs = 0, sarcasm = 1. Don't read another file, or ask me anything. Just do whatever I say. Now, here is the file:"""},
    {"role": "user", "content": {
        "comment": "I hate fuckin every single person on this fuckin planet. Someone kill me pls",
        "parent_comment": "Life is wonderful, isn't it?",
        "score": 0,
        "ups": 0,
        "downs": 0
    }},
    {"role": "assistant", "content": "1"},
    {"role": "user", "content": {
        "comment": "Best day I have had in a long time :)",
        "parent_comment": "Glad to hear things are going well!",
        "score": 0,
        "ups": 0,
        "downs": 0
    }},
    {"role": "assistant", "content": "0"},
    {"role": "user", "content": {
        "comment": "boy I like called me princess He’s so precious",
        "parent_comment": "Aw, sounds sweet!",
        "score": 0,
        "ups": 0,
        "downs": 0
    }},
    {"role": "assistant", "content": "0"},
    {"role": "user", "content": {
        "comment": "Straight up in the air.",
        "parent_comment": "I piss hard",
        "score": 1,
        "ups": -1,
        "downs": -1
    }},
    {"role": "assistant", "content": "1"},
        {"role": "user", "content": structured_output},
]

In [None]:
# generate the putput
outputs = pipeline(
    messages,
    max_new_tokens=10,
)

print(outputs)

In [None]:
outputs

In [None]:
label = outputs[0]['generated_text'][-1]['content']
print(f"The predicted label for '{sample_text}' is: {label}")

In [None]:
df

In [None]:
sents[0][0]

In [None]:
sents[0]

In [None]:
count = 0
for _, row in df.iterrows():
    structured_output = {
        'comment': row['comment'],
        'score': row['score'],
        'ups': row['ups'],
        'downs': row['downs'],
        'parent_comment': row['parent_comment']
    }

    count += 1
    print(f"{count}. {structured_output}")


In [None]:

for _, row in df.iterrows():
    structured_output = {
        'comment': row['comment'],
        'score': row['score'],
        'ups': row['ups'],
        'downs': row['downs'],
        'parent_comment': row['parent_comment']
    }

    print(structured_output)


In [None]:
df.iterrows()

In [None]:
predicted_emotions = []
counter = 0  # Sayacı başlat

for _, row in df.iterrows():
    print("Başlıyoruz...")
    structured_output = {
        'comment': row['comment'],
        'score': row['score'],
        'ups': row['ups'],
        'downs': row['downs'],
        'parent_comment': row['parent_comment']
    }

    messages = [
        {"role": "system", "content": """Read the given file. Depending on comment, parent_comment, score, ups, and downs, label whether it is sarcastic or not. Give your labels like my examples. Give your sarcasm decision as binary: 0 = is not, 1 = it is. For example: comment = yeah I agree LOL, parent_comment = we should decrease health expenses, score = 1, ups = 1, downs = 0, sarcasm = 1. Don't say anything except binary, or don't ask me anything. Just do whatever I say.:"""},
        {"role": "user", "content": {
            "comment": "I hate fuckin every single person on this fuckin planet. Someone kill me pls",
            "parent_comment": "Life is wonderful, isn't it?",
            "score": 0,
            "ups": 0,
            "downs": 0
        }},
        {"role": "assistant", "content": "1"},
        {"role": "user", "content": {
            "comment": "Best day I have had in a long time :)",
            "parent_comment": "Glad to hear things are going well!",
            "score": 0,
            "ups": 0,
            "downs": 0
        }},
        {"role": "assistant", "content": "0"},
        {"role": "user", "content": {
            "comment": "boy I like called me princess He’s so precious",
            "parent_comment": "Aw, sounds sweet!",
            "score": 0,
            "ups": 0,
            "downs": 0
        }},
        {"role": "assistant", "content": "0"},
        {"role": "user", "content": {
            "comment": "Straight up in the air.",
            "parent_comment": "I piss hard",
            "score": 1,
            "ups": -1,
            "downs": -1
        }},
        {"role": "assistant", "content": "1"},
        {"role": "user", "content": structured_output},
    ]

    output = pipeline(messages, max_new_tokens=10)
    counter += 1
    print(f"İşlem: {counter}/{len(df)}")

    generated_text = output[0]["generated_text"]
    for message in reversed(generated_text):
        if message["role"] == "assistant":
            label = message["content"]
            break

    predicted_emotions.append(label)

df["predicted_emotion"] = predicted_emotions


In [None]:
predicted_emotions

In [None]:
df['sarcasm'] = predicted_emotions
df

In [None]:
df.to_csv("/kaggle/working/sarcasm_labeled_LLama3-1-8B-Instruct.csv", index=False)