In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

# Read the enriched parquet file
df = pd.read_parquet('../data/processed/ao3_tifu_enriched_labels.parquet')

# # Display basic info
# df.sample(30)

In [2]:
from transformers import pipeline
from tqdm import tqdm 

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
# test out another emotion model: SamLowe/roberta-base-go_emotions

sample = df.sample(100)

print('Getting emotion classifier (CPU mode)...')
emotion_classifier = pipeline(
    "text-classification",
    model="SamLowe/roberta-base-go_emotions",
    return_all_scores=True,
    device=-1  # Force CPU for stability
)

results = []

print('Starting for loop...')
for idx in tqdm(range(len(sample))): 
    text = sample.iloc[idx]['text']

    # Classify (512 tokens ≈ 2000 chars)
    predictions = emotion_classifier(str(text)[:2000])[0]

    # Get highest scoring emotion
    top_emotion = max(predictions, key=lambda x: x['score'])

    result = {
        'id': sample.iloc[idx]['id'],
        'text': sample.iloc[idx]['text'],
        'type': sample.iloc[idx]['type'],
        'new_emotion': top_emotion['label'],
        'new_emotion_score': top_emotion['score'],
        'emotion': sample.iloc[idx]['emotion'],
        'emotion_score': sample.iloc[idx]['emotion_score']
    }

    results.append(result)

Getting emotion classifier (CPU mode)...


Device set to use cpu


Starting for loop...


100%|██████████| 100/100 [00:05<00:00, 17.01it/s]


In [11]:
print('Finished for loop...')
results = pd.DataFrame(results)

Finished for loop...


In [20]:
results[(results['new_emotion'] == 'neutral')]

Unnamed: 0,id,text,type,new_emotion,new_emotion_score,emotion,emotion_score
5,ao3_34816549_7402,"So... He saw the entire thing. You know, Yin Yu doesn’t even know why the Ghost King wants to find the Prince of Xianle so badly, but he feels sorry for the man.",short_story,neutral,0.664051,sadness,0.854
6,ao3_28777140_883,"Hannibal wiped his fingers on Will’s thighs, then shook Will’s caged cock over the bowl to free any lingering seminal fluid. He took the bowl away.",short_story,neutral,0.966026,disgust,0.836
7,ao3_11985126_1311,"Loki glanced at where the vision would be projected, his eyes filled with wonder. “Step away from the Eye of Agamotto,” Wong warned. “He won’t ask a second time,” Stephen said.",short_story,neutral,0.872361,surprise,0.925
8,ao3_19042240_1035,"He kept holding his cup, taking small sips yet feeling no taste. At some point, Tom pulled a box of sweets from under the table, offering him to take one.",short_story,neutral,0.965057,disgust,0.972
9,ao3_19042240_4557,"“Do you mean the cloak?” he drawled. “If you do, then you misunderstood everything.” “Misunderstood?” Harry hissed. Tom’s blasé attitude boiled his blood, making him see red. “Which part did I misunderstand?",short_story,neutral,0.6155,anger,0.842
12,ao3_53634157_1463,"Satoru could only imagine the gruesome death that could’ve taken someone like Nanami Kento, especially if Yūji was there to see it happen. Nanami would’ve fought hard to the end for the poor kid. The thought makes his stomach churn.",short_story,neutral,0.320549,disgust,0.805
13,ao3_22942099_3314,"If he had never gone to Midgard, if he had never met you, this wouldn’t be happening. You wouldn’t have been put in danger. Knowing Gretchen, you might still be in danger.",short_story,neutral,0.594543,fear,0.925
14,ao3_12402426_1316,"There are two ways to go about this. Scott’s face is swimming in his drunk vision and his heart is beating furiously in his ears so he picks the worst one, obviously. “You mean, you’renotgay?” He asks loudly.",short_story,neutral,0.887509,disgust,0.869
15,ao3_3195734_164,"The officer rolls down his window, but flicks up his shades up when he and Derek turn around. He squints.",short_story,neutral,0.939978,disgust,0.807
16,ao3_22992613_435,"When Jungkook grabs it to shake like they are actually doing some business deal, Taehyung uses all of his strength to pull him over it. Right over the already small space between them, until their faces are mere centimeters apart.",short_story,neutral,0.958784,fear,0.939


In [38]:
df[(df['tone_scores'].apply(lambda x: x[0] >= 0.8)) & (df['id'].str.contains('tifu'))].sample(10)

Unnamed: 0,id,text,emotion,emotion_score,type,tone_labels,tone_scores,genre_jquigl,genre_jquigl_score,genre_Sarthak279,genre_Sarthak279_score
133688,tifu_645,pulled off mans prosthetic hand and laughed like an idiot.,disgust,0.349847,one_liner,"[amusement, joy, neutral, annoyance, approval, realization, admiration, excitement, sadness, disapproval, optimism, love, anger, gratitude, caring, disappointment, embarrassment, relief, curiosity, confusion, disgust, desire, surprise, remorse, fear, nervousness, pride, grief]","[0.869, 0.151, 0.042, 0.01, 0.009, 0.007, 0.005, 0.004, 0.004, 0.003, 0.003, 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.0]",Scifi,0.201029,Thriller,0.352885
134475,tifu_1432,made 15 people inside burger king think they were dying,surprise,0.612221,one_liner,"[neutral, sadness, realization, fear, disappointment, annoyance, approval, disgust, confusion, disapproval, amusement, surprise, optimism, grief, nervousness, caring, anger, admiration, joy, desire, curiosity, embarrassment, excitement, relief, remorse, gratitude, love, pride]","[0.926, 0.034, 0.017, 0.016, 0.01, 0.009, 0.007, 0.005, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.001, 0.001, 0.001, 0.001, 0.001]",Thriller,0.163024,Thriller,0.458943
136804,tifu_3761,"splitting wood, try to get axe out of log as my dad points where to cut next, slicing his finger off. whoops.",surprise,0.803592,one_liner,"[neutral, annoyance, disgust, embarrassment, approval, amusement, disappointment, realization, anger, sadness, fear, excitement, admiration, disapproval, joy, surprise, optimism, caring, desire, pride, nervousness, relief, curiosity, grief, remorse, confusion, love, gratitude]","[0.88, 0.068, 0.045, 0.016, 0.016, 0.013, 0.013, 0.012, 0.009, 0.007, 0.005, 0.004, 0.004, 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.0]",Family,0.412257,Thriller,0.624609
139004,tifu_5961,"quickly signed a sympathy card without reading it. gave it to a coworker to sign, it was a sympathy card for his own mother whom had just passed away.",sadness,0.921457,one_liner,"[sadness, neutral, grief, caring, disappointment, realization, approval, remorse, admiration, annoyance, joy, disapproval, love, gratitude, relief, desire, amusement, disgust, anger, nervousness, fear, optimism, curiosity, embarrassment, confusion, excitement, surprise, pride]","[0.851, 0.168, 0.062, 0.04, 0.03, 0.028, 0.023, 0.019, 0.012, 0.01, 0.01, 0.007, 0.006, 0.006, 0.005, 0.005, 0.005, 0.005, 0.005, 0.004, 0.004, 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.001]",Family,0.5847,Romance,0.636212
137576,tifu_4533,recreated rain man with the old guy from up,neutral,0.836881,one_liner,"[neutral, approval, realization, annoyance, admiration, disappointment, disapproval, sadness, excitement, confusion, joy, anger, optimism, disgust, amusement, fear, curiosity, love, gratitude, surprise, desire, caring, embarrassment, pride, grief, relief, remorse, nervousness]","[0.966, 0.015, 0.007, 0.006, 0.005, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.0, 0.0, 0.0, 0.0, 0.0]",Animation,0.361367,Thriller,0.58466
133377,tifu_334,i saw blue daba de daba dye,surprise,0.808072,one_liner,"[neutral, approval, realization, confusion, excitement, annoyance, curiosity, admiration, amusement, surprise, disappointment, joy, fear, optimism, sadness, disgust, love, disapproval, anger, desire, embarrassment, caring, nervousness, gratitude, grief, relief, pride, remorse]","[0.95, 0.016, 0.014, 0.006, 0.004, 0.004, 0.003, 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.0, 0.0, 0.0, 0.0]",Animation,0.38911,Romance,0.545605
136351,tifu_3308,and alternate title - tifu by accidentally stealing from target.,neutral,0.295233,one_liner,"[neutral, realization, annoyance, approval, disappointment, sadness, disapproval, confusion, disgust, embarrassment, optimism, anger, amusement, fear, surprise, curiosity, remorse, admiration, joy, gratitude, desire, excitement, caring, grief, nervousness, relief, love, pride]","[0.946, 0.019, 0.014, 0.009, 0.009, 0.006, 0.006, 0.004, 0.004, 0.004, 0.003, 0.003, 0.003, 0.003, 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.0]",Action,0.272221,Drama,0.515963
135431,tifu_2388,i just lost my family pet of 11 years because i’m a dumbass.,sadness,0.797958,one_liner,"[sadness, disappointment, annoyance, neutral, grief, realization, anger, remorse, disgust, disapproval, approval, admiration, surprise, amusement, love, nervousness, embarrassment, joy, fear, desire, caring, curiosity, optimism, gratitude, confusion, excitement, relief, pride]","[0.825, 0.174, 0.033, 0.023, 0.021, 0.019, 0.014, 0.009, 0.008, 0.006, 0.006, 0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.004, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.001, 0.001]",Family,0.591689,Thriller,0.723617
137221,tifu_4178,"dropped soap in toilet bowl with shit still in it, rescued it using my hands.",disgust,0.946041,one_liner,"[neutral, disgust, annoyance, approval, embarrassment, disappointment, sadness, realization, anger, disapproval, fear, amusement, desire, optimism, remorse, caring, love, confusion, curiosity, admiration, grief, gratitude, nervousness, excitement, joy, relief, surprise, pride]","[0.824, 0.122, 0.056, 0.017, 0.014, 0.013, 0.011, 0.01, 0.009, 0.007, 0.005, 0.004, 0.004, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001]",Adventure,0.149016,Romance,0.547559
136137,tifu_3094,dragon will be ready to battle again shortly,neutral,0.934715,one_liner,"[neutral, approval, excitement, optimism, admiration, desire, joy, realization, caring, amusement, annoyance, curiosity, love, fear, confusion, gratitude, sadness, disapproval, disappointment, anger, disgust, surprise, pride, relief, nervousness, remorse, grief, embarrassment]","[0.897, 0.041, 0.017, 0.015, 0.009, 0.007, 0.005, 0.004, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.0, 0.0, 0.0, 0.0]",Animation,0.407568,Action,0.880329


In [41]:
df[df['text'].str.len() > 2000]

Unnamed: 0,id,text,emotion,emotion_score,type,tone_labels,tone_scores,genre_jquigl,genre_jquigl_score,genre_Sarthak279,genre_Sarthak279_score


In [51]:
filtered = df[df['tone_scores'].apply(lambda x: x[0] >= 0.8)].drop(['genre_Sarthak279', 'genre_Sarthak279_score'], axis=1).rename(columns={'genre_jquigl' : 'genre', 'genre_jquigl_score': 'genre_score', 'tone_labels': 'tone', 'tone_scores': 'tone_score'})
filtered['tone'] = filtered['tone'].apply(lambda x: x[0])
filtered['tone_score'] = filtered['tone_score'].apply(lambda x: x[0])

In [52]:
filtered

Unnamed: 0,id,text,emotion,emotion_score,type,tone,tone_score,genre,genre_score
1,ao3_41199483_1822,"His eyes were deep and fathomless, gaze more intense than it had ever been when levelled against Eddie. Eddie swallowed. He wet his mouth and watched Steve’s eyes dip, tracking the movement.",fear,0.976000,short_story,neutral,0.936,Horror,0.396911
2,ao3_39492981_286,"Steve was looking at him now, his eyes wide in Eddie’s peripheral. He couldn’t stomach seeing his face, no matter what it wore.",disgust,0.829000,short_story,neutral,0.962,Thriller,0.230390
4,ao3_41199483_1637,"The scars on Steve’s wrist were the same colour as the ones on his cheeks, Eddie reminded himself. Thin, spidery, silver patches that were unnatural. Eddie’s breath caught in his throat.",disgust,0.911000,short_story,neutral,0.835,Horror,0.477902
8,ao3_39570081_324,"Steve moved to Eddie’s chest, caressing him as he kissed down his body. When he got to Eddie’s legs, which were bent at the knee, he lifted one to kiss the inner thigh.",disgust,0.866000,short_story,neutral,0.912,Romance,0.505607
10,ao3_39346095_128,"Eddie stares down at him, his eyes flickering to different points on his body. Steve’s fully clothed, but he has never felt more exposed. He runs his hand through his messed up hair.",disgust,0.803000,short_story,neutral,0.956,Mystery,0.280344
...,...,...,...,...,...,...,...,...,...
139205,tifu_6162,": was a drunk jack ass, received a bet from a friend, jumped in lake filled with ice, got cut, mild hypothermia and mild shock.",surprise,0.507925,one_liner,neutral,0.952,Crime,0.323993
139209,tifu_6166,12-year-old me screwed over 23-year-old me. 12-year-old me and 23-year-old me are almost the same height now.,neutral,0.850632,one_liner,neutral,0.942,Family,0.448582
139210,tifu_6167,"was trying to de-tick my dog, almost de-nippled my dog.",fear,0.401921,one_liner,neutral,0.942,Family,0.665022
139211,tifu_6168,- phone in fryer = boom,surprise,0.761446,one_liner,neutral,0.898,Thriller,0.229029


In [53]:
filtered.to_parquet('/Users/averylee/Desktop/FriendFic/data/processed/ao3_tifu_enriched_labels.parquet', index=False)

In [54]:
len(filtered)

51337

In [55]:
len(filtered[filtered['id'].str.contains('ao3')])

49246

In [56]:
len(filtered[filtered['id'].str.contains('tifu')])

2091