In [1]:
import pandas as pd

books = pd.read_csv('books_categorized.csv')

In [2]:
import torch

print(torch.__version__)
device = 0 if torch.cuda.is_available() else -1

2.8.0+cpu


In [3]:
from transformers import pipeline

classifier = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    device=-1
)

# This will now work correctly
result = classifier("I love programming!")
print(result)

  from .autonotebook import tqdm as notebook_tqdm
Device set to use cpu


[{'label': 'joy', 'score': 0.9866405129432678}]


In [4]:
import numpy as np

In [5]:
from tqdm import tqdm
import numpy as np

# Parameters
batch_size = 16
emotion_labels = ["joy", "anger", "sadness", "fear", "disgust", "surprise", "neutral"]

# Storage
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

# Corrected function
def calculate_max_emotion_scores(predictions):
    per_emotion_scores = {label: [] for label in emotion_labels}
    
    for sentence_pred in predictions:
        # Wrap single dict in list
        if isinstance(sentence_pred, dict):
            sentence_pred = [sentence_pred]
        
        # Map scores by label
        score_map = {item['label']: item['score'] for item in sentence_pred}
        
        for label in emotion_labels:
            per_emotion_scores[label].append(score_map.get(label, 0.0))  # default 0 if missing
    
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

# Make sure classifier returns all scores
classifier.return_all_scores = True

# Prepare all sentences and ISBNs
all_sentences = [books["description"][i].split(".") for i in range(len(books))]
all_isbns = books["isbn13"].tolist()

# Process in batches
for start_idx in tqdm(range(0, len(all_sentences), batch_size)):
    end_idx = start_idx + batch_size
    batch_sentences = all_sentences[start_idx:end_idx]
    batch_isbns = all_isbns[start_idx:end_idx]
    
    # Flatten batch for classifier
    flat_sentences = [s for desc in batch_sentences for s in desc]
    
    # Run classifier
    predictions = classifier(flat_sentences)
    
    # Split predictions per book
    idx = 0
    for desc_idx, desc in enumerate(batch_sentences):
        desc_len = len(desc)
        desc_preds = predictions[idx: idx + desc_len]
        max_scores = calculate_max_emotion_scores(desc_preds)
        
        # Store results
        isbn.append(batch_isbns[desc_idx])
        for label in emotion_labels:
            emotion_scores[label].append(max_scores[label])
        
        idx += desc_len


100%|██████████| 325/325 [08:17<00:00,  1.53s/it]


In [6]:
# Create emotion dataframe
emotion_df = pd.DataFrame({"isbn13": isbn, **emotion_scores})

# Merge with books
books = pd.merge(books, emotion_df, on="isbn13", how="left")

In [7]:
emotion_df

Unnamed: 0,isbn13,joy,anger,sadness,fear,disgust,surprise,neutral
0,9780002005883,0.932798,0.000000,0.967158,0.928168,0.000000,0.729602,0.646216
1,9780002261982,0.704422,0.612619,0.000000,0.942528,0.348284,0.000000,0.887940
2,9780006178736,0.767239,0.000000,0.000000,0.972321,0.000000,0.000000,0.549477
3,9780006280897,0.000000,0.000000,0.000000,0.360705,0.000000,0.000000,0.732685
4,9780006280934,0.000000,0.000000,0.475880,0.000000,0.000000,0.000000,0.884390
...,...,...,...,...,...,...,...,...
5192,9788172235222,0.000000,0.000000,0.980877,0.919165,0.000000,0.000000,0.853721
5193,9788173031014,0.400263,0.000000,0.000000,0.000000,0.000000,0.000000,0.883198
5194,9788179921623,0.947779,0.000000,0.000000,0.000000,0.000000,0.000000,0.375754
5195,9788185300535,0.759457,0.000000,0.000000,0.459270,0.000000,0.000000,0.951104


In [8]:
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,title and subtitle,tagged_description,simple_category,joy,anger,sadness,fear,disgust,surprise,neutral
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,...,Gilead,9780002005883 A NOVEL THAT READERS and critics...,Fiction,0.932798,0.000000,0.967158,0.928168,0.000000,0.729602,0.646216
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,...,Spider's Web: A Novel,9780002261982 A new 'Christie for Christmas' -...,Fiction,0.704422,0.612619,0.000000,0.942528,0.348284,0.000000,0.887940
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,...,Rage of angels,"9780006178736 A memorable, mesmerizing heroine...",Fiction,0.767239,0.000000,0.000000,0.972321,0.000000,0.000000,0.549477
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,...,The Four Loves,9780006280897 Lewis' work on the nature of lov...,Nonfiction,0.000000,0.000000,0.000000,0.360705,0.000000,0.000000,0.732685
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,...,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le...",Nonfiction,0.000000,0.000000,0.475880,0.000000,0.000000,0.000000,0.884390
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,...,Mistaken Identity,9788172235222 On A Train Journey Home To North...,Fiction,0.000000,0.000000,0.980877,0.919165,0.000000,0.000000,0.853721
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,...,Journey to the East,9788173031014 This book tells the tale of a ma...,Nonfiction,0.400263,0.000000,0.000000,0.000000,0.000000,0.000000,0.883198
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,...,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...,Fiction,0.947779,0.000000,0.000000,0.000000,0.000000,0.000000,0.375754
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,...,I Am that: Talks with Sri Nisargadatta Maharaj,9788185300535 This collection of the timeless ...,Nonfiction,0.759457,0.000000,0.000000,0.459270,0.000000,0.000000,0.951104


In [9]:
books.to_csv("books_with_emotions.csv", index=False)