In [79]:
import pandas as pd

# Read the CSV dataset
file_path = '../datasets/dataset_latest.csv'
df = pd.read_csv(file_path) # Add .head(100) if you want to limit the number of rows

df = df.fillna('')

print(str(len(df)) + ' rows')
df.head()

10638 rows


Unnamed: 0,id,title,summary,year,certificate,runtime,runtime_mins,rating,votes,director_1,...,writer_2,writer_3,cast_1,cast_2,cast_3,genre_1,genre_2,genre_3,synopsis,img
0,15239678,Dune: Part Two,Paul Atreides unites with Chani and the Fremen...,2024,PG-13,2h 46m,166,8.7,360000,Denis Villeneuve,...,Jon Spaihts,Frank Herbert,Timothée Chalamet,Zendaya,Rebecca Ferguson,Action,Adventure,Drama,Following the destruction of House Atreides by...,https://m.media-amazon.com/images/M/MV5BN2QyZG...
1,14539740,Godzilla x Kong: The New Empire,"Two ancient titans, Godzilla and Kong, clash i...",2024,PG-13,1h 55m,115,6.5,37000,Adam Wingard,...,Simon Barrett,Jeremy Slater,Rebecca Hall,Brian Tyree Henry,Dan Stevens,Action,Adventure,Fantasy,"Three years after defeating Mechagodzilla, Kon...",https://m.media-amazon.com/images/M/MV5BY2QwOG...
2,23137904,Rebel Moon - Part Two: The Scargiver,Kora and surviving warriors prepare to defend ...,2024,PG-13,2h 2m,122,5.2,17000,Zack Snyder,...,Kurt Johnstad,Shay Hatten,Sofia Boutella,Djimon Hounsou,Ed Skrein,Action,Adventure,Drama,Kora and surviving warriors prepare to defend ...,https://m.media-amazon.com/images/M/MV5BYmQ2OD...
3,21692408,Kung Fu Panda 4,After Po is tapped to become the Spiritual Lea...,2024,PG,1h 34m,94,6.4,30000,Mike Mitchell,...,Glenn Berger,Darren Lemke,Jack Black,Awkwafina,Viola Davis,Animation,Action,Adventure,"On top of a mountain, a bunch of mountain goat...",https://m.media-amazon.com/images/M/MV5BZDY0Yz...
4,1160419,Dune,A noble family becomes embroiled in a war for ...,2021,PG-13,2h 35m,155,8.0,851000,Denis Villeneuve,...,Denis Villeneuve,Eric Roth,Timothée Chalamet,Rebecca Ferguson,Zendaya,Action,Adventure,Drama,The story opens with a woman telling a portion...,https://m.media-amazon.com/images/M/MV5BMDQ0Nj...


In [80]:
from dotenv import load_dotenv, find_dotenv
import json
import os
from openai import OpenAI
import openai
from groq import Groq

load_dotenv(find_dotenv())

client = OpenAI(
    api_key=os.getenv('OPENAI_API_KEY'),
)

# client = Groq(
#     api_key=os.getenv('GROQ_API_KEY'),
# )

# llm_model_name = "llama3-70b-8192"
# llm_model_name="mixtral-8x7b-32768"
llm_model_name="gpt-3.5-turbo-0125"

def analyze_sentiment(title, synopsis):

    prompt_template = f"""
        For the following movie title '{title}' and its synopsis '{synopsis}', analyze the synopsis of the movie and assign a sentiment score ranging from 0 to 10 (0 being extremely sad/negative, 1 being very sad, 2 being sad/negative, 3 being somewhat sad, 4 being sad closer to neutral, 5 being neutral, 6 being happy/positive closer to neutral, 7 being somewhat happy/positive, 8 being happy/positive, 9 being very happy, 10 being extremely happy/positive). 
        Consider the emotional tone of the synopsis, especially how the movie concludes. If the movie has a bittersweet or mature content/ending, take into account themes of sacrifice, loss, or unresolved conflicts, and assign a lower score. 
        Additionally, if the movie tackles controversial themes such as slavery, abortion, or war, consider lowering the score accordingly.
        Only provide a sentiment score along with a specific and insightful justification/reasoning as to why the movie received with the score and recommended audience in about 30 words. Output needs to be in a JSON format.
        
        Example JSON Output:
        {{
        	"Sentiment Score": 3,
        	"Justification": "This film depicts a poignant love story set against the backdrop of societal expectations and restrictions. The themes of love, sacrifice, and longing contribute to a somewhat sad and bittersweet overall tone.",
        	"Recommended Audience": "This film may resonate with viewers who enjoy slow-burn romance narratives and are open to exploring themes of societal constraints and the complexities of human relationships."
        }}
    """

    messages = [{
            "role": "system",
            "content": "Please generate output in JSON format exclusively, avoiding any additional text or explanations.",
        },
        {
            "role": "user",
            "content": prompt_template
        }
    ]

    stream = client.chat.completions.create(
        model=llm_model_name,
        messages=messages,
        max_tokens=200,
        temperature=0.5,
        frequency_penalty=0,
        presence_penalty=0,
        response_format={ "type": "json_object" }
    )

    # print(stream)
    
    return json.loads(stream.choices[0].message.content)

def shorten_string(text, max_length=1000, keep=500):
    if len(text) <= max_length:
        return text
    else:
        first_part = text[:keep]
        last_part = text[-keep:]
        return first_part + "..." + last_part

In [81]:
from tqdm import tqdm
import pandas as pd

# Create empty columns
df['sentiment_score'] = ''
df['sentiment_reason'] = ''
df['recommended_audience'] = ''

# Function to apply sentiment analysis and update DataFrame
def apply_sentiment_analysis(row):
    try:
        result = analyze_sentiment(row['title'], shorten_string(row['synopsis']))
        row['sentiment_score'] = result.get('Sentiment Score', '')
        row['sentiment_reason'] = result.get('Justification', '')
        row['recommended_audience'] = result.get('Recommended Audience', '')
    except Exception as e:
        print(f"Error processing row: {e}")
        row['sentiment_score'] = ''
        row['sentiment_reason'] = ''
        row['recommended_audience'] = ''
    return row

# Apply sentiment analysis function to each row with tqdm progress bar
tqdm.pandas()

# Define a function to handle apply with backup
def apply_with_backup(func, df, *args, **kwargs):
    try:
        return df.progress_apply(func, *args, **kwargs)
    except Exception as e:
        print(f"Error occurred: {e}")
        print("Saving the data up to this point as backup.csv")
        df.to_csv("backup.csv", index=False)
        raise e

# Apply sentiment analysis function to each row with tqdm progress bar and backup
df = apply_with_backup(apply_sentiment_analysis, df, axis=1)

df.head()

100%|█| 10638/10638 [5:19:36<00:00,  1.80s/it]


Unnamed: 0,id,title,summary,year,certificate,runtime,runtime_mins,rating,votes,director_1,...,cast_2,cast_3,genre_1,genre_2,genre_3,synopsis,img,sentiment_score,sentiment_reason,recommended_audience
0,15239678,Dune: Part Two,Paul Atreides unites with Chani and the Fremen...,2024,PG-13,2h 46m,166,8.7,360000,Denis Villeneuve,...,Zendaya,Rebecca Ferguson,Action,Adventure,Drama,Following the destruction of House Atreides by...,https://m.media-amazon.com/images/M/MV5BN2QyZG...,3,The movie 'Dune: Part Two' showcases themes of...,This movie is recommended for viewers who appr...
1,14539740,Godzilla x Kong: The New Empire,"Two ancient titans, Godzilla and Kong, clash i...",2024,PG-13,1h 55m,115,6.5,37000,Adam Wingard,...,Brian Tyree Henry,Dan Stevens,Action,Adventure,Fantasy,"Three years after defeating Mechagodzilla, Kon...",https://m.media-amazon.com/images/M/MV5BY2QwOG...,7,The movie ends on a positive note with Godzill...,This film is recommended for audiences who enj...
2,23137904,Rebel Moon - Part Two: The Scargiver,Kora and surviving warriors prepare to defend ...,2024,PG-13,2h 2m,122,5.2,17000,Zack Snyder,...,Djimon Hounsou,Ed Skrein,Action,Adventure,Drama,Kora and surviving warriors prepare to defend ...,https://m.media-amazon.com/images/M/MV5BYmQ2OD...,7,"The synopsis highlights themes of unity, resil...",This movie is recommended for audiences who en...
3,21692408,Kung Fu Panda 4,After Po is tapped to become the Spiritual Lea...,2024,PG,1h 34m,94,6.4,30000,Mike Mitchell,...,Awkwafina,Viola Davis,Animation,Action,Adventure,"On top of a mountain, a bunch of mountain goat...",https://m.media-amazon.com/images/M/MV5BZDY0Yz...,8,The synopsis portrays a heartwarming story of ...,This movie is recommended for audiences seekin...
4,1160419,Dune,A noble family becomes embroiled in a war for ...,2021,PG-13,2h 35m,155,8.0,851000,Denis Villeneuve,...,Rebecca Ferguson,Zendaya,Action,Adventure,Drama,The story opens with a woman telling a portion...,https://m.media-amazon.com/images/M/MV5BMDQ0Nj...,6,The synopsis portrays a mix of challenges and ...,Recommended for viewers who appreciate epic ta...


In [84]:
file_path = 'sentiment_latest_backup.csv'
df.to_csv(file_path, index=False)