In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_google_genai.llms import GoogleGenerativeAI

from dotenv import load_dotenv
from os import environ

from ratelimit import limits, sleep_and_retry

load_dotenv()

True

In [2]:
api = environ["GOOGLE_API_KEY"]

llm = GoogleGenerativeAI(model="gemini-1.5-flash-8b", google_api_key=api)

template = """
Your task is to perform NER task on a posts with film discussion from Reddit.
You have a reddit post which is written in a Markdown format. Analyse it and extract what is a title of a film that was discussed.
As an output give the only title of a film without any explanation or additional data.
If there are many films that were mentioned, choose only one, which is a main theme of a post.
Since post is in a markdown format, do not pay attention to links and any special signs, focus only on a text.

For example:
Input: "Ask me, what is a fucking best film I've ever watched and I'll say its a forest gamp. This film is way better than new Anora film. but ok maybe anora is also a good film idk"
Output: "Forest Gamp"

Input: "There are a lot of science fiction films like Star Wars, Arrival or Alien, but there is one which I consider the best. Interstellar is a masterpiece, the best science fiction which everyone should watch"
Output: "Interstellar"

Input: "I don't know why people watch films like this????? There are a lot of good musicals like La La Land, Wicked or Grease. For me Barbie its just another hyping bullshit that will be forgotten like in a 2 weeks."
Output: "Barbie"

Post: {post}
"""

prompt = PromptTemplate.from_template(template=template)

message = prompt.invoke({"post": " So I was just watching the classic film *Gentlemen Prefer Blondes* last night and found it ultimately works because the friendship between the two leading ladies is so solid and believable, even though they're complete opposites in every way. However, my only pet peeve about this film is the scene where Marilyn's character gets stuck in the porthole window. A part of me kind of wishes the kid found a way to sneak into the cabin room and lend Marilyn's character a helping hand from behind where she was trying to escape from. I'm sure it would've still turned out to be a funny scene regardless. What a missed opportunity it was for the kid actor and the directors."})
llm.invoke(message)

'Gentlemen Prefer Blondes'

In [5]:
import json

with open("reddit_data.json", "r") as f:
    reddit_data = json.load(f)

reddit_data[0]

{'id': '1l29tb6',
 'title': 'Mission: Impossible 8 (The Final Reckoning)',
 'selftext': "If you’ve seen some or all of the previous seven *Mission: Impossible* movies, there’s no reason why you shouldn’t see *Mission: Impossible - The Final Reckoning*.\xa0 The movie delivers more than enough entertainment to justify the time and/or the money you’ll spend watching it.\xa0 There are two exceptional action sequences, featuring Tom Cruise yet again risking life and limb for moviegoers.\xa0 The cast from the last movie is augmented by at least a dozen new supporting characters played by recognizable actors (Tramell Tillman, Hannah Waddingham, Nick Offerman, Katy O'Brian, Holt McCallany).\xa0 There are also numerous callbacks to previous installments that will thrill fans of the franchise.\xa0 However, even with everything it has going for it, the movie is not the giddy thrill-ride it should be.\n\nWhere [*Dead Reckoning*](https://detroitcineaste.net/2023/07/31/mission-impossible-dead-reckon

In [6]:
reviews = {record["id"] : record["title"] + "\n" + record["selftext"] for record in reddit_data}
reviews

{'1l29tb6': "Mission: Impossible 8 (The Final Reckoning)\nIf you’ve seen some or all of the previous seven *Mission: Impossible* movies, there’s no reason why you shouldn’t see *Mission: Impossible - The Final Reckoning*.\xa0 The movie delivers more than enough entertainment to justify the time and/or the money you’ll spend watching it.\xa0 There are two exceptional action sequences, featuring Tom Cruise yet again risking life and limb for moviegoers.\xa0 The cast from the last movie is augmented by at least a dozen new supporting characters played by recognizable actors (Tramell Tillman, Hannah Waddingham, Nick Offerman, Katy O'Brian, Holt McCallany).\xa0 There are also numerous callbacks to previous installments that will thrill fans of the franchise.\xa0 However, even with everything it has going for it, the movie is not the giddy thrill-ride it should be.\n\nWhere [*Dead Reckoning*](https://detroitcineaste.net/2023/07/31/mission-impossible-dead-reckoning-part-1/) was self-assured a

In [9]:
from tqdm import tqdm

extracted_titles = {}

@sleep_and_retry
@limits(calls=15, period=60)
def extract_title(review_id, review):
    request = prompt.invoke({"post" : review})
    film_title = llm.invoke(request)
    extracted_titles[review_id] = film_title

for review_id, review in tqdm(reviews.items(), desc="Processing reviews"):
    extract_title(review_id, review)

Processing reviews: 100%|██████████| 400/400 [26:06<00:00,  3.92s/it]  


In [10]:
extracted_titles

{'1l29tb6': 'Mission: Impossible - The Final Reckoning',
 '1l27rr5': 'Nonnas',
 '1l257u6': 'Conan the Barbarian',
 '1l20yu5': 'The Life of Chuck',
 '1l1wjhw': 'Mission: Impossible – The Final Reckoning',
 '1l1lwvh': 'Friendship',
 '1l1cw5o': 'Anora',
 '1l16261': 'Bring her back',
 '1l0wdp4': 'PREDATOR: KILLER OF KILLERS',
 '1l0tsou': 'Mission: Impossible – Dead Reckoning Part Two',
 '1l0mv9k': 'Spider-man: Homecoming',
 '1l0havk': '```\n```\nNo film titles are mentioned in the provided text.',
 '1kzlfg7': 'Mission: Impossible - The Final Reckoning',
 '1kzkrar': 'Lilo & Stitch',
 '1kzf3no': 'The Legend of Ochi',
 '1kzf1cc': 'Karate Kid: Legends',
 '1kz6zsa': 'Star Wars: Return of the Jedi',
 '1kyxxr6': 'Black Widow',
 '1kyxx7p': 'Captain America: Civil War',
 '1kyxv87': 'Star Wars',
 '1kyxr5p': 'American Psycho',
 '1kyxq3f': 'American Beauty',
 '1kyxolu': 'Star Wars: Empire Strikes Back',
 '1kyxnx2': 'Star Wars: A New Hope',
 '1kyxm7r': 'Avengers: Age of Ultron',
 '1kyulhb': 'Karate Kid

In [11]:
with open("extracted_titles.json", "w") as f:
    json.dump(extracted_titles, f)

In [None]:
import json

with open("extracted_titles.json", "r") as f:
    temp = json.load(f)
temp

{'1l29tb6': 'Mission: Impossible - The Final Reckoning',
 '1l27rr5': 'Nonnas',
 '1l257u6': 'Conan the Barbarian',
 '1l20yu5': 'The Life of Chuck',
 '1l1wjhw': 'Mission: Impossible – The Final Reckoning',
 '1l1lwvh': 'Friendship',
 '1l1cw5o': 'Anora',
 '1l16261': 'Bring her back',
 '1l0wdp4': 'PREDATOR: KILLER OF KILLERS',
 '1l0tsou': 'Mission: Impossible – Dead Reckoning Part Two',
 '1l0mv9k': 'Spider-man: Homecoming',
 '1l0havk': '```\n```\nNo film titles are mentioned in the provided text.',
 '1kzlfg7': 'Mission: Impossible - The Final Reckoning',
 '1kzkrar': 'Lilo & Stitch',
 '1kzf3no': 'The Legend of Ochi',
 '1kzf1cc': 'Karate Kid: Legends',
 '1kz6zsa': 'Star Wars: Return of the Jedi',
 '1kyxxr6': 'Black Widow',
 '1kyxx7p': 'Captain America: Civil War',
 '1kyxv87': 'Star Wars',
 '1kyxr5p': 'American Psycho',
 '1kyxq3f': 'American Beauty',
 '1kyxolu': 'Star Wars: Empire Strikes Back',
 '1kyxnx2': 'Star Wars: A New Hope',
 '1kyxm7r': 'Avengers: Age of Ultron',
 '1kyulhb': 'Karate Kid

In [7]:
import pandas as pd

pd.Series(temp).to_csv("id_to_titles.csv")