In [63]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import numpy as np
import pandas as pd
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [2]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [24]:
model_id = "google/gemma-1.1-2b-it"

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})
tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [36]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [26]:
text = "Quote: Imagination is more"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: Imagination is morePowerful than knowledge.

- Albert Einstein

**Analysis:**

Albert Einstein's statement highlights the


In [90]:
def summarize_text(text_type: str, text: str, model) -> str:
  prompt_template = f"Please summarize the video {text_type}. given in {text}." \
                    f"Start your summary with the phrase The video summary based on the " \
                    f"{text_type} is: and provide the summary right after."
  parser = StrOutputParser()
  prompt = PromptTemplate(template=prompt_template,
                                        input_variables=["text_type", "text"])
  
  chain = prompt | model | parser
  return chain.invoke({"text_type": text_type, "text": text})
  

In [89]:
videos_df = pd.read_csv('csvs/videos_df.csv', index_col=0)

In [30]:
videos_df.head()

Unnamed: 0_level_0,video_description,video_transcript,video_name,channel_name,prompt
video_url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
https://www.youtube.com/watch?v=Z_EliVUkuFA,Summary extraction error: Unexpected response ...,The video discusses the potential of Google De...,DeepMind’s New AI: Assistant From The Future!,Two Minute Papers,"[""A video about the limitations of Google Deep..."
https://www.youtube.com/watch?v=_2bzwNyIjkY,The video provides information about Andrew Pr...,The video highlights the many improvements in ...,Blender 4.1 - Create Virtual Worlds…For Free!,Two Minute Papers,['A video about the split viewer node in Blend...
https://www.youtube.com/watch?v=1YEX4t79e0Q,Summary extraction error: Unexpected response ...,"OpenAI’s text to video AI, Sora took the world...",OpenAI Sora: Beauty And Horror!,Two Minute Papers,"[""A video about the AI's ability to create abs..."
https://www.youtube.com/watch?v=IS0xphCc5rI,"The conference ""Fully Connected"" is about expl...",OpenAI’s Sora is a brilliant new text-to-video...,OpenAI Sora Just Supercharged Filmmaking!,Two Minute Papers,['A video about exploring the intersection bet...
https://www.youtube.com/watch?v=Y9cwnHor8es,The video provides information about a paper o...,The video highlights the advancements in artif...,NVIDIA GTC: This Is The Future Of Everything!,Two Minute Papers,['A video about the potential impact of artifi...


In [78]:
from langchain import HuggingFaceHub

model = HuggingFaceHub(repo_id="google/gemma-1.1-2b-it",
                       model_kwargs={"temperature":0.01, "max_length":100, "device_map": {"": 0}},
                       huggingfacehub_api_token='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')

In [91]:
comp = summarize_text("description", videos_df['video_description'][1], model)

  comp = summarize_text("description", videos_df['video_description'][1], model)


In [92]:
comp

"Please summarize the video description. given in The video provides information about Andrew Price's donut tutorial and a paper on simulations that look almost like reality. The video also mentions the Patreon supporters who make Two Minute Papers possible..Start your summary with the phrase The video summary based on the description is: and provide the summary right after.\n\n**The video summary based on the description is:**\n\nAndrew Price's donut tutorial provides a step-by-step guide on how to make delicious donuts. The video also showcases a paper that explores the potential for simulations to look remarkably similar to reality. Additionally, the video highlights the support of Patreon supporters who make Two Minute Papers possible.\n\n**Summary:**\n\nThe video provides information about Andrew Price's donut tutorial and a paper exploring the potential for simulations to look remarkably similar to reality."

In [123]:
dataset = videos_df[['video_description', 'video_name', 'prompt']]

In [124]:
dataset.head()

Unnamed: 0_level_0,video_description,video_name,prompt
video_url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
https://www.youtube.com/watch?v=Z_EliVUkuFA,Summary extraction error: Unexpected response ...,DeepMind’s New AI: Assistant From The Future!,"[""A video about the limitations of Google Deep..."
https://www.youtube.com/watch?v=_2bzwNyIjkY,The video provides information about Andrew Pr...,Blender 4.1 - Create Virtual Worlds…For Free!,['A video about the split viewer node in Blend...
https://www.youtube.com/watch?v=1YEX4t79e0Q,Summary extraction error: Unexpected response ...,OpenAI Sora: Beauty And Horror!,"[""A video about the AI's ability to create abs..."
https://www.youtube.com/watch?v=IS0xphCc5rI,"The conference ""Fully Connected"" is about expl...",OpenAI Sora Just Supercharged Filmmaking!,['A video about exploring the intersection bet...
https://www.youtube.com/watch?v=Y9cwnHor8es,The video provides information about a paper o...,NVIDIA GTC: This Is The Future Of Everything!,['A video about the potential impact of artifi...


In [125]:
# Look into the first row of the dataset
row = dataset.iloc[1].to_dict()

In [126]:
row

{'video_description': "The video provides information about Andrew Price's donut tutorial and a paper on simulations that look almost like reality. The video also mentions the Patreon supporters who make Two Minute Papers possible.",
 'video_name': 'Blender 4.1 - Create Virtual Worlds…For Free!',
 'prompt': "['A video about the split viewer node in Blender 4.1.']"}

In [127]:
def prompt_row(row):
    return (f"Below is a video title and description summary. " \
            f"Write a prompt that can be used to adress the video description. " \
            f"### Description:\n{row['video_description']}\n\n ### title:\n{row['video_name']}").format_map(row)

In [128]:
row2 = prompt_row(row)

In [129]:
row2

"Below is a video title and description summary. Write a prompt that can be used to adress the video description. ### Description:\nThe video provides information about Andrew Price's donut tutorial and a paper on simulations that look almost like reality. The video also mentions the Patreon supporters who make Two Minute Papers possible.\n\n ### title:\nBlender 4.1 - Create Virtual Worlds…For Free!"

In [130]:
prompts = [prompt_row(row) for _, row in dataset.iterrows()]

In [131]:
len(prompts)

876

In [132]:
EOS_TOKEN = "</s>"
outputs = [row['prompt'] + EOS_TOKEN for _, row in dataset.iterrows()]

In [133]:
outputs[0]

'["A video about the limitations of Google DeepMind\'s Gemini 1.5 Pro AI in"]</s>'

In [136]:
dataset = [{"prompt":s, "output":t, "example": s+t} for s,t in zip(prompts, outputs)]

In [137]:
dataset[0]

{'prompt': 'Below is a video title and description summary. Write a prompt that can be used to adress the video description. ### Description:\nSummary extraction error: Unexpected response format.\n\n ### title:\nDeepMind’s New AI: Assistant From The Future!',
 'output': '["A video about the limitations of Google DeepMind\'s Gemini 1.5 Pro AI in"]</s>',
 'example': 'Below is a video title and description summary. Write a prompt that can be used to adress the video description. ### Description:\nSummary extraction error: Unexpected response format.\n\n ### title:\nDeepMind’s New AI: Assistant From The Future!["A video about the limitations of Google DeepMind\'s Gemini 1.5 Pro AI in"]</s>'}

In [None]:
model_id = 'google/gemma-1.1-2b-it'
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token