# "Youtube Transcript Summarization using HuggingFace Transformers"
> "Use youtube_transcript_api to fetch transcripts of Youtube video and then user Bart transormer model to summarize the transcript."

- toc:true- branch: master- badges: true- comments: true
- author: Tracy Shields
- categories: [transformers, python, nlp, youtube,]


In [1]:
#Hide
!pip install transformers



In [2]:
#Hide
!pip install youtube_transcript_api



In [3]:
from youtube_transcript_api import YouTubeTranscriptApi
from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig
import urllib.request
import json
import urllib
from pprint import pprint

In [4]:
def transcribe_video_from_url(url):
  # Get transcript if transcript available
  video_id = url.split("watch?v=")
  video_id = video_id[1]
  transcript_dict = YouTubeTranscriptApi.get_transcript(video_id)
  transcript = ""
  for i in transcript_dict:
      transcript += ' ' + i['text']

  # Get video title
  params = {"format": "json", "url": "https://www.youtube.com/watch?v=%s" % video_id}
  url = "https://www.youtube.com/oembed"
  query_string = urllib.parse.urlencode(params)
  url = url + "?" + query_string

  with urllib.request.urlopen(url) as response:
      response_text = response.read()
      data = json.loads(response_text.decode())
      video_title = data['title']
  return transcript, video_title



In [5]:
# Download models from transformers module
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

In [6]:
def summarize_transcript(transcript):
  inputs = tokenizer.batch_encode_plus([transcript], max_length=1024, return_tensors='pt', truncation=True)
  summary_ids = model.generate(inputs['input_ids'])
  summary = [tokenizer.decode(ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) for ids in summary_ids]
  return summary

In [7]:
example_url = "https://www.youtube.com/watch?v=igUEGiQgZhA"

> youtube: https://www.youtube.com/watch?v=igUEGiQgZhA

In [8]:
example_transcript, video_title = transcribe_video_from_url(example_url)
len(example_transcript), video_title

(17125,
 'Kwik Brain Episode 16: My Morning Routine - How to Jumpstart Your Brain & Day')

In [9]:
example_summary = summarize_transcript(example_transcript)
pprint(example_summary)

['Jim quick shows you how to jump-start your brain and your day for greater '
 'productivity and peace of mind. The first thing he does when he wakes up is '
 'to recall his dreams. After that he makes his bed and takes his supplements. '
 'He also gives you some tips on how to get fast and restful sleep.']
