<a href="https://colab.research.google.com/github/zisuzlabs/esoteric-brewing-crowdfunding/blob/master/YouTube_Content_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# YouTube Content Generator
# This notebook automatically generates video content for YouTube based on a given topic.

# Install required libraries
!pip install google-generativeai pillow moviepy google-cloud-texttospeech pyttsx3
!pip install diffusers huggingface-hub torch torchaudio torchvision tokenizers tqdm transformers
!apt-get update && apt-get install -y espeak ffmpeg libespeak1 imagemagick

# Import necessary libraries
import os
import io
import pyttsx3
import google.generativeai as genai
from PIL import Image
from moviepy.editor import *
from google.cloud import texttospeech
from moviepy.config import change_settings # Import for configuring MoviePy

import torch
from torch import autocast
from diffusers import StableDiffusionPipeline

import numpy as np

# Set up Google API credentials
from google.colab import userdata
os.environ['GOOGLE_API_KEY'] = userdata.get('GOOGLE_API_KEY')
os.environ['HUGGING_FACE_AUTH'] = userdata.get('HUGGING_FACE_AUTH')
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

# Modify ImageMagick policy to allow reading from /tmp
!sed -i '/<policy domain="path" rights="none" pattern="@\*"/d' /etc/ImageMagick-6/policy.xml

# Configure the path to the ImageMagick binary
change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})

# Function to generate script using Gemini
def generate_script(topic):
    model = genai.GenerativeModel('gemini-pro')
    prompt = f"Write a short, engaging script about {topic} for a YouTube video. Include an introduction, 3 main points, and a conclusion."
    response = model.generate_content(prompt)
    return response.text


# Function to generate images using SDXL
def generate_images_sdxl(topic, num_images=4):
    modelid = "CompVis/stable-diffusion-v1-4"
    device = "cuda"
    pipe = StableDiffusionPipeline.from_pretrained(modelid, revision="fp16", torch_dtype=torch.float16, use_auth_token=os.environ['HUGGING_FACE_AUTH'])
    pipe.to(device)
    prompt = f"Create an image related to {topic}"
    images = []
    for _ in range(num_images):
        with autocast(device):
            image = pipe(prompt, guidance_scale=8.5)["images"][0]
            images.append(image)

    return images

# Function to generate images using Gemini
def generate_images(topic, num_images=4):
    model = genai.GenerativeModel('gemini-1.5-flash')
    prompt = f"Create an image related to {topic}"
    images = []
    for _ in range(num_images):
        response = model.generate_content(prompt)
        try:
          image = Image.open(io.BytesIO(response.image.data))
          images.append(image)
        except Exception as e: # Catching the exception to see why image generation is failing
            print(f"Error generating image: {e}")
    return images

# Function to generate voiceover using Google Text-to-Speech
def generate_voiceover(script):
    client = texttospeech.TextToSpeechClient()
    synthesis_input = texttospeech.SynthesisInput(text=script)
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        name="en-US-Neural2-J"
    )
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )
    response = client.synthesize_speech(
        input=synthesis_input, voice=voice, audio_config=audio_config
    )
    with open("voiceover.mp3", "wb") as out:
        out.write(response.audio_content)
    return "voiceover.mp3"

# Function to generate voiceover using pyttsx3
def generate_voiceover_pyttsx3(script):
  engine = pyttsx3.init() # object creation
  engine.setProperty('rate', 150) # setting up new voice rate
  engine.setProperty('volume', 0.9) # setting up volume level  # can be more than 1
  voices = engine.getProperty('voices')
  engine.setProperty('voice', voices[1].id) #changing index, changes voices. o for male
  engine.say(script)
  engine.save_to_file(script, 'voiceover.mp3')
  engine.runAndWait()
  engine.stop()
  return "voiceover.mp3"

# Function to create video with captions
def create_video(images, audio_file, script):
    # Create clips from images
    clips = [ImageClip(np.array(img)).set_duration(5) for img in images]

    # Concatenate clips
    if clips: # Check if clips is not empty
      video = concatenate_videoclips(clips)

      # Add audio
      audio = AudioFileClip(audio_file)
      video = video.set_audio(audio)

      # Add captions
      sentences = script.split('. ')
      txt_clips = []
      for i, sentence in enumerate(sentences):
          txt_clip = TextClip(sentence, fontsize=24, color='white', bg_color='black',
                              size=(video.w, None), method='caption', align='South')
          txt_clip = txt_clip.set_pos(('center', 'bottom')).set_duration(5)
          txt_clips.append(txt_clip)

      video = CompositeVideoClip([video] + txt_clips)

      # Write video file
      video.write_videofile("output_video.mp4", fps=24)

# Main function to orchestrate the process
def generate_youtube_content(topic):
    print("Generating script...")
    script = generate_script(topic)

    print("Generating images...")
    images = generate_images_sdxl(topic)

    print("Generating voiceover...")
    audio_file = generate_voiceover_pyttsx3(script)

    print("Creating video...")
    create_video(images, audio_file, script)

    print("Video generation complete!")

# Example usage
topic = "The importance of artificial intelligence in modern society"
generate_youtube_content(topic)

Hit:1 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:4 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:5 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:6 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:8 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:9 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Ign:10 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:11 https://r2u.stat.illinois.edu/ubuntu jammy Release
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading pack


text_encoder/model.safetensors not found
Keyword arguments {'use_auth_token': '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'} are not expected by StableDiffusionPipeline and will be ignored.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/2880f2ca379f41b0226444936bb7a6766a227587/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/2880f2ca379f41b0226444936bb7a6766a227587/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/2880f2ca379f41b0226444936bb7a6766a227587/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/2880f2ca379f41b0226444936bb7a6766a227587/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

Generating voiceover...


Exception ignored on calling ctypes callback function: <bound method EspeakDriver._onSynth of <pyttsx3.drivers.espeak.EspeakDriver object at 0x7b183871e740>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pyttsx3/drivers/espeak.py", line 162, in _onSynth
    self._proxy.notify('started-word',
ReferenceError: weakly-referenced object no longer exists
Exception ignored on calling ctypes callback function: <bound method EspeakDriver._onSynth of <pyttsx3.drivers.espeak.EspeakDriver object at 0x7b183871e740>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pyttsx3/drivers/espeak.py", line 162, in _onSynth
    self._proxy.notify('started-word',
ReferenceError: weakly-referenced object no longer exists
Exception ignored on calling ctypes callback function: <bound method EspeakDriver._onSynth of <pyttsx3.drivers.espeak.EspeakDriver object at 0x7b183871e740>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.

Creating video...


Exception ignored on calling ctypes callback function: <bound method EspeakDriver._onSynth of <pyttsx3.drivers.espeak.EspeakDriver object at 0x7b183871e740>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pyttsx3/drivers/espeak.py", line 162, in _onSynth
    self._proxy.notify('started-word',
ReferenceError: weakly-referenced object no longer exists
Exception ignored on calling ctypes callback function: <bound method EspeakDriver._onSynth of <pyttsx3.drivers.espeak.EspeakDriver object at 0x7b183871e740>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pyttsx3/drivers/espeak.py", line 162, in _onSynth
    self._proxy.notify('started-word',
ReferenceError: weakly-referenced object no longer exists
Exception ignored on calling ctypes callback function: <bound method EspeakDriver._onSynth of <pyttsx3.drivers.espeak.EspeakDriver object at 0x7b183871e740>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.

Moviepy - Building video output_video.mp4.
MoviePy - Writing audio in output_videoTEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video output_video.mp4





Moviepy - Done !
Moviepy - video ready output_video.mp4
Video generation complete!


1. Functions for:
   - Generating a script using Gemini
   - Generating images using Gemini
   - Creating voiceover using Google Text-to-Speech
   - Creating a video with captions using MoviePy
2. A main function to orchestrate the entire process