<a href="https://colab.research.google.com/github/yotors/Laravel/blob/main/podcast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install PyPDF2
!pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib


Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [None]:
from google.colab import userdata
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google.colab import auth
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from IPython.display import display, Javascript
import google.generativeai as genai
import PyPDF2
import os
import requests
import io
import time
import subprocess
import json

In [None]:
def upload_pdf(pdf_url: str) -> str:
    """Reads a PDF from a URL and returns its content as bytes.

    Args:
        pdf_url: URL of the PDF file.

    Returns:
        Raw bytes of the PDF file.
    """
    response = requests.get(pdf_url, stream=True)
    response.raise_for_status()
    pdf_path = 'pdf_file'
    with open(pdf_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
    return pdf_path


In [None]:
pdf_content = upload_pdf('http://research.google.com/pubs/archive/34939.pdf')

In [None]:
pdf_content

'pdf_file'

In [None]:
def extract_text_from_pdf(pdf_bytes: str)-> str:
    """Extracts text content from a PDF file.

    Args:
        pdf_bytes: The raw bytes of a PDF.

    Returns:
        Extracted text as a string, or None if extraction fails.
    """
    try:
      pdf_reader = PyPDF2.PdfReader(pdf_bytes)
      text = ""
      for page in pdf_reader.pages:
         text += page.extract_text()
      return text
    except Exception as e:
        print(f"Error extracting text from PDF: {e}")
        return None


In [None]:
extracted_text = extract_text_from_pdf(pdf_content)

In [None]:
def summarize_with_gemini(text:str) -> str:
    """Summarizes the given text using the Gemini API.

    Args:
        text: The text to summarize.

    Returns:
        Summarized text or None if it failed
    """
    api_key = userdata.get('API_KEY')
    model = "gemini-2.0-flash"
    try:
        genai.configure(api_key=api_key)
        generation_config = genai.GenerationConfig(
            temperature=0.7,
            top_p=1,
            top_k=1,
            max_output_tokens=2048,
        )
        model = genai.GenerativeModel(model_name=model, generation_config=generation_config)
        prompt = f"""
        Please summarize the following document into plain text suitable for a podcast script between two persons, Alisa and Bob. Do not use any markdown formatting.

        {text}
        """
        response = model.generate_content(prompt)
        if response.text:
          return response.text
        else:
          print("The Gemini API didn't return any response")
          return None
    except Exception as e:
        print(f"Error using Gemini API: {e}")
        return None


In [None]:


summarized_text = summarize_with_gemini(extracted_text, gemini_api_key)

In [None]:
summarized_text

'Alisa: Hey Bob, heard about this interesting paper on how to represent programs for AI?\n\nBob: Oh yeah? What\'s it about? I always thought programs were just… code.\n\nAlisa: Well, the paper argues that for AI to really learn and reason, especially for Artificial General Intelligence (AGI), the way we *represent* programs matters a lot. Traditional machine learning uses simple data formats, but real-world stuff is complex with relationships, hierarchies, and all that.\n\nBob: So, programs are a way to handle that complexity?\n\nAlisa: Exactly! The paper says programs are well-defined, compact, and hierarchical. Unlike natural language, programs aren\'t ambiguous. They can compress data and are built from sub-programs.\n\nBob: Okay, I get the structure part. But why not just use neural networks?\n\nAlisa: The paper mentions that! Neural nets can be opaque and inefficient. This paper is looking for something better than just throwing tons of processing power at the problem.\n\nBob: So,

In [None]:
def generate_podcast_from_text(text:str, api_url:str, api_key:str, voice_id:str, output_file:str) -> str:
    """
    Placeholder function to generate a podcast using an AI service.
    Replace this with your API integration logic.

    Args:
        text: The text extracted from the PDF.
        api_url: The URL endpoint of the AI service API.
        api_key: API key for the chosen AI service.
        voice_id: specific voice id for this specific AI service
        output_file: The path to save the generated audio file.

    Returns:
        Path to the downloaded audio file or None if it failed
    """
    try:
        headers = {
            "xi-api-key": api_key,
            "Content-Type": "application/json"
        }
        data = {
          "text": text,
          "model_id": "eleven_multilingual_v1",
          "voice_settings": {
              "stability": 0.5,
              "similarity_boost": 0.5
            }
        }
        response = requests.post(f"{api_url}/text-to-speech/{voice_id}", headers=headers, json=data, stream=True)
        if response.status_code == 200:
             with open(output_file, "wb") as f:
                for chunk in response.iter_content(chunk_size=512):
                     if chunk:
                           f.write(chunk)
             return output_file
        else:
            print(f"Error with API request, status code {response.status_code}: {response.content}")
            return None

    except Exception as e:
      print(f"Error calling the AI API: {e}")
      return None

In [None]:
def generate_duo_podcast(text:str)-> None:
    """
    Generates a podcast with two speakers by alternating between paragraphs

    Args:
        text: The full text content.

    Returns:
        Path to the merged audio file or None if it fails
    """
    api_url = "https://api.elevenlabs.io/v1"
    api_key = userdata.get('ELEVEN')
    voice_id_1 = "9BWtsMINqrJLrRacOk9x"
    voice_id_2 = "cjVigY5qzO86Huf0OWal"
    output_file = "output_podcast.mp3"
    paragraphs = text.split("\n\n")
    temp_audio_files = []

    for i, paragraph in enumerate(paragraphs):
        speaker_id = voice_id_1 if i % 2 == 0 else voice_id_2
        temp_file = f"temp_audio_{i}.mp3"

        temp_audio_path = generate_podcast_from_text(paragraph, api_url, api_key, speaker_id, temp_file)

        if temp_audio_path:
            temp_audio_files.append(temp_audio_path)
        else:
            print("Error in creating an intermediate audio file")
            # cleanup and return
            for file_path in temp_audio_files:
                try:
                    os.remove(file_path)
                except OSError:
                   pass
            return None

    # concatenate the audio files (simple sequential combination)
    try:
        with open(output_file, "wb") as final_audio:
            for audio_file in temp_audio_files:
                with open(audio_file, "rb") as f:
                  final_audio.write(f.read())

            for file_path in temp_audio_files:
              os.remove(file_path)

        return output_file
    except Exception as e:
      print(f"Error concatenating audio files: {e}")
      return None

In [None]:

generate_duo_podcast(summarized_text, elevenlabs_api_url, elevenlabs_api_key, elevenlabs_voice_id_1, elevenlabs_voice_id_2, output_audio_path)

Error with API request, status code 401: b'{"detail":{"status":"quota_exceeded","message":"This request exceeds your quota of 10000. You have 134 credits remaining, while 206 credits are required for this request."}}'
Error in creating an intermediate audio file


In [None]:
import os
import subprocess

def create_video_from_audio_ffmpeg(audio_filepath:str, output_filepath:str, image_filepath:str) -> str:
    """
    Creates a video from an audio file using FFmpeg with proper syntax.

    Args:
        audio_filepath: Path to the input audio file.
        output_filepath: Path to the output video file.
        image_filepath (optional): Path to an image to use as static background.
    """
    # Install ffmpeg if not already present
    if not os.path.exists('/usr/bin/ffmpeg'):
        print("Installing ffmpeg...")
        subprocess.run(['apt-get', 'update'], check=True)
        subprocess.run(['apt-get', 'install', '-y', 'ffmpeg'], check=True)

    if not os.path.exists(audio_filepath):
        raise FileNotFoundError(f"Audio file not found: {audio_filepath}")

    # Build the correct FFmpeg command
    if image_filepath:
        if not os.path.exists(image_filepath):
            raise FileNotFoundError(f"Image file not found: {image_filepath}")

        command = [
            'ffmpeg',
            '-loop', '1',
            '-i', image_filepath,
            '-i', audio_filepath,
            '-vf', 'scale=1280:720:force_original_aspect_ratio=decrease,pad=1280:720:(ow-iw)/2:(oh-ih)/2,setsar=1',
            '-c:v', 'libx264',
            '-tune', 'stillimage',
            '-pix_fmt', 'yuv420p',
            '-c:a', 'copy',
            '-shortest',
            '-y',
            output_filepath
        ]
    else:
        command = [
            'ffmpeg',
            '-f', 'lavfi',
            '-i', 'color=c=black:s=1280x720:r=25',
            '-i', audio_filepath,
            '-c:v', 'libx264',
            '-pix_fmt', 'yuv420p',
            '-c:a', 'copy',
            '-shortest',
            '-y',
            output_filepath
        ]

    try:
        print("Running FFmpeg command:")
        print(' '.join(command))
        subprocess.run(command, check=True)
        print(f"Successfully created video: {output_filepath}")
        return output_filepath
    except subprocess.CalledProcessError as e:
        print(f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}")
        return None
    except Exception as e:
        print(f"Error: {str(e)}")
        return None

In [None]:
audio_file = "/output_podcast.mp3"
output_video = "/output_video.mp4"
image_filepath = '/symbolic.png'
create_video_from_audio_ffmpeg(audio_file, output_video, image_filepath)

Running FFmpeg command:
ffmpeg -loop 1 -i /symbolic.png -i /output_podcast.mp3 -vf scale=1280:720:force_original_aspect_ratio=decrease,pad=1280:720:(ow-iw)/2:(oh-ih)/2,setsar=1 -c:v libx264 -tune stillimage -pix_fmt yuv420p -c:a copy -shortest -y /output_video.mp4
Successfully created video: /output_video.mp4


'/output_video.mp4'

In [None]:

def get_authenticated_service() -> str:
    """Authenticates with YouTube API using OAuth 2.0 credentials.

    Guides user through getting OAuth tokens from Google's OAuth Playground and
    returns an authenticated YouTube service client.

    Args:
        oauth_response: JSON string containing access_token, refresh_token, etc.

    Returns:
        Authenticated YouTube API service client.
    """
    SCOPES = ['https://www.googleapis.com/auth/youtube.upload']

    creds_data = json.loads("""{
  "access_token": "ya29.a0AeXRPp7rm_o2CX5AdZ3YbMf9NoKMAiRCd9Kx18N8BaOxx55mOywt9YZHztU2K9dTjij9uKHJfeVe_nlf51sbZWO251c9u7NFpoI2DF8katdhpKgEtVz0KMFMPhDx44s8SVWFS38yjSc5u1ORGVqUvvICguroynP2rLXZfXDdaCgYKAYISARMSFQHGX2MiGArfw5B1bG9CEapx0r05pQ0175",
  "refresh_token_expires_in": 604799,
  "expires_in": 3599,
  "token_type": "Bearer",
  "scope": "https://www.googleapis.com/auth/youtube.channel-memberships.creator https://www.googleapis.com/auth/youtube.readonly https://www.googleapis.com/auth/youtube.upload https://www.googleapis.com/auth/youtube https://www.googleapis.com/auth/youtube.force-ssl https://www.googleapis.com/auth/youtubepartner https://www.googleapis.com/auth/youtubepartner-channel-audit",
  "refresh_token": "1//04y_ssvJXtaSqCgYIARAAGAQSNwF-L9IruwQKG9HCvaxSb05XxVd-CzGJ0lO-ZJPFJ4mLo30hCg8AINkPSb_KVYL2OvRDYzQ6MBk"
}""")
    print(creds_data)
    creds = Credentials(
        token=creds_data.get('access_token'),
        refresh_token=creds_data.get('refresh_token'),
        token_uri='https://oauth2.googleapis.com/token',
        client_id=creds_data.get('client_id'),
        client_secret=creds_data.get('client_secret'),
        scopes=SCOPES
    )

    return build('youtube', 'v3', credentials=creds)

def upload_video(
    youtube_service: str,
    video_path: str,
    title: str,
    description: str,
    category_id: str,
    tags: list[str],
    privacy_status: str) -> dict:
    """Uploads a video to YouTube using the authenticated service.

    Args:
        youtube_service: Authenticated YouTube service
        video_path: Local path to video file
        title: Video title
        description: Video description
        category_id: YouTube category ID
        tags: List of video tags
        privacy_status: Video privacy status

    Returns:
        Dictionary containing YouTube API response with video details
    """
    body = {
        'snippet': {
            'title': title,
            'description': description,
            'categoryId': category_id,
            'tags': tags
        },
        'status': {
            'privacyStatus': privacy_status
        }
    }

    media = MediaFileUpload(
        video_path,
        mimetype='video/*',
        resumable=True,
        chunksize=8*1024*1024  # 8MB chunks
    )

    request = youtube_service.videos().insert(
        part='snippet,status',
        body=body,
        media_body=media
    )

    response = None
    while response is None:
        status, response = request.next_chunk()
        if status:
            print(f"Uploaded {int(status.progress() * 100)}%")

    return response

In [None]:
from google import genai

# create client
api_key = userdata.get("API_KEY")
client = genai.Client(api_key=api_key)

# Define the model you are going to use
model_id =  "gemini-2.0-flash"

In [None]:
from google.genai.types import GenerateContentConfig
config = GenerateContentConfig(
    system_instruction="You are helpfull agent that uploads a generated video podcast to YouTube. The podcast audio uses ElevenLabs AI voices based on a Gemini-summarized PDF. Automate the upload using available tools.", # to give the LLM context.
    tools=[upload_pdf, extract_text_from_pdf, summarize_with_gemini, generate_podcast_from_text, generate_duo_podcast, create_video_from_audio_ffmpeg, get_authenticated_service, upload_video ], # define the functions that the LLM can use
)

In [None]:
r = client.models.generate_content(
    model=model_id,
    config=config,
    contents="""from this pdf source: "http://research.google.com/pubs/archive/34939.pdf" get the pdf and make a podcast save the audio and use /symbolic.png for image in the video
     also upload the video to youtube with the title moses project use the discription as evolutionary algorithm with the tags ai, symbolic and icog  ."""
)

Error using Gemini API: module 'google.genai' has no attribute 'configure'
Error with API request, status code 404: b'{"detail":"Not Found"}'
Error with API request, status code 400: b'{"detail":{"status":"max_character_limit_exceeded","message":"This request\'s text has 29786.0 credits and exceeds the credit limit of 10000 credits. Please use Studio for long form TTS."}}'
Error in creating an intermediate audio file


In [None]:
print(r.text)

None
