In [56]:
!pip install edge-tts nest_asyncio




[notice] A new release of pip is available: 24.0 -> 26.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [57]:
import ollama
import json
import edge_tts
import nest_asyncio
import asyncio
from IPython.display import Audio, display
import os
import ast # For parsing python-like dict/list strings

# Apply nest_asyncio to allow async execution in Jupyter
nest_asyncio.apply()

In [58]:
# Model to use
model = "phi3:mini"

In [59]:
document_content = """
Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalize to unseen data, and thus perform tasks without explicit instructions. Recently, artificial neural networks have been able to surpass many previous approaches in performance.
"""

In [60]:
# Step 1: Determine Roles
role_prompt = f"""
Analyze the following content and determine the two most suitable roles for a conversation about it.
For example: 
- If the content is about study, roles could be 'Teacher' and 'Student'.
- If the content is about a job, roles could be 'HR' and 'Candidate'.
- If the content is technical, roles could be 'Expert' and 'Novice'.

Content:
{document_content}

Return ONLY a JSON object with keys 'host' and 'guest'. Do not add any other text.
Example format: {{"host": "...", "guest": "..."}}
"""

try:
    response = ollama.chat(model=model, messages=[{'role': 'user', 'content': role_prompt}], format='json')
    roles_response = response['message']['content']
    roles = json.loads(roles_response)
    print("Selected Roles:", roles)
except Exception as e:
    print(f"Error selecting roles: {e}")
    # Fallback
    roles = {"host": "Host", "guest": "Guest"}


Selected Roles: {'host': 'AI Researcher specializing in Machine Learning algorithms', 'guest': 'Data Scientist interested in the practical applications of neural networks.'}


In [61]:
# Step 2: Generate Podcast Script

podcast_prompt = f"""
Generate a podcast conversation between a {roles.get('host', 'Host')} and a {roles.get('guest', 'Guest')} based on the following content.
Make it engaging and easy to understand.
Keep the conversation relatively short (about 4-6 turns total).

Content:
{document_content}

Return the output as a JSON object with a key 'conversation' which is a list of objects.
Each object in the list should have 'speaker' and 'text' keys.
Ensure the 'speaker' field matches exactly one of the roles: {roles.get('host', 'Host')} or {roles.get('guest', 'Guest')}.
Example format:
{{
  "conversation": [
    {{"speaker": "{roles.get('host', 'Host')}", "text": "Hello everyone..."}},
    {{"speaker": "{roles.get('guest', 'Guest')}", "text": "Hi! Today we are discussing..."}}
  ]
}}
"""

podcast_data = {}

def try_parse_json_string(value):
    """Attempts to parse a string as JSON, falling back to ast.literal_eval for single quotes."""
    if not isinstance(value, str):
        return value
    value = value.strip()
    try:
        return json.loads(value)
    except json.JSONDecodeError:
        try:
            # Fallback for Python-like dictionary strings (often output by models using single quotes)
            return ast.literal_eval(value)
        except:
            return None

try:
    # Adding a system message to enforce JSON behavior better
    messages = [
        {'role': 'system', 'content': 'You are an AI that outputs valid JSON only.'},
        {'role': 'user', 'content': podcast_prompt}
    ]
    response = ollama.chat(model=model, messages=messages, format='json')
    podcast_script_json = response['message']['content']
    print(f"Raw Response start: {podcast_script_json[:200]}...") # Debugging print
    
    podcast_data = json.loads(podcast_script_json)
    
    # 1. Pre-process: Check if the entire response is nested inside a key as a string
    # e.g. {"response": "{'conversation': [...]}"}
    keys_to_check = list(podcast_data.keys())
    for key in keys_to_check:
        value = podcast_data[key]
        if isinstance(value, str):
            parsed_value = try_parse_json_string(value)
            if isinstance(parsed_value, dict):
                print(f"   Parsed nested JSON string in key '{key}'")
                podcast_data.update(parsed_value)
            elif isinstance(parsed_value, list) and 'convers' in key.lower():
                print(f"   Parsed nested List string in key '{key}'")
                podcast_data['conversation'] = parsed_value

    # 2. Check for 'conversation' key or alternatives
    if 'conversation' not in podcast_data:
        print("‚ö†Ô∏è Warning: 'conversation' key missing in JSON. Looking for alternatives...")
        
        found_key = None
        for key in podcast_data.keys():
            if 'convers' in key.lower():
                found_key = key
                break
        
        if found_key:
             print(f"   Found similar key: '{found_key}'")
             val = podcast_data[found_key]
             # If the value is a string, try to parse it again (double check)
             parsed_val = try_parse_json_string(val)
             if parsed_val:
                 val = parsed_val
             podcast_data['conversation'] = val
        
        elif isinstance(podcast_data, list):
             podcast_data = {'conversation': podcast_data}
             
        else:
             # Search deeper for a list of dicts
             for k, v in podcast_data.items():
                 if isinstance(v, list) and len(v) > 0 and isinstance(v[0], dict) and 'speaker' in v[0]:
                     print(f"   Found conversation list in key: '{k}'")
                     podcast_data['conversation'] = v
                     break

    # 3. Final Validation: Ensure conversation is a list
    if 'conversation' in podcast_data:
        if isinstance(podcast_data['conversation'], str):
             print("   'conversation' is still a string. Parsing...")
             podcast_data['conversation'] = try_parse_json_string(podcast_data['conversation'])

        if isinstance(podcast_data['conversation'], list):
            print("‚úÖ Podcast Script Generated!")
            print(f"--- Podcast: {roles.get('host', 'Host')} & {roles.get('guest', 'Guest')} ---\n")
            for turn in podcast_data.get('conversation', []):
                print(f"{turn.get('speaker', 'Unknown')}: {turn.get('text', '')}\n")
        else:
            print("‚ùå 'conversation' data is not a valid list.")
    else:
        print("‚ùå Could not find valid conversation data in response.")
        
except Exception as e:
    print(f"Error generating podcast: {e}")
    if 'response' in locals():
        print("Raw Response:", response['message']['content'])


Raw Response start: {
  "conversrance": [
    {
      "speaker": "AI Researcher specializing in Machine Learning algorithms", 
      "text": "Good day, I'm really excited about the recent advancements we've made with neu...
   Found similar key: 'conversrance'
‚úÖ Podcast Script Generated!
--- Podcast: AI Researcher specializing in Machine Learning algorithms & Data Scientist interested in the practical applications of neural networks. ---

AI Researcher specializing in Machine Learning algorithms: Good day, I'm really excited about the recent advancements we've made with neural networks. They have started to surpass many previous approaches.

Data Scientist interested in the practical applications of neural networks.: Absolutely, I find it fascinating how these algorithms can improve decision-making processes and predictive analytics. Could you share more on their advantages?

AI Researcher specializing in Machine Learning algorithms: Of course! Neural networks, with their ability to 

In [62]:
# Step 3: Generate Audio with Edge TTS and Merge

# Define voices
VOICE_HOST = "en-US-ChristopherNeural"  # Male voice
VOICE_GUEST = "en-US-AriaNeural"        # Female voice

async def generate_audio_for_turn(text, voice, index):
    output_file = f"turn_{index}.mp3"
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(output_file)
    return output_file

async def produce_podcast(podcast_data, roles):
    conversation = podcast_data.get('conversation', [])
    generated_files = []
    
    if not isinstance(conversation, list) or not conversation:
        print("‚ùå No conversation data found to play.")
        return

    print("\nüé§ Generating Audio Segments...")

    for i, turn in enumerate(conversation):
        # Safety check if turn is not a dict (e.g. if parsing failed partially)
        if not isinstance(turn, dict):
            print(f"‚ö†Ô∏è Skipping invalid turn data at index {i}: {turn}")
            continue
            
        speaker = turn.get('speaker', 'Unknown')
        text = turn.get('text', '')
        
        # Determine voice based on role
        if roles.get('host', 'Host') in speaker:
            voice = VOICE_HOST
        else:
            voice = VOICE_GUEST
            
        print(f"   Generating turn {i+1}/{len(conversation)}: {speaker}...")
        
        try:
            # Generate audio file for this turn
            audio_file = await generate_audio_for_turn(text, voice, i)
            generated_files.append(audio_file)
        except Exception as e:
             print(f"‚ùå Error generating audio for turn {i}: {e}")

    # Merge all audio files
    if generated_files:
        print("\nüîó Merging Audio Segments...")
        output_filename = "podcast_full.mp3"
        try:
            with open(output_filename, 'wb') as outfile:
                for f in generated_files:
                    if os.path.exists(f):
                        with open(f, 'rb') as infile:
                            outfile.write(infile.read())
            
            if os.path.exists(output_filename):
                print(f"‚úÖ Podcast Compilation saved as '{output_filename}'")
                print("\nüîä Playing Full Podcast...")
                display(Audio(output_filename, autoplay=True))
            else:
                print("‚ùå Error: Output file was not created.")
                
            # Cleanup individual files if desired
            # for f in generated_files: os.remove(f)
            
        except Exception as e:
            print(f"‚ùå Error merging audio: {e}")
            for f in generated_files:
                 display(Audio(f))
    else:
        print("‚ùå No audio files were generated.")
             
# Run the async function
if podcast_data and isinstance(podcast_data.get('conversation'), list):
    await produce_podcast(podcast_data, roles)
else:
    print("‚ùå No podcast data available or invalid format.")



üé§ Generating Audio Segments...
   Generating turn 1/5: AI Researcher specializing in Machine Learning algorithms...
   Generating turn 2/5: Data Scientist interested in the practical applications of neural networks....
   Generating turn 3/5: AI Researcher specializing in Machine Learning algorithms...
   Generating turn 4/5: Data Scientist interested in the practical applications of neural networks....
   Generating turn 5/5: AI Researcher specializing in Machine Learning algorithms...

üîó Merging Audio Segments...
‚úÖ Podcast Compilation saved as 'podcast_full.mp3'

üîä Playing Full Podcast...
