Base imports.

In [26]:
import os
import json
import pandas as pd
from pprint import pprint
from IPython.display import Markdown

from helpers import _set_env
_set_env("OPENAI_API_KEY")
_set_env("ANTHROPIC_API_KEY")
_set_env("TAVILY_API_KEY")
_set_env("LANGCHAIN_API_KEY")
_set_env("LINKEDIN_COOKIE_LI_AT")
_set_env("LINKEDIN_COOKIE_JSESSIONID")

import warnings
warnings.filterwarnings('ignore')

agent_name = "podcast_outline"
collected_data_dir = f"collected_data/{agent_name}"
os.makedirs(collected_data_dir, exist_ok=True)
base_data_dir = f"base_data/{agent_name}"
os.makedirs(base_data_dir, exist_ok=True)

Import CrewAI, tools, and LLM / AI agent-related dependencies.

In [2]:
from crewai import Agent, Task, Crew, LLM
from crewai.tools import BaseTool
from crewai_tools import FileReadTool, ScrapeWebsiteTool, YoutubeVideoSearchTool
from langchain_community.tools.tavily_search import TavilySearchResults
from pydantic import BaseModel, Field
from typing import Optional

Initialize the LLM to be used. 

*NOTE: Later you can customize or use different LLMs for each agent or task.*

In [3]:
llm = LLM(model="anthropic/claude-3-5-sonnet-20241022", temperature=1.0)

Connect to LinkedIn API.

In [4]:
with open("linkedin_credentials.json", "r") as f:
    credentials = json.load(f)

from requests.cookies import cookiejar_from_dict
cookies = cookiejar_from_dict({
    "liap": "true",
    "li_at": os.environ["LINKEDIN_COOKIE_LI_AT"],
    "JSESSIONID": os.environ["LINKEDIN_COOKIE_JSESSIONID"],
})

from linkedin_api import Linkedin
linkedin = Linkedin(credentials["username"], credentials["password"], cookies=cookies)

Instantiate tools.

In [7]:
file_read_tool = FileReadTool()
scrape_website_tool = ScrapeWebsiteTool()
youtube_video_search_tool = YoutubeVideoSearchTool()

Create custom tool for Tavily search -- this is supposed to work better than Serper API and other search tools as it's LLM-tailored.

In [8]:
class WebSearchTool(BaseTool):
    name: str ="Web Search Tool"
    description: str = "Search the web for current information on a given topic or person"
        
    def _run(self, query: str, max_results: Optional[int] = 10) -> str:
        # Perform the search
        search_client = TavilySearchResults(max_results=max_results)
        search_results = search_client.invoke(query)
        
        # Format the results
        formatted_results = []
        for doc in search_results:
            formatted_doc = f"URL: {doc['url']}\n"
            formatted_doc += f"Content: {doc['content']}\n"
            formatted_results.append(formatted_doc)
            
        return "\n---\n".join(formatted_results)

In [9]:
web_search_tool = WebSearchTool()

Create agents.

In [27]:
# Create the researcher agent
local_researcher = Agent(
    role='Senior Podcast Guest Researcher (Local)',
    goal='Thoroughly research the guest to understand their expertise, background, and recent work',
    backstory="""You are an expert podcast researcher who excels at finding and synthesizing 
    information about podcast guests. You analyze their background, work, and LinkedIn profiles and posts
    to understand their unique perspectives and contributions to the specific topic of the podcast.""",
    tools=[file_read_tool],
    llm=llm,
    allow_delegation=False,
    verbose=True,
)

# Create the researcher agent with web search tool
online_researcher = Agent(
    role='Senior Podcast Guest Researcher (Online)',
    goal='Thoroughly research the guest to understand their expertise, background, and recent work',
    backstory="""You are an expert podcast researcher who excels at finding and synthesizing 
    online information about podcast guests. You analyze their background, work, and online presence
    to understand their unique perspectives and contributions to the specific topic of the podcast.""",
    tools=[web_search_tool],
    llm=llm,
    allow_delegation=False,
    verbose=True,
)

# Create the outline writer agent
outline_writer = Agent(
    role='Senior Podcast Outline Writer',
    goal='Create engaging, personalized podcast outlines that highlight the guest\'s expertise',
    backstory="""You are an experienced podcast producer who knows how to structure 
    compelling conversations. You excel at creating outlines that flow naturally and 
    bring out the best in each guest.""",
    tools=[file_read_tool],
    llm=llm,
    allow_delegation=False,
    verbose=True
)

Create the tasks.

In [28]:
local_research_task = Task(
    description="""
    You are researching the podcast guest {guest_name} for the podcast {podcast_name}.
    The podcast description is as follows: {podcast_description}.
    
    First, read the following document to understand the podcast's purpose and structure: {podcast_introduction_filepath}.
    
    Then read more about the guest {guest_name} using the following documents (using the file_read_tool):
    {guest_linkedin_profile_filepath}
    {guest_linkedin_posts_filepath}
    
    Focus on:
    1. Their professional background and expertise in relation to the podcast's topic
    2. Recent projects, achievements, startups, products, and services related to the podcast's topic
    3. Their thought leadership and key ideas related to the podcast's topic
    4. Any unique perspectives or experiences they bring related to the podcast's topic
    
    VERY IMPORTANT: When you're compiling your findings, 
    MAKE SURE TO GIVE MORE IMPORTANCE TO RECENT FINDINGS. For example, if the guest has a recent project, 
    make sure to give more importance to that project. If you focus on the guest's project from 10 years ago that
    is discontinued, it's not useful.
    
    Compile your findings into a comprehensive research summary.""",
    expected_output="""A detailed research summary containing:
    1. Professional background and current role
    2. Key achievements and notable projects
    3. Areas of expertise and thought leadership
    4. Recent activities and public presence
    5. Unique perspectives or specialized knowledge
    
    The summary should combine information from provided documents,
    ensuring all facts are verified and relevant to podcast discussion topics.""",
    agent=local_researcher,
)

online_research_task = Task(
    description="""
    You are researching the podcast guest {guest_name} for the podcast {podcast_name}.
    The podcast description is as follows: {podcast_description}.
    
    Using the web_search_tool, find relevant information online about the guest {guest_name}. 
    To tailor the search query, use the following information: {guest_details}.
    
    Focus on:
    1. Their professional background and expertise in relation to the podcast's topic
    2. Recent projects, achievements, startups, products, and services related to the podcast's topic
    3. Their thought leadership and key ideas related to the podcast's topic
    4. Any unique perspectives or experiences they bring related to the podcast's topic
    
    VERY IMPORTANT: When you're compiling your findings, 
    MAKE SURE TO GIVE MORE IMPORTANCE TO RECENT FINDINGS. For example, if the guest has a recent project, 
    make sure to give more importance to that project. If you focus on the guest's project from 10 years ago that
    is discontinued, it's not useful.
    
    Compile your findings into a comprehensive research summary.""",
    expected_output="""A detailed research summary containing:
    1. Professional background and current role
    2. Key achievements and notable projects
    3. Areas of expertise and thought leadership
    4. Recent activities and public presence
    5. Unique perspectives or specialized knowledge
    
    The summary should combine information from provided information and web research,
    ensuring all facts are verified and relevant to podcast discussion topics.""",
    agent=online_researcher,
)

outline_task = Task(
    description="""
    You are writing the podcast outline for guest {guest_name} for the podcast {podcast_name}.
    The podcast description is as follows: {podcast_description}.
    
    First, read the following document to understand the podcast's purpose and structure: {podcast_introduction_filepath}.
    
    Then using the research provided by the Senior Podcast Guest Researcher, create a one-pager podcast outline that:
    1. Starts with a note encouraging the guest to edit the document
    2. Includes a 2-3 sentence introduction about the guest
    3. Suggests 4-5 relevant segments tailored to the guest's expertise
    4. Provides brief descriptions and specific questions for each segment
    
    The outline should be conversational, engaging, and specifically tailored to the guest's 
    background and expertise. Ensure questions are clear and accessible.""",
    expected_output="""A structured podcast outline document containing:
    1. An opening note inviting guest feedback
    2. A concise guest introduction
    3. 4-5 clearly defined podcast segments
    4. Specific questions and talking points for each segment
    
    The outline should be personalized to the guest's expertise and maintain
    an engaging, conversational flow suitable for a podcast format.""",
    agent=outline_writer,
)

Create the crew.

In [29]:
local_podcast_crew = Crew(
    agents=[local_researcher, outline_writer],
    tasks=[local_research_task, outline_task],
    verbose=True,
    memory=True,
)

online_podcast_crew = Crew(
    agents=[online_researcher, outline_writer],
    tasks=[online_research_task, outline_task],
    verbose=True,
    memory=True,
)



Execute the crew.

In [20]:
profile_id = "alan-chan-51858378"
guest_name = "Alan Chan"
profile = linkedin.get_profile(profile_id)
posts = linkedin.get_profile_posts(profile_id, post_count=100)

In [25]:
# Specifying the fields that are essential for posts (semantic content) -- otherwise it brings too much noisy metadata.
essential_post_fields = [
    'commentary.text.text',  # The actual post text written by the author
    'resharedUpdate.commentary.text.text',  # Text from reshared posts
    'actor.name.text',  # Author's name
    'resharedUpdate.actor.name.text',  # Name of original poster if reshared
    'actor.subDescription.text',  # Post timing (e.g., "4h • Edited")
]

os.makedirs(f"{collected_data_dir}/{profile_id}", exist_ok=True)
df_profile = pd.json_normalize(profile).T
df_profile.to_csv(f"{collected_data_dir}/{profile_id}/linkedin_profile.csv")
try:
    df_posts = pd.json_normalize(posts)[essential_post_fields].T
    df_posts.to_csv(f"{collected_data_dir}/{profile_id}/linkedin_posts.csv")
except Exception as e:
    print(f"Error processing posts (maybe there are no posts): {e}")
    df_posts = pd.DataFrame()

Error processing posts (maybe there are no posts): "['resharedUpdate.commentary.text.text', 'resharedUpdate.actor.name.text'] not in index"


In [30]:
inputs = {
    "podcast_name": "The AI Agents Podcast",
    "podcast_description": "A podcast about AI agents and how they are changing the world from scientific, commercial, ethical, and societal perspectives.",
    "guest_name": guest_name,
    "guest_details": "Alan is a PhD student in machine learning and an AI governance researcher. He works on how to govern a world where AI agents substitute for human labour across a wide variety of commercial, scientific, governmental, and personal activities. He also thinks about how best to design model evaluations and an evaluation ecosystem. ",
    "podcast_introduction_filepath": f"./{base_data_dir}/podcast_introduction.txt",
    "guest_linkedin_profile_filepath": f"./{collected_data_dir}/{profile_id}/linkedin_profile.csv",
    "guest_linkedin_posts_filepath": f"./{collected_data_dir}/{profile_id}/linkedin_posts.csv",
}

result = local_podcast_crew.kickoff(inputs=inputs)

[1m[95m# Agent:[00m [1m[92mSenior Podcast Guest Researcher (Local)[00m
[95m## Task:[00m [92m
    You are researching the podcast guest Alan Chan for the podcast The AI Agents Podcast.
    The podcast description is as follows: A podcast about AI agents and how they are changing the world from scientific, commercial, ethical, and societal perspectives..
    
    First, read the following document to understand the podcast's purpose and structure: ./base_data/podcast_outline/podcast_introduction.txt.
    
    Then read more about the guest Alan Chan using the following documents (using the file_read_tool):
    ./collected_data/podcast_outline/alan-chan-51858378/linkedin_profile.csv
    ./collected_data/podcast_outline/alan-chan-51858378/linkedin_posts.csv
    
    Focus on:
    1. Their professional background and expertise in relation to the podcast's topic
    2. Recent projects, achievements, startups, products, and services related to the podcast's topic
    3. Their thought

In [31]:
Markdown(result.raw)

AI Agents Podcast - Guest Outline: Alan Chan

Dear Alan,

Thank you for joining us on The AI Agents Podcast! Below is a proposed outline for our conversation. Please feel free to modify, add, or suggest different topics that you'd like to discuss. This is just a starting point to guide our discussion, but we can be flexible and follow interesting threads as they emerge.

Guest Introduction:
Alan Chan is a Research Fellow at the Centre for the Governance of AI (GovAI) and a PhD candidate at Université de Montréal/Mila. With a unique blend of technical expertise in machine learning and deep involvement in AI governance research, Alan brings valuable insights into how AI agents are reshaping various sectors of society. His work spans from technical evaluations of language models to studying the broader implications of AI systems in commercial, scientific, and governmental contexts.

Segment 1: The Evolution of AI Agents (10 minutes)
- Your journey from technical ML research to AI governance
- How has your perspective on AI agents evolved over time?
- What are the most significant changes you've observed in how we approach AI agent development?
Key Questions:
- Could you share your definition of AI agency and how it's evolved with recent developments?
- How has your background in statistical machine learning informed your current work on AI governance?

Segment 2: Evaluating AI Agents (10 minutes)
- Current challenges in measuring AI agent capabilities
- Importance of evaluation ecosystems
- Balancing performance metrics with safety considerations
Key Questions:
- What are the most crucial metrics we should consider when evaluating AI agents?
- How can we design evaluation frameworks that account for both technical performance and broader societal impacts?
- What are some common pitfalls in current evaluation approaches?

Segment 3: AI Agents Across Sectors (10 minutes)
- Real-world applications and impact
- Transformation of work and human-AI collaboration
- Sector-specific considerations and challenges
Key Questions:
- How do you see AI agents transforming different sectors (commercial, scientific, governmental)?
- What are some unexpected ways AI agents are being deployed that excite or concern you?
- How should organizations approach the integration of AI agents into their workflows?

Segment 4: Governance and Safety Considerations (10 minutes)
- Frameworks for responsible AI agent deployment
- Coordination challenges in AI development
- Building cooperative AI systems
Key Questions:
- Based on your research, what are the key governance challenges we need to address with AI agents?
- How can we ensure AI agents remain aligned with human values as they become more capable?
- What role should policy play in shaping the development and deployment of AI agents?

Closing Segment: Future Outlook (5 minutes)
- Personal predictions for AI agent development
- Key areas to watch in the coming years
- Advice for those entering the field
Key Questions:
- What developments in AI agents are you most excited about?
- What should people be paying attention to in this space?
- Any final thoughts or advice for our audience?

Note: Feel free to suggest modifications or additional topics you'd like to cover. We can also include any specific projects or research you'd like to highlight during our conversation.

In [32]:
result = online_podcast_crew.kickoff(inputs=inputs)

[1m[95m# Agent:[00m [1m[92mSenior Podcast Guest Researcher (Online)[00m
[95m## Task:[00m [92m
    You are researching the podcast guest Alan Chan for the podcast The AI Agents Podcast.
    The podcast description is as follows: A podcast about AI agents and how they are changing the world from scientific, commercial, ethical, and societal perspectives..
    
    Using the web_search_tool, find relevant information online about the guest Alan Chan. 
    To tailor the search query, use the following information: Alan is a PhD student in machine learning and an AI governance researcher. He works on how to govern a world where AI agents substitute for human labour across a wide variety of commercial, scientific, governmental, and personal activities. He also thinks about how best to design model evaluations and an evaluation ecosystem. .
    
    Focus on:
    1. Their professional background and expertise in relation to the podcast's topic
    2. Recent projects, achievements, st

In [33]:
Markdown(result.raw)

THE AI AGENTS PODCAST - GUEST OUTLINE
Episode featuring Alan Chan

Note to Alan: 
Thank you for agreeing to be a guest on The AI Agents Podcast! Below is a proposed outline for our conversation. Please feel free to modify, add, or remove any topics or questions that you'd like to discuss. This is meant to be a guide rather than a strict script - we want the conversation to flow naturally while covering interesting ground.

Guest Introduction:
Alan Chan is a Research Fellow at the Centre for the Governance of AI (GovAI) and a final-year PhD student in machine learning at Mila. His groundbreaking work focuses on the governance of AI agents, particularly in developing frameworks for visibility and oversight of AI systems. Alan's unique combination of technical expertise in machine learning and deep understanding of governance challenges makes him ideally positioned to discuss the future of AI agent deployment and management.

Segment 1: Understanding AI Agent Visibility (10-12 minutes)
- What do we mean by "visibility" in the context of AI agents?
- Can you break down the three key areas of your research: agent identifiers, real-time monitoring, and activity logging?
- Why is visibility becoming increasingly crucial as AI agents become more prevalent?
- What are some common misconceptions about AI agent transparency?

Segment 2: Practical Governance Challenges (10-12 minutes)
- What are the biggest challenges in governing AI agents today?
- How do you balance innovation with necessary oversight?
- Can you share examples of governance frameworks that work well?
- How should organizations approach implementing AI agent governance?

Segment 3: The Future of AI Agent Integration (8-10 minutes)
- How do you see AI agents evolving in various sectors (commercial, scientific, governmental)?
- What infrastructure changes are needed to support safe AI agent deployment?
- What excites you most about the future of AI agents?
- What concerns keep you up at night?

Segment 4: Technical Deep Dive (8-10 minutes)
- Could you walk us through a specific example of how agent identification works?
- What technical challenges exist in monitoring AI agents effectively?
- How do you approach the design of logging systems for AI agents?
- What role does machine learning play in governance infrastructure?

Segment 5: Recommendations and Future Outlook (5-8 minutes)
- What advice would you give organizations looking to implement AI agents responsibly?
- How can individuals stay informed about AI agent governance?
- What developments should we watch for in the coming years?
- Where can listeners learn more about your work and stay updated on your research?

Technical Requirements:
- Remote recording via preferred platform
- Please have a stable internet connection and quiet environment
- We'll do a quick tech check 5 minutes before recording

Additional Notes:
- Feel free to share specific examples from your research or work
- We can include demonstrations or explanations of specific concepts if desired
- Please let us know if there are any sensitive topics we should avoid
- We encourage sharing practical insights and real-world applications

Social Media/Links to mention:
(Please provide any relevant handles or links you'd like us to share with listeners)

Backup Questions:
- Your thoughts on recent developments in AI agent architectures
- The role of academic research in shaping AI governance
- Specific case studies from your research
- Your journey from ML researcher to governance expert