In [None]:
from dotenv import load_dotenv
load_dotenv(dotenv_path='../.env', override=True)

In [9]:
import requests
from newspaper import Article
import time
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
import chromadb
from langchain.agents.tools import Tool
from langchain.chat_models import ChatOpenAI
from langchain_experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner


In [10]:
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
chroma_dataset_name = "assistant_84"
chroma_client = chromadb.HttpClient(host="localhost", port=8000)
vecdb = Chroma(
    client=chroma_client,
    collection_name=chroma_dataset_name,
    embedding_function=embeddings,
)

# Load website page content

In [5]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
}

article_urls = [
    "https://www.artificialintelligence-news.com/2023/05/23/meta-open-source-speech-ai-models-support-over-1100-languages/",
    "https://www.artificialintelligence-news.com/2023/05/18/beijing-launches-campaign-against-ai-generated-misinformation/"
    "https://www.artificialintelligence-news.com/2023/05/16/openai-ceo-ai-regulation-is-essential/",
    "https://www.artificialintelligence-news.com/2023/05/15/jay-migliaccio-ibm-watson-on-leveraging-ai-to-improve-productivity/",
    "https://www.artificialintelligence-news.com/2023/05/15/iurii-milovanov-softserve-how-ai-ml-is-helping-boost-innovation-and-personalisation/",
    "https://www.artificialintelligence-news.com/2023/05/11/ai-and-big-data-expo-north-america-begins-in-less-than-one-week/",
    "https://www.artificialintelligence-news.com/2023/05/11/eu-committees-green-light-ai-act/",
    "https://www.artificialintelligence-news.com/2023/05/09/wozniak-warns-ai-will-power-next-gen-scams/",
    "https://www.artificialintelligence-news.com/2023/05/09/infocepts-ceo-shashank-garg-on-the-da-market-shifts-and-impact-of-ai-on-data-analytics/",
    "https://www.artificialintelligence-news.com/2023/05/02/ai-godfather-warns-dangers-and-quits-google/",
    "https://www.artificialintelligence-news.com/2023/04/28/palantir-demos-how-ai-can-used-military/",
    "https://www.artificialintelligence-news.com/2023/04/26/ftc-chairwoman-no-ai-exemption-to-existing-laws/",
    "https://www.artificialintelligence-news.com/2023/04/24/bill-gates-ai-teaching-kids-literacy-within-18-months/",
    "https://www.artificialintelligence-news.com/2023/04/21/google-creates-new-ai-division-to-challenge-openai/"
]

session = requests.Session()
pages_content = [] # where we save the scraped articles

for url in article_urls:
    try:
        time.sleep(2) # sleep two seconds for gentle scraping
        response = session.get(url, headers=headers, timeout=10)

        if response.status_code == 200:
            article = Article(url)
            article.download() # download HTML of webpage
            article.parse() # parse HTML to extract the article text
            pages_content.append({ "url": url, "text": article.text })
        else:
            print(f"Failed to fetch article at {url}")
    except Exception as e:
        print(f"Error occurred while fetching article at {url}: {e}")

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

all_texts = []
for d in pages_content:
    chunks = text_splitter.split_text(d["text"])
    for chunk in chunks:
        all_texts.append(chunk)

In [7]:
vecdb.add_texts(all_texts)

['4798e0dc-70b1-11ee-9552-a620d32a5b6c',
 '4798e21c-70b1-11ee-9552-a620d32a5b6c',
 '4798e29e-70b1-11ee-9552-a620d32a5b6c',
 '4798e2c6-70b1-11ee-9552-a620d32a5b6c',
 '4798e2ee-70b1-11ee-9552-a620d32a5b6c',
 '4798e316-70b1-11ee-9552-a620d32a5b6c',
 '4798e334-70b1-11ee-9552-a620d32a5b6c',
 '4798e384-70b1-11ee-9552-a620d32a5b6c',
 '4798e3b6-70b1-11ee-9552-a620d32a5b6c',
 '4798e3d4-70b1-11ee-9552-a620d32a5b6c',
 '4798e3f2-70b1-11ee-9552-a620d32a5b6c',
 '4798e410-70b1-11ee-9552-a620d32a5b6c',
 '4798e42e-70b1-11ee-9552-a620d32a5b6c',
 '4798e44c-70b1-11ee-9552-a620d32a5b6c',
 '4798e46a-70b1-11ee-9552-a620d32a5b6c',
 '4798e488-70b1-11ee-9552-a620d32a5b6c',
 '4798e4a6-70b1-11ee-9552-a620d32a5b6c',
 '4798e4c4-70b1-11ee-9552-a620d32a5b6c',
 '4798e4e2-70b1-11ee-9552-a620d32a5b6c',
 '4798e500-70b1-11ee-9552-a620d32a5b6c',
 '4798e51e-70b1-11ee-9552-a620d32a5b6c',
 '4798e53c-70b1-11ee-9552-a620d32a5b6c',
 '4798e550-70b1-11ee-9552-a620d32a5b6c',
 '4798e56e-70b1-11ee-9552-a620d32a5b6c',
 '4798e58c-70b1-

In [11]:
retriever = vecdb.as_retriever()
retriever.search_kwargs['k'] = 3
CUSTOM_TOOL_DOCS_SEPARATOR ="\n---------------\n" # how to join together the retrieved docs to form a single string

def retrieve_n_docs_tool(query: str) -> str:
    """Searches for relevant documents that may contain the answer to the query."""
    docs = retriever.get_relevant_documents(query)
    texts = [doc.page_content for doc in docs]
    texts_merged = "---------------\n" + CUSTOM_TOOL_DOCS_SEPARATOR.join(texts) + "\n---------------"
    return texts_merged


# We create the tool that uses the "retrieve_n_docs_tool" function
tools = [
    Tool(
        name="Search Private Docs",
        func=retrieve_n_docs_tool,
        description="useful for when you need to answer questions about current events about Artificial Intelligence"
    )
]


In [12]:
planner = load_chat_planner(model)
executor = load_agent_executor(model, tools, verbose=True)
agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)

In [13]:
response = agent.run("Write an overview of Artificial Intelligence regulations by governments by country")



[1m> Entering new PlanAndExecute chain...[0m
steps=[Step(value='Research and gather information on the current state of Artificial Intelligence (AI) regulations by governments in different countries.'), Step(value='Organize the information by country and categorize it based on the level of AI regulation.'), Step(value='Provide an overview of the AI regulations in each country, including any specific laws or policies that have been implemented.'), Step(value='Include information on the key areas covered by the regulations, such as data privacy, algorithmic transparency, liability, and ethical considerations.'), Step(value='Highlight any notable differences or similarities between the regulations in different countries.'), Step(value='Summarize the overall trends and developments in AI regulations globally.'), Step(value='Given the above steps taken, provide an overview of Artificial Intelligence regulations by governments by country. ')]

[1m> Entering new AgentExecutor chain...[0

In [14]:
print(response)

Based on the previous steps and the information provided, it seems that the AI regulations in different countries vary. The European Union (EU) has recently endorsed new transparency and risk-management rules for AI systems known as the AI Act. These rules aim to ensure the safety, transparency, traceability, and non-discrimination of AI systems. Exemptions have been added for research activities and AI components provided under open-source licenses, and regulatory sandboxes are being promoted to test AI before deployment.

In contrast, the United States (US) typically follows a tech approach of experimenting first and then retrofitting to other markets and regulatory frameworks. This approach fosters innovation. The United Kingdom (UK) is adopting a similar approach to the US, but due to its proximity to the EU market, UK-based developers are more likely to align with the EU ruleset from the outset.

It's important to note that regulations and approaches may vary across different coun