In [5]:
import requests
import transformers
from transformers import AutoTokenizer
import re
import string
import pandas as pd
# import nltk
import torch

from langchain import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_community.llms import Ollama
from crewai import Agent , Task ,Crew , Process

In [6]:
model = Ollama(model= "llama3:8b")

In [7]:
df = pd.read_csv("/Users/tushitdave/Desktop/Projects/LLMs/Llama3/physicist.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,physicist,physicist_bio,num_tokens
0,0,James Clerk Maxwell,James Clerk Maxwell (13 June 1831 – 5 Novembe...,8279
1,1,Charles Darwin,Charles Robert Darwin ( DAR-win; 12 February ...,12349
2,2,Nikola Tesla,Nikola Tesla (; Serbian Cyrillic: Никола Тесла...,14479
3,3,Albert Einstein,Albert Einstein ( EYEN-styne; German: [ˈalbɛɐt...,16884
4,4,Richard Feynman,"Richard Phillips Feynman (; May 11, 1918 – Feb...",17255


In [8]:
tokenizer = AutoTokenizer.from_pretrained("allenai/longformer-base-4096")

In [9]:
def clean_text(text):
  text = re.sub(r'[^A-Za-z0-9\s.\(\)[\]{\}]+', '' , text)
  text = text.lower()
  text = " ".join(text.split())
  return text


def count_tokens(text):
  tokens = tokenizer.encode(text , add_special_tokens = True)
  return(len(tokens))

In [10]:
df['physicist_bio_clean'] = df['physicist_bio'].apply(clean_text)

In [11]:
df.head()

Unnamed: 0.1,Unnamed: 0,physicist,physicist_bio,num_tokens,physicist_bio_clean
0,0,James Clerk Maxwell,James Clerk Maxwell (13 June 1831 – 5 Novembe...,8279,james clerk maxwell (13 june 1831 5 november 1...
1,1,Charles Darwin,Charles Robert Darwin ( DAR-win; 12 February ...,12349,charles robert darwin ( darwin 12 february 180...
2,2,Nikola Tesla,Nikola Tesla (; Serbian Cyrillic: Никола Тесла...,14479,nikola tesla ( serbian cyrillic [nkola tsla] 1...
3,3,Albert Einstein,Albert Einstein ( EYEN-styne; German: [ˈalbɛɐt...,16884,albert einstein ( eyenstyne german [albt antan...
4,4,Richard Feynman,"Richard Phillips Feynman (; May 11, 1918 – Feb...",17255,richard phillips feynman ( may 11 1918 februar...


In [12]:
df['physicist_bio_clean'][0]

'james clerk maxwell (13 june 1831 5 november 1879) was a scottish physicist with broad interests who was responsible for the classical theory of electromagnetic radiation which was the first theory to describe electricity magnetism and light as different manifestations of the same phenomenon. maxwells equations for electromagnetism have been called the second great unification in physics where the first one had been realised by isaac newton. with the publication of a dynamical theory of the electromagnetic field in 1865 maxwell demonstrated that electric and magnetic fields travel through space as waves moving at the speed of light. he proposed that light is an undulation in the same medium that is the cause of electric and magnetic phenomena. the unification of light and electrical phenomena led to his prediction of the existence of radio waves. maxwell is also regarded as a founder of the modern field of electrical engineering. maxwell was the first to derive the maxwellboltzmann di

In [13]:
from crewai import Agent
import os
import json

In [14]:
with open(r'/Users/tushitdave/Desktop/Projects/LLMs/Llama3/key.json') as config_file:
    config = json.load(config_file)
    api_key = config['api_key']

In [22]:
# Set environment variables
os.environ["OPENAI_API_BASE"] = 'https://api.groq.com/openai/v1'
os.environ["OPENAI_MODEL_NAME"] = 'llama3-8b-8192'  # Adjust based on available model
os.environ["OPENAI_API_KEY"] = api_key

In [23]:
# Choosing physicist bio from dataframe to acheive summary
physicist_bios = df['physicist_bio_clean']

In [24]:
# Create summarizer agent for bio
summarizer_bio = Agent(
    role='Summarizer',
    goal='Accurately summarize physicist bios with minimum 300 words',
    verbose=True,
    memory=True,
    backstory=(
        "Equipped with advanced summarization techniques, "
        "the goal is to distill complex information into concise summaries."
    ),
    allow_delegation=False  # No need for search_tool if performing local summarization
)

# Create summarizer agent for research
summarizer_research = Agent(
    role='Summarizer',
    goal='Accurately summarize physicist researches with bullet points',
    verbose=True,
    memory=True,
    backstory=(
        "With a focus on distilling complex information into concise summaries, "
        "the goal is to provide clear and insightful summaries on various topics."
    ),
    allow_delegation=False,
)

In [25]:
# Define a function to create summary tasks
def create_summary_task(description_template, expected_output_template, physicist_bio, agent):
    description = description_template.format(physicist_bio=physicist_bio)
    expected_output = expected_output_template.format(physicist_bio=physicist_bio)
    return Task(
        description=description,
        expected_output=expected_output,
        agent=agent
    )

# Summary task for summarizing physicist bios
summary_task_summarizer_bio = create_summary_task(
    description_template="Summarize the biography of {physicist_bio}.",
    expected_output_template="A concise summary of the biography of {physicist_bio}.",
    physicist_bio=physicist_bios,
    agent=summarizer_bio
)

# Summary task for summarizing physicist researches
summary_task_summarizer_research = create_summary_task(
    description_template="Summarize the research of {physicist_bio}.",
    expected_output_template="A concise summary of the research of {physicist_bio}.",
    physicist_bio=physicist_bios,
    agent=summarizer_research
)

# Add the summary tasks to the crew
crew = Crew(
    agents=[summarizer_bio, summarizer_research],
    tasks=[summary_task_summarizer_bio, summary_task_summarizer_research],
    process=Process.sequential
)

# Kick off the crew
crew_result = crew.kickoff()

# Initialize lists to store summary outputs
summary_bio_list = [''] * len(df)
summary_research_list = [''] * len(df)

# Extract summary outputs from the crew result
if not isinstance(crew_result, str):
    # Split the crew result into lines
    lines = crew_result.split('\n')

    # Find the indices where the outputs start and end for summary_bio and summary_research
    bio_start_index = lines.index("Final Answer for summary_bio:")
    research_start_index = lines.index("Final Answer for summary_research:")

    # Extract the outputs and concatenate them properly
    summary_bio_list[0] = " ".join(lines[bio_start_index + 1:research_start_index])
    summary_research_list[0] = " ".join(lines[research_start_index + 1:])

# Append the summary results to the DataFrame
df['summary_bio'] = summary_bio_list
df['summary_research'] = summary_research_list





[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mI now can give a great answer!

Final Answer: 

Here is the complete and concise biography of the 15 physicists mentioned:

**0. James Clerk Maxwell (13 June 1831 - 5 November 1885)**

James Clerk Maxwell was a Scottish physicist and mathematician who made groundbreaking contributions to the fields of electromagnetism and thermodynamics. Born in Scotland, Maxwell studied at the University of Cambridge and later became a professor at the University of Cambridge. He formulated the Maxwell's equations, which united the previously separate theories of electricity, magnetism, and electromagnetism. Maxwell's work laid the foundation for the development of modern physics and has had a profound impact on the field.

**1. Charles Robert Darwin (12 February 1809 - 19 April 1882)**

Charles Robert Darwin was an English naturalist and biologist who is best known for his theory of evolution through natural selection. Born in Shrewsbur