In [79]:
from crewai import Agent, Task, Crew, Process, LLM
from langchain_openai import ChatOpenAI
import os 
import sys 
from pprint import pprint
from IPython.display import display, Markdown, Latex

sys.path.append('../')

from utils.pdf import read_pdf
from pydantic import BaseModel, Field
from typing import List
import json

In [80]:
model = "ollama/llama3.2"
base_url = "http://localhost:11434"
llm = LLM(base_url=base_url, model=model, temperature=0, api_key="test")
pdf_text = read_pdf("../data/Sohn_Visual_Prompt_Tuning_for_Generative_Transfer_Learning_CVPR_2023_paper.pdf") 


In [81]:
# Chunk the document based on the token limit
def chunk_document(document, chunk_size=3000):
    words = document.split()
    chunks = []
    current_chunk = []

    for word in words:
        current_chunk.append(word)
        if len(current_chunk) >= chunk_size:
            chunks.append(" ".join(current_chunk))
            current_chunk = []

    if current_chunk:
        chunks.append(" ".join(current_chunk))  # Add remaining chunk
    
    return chunks

In [82]:
len(pdf_text)

55644

In [83]:
chunks = chunk_document(pdf_text)

Leverage Pydantic Models for entity extraction to get output in structured format

In [84]:
class NamedEntities(BaseModel):
    people: List[str] = Field(..., description = "List of people")
    locations: List[str] = Field(..., description = "List of locations")
    organizations: List[str] = Field(..., description = "List of organizations")

In [95]:
NamedEntities.model_dump



In [104]:
extraction_agent = Agent(
    llm = llm,
    role = "Chief Researcher",
    goal = "Reads documents thoroughly and identifies important entities and concepts",
    backstory="As the chief researcher at an AI firm, you specialize in identifying key concepts",
    verbose=True,
)

In [105]:
task = Task(
    output_json=NamedEntities,
    description="Extract entities entities from the following piece of text \n{text}",
    agent=extraction_agent,
    expected_output="List of entities in JSON for people, locations, organizations",
)

In [106]:
crew = Crew(
    tasks=[task],
    agents=[extraction_agent],
    verbose=True,
    full_output=True,
    process=Process.sequential,
    output_log_file="logs.txt"
  
)



In [107]:
res = crew.kickoff(inputs = {"text": pdf_text[0:20000]})    

[1m[95m# Agent:[00m [1m[92mChief Researcher[00m
[95m## Task:[00m [92mExtract entities entities from the following piece of text 
# Visual Prompt Tuning for Generative Transfer Learning


Kihyuk Sohn, Huiwen Chang, Jos´e Lezama, Luisa Polania,

Han Zhang, Yuan Hao, Irfan Essa, Lu Jiang

Google Research

Figure 1. Image synthesis by knowledge transfer. Unlike previous works using GANs as base model and test transfer on relatively narrow
visual domains, we transfer knowledge of generative vision transformers [7, 15] to a wide range of visual domains, including natural
(e.g., scene, flower), specialized (e.g., satellite, medical), and structured (e.g., road scenes, infograph, sketch) with a few training images.
Notably, the prompt tuning significantly improves the prior best FID on two benchmarks ImageNet (85.9!16.3) and Places (71.3!24.2).


**Abstract**

_Learning generative image models from various domains_

_efficiently needs transferring knowledge from an image syn-_
_thesis

ValidationError: 1 validation error for TaskOutput
json_dict
  Input should be a valid dictionary [type=dict_type, input_value=[{'name': 'People', 'enti...s': [{'text': 'VTAB'}]}], input_type=list]
    For further information visit https://errors.pydantic.dev/2.8/v/dict_type

In [97]:
dir(res)

['__abstractmethods__',
 '__annotations__',
 '__class__',
 '__class_getitem__',
 '__class_vars__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__fields__',
 '__fields_set__',
 '__format__',
 '__ge__',
 '__get_pydantic_core_schema__',
 '__get_pydantic_json_schema__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__pretty__',
 '__private_attributes__',
 '__pydantic_complete__',
 '__pydantic_core_schema__',
 '__pydantic_custom_init__',
 '__pydantic_decorators__',
 '__pydantic_extra__',
 '__pydantic_fields_set__',
 '__pydantic_generic_metadata__',
 '__pydantic_init_subclass__',
 '__pydantic_parent_namespace__',
 '__pydantic_post_init__',
 '__pydantic_private__',
 '__pydantic_root_model__',
 '__pydantic_serializer__',
 '__pydantic_validator__',
 '__reduce__',
 '__reduce_ex__',
 '__rep

In [91]:
res.json

'{"people": ["The authors"], "locations": [], "organizations": []}'

In [90]:
json.loads(res.raw)

[{'name': 'People', 'entities': [{'text': 'The authors'}]},
 {'name': 'Locations', 'entities': []},
 {'name': 'Organizations', 'entities': [{'text': 'VTAB'}]}]

In [96]:
pdf_text[0:20000]

'# Visual Prompt Tuning for Generative Transfer Learning\n\n\nKihyuk Sohn, Huiwen Chang, Jos´e Lezama, Luisa Polania,\n\nHan Zhang, Yuan Hao, Irfan Essa, Lu Jiang\n\nGoogle Research\n\nFigure 1. Image synthesis by knowledge transfer. Unlike previous works using GANs as base model and test transfer on relatively narrow\nvisual domains, we transfer knowledge of generative vision transformers [7, 15] to a wide range of visual domains, including natural\n(e.g., scene, flower), specialized (e.g., satellite, medical), and structured (e.g., road scenes, infograph, sketch) with a few training images.\nNotably, the prompt tuning significantly improves the prior best FID on two benchmarks ImageNet (85.9!16.3) and Places (71.3!24.2).\n\n\n**Abstract**\n\n_Learning generative image models from various domains_\n\n_efficiently needs transferring knowledge from an image syn-_\n_thesis model trained on a large dataset. We present a recipe_\n_for learning vision transformers by generative knowledge_\n