In [13]:
from langchain_aws import ChatBedrock
import os

claude_3_7_bedrock = ChatBedrock(
    model_id="arn:aws:bedrock:us-east-1:050451404360:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
    provider="anthropic",
    model_kwargs={"max_tokens": 20000,
    # "thinking": {"type": "enabled", "budget_tokens": 1024}
    },
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
)

claude_3_7_bedrock_thinking = ChatBedrock(
    model_id="arn:aws:bedrock:us-east-1:050451404360:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
    provider="anthropic",
    model_kwargs={"max_tokens": 20000,
    "thinking": {"type": "enabled", "budget_tokens": 1024}
    },
)


In [14]:
claude_3_7_bedrock.invoke("Hello, how are you?")


AIMessage(content="I'm doing well, thank you for asking! I'm here and ready to assist you with information or help with whatever you'd like to discuss today. How can I help you?", additional_kwargs={'usage': {'prompt_tokens': 13, 'completion_tokens': 40, 'total_tokens': 53}, 'stop_reason': 'end_turn', 'thinking': {}, 'model_id': 'arn:aws:bedrock:us-east-1:050451404360:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0'}, response_metadata={'usage': {'prompt_tokens': 13, 'completion_tokens': 40, 'total_tokens': 53}, 'stop_reason': 'end_turn', 'thinking': {}, 'model_id': 'arn:aws:bedrock:us-east-1:050451404360:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0'}, id='run-8a3c17a3-e54e-47fc-86ef-0c79a4bb134d-0', usage_metadata={'input_tokens': 13, 'output_tokens': 40, 'total_tokens': 53})

In [15]:
from langgraph.graph import END, StateGraph
from typing import Literal, TypedDict, List
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate

## GSheets

In [16]:
# login using service account
from google.oauth2 import service_account
import pandas as pd
import gspread
from gspread_pandas import Spread

# Configurar credenciais
credentials = service_account.Credentials.from_service_account_file(
    ".secrets/service-account-admin.json",
    scopes=['https://www.googleapis.com/auth/spreadsheets', 
            'https://www.googleapis.com/auth/drive']
)

# Conectar ao Google Sheets
gc = gspread.authorize(credentials)

# Extrair o ID da planilha da URL
sheet_id = "11I9QFSMFn7UBfV0wz0-hAYgWtIKytTVnWA9pjquwgdk"

# Abrir a planilha e obter a primeira aba
sheet = gc.open_by_key(sheet_id).sheet1

# Obter todos os dados e converter para DataFrame
data = sheet.get_all_values()
headers = data[0]
df = pd.DataFrame(data[1:], columns=headers)

# Exibir as primeiras linhas do DataFrame
df.head(3)

Unnamed: 0,name,proximity,industry_agnostic,leader?,investment_geography,preferred_industry,vc_quality_perception,observations,investment_range,geo_origin,...,Unnamed: 12,Opiniao Cache,investor_profile,prefered_industry_enriched,meeting_frequency,intros_made,intros_received,description,domains,entry_id
0,Bridge Latam,3,True,Pure-follower,Latam,[],3.0,"Agnostic, low bar for investments",[< USD 1mn],Latam ex-Brazil,...,,,VC,"Business/Productivity Software, Financial Soft...",[Bimonthly],14.0,16.0,Bridge Partners is an early-stage investment f...,bridgelat.com,007037bc-f043-424c-a9ef-0d7394eeed90
1,Latitud,3,True,Pure-follower,Latam,[],4.0,Top-tier pre-seed dealflow. Have an active acc...,"[< USD 1mn, USD 1-5mn]",Brazil,...,,,VC,"Fintech, vertical SaaS, Healthtech, Marketplac...",[Monthly],13.0,13.0,Latitud is the operating system for Latin Amer...,latitud.com,009e1d00-ddaa-4e35-8113-3b775dfe46c7
2,Andreessen Horowitz,3,True,Leader and Follower,Global,[],5.0,Focusing more on Series A+,"[USD 1-5mn, USD 5-10mn, USD 10-20mn, > USD 20mn]",Outside Latam,...,,,VC,"(Global: AI, Bio + Healthcare, Consumer, Crypt...",[Monthly],14.0,3.0,Andreessen Horowitz is a venture capital firm ...,a16z.com,082a298f-a986-4686-becb-9905d63a806b


## Gdocs

In [17]:
# Importar bibliotecas necessárias para Google Docs
from googleapiclient.discovery import build
from google.oauth2 import service_account

# Configurar credenciais para Google Docs (usando as mesmas credenciais do Sheets)
SCOPES = ['https://www.googleapis.com/auth/documents.readonly']
creds = service_account.Credentials.from_service_account_file(
    '.secrets/service-account-admin.json',
    scopes=SCOPES
)

# Criar serviço do Google Docs
docs_service = build('docs', 'v1', credentials=creds)

# Função para obter conteúdo de um documento do Google Docs
def get_gdoc_content(doc_id):
    """
    Obtém o conteúdo de um documento do Google Docs pelo ID.
    
    Args:
        doc_id (str): ID do documento do Google Docs
        
    Returns:
        dict: Conteúdo do documento
    """
    try:
        # Fazer a requisição para obter o documento
        document = docs_service.documents().get(documentId=doc_id).execute()
        return document
    except Exception as e:
        print(f"Erro ao acessar o documento: {e}")
        return None


doc_ids = ["1WJDSO_uyTJ16DdkDOhgAUww4bMKuUa9fXuTh0HdkOew", "1F8jqD4pFq3_17NN75HZbzd2G2vrIqWaZqR1MOB5Fa6M", "182Qj0a8ZIXjS0_plWZdRY-aGY-X8qqpiIAUYex8xrd0"]
for doc_id in doc_ids:
    doc_content = get_gdoc_content(doc_id)
    if doc_content:
        print(f"Título do documento: {doc_content.get('title')}")


Título do documento: [AI MATERIAL] Definition of Lead and Follow
Título do documento: [AI MATERIAL] Geographic Focus
Título do documento: [AI MATERIAL] Preffered Industry


In [18]:
# Extrair texto do documento
doc_plaintext = ""
for item in doc_content.get('body').get('content'):
    if 'paragraph' in item:
        for element in item.get('paragraph').get('elements', []):
            if 'textRun' in element:
                doc_plaintext += element.get('textRun').get('content', '')

doc_plaintext

'Analysis of Venture Capital Investment Preferences in Latin America\n1. Executive Summary\nThis report analyzes the preferred investment industries of a selected group of venture capital firms with a significant presence or interest in the Latin American market. The analysis reveals a strong concentration of investment in the technology sector, particularly within software-as-a-service (SaaS), financial technology (Fintech), and e-commerce enablers. While many firms exhibit a broad interest across various technology verticals, some demonstrate specialization in areas such as agritech, edtech, and climate technology. The findings highlight the dynamism and increasing maturity of the Latin American venture capital ecosystem, attracting both regional and global investors who are keen to capitalize on the region\'s burgeoning entrepreneurial landscape. Understanding these investment preferences is crucial for entrepreneurs seeking funding and for investors looking to identify key opportun

## AI

In [19]:
inputs = {
    "company": "Brendi",
    "description_company": "Brendi is a company that creates AI agents to sell food in Brazilian restaurants via delivery. They are going to be the next ifood",
    "description_person": "Daniel is the CEO of Brendi. he studied at ITA, is very young and energetic",
    "round": {"size": "10M USD", "Funding": "Series A"},
    "round_commitment": "2M USD",
    "leader_or_follower": "leader",
    "industry": "AI Solutions, Food Delivery, Restaurant Management, AI Agents, Embedded Finance",
    "fund_closeness": "Close",
    "observations": "We are sure this deal is very hot, so we want the top funds with us in this one, but they have to fit"
}

parameters = {
    "batch_size": 10,
    "surviving_percentage": 0.5,
}


In [20]:
# First Filter
if inputs["leader_or_follower"] == "leader":
    # get only leader? that contains the word Leader (case insensitive)
    df = df[df["leader?"].str.lower().str.contains("leader")]
else:
    # get only follower? that contains the word Follower (case insensitive)
    df = df[df["leader?"].str.lower().str.contains("follower")]


In [21]:
from pydantic import BaseModel
class FundScore(TypedDict):
    fund_name: str
    score: float
    reason: str

class FundScoreList(BaseModel):
    scores: List[FundScore]

class AgentState(TypedDict):
    df: pd.DataFrame
    df_batches: list[pd.DataFrame]
    fund_scores: list[FundScore]
    raw_scores: List[FundScore]  # Armazenar pontuações não normalizadas
    second_raw_scores:List[FundScore]
    second_scores: List[FundScore]


BATCH_SIZE = 10
SURVIVING_PERCENTAGE = 0.5

In [22]:
df

Unnamed: 0,name,proximity,industry_agnostic,leader?,investment_geography,preferred_industry,vc_quality_perception,observations,investment_range,geo_origin,...,Unnamed: 12,Opiniao Cache,investor_profile,prefered_industry_enriched,meeting_frequency,intros_made,intros_received,description,domains,entry_id
2,Andreessen Horowitz,3,True,Leader and Follower,Global,[],5.0,Focusing more on Series A+,"[USD 1-5mn, USD 5-10mn, USD 10-20mn, > USD 20mn]",Outside Latam,...,,,VC,"(Global: AI, Bio + Healthcare, Consumer, Crypt...",[Monthly],14.0,3.0,Andreessen Horowitz is a venture capital firm ...,a16z.com,082a298f-a986-4686-becb-9905d63a806b
3,big_bets,5,,Leader and Follower,"Brazil, USA",[],4.0,Brazil and Global thesis,"[< USD 1mn, USD 1-5mn]",Brazil,...,,,VC,"Software (IDtech, AI, SaaS, Fintech, Edtech, G...",[Monthly],18.0,44.0,big_bets is a venture capital and private equi...,bigbets.com.br,0920744a-6cdf-4992-9ecd-f370cb240be6
4,NXTP Ventures,3,,Leader,Latam,[],4.0,"B2B focused, Fintech, Marketplace, Ecommerce.","[USD 1-5mn, < USD 1mn]",Latam ex-Brazil,...,,,VC,"Cloud & SaaS, E-commerce Enablers, Fintech, B2...",[Monthly],17.0,6.0,NXTP Ventures is a pioneering early stage vent...,nxtp.vc,099a7873-7123-4eb9-8397-6e629e61953c
5,Alexia Ventures,4,,Leader and Follower,Latam,[],4.0,SaaS and AI businesses,"[< USD 1mn, USD 1-5mn, USD 5-10mn, USD 10-20mn]",Brazil,...,,,VC,"Software Platforms, Data/AI, Blockchain/Crypto...",[Monthly],29.0,13.0,Alexia Ventures is a venture capital firm that...,alexia.vc,0b184e1c-3f6e-4c24-b0b7-5d39ac18a8a9
7,Volpe Capital,2,True,Leader and Follower,Latam,[],3.0,Needs traction,"[USD 5-10mn, USD 1-5mn]",Brazil,...,,,VC,"FinTech, SaaS, Marketplaces, Healthtech, Const...",[Quarterly],7.0,1.0,Volpe Capital is a venture capital and private...,volpecapital.com,1515b03e-826c-4ba5-92c4-e6a653a42067
8,Citrino Ventures,3,True,Leader and Follower,Brazil,[],3.0,"B2B, Agnostic. Seed, early, and later-stage, B...",[USD 1-5mn],Brazil,...,,,Family office,"Agriculture (AgTech), Education (EdTech), Fina...",[Bimonthly],13.0,1.0,Citrino Ventures is a VC firm that supports vi...,citrino.vc,15f7a23f-2eb0-4363-9c5f-a561842e6f61
9,30N Ventures,3,,Leader and Follower,Latam,[],2.0,Follower,"[USD 1-5mn, USD 5-10mn]",Latam ex-Brazil,...,,,VC,"Fintech, Foodtech, Retailtech (Marketplace, E-...",[Bimonthly],1.0,0.0,30N Ventures is a VC firm focused on Emerging ...,30n.vc,17013bf6-7fb3-4011-8758-08a72bf23875
10,Across Capital Partners,4,,Leader and Follower,Global,[],4.0,">USD 5M of ARR, with +50% YoY growth","[USD 5-10mn, USD 10-20mn]",Brazil,...,,,VC,"Software (Financial Software, Legal Services B...",[Bimonthly],8.0,0.0,Across Capital Partners partners with ambitiou...,acrosscap.com,176e8b6f-bc56-4fb9-9275-b098084a96b6
11,Amador,3,,Leader and Follower,Latam,"[B2B SaaS, Fintech, Healthtech, Edtech]",2.0,"VC & PE (Financial Services, Logistics, Consum...",[< USD 1mn],Outside Latam,...,,,VC,"Digital-enabled business models, Tech-enabled ...",[Bimonthly],5.0,4.0,Amador Holdings is an investment holding compa...,amador.holdings,190d0130-d61a-41fa-b86d-7802555e075a
12,SaaSholic,3,,Leader and Follower,Latam,[],3.0,"B2B businesses, low valuations","[< USD 1mn, USD 1-5mn]",Brazil,...,,,VC,"SaaS, other tech-related (Future of Work, Mobi...",[Monthly],15.0,8.0,SaaSholic is an early stage VC firm investing ...,saasholic.com,1a89537c-1147-43d2-9e5f-046e10da584f


In [23]:
if inputs["fund_closeness"] == "Close":
    proximity_weight = 2
elif inputs["fund_closeness"] == "Distant":
    proximity_weight = -2
else:
    proximity_weight = 0

if inputs["fund_quality"] == "High":
    fund_quality_weight = 2
elif inputs["fund_quality"] == "Medium":
    fund_quality_weight = 1
elif inputs["fund_quality"] == "Low":
    fund_quality_weight = -1
else:
    fund_quality_weight = 0

weights = {
    "proximity": proximity_weight,
    "investment_geography": 1,
    "fund_quality": fund_quality_weight,
    "round_compatibility": 1,
    "description_compatibility": 1,
}

KeyError: 'fund_quality'

In [None]:
# change empty strings to 0
df["vc_quality_perception"] = df["vc_quality_perception"].replace("", 0)
df["vc_quality_perception"] = df["vc_quality_perception"].astype(float, errors='ignore')
df["proximity"] = df["proximity"].replace("", 0)
df["proximity"] = df["proximity"].astype(float, errors='ignore')


In [None]:
possible_ranges = {
    "< USD 1mn": 1,
    "USD 5-10mn": 2,
    "USD 10-20mn": 3,
    "> USD 20mn": 4,
}

# check if funding round is inside what range
if inputs["round"]["size"] < 1:
    company_investment_range = "< USD 1mn"
elif inputs["round"]["size"] < 5:
    company_investment_range = ["USD 5-10mn", "< USD 1mn"]
elif inputs["round"]["size"] < 10:
    company_investment_range = ["USD 10-20mn", "USD 5-10mn", "< USD 1mn"]
else:
    company_investment_range = [">USD 20mn", "USD 10-20mn", "USD 5-10mn", "< USD 1mn"]

# remove from the df the rows that are not in the company_investment_range
# Converter a coluna investment_range de string para lista
# remove [ ]
df["investment_range"] = df["investment_range"].apply(lambda x: x.strip("[]"))

company_investment_range_pattern = '|'.join(company_investment_range)


In [None]:
### FILTERS ###
df = df[df["investment_range"].str.contains(company_investment_range_pattern, na=False)]

if inputs["leader_or_follower"] == "leader":
    # get only leader? that contains the word Leader (case insensitive)
    df = df[df["leader?"].str.lower().str.contains("leader")]
else:
    # get only follower? that contains the word Follower (case insensitive)
    df = df[df["leader?"].str.lower().str.contains("follower")]

if inputs["fund_quality"] == "High":
    df = df[df["vc_quality_perception"] >= 4]
elif inputs["fund_quality"] == "Medium":
    df = df[df["vc_quality_perception"] >= 3]

if inputs["fund_closeness"] == "Close":
    df = df[df["proximity"] >= 3]

In [None]:
cols_for_ai = ["name", "investment_geography", "prefered_industry_enriched", "description", "observations"]

In [1]:
# login using service account
from google.oauth2 import service_account
import pandas as pd
import gspread
from gspread_pandas import Spread
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Configurar credenciais
credentials = service_account.Credentials.from_service_account_file(
    ".secrets/service-account-admin.json",
    scopes=['https://www.googleapis.com/auth/spreadsheets', 
            'https://www.googleapis.com/auth/drive']
)

# Conectar ao Google Sheets
gc = gspread.authorize(credentials)

# Extrair o ID da planilha da URL
sheet_id = "11I9QFSMFn7UBfV0wz0-hAYgWtIKytTVnWA9pjquwgdk"

# Abrir a planilha e obter a primeira aba
sheet = gc.open_by_key(sheet_id).sheet1

# Obter todos os dados e converter para DataFrame
data = sheet.get_all_values()
headers = data[0]
df = pd.DataFrame(data[1:], columns=headers)

# Carregar modelo de embeddings
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')

# Função para criar embeddings das linhas do DataFrame
def create_embeddings(df, text_column):
    # Assumindo que queremos usar uma coluna específica para embeddings
    texts = df[text_column].tolist()
    embeddings = model.encode(texts)
    return embeddings

# Função para buscar entradas similares com base em uma consulta
def search_similar(query, df, embeddings, top_n=5):
    # Criar embedding para a consulta
    query_embedding = model.encode([query])
    
    # Calcular similaridade coseno
    similarities = cosine_similarity(query_embedding, embeddings)[0]
    
    # Obter os índices dos itens mais similares
    top_indices = similarities.argsort()[-top_n:][::-1]
    
    # Criar DataFrame com resultados e pontuações
    results = df.iloc[top_indices].copy()
    results['similarity_score'] = similarities[top_indices]
    
    return results

# Exemplo de uso - selecione a coluna apropriada para criar embeddings
# Substitua 'nome_da_coluna' pela coluna que contém o texto relevante
text_column = 'Título'  # Ajuste conforme necessário
df_embeddings = create_embeddings(df, text_column)

# Exemplo de consulta
exemplo_consulta = "Como criar um bom currículo"
resultados = search_similar(exemplo_consulta, df, df_embeddings)

# Exibir resultados
resultados

ModuleNotFoundError: No module named 'sentence_transformers'

In [39]:
sorted_results = sorted(result["fund_scores"], key=lambda x: x.score, reverse=True)
sorted_results

[FundScore(fund_name='Kaszek', score=100.0, reason="Meeting Frequency: 10 points (monthly meetings align with desired 100% closeness). VC Quality: 5 points (top-tier fund perfect for a hot deal). Investment Range: 5 points (USD 10-20mn range is appropriate for leading a $10M Series A). Investment Geography: 5 points (Latam focus perfect for Brazilian company). Funding Rounds: 5 points (invests 'Regardless' of stage, including Series A). Observations: 0 points (no specific notes). Preferred Industry: 8 points (strong alignment with technology and disruption focus - AI Solutions and Food Delivery fit their tech innovation thesis, with their portfolio including marketplaces and software that would apply to Brendi). Description: 2 points (high-impact technology-based companies in Latin America aligns well with Brendi's AI-focused food delivery platform)."),
 FundScore(fund_name='Monashees', score=92.0, reason="Meeting Frequency: 10 points (monthly meetings align with desired 100% closeness

In [37]:
import math
to_keep = parameters["surviving_percentage"]*len(result["fund_scores"])
to_keep = math.ceil(to_keep)
to_keep

19

In [38]:
kept_funds = sorted_results[:to_keep]
kept_funds

[FundScore(fund_name='Kaszek', score=100.0, reason="Meeting Frequency: 10 points (monthly meetings align with desired 100% closeness). VC Quality: 5 points (top-tier fund perfect for a hot deal). Investment Range: 5 points (USD 10-20mn range is appropriate for leading a $10M Series A). Investment Geography: 5 points (Latam focus perfect for Brazilian company). Funding Rounds: 5 points (invests 'Regardless' of stage, including Series A). Observations: 0 points (no specific notes). Preferred Industry: 8 points (strong alignment with technology and disruption focus - AI Solutions and Food Delivery fit their tech innovation thesis, with their portfolio including marketplaces and software that would apply to Brendi). Description: 2 points (high-impact technology-based companies in Latin America aligns well with Brendi's AI-focused food delivery platform)."),
 FundScore(fund_name='Monashees', score=92.0, reason="Meeting Frequency: 10 points (monthly meetings align with desired 100% closeness

In [13]:
doc_plaintext

'Analysis of Venture Capital Investment Preferences in Latin America\n1. Executive Summary\nThis report analyzes the preferred investment industries of a selected group of venture capital firms with a significant presence or interest in the Latin American market. The analysis reveals a strong concentration of investment in the technology sector, particularly within software-as-a-service (SaaS), financial technology (Fintech), and e-commerce enablers. While many firms exhibit a broad interest across various technology verticals, some demonstrate specialization in areas such as agritech, edtech, and climate technology. The findings highlight the dynamism and increasing maturity of the Latin American venture capital ecosystem, attracting both regional and global investors who are keen to capitalize on the region\'s burgeoning entrepreneurial landscape. Understanding these investment preferences is crucial for entrepreneurs seeking funding and for investors looking to identify key opportun

In [14]:
fund_names = []
for fund in kept_funds:
    fund_names.append(fund.fund_name)
fund_names

['Kaszek',
 'Monashees',
 'Valor Capital Group',
 'Upload',
 'ONEVC',
 'Atlantico',
 'Alexia Ventures',
 'Hi Ventures',
 'Andreessen Horowitz',
 'Zenda',
 'Astella',
 'big_bets',
 'Headline',
 'Fen Ventures',
 'Caravela Capital',
 'Dalus Capital',
 'Canary',
 'Quona Capital',
 'SaaSholic']

In [15]:
df_first_filter = df[df["name"].isin(fund_names)]
df_first_filter.head(2)

Unnamed: 0,name,Opiniao Pedro,Opiniao Gus,industry_agnostic,leader?,investment_geography,preferred_industry,vc_quality_perception,observations,investment_range,...,Unnamed: 12,Opiniao Cache,investor_profile,prefered_industry_enriched,meeting_frequency,intros_made,intros_received,description,domains,entry_id
2,Andreessen Horowitz,"Lots of money, hyped, low activity in Brazil. ...",,True,Leader and Follower,Global,[],5.0,Focusing more on Series A+,"[USD 1-5mn, USD 5-10mn, USD 10-20mn, > USD 20mn]",...,,,VC,"(Global: AI, Bio + Healthcare, Consumer, Crypt...",[Monthly],14.0,3.0,Andreessen Horowitz is a venture capital firm ...,a16z.com,082a298f-a986-4686-becb-9905d63a806b
3,big_bets,"Pure software, they don't like hardware. Rarel...",,,Leader and Follower,"Brazil, USA",[],4.0,Brazil and Global thesis,"[< USD 1mn, USD 1-5mn]",...,,,VC,"Software (IDtech, AI, SaaS, Fintech, Edtech, G...",[Monthly],18.0,44.0,big_bets is a venture capital and private equi...,bigbets.com.br,0920744a-6cdf-4992-9ecd-f370cb240be6
