In [90]:
from langchain_aws import ChatBedrock
import os

claude_3_7_bedrock = ChatBedrock(
    model_id="arn:aws:bedrock:us-east-1:050451404360:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
    provider="anthropic",
    model_kwargs={"max_tokens": 20000,
    # "thinking": {"type": "enabled", "budget_tokens": 1024}
    },
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
)

claude_3_7_bedrock_thinking = ChatBedrock(
    model_id="arn:aws:bedrock:us-east-1:050451404360:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
    provider="anthropic",
    model_kwargs={"max_tokens": 20000,
    "thinking": {"type": "enabled", "budget_tokens": 1024}
    },
)


In [91]:
claude_3_7_bedrock.invoke("Hello, how are you?")


AIMessage(content="I'm doing well, thank you for asking! I'm here and ready to help with any questions or topics you'd like to discuss. How are you today?", additional_kwargs={'usage': {'prompt_tokens': 13, 'completion_tokens': 36, 'total_tokens': 49}, 'stop_reason': 'end_turn', 'thinking': {}, 'model_id': 'arn:aws:bedrock:us-east-1:050451404360:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0'}, response_metadata={'usage': {'prompt_tokens': 13, 'completion_tokens': 36, 'total_tokens': 49}, 'stop_reason': 'end_turn', 'thinking': {}, 'model_id': 'arn:aws:bedrock:us-east-1:050451404360:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0'}, id='run-bc229901-ff4c-4194-9ade-fa6248bef79f-0', usage_metadata={'input_tokens': 13, 'output_tokens': 36, 'total_tokens': 49})

In [92]:
from langgraph.graph import END, StateGraph
from typing import Literal, TypedDict, List
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate

## GSheets

In [93]:
# login using service account
from google.oauth2 import service_account
import pandas as pd
import gspread
from gspread_pandas import Spread

# Configurar credenciais
credentials = service_account.Credentials.from_service_account_file(
    ".secrets/service-account-admin.json",
    scopes=['https://www.googleapis.com/auth/spreadsheets', 
            'https://www.googleapis.com/auth/drive']
)

# Conectar ao Google Sheets
gc = gspread.authorize(credentials)

# Extrair o ID da planilha da URL
sheet_id = "11I9QFSMFn7UBfV0wz0-hAYgWtIKytTVnWA9pjquwgdk"

# Abrir a planilha e obter a primeira aba
sheet = gc.open_by_key(sheet_id).sheet1

# Obter todos os dados e converter para DataFrame
data = sheet.get_all_values()
headers = data[0]
df = pd.DataFrame(data[1:], columns=headers)

# Exibir as primeiras linhas do DataFrame
df.head(3)

Unnamed: 0,name,proximity,industry_agnostic,leader?,investment_geography,preferred_industry,vc_quality_perception,observations,investment_range,geo_origin,...,Unnamed: 12,Opiniao Cache,investor_profile,prefered_industry_enriched,meeting_frequency,intros_made,intros_received,description,domains,entry_id
0,Bridge Latam,3,True,Pure-follower,Latam,[],3.0,"Agnostic, low bar for investments",[< USD 1mn],Latam ex-Brazil,...,,,VC,"Business/Productivity Software, Financial Soft...",[Bimonthly],14.0,16.0,Bridge Partners is an early-stage investment f...,bridgelat.com,007037bc-f043-424c-a9ef-0d7394eeed90
1,Latitud,3,True,Pure-follower,Latam,[],4.0,Top-tier pre-seed dealflow. Have an active acc...,"[< USD 1mn, USD 1-5mn]",Brazil,...,,,VC,"Fintech, vertical SaaS, Healthtech, Marketplac...",[Monthly],13.0,13.0,Latitud is the operating system for Latin Amer...,latitud.com,009e1d00-ddaa-4e35-8113-3b775dfe46c7
2,Andreessen Horowitz,3,True,Leader and Follower,Global,[],5.0,Focusing more on Series A+,"[USD 1-5mn, USD 5-10mn, USD 10-20mn, > USD 20mn]",Outside Latam,...,,,VC,"(Global: AI, Bio + Healthcare, Consumer, Crypt...",[Monthly],14.0,3.0,Andreessen Horowitz is a venture capital firm ...,a16z.com,082a298f-a986-4686-becb-9905d63a806b


## Gdocs

In [94]:
# Importar bibliotecas necessárias para Google Docs
from googleapiclient.discovery import build
from google.oauth2 import service_account

# Configurar credenciais para Google Docs (usando as mesmas credenciais do Sheets)
SCOPES = ['https://www.googleapis.com/auth/documents.readonly']
creds = service_account.Credentials.from_service_account_file(
    '.secrets/service-account-admin.json',
    scopes=SCOPES
)

# Criar serviço do Google Docs
docs_service = build('docs', 'v1', credentials=creds)

# Função para obter conteúdo de um documento do Google Docs
def get_gdoc_content(doc_id):
    """
    Obtém o conteúdo de um documento do Google Docs pelo ID.
    
    Args:
        doc_id (str): ID do documento do Google Docs
        
    Returns:
        dict: Conteúdo do documento
    """
    try:
        # Fazer a requisição para obter o documento
        document = docs_service.documents().get(documentId=doc_id).execute()
        return document
    except Exception as e:
        print(f"Erro ao acessar o documento: {e}")
        return None


doc_ids = ["1WJDSO_uyTJ16DdkDOhgAUww4bMKuUa9fXuTh0HdkOew", "1F8jqD4pFq3_17NN75HZbzd2G2vrIqWaZqR1MOB5Fa6M", "182Qj0a8ZIXjS0_plWZdRY-aGY-X8qqpiIAUYex8xrd0"]
for doc_id in doc_ids:
    doc_content = get_gdoc_content(doc_id)
    if doc_content:
        print(f"Título do documento: {doc_content.get('title')}")


Título do documento: [AI MATERIAL] Definition of Lead and Follow
Título do documento: [AI MATERIAL] Geographic Focus
Título do documento: [AI MATERIAL] Preffered Industry


In [95]:
# Extrair texto do documento
doc_plaintext = ""
for item in doc_content.get('body').get('content'):
    if 'paragraph' in item:
        for element in item.get('paragraph').get('elements', []):
            if 'textRun' in element:
                doc_plaintext += element.get('textRun').get('content', '')

doc_plaintext

'Analysis of Venture Capital Investment Preferences in Latin America\n1. Executive Summary\nThis report analyzes the preferred investment industries of a selected group of venture capital firms with a significant presence or interest in the Latin American market. The analysis reveals a strong concentration of investment in the technology sector, particularly within software-as-a-service (SaaS), financial technology (Fintech), and e-commerce enablers. While many firms exhibit a broad interest across various technology verticals, some demonstrate specialization in areas such as agritech, edtech, and climate technology. The findings highlight the dynamism and increasing maturity of the Latin American venture capital ecosystem, attracting both regional and global investors who are keen to capitalize on the region\'s burgeoning entrepreneurial landscape. Understanding these investment preferences is crucial for entrepreneurs seeking funding and for investors looking to identify key opportun

## AI

In [96]:
inputs = {
    "company": "Brendi",
    "description_company": "Brendi is a company that creates AI agents to sell food in Brazilian restaurants via delivery. They are going to be the next ifood",
    "description_person": "Daniel is the CEO of Brendi. he studied at ITA, is very young and energetic",
    "round": {"size": 10, "Funding": "Series A"},
    "round_commitment": "2M USD",
    "leader_or_follower": "leader",
    "industry": "AI Solutions, Food Delivery, Restaurant Management, AI Agents, Embedded Finance",
    "fund_closeness": "Close",
    "fund_quality": "High",
    "observations": "We are sure this deal is very hot, so we want the top funds with us in this one, but they have to fit the description of the company"
}

parameters = {
    "batch_size": 10,
    "surviving_percentage": 0.5,
}


In [97]:
BATCH_SIZE = 10
SURVIVING_PERCENTAGE = 0.5

In [98]:
if inputs["fund_closeness"] == "Close":
    proximity_weight = 2
elif inputs["fund_closeness"] == "Distant":
    proximity_weight = -2
else:
    proximity_weight = 0

if inputs["fund_quality"] == "High":
    fund_quality_weight = 2
elif inputs["fund_quality"] == "Medium":
    fund_quality_weight = 1
elif inputs["fund_quality"] == "Low":
    fund_quality_weight = -1
else:
    fund_quality_weight = 0

weights = {
    "proximity": proximity_weight,
    "investment_geography": 1,
    "fund_quality": fund_quality_weight,
    "round_compatibility": 1,
    "description_compatibility": 1,
}

In [99]:
# change empty strings to 0
df["vc_quality_perception"] = df["vc_quality_perception"].replace("", 0)
df["vc_quality_perception"] = df["vc_quality_perception"].astype(float, errors='ignore')
df["proximity"] = df["proximity"].replace("", 0)
df["proximity"] = df["proximity"].astype(float, errors='ignore')


In [100]:
possible_ranges = {
    "< USD 1mn": 1,
    "USD 5-10mn": 2,
    "USD 10-20mn": 3,
    "> USD 20mn": 4,
}

# check if funding round is inside what range
if inputs["round"]["size"] < 1:
    company_investment_range = "< USD 1mn"
elif inputs["round"]["size"] < 5:
    company_investment_range = ["USD 5-10mn", "< USD 1mn"]
elif inputs["round"]["size"] < 10:
    company_investment_range = ["USD 10-20mn", "USD 5-10mn", "< USD 1mn"]
else:
    company_investment_range = [">USD 20mn", "USD 10-20mn", "USD 5-10mn", "< USD 1mn"]

# remove from the df the rows that are not in the company_investment_range
# Converter a coluna investment_range de string para lista
# remove [ ]
df["investment_range"] = df["investment_range"].apply(lambda x: x.strip("[]"))

company_investment_range_pattern = '|'.join(company_investment_range)


In [101]:
### FILTERS ###
df = df[df["investment_range"].str.contains(company_investment_range_pattern, na=False)]

if inputs["leader_or_follower"] == "leader":
    # get only leader? that contains the word Leader (case insensitive)
    df = df[df["leader?"].str.lower().str.contains("leader")]
else:
    # get only follower? that contains the word Follower (case insensitive)
    df = df[df["leader?"].str.lower().str.contains("follower")]

if inputs["fund_quality"] == "High":
    df = df[df["vc_quality_perception"] >= 4]
elif inputs["fund_quality"] == "Medium":
    df = df[df["vc_quality_perception"] >= 3]

if inputs["fund_closeness"] == "Close":
    df = df[df["proximity"] >= 3]

In [102]:
df.columns

Index(['name', 'proximity', 'industry_agnostic', 'leader?',
       'investment_geography', 'preferred_industry', 'vc_quality_perception',
       'observations', 'investment_range', 'geo_origin',
       'funding_rounds_1st_check', '', 'Opiniao Cache', 'investor_profile',
       'prefered_industry_enriched', 'meeting_frequency', 'intros_made',
       'intros_received', 'description', 'domains', 'entry_id'],
      dtype='object')

In [103]:
cols_for_ai = ["name", "investment_geography", "prefered_industry_enriched", "description", "observations"]

In [None]:
from pydantic import BaseModel, Field
from typing import List

class FundScore(BaseModel):
    fund_name: str = Field(description="Fund Name")
    score: float = Field(description="Gross Score based on the sum of the criteria")
    reason: str = Field(description="Detailed reason for the score separated by criteria")

class FundScoreList(BaseModel):
    scores: List[FundScore]

def batch_splitter(df: pd.DataFrame) -> list[pd.DataFrame]:
    batch_size = BATCH_SIZE
    return [df[i:i+batch_size] for i in range(0, len(df), batch_size)]

def score_fund(state):
    system_prompt = """
    You are a fund score agent. Score every fund.
    You are given a table of funds, user inputs, and you need to score them based on the following criteria:

    - preffered_industry (the fund's preferred industry should be compatible with the company's industry. If there is just one intersection, the score is around 5. If there is a near perfect fit, the score is 10) | 0-10 points
    - investment_geography (the fund's investment geography should be compatible with the user's investment geography) | -5 to 5 points. If the geography is a perfect match, the score is 5. If the geography is not a perfect match, the score is 3. If incompatible, the score is -5
    - funding_rounds_1st_check (the first check round should be compatible with the round type) | 0-5 points
    - Description (the description should be compatible with the company's description) | 0-3 points
    - observations (Use it as a situational reference of the fund) | -5 to 5 points

    {previous_scores_guidance}
    """

    human_prompt = """
    Here is the table of funds:
    {df}

    Here is the user inputs:
    {inputs}
    """

    batches = batch_splitter(state["df"])
    
    # Inicializar lista para armazenar scores não normalizados
    if "raw_scores" not in state:
        state["raw_scores"] = []

    for i, batch in enumerate(batches):
        # Preparar orientação baseada em pontuações anteriores
        previous_scores_guidance = ""
        if state["raw_scores"]:
            # Criar exemplos de pontuações anteriores para manter consistência
            examples = [(s.fund_name, s.score) for s in state["raw_scores"][:5]]
            previous_scores_guidance = f""" 
            IMPORTANT: Keep consistency with the scores already assigned to other funds.
            Examples of previous scores: {examples}
            Remember that the total score must be in an approximate scale with the scores already assigned.
            """
        
        prompt = ChatPromptTemplate.from_messages([
            ("system", system_prompt.format(previous_scores_guidance=previous_scores_guidance)),
            ("human", human_prompt)
        ])

        structured_llm = claude_3_7_bedrock.with_structured_output(FundScoreList)
        chain = prompt | structured_llm

        # Converter batch para string
        batch_str = batch.to_string()
        
        # Invocar o modelo
        fund_scores = chain.invoke({"df": batch_str, "inputs": inputs})
        
        # Guardar pontuações brutas
        state["raw_scores"].extend(fund_scores.scores)
        
        print(f"Processado lote {i+1}/{len(batches)}")

    # Normalizar todas as pontuações no final
    if state["raw_scores"]:
        min_score = min(score.score for score in state["raw_scores"])
        max_score = max(score.score for score in state["raw_scores"])
        
        if max_score > min_score:  # evitar divisão por zero
            state["fund_scores"] = [
                FundScore(
                    fund_name=score.fund_name,
                    score=100 * (score.score - min_score) / (max_score - min_score),
                    reason=score.reason
                ) for score in state["raw_scores"]
            ]
        else:
            # Caso todas as pontuações sejam iguais
            state["fund_scores"] = [
                FundScore(
                    fund_name=score.fund_name,
                    score=50.0,  # valor médio arbitrário
                    reason=score.reason
                ) for score in state["raw_scores"]
            ]

    return state

initial_state = {
    "df":df[cols_for_ai],
    "fund_scores":[],
    "raw_scores":[]
}

result = score_fund(initial_state)

Processado lote 1/2
Processado lote 2/2


In [105]:
sorted_results = sorted(result["fund_scores"], key=lambda x: x.score, reverse=True)
sorted_results

[FundScore(fund_name='Monashees', score=100.0, reason="Investment range: 4/5 - As a leading LatAm investor, they can lead a $2M investment in a $10M Series A. Geography: 5/5 - LatAm focus matches perfectly with Brendi's Brazilian market. Funding rounds: 4/5 - They invest in various stages and support scaling businesses. Description: 3/3 - Their investment in marketplaces, consumption and technology solutions directly aligns with Brendi's AI food delivery platform. Observations: 3/5 - Identified as a top-tier fund which meets the requirement for quality investors in this hot deal."),
 FundScore(fund_name='Kaszek', score=94.11764705882354, reason="Investment range: 4/5 - As a top-tier LatAm fund, they can likely lead a $2M investment in a $10M round. Geography: 5/5 - LatAm-focused fund perfectly matches Brendi's Brazilian market. Funding rounds: 4/5 - They support high-impact technology companies at various stages including Series A. Description: 3/3 - They invest in a wide range of tech

In [106]:
import math
to_keep = parameters["surviving_percentage"]*len(result["fund_scores"])
to_keep = math.ceil(to_keep)
to_keep

8

In [107]:
kept_funds = sorted_results[:to_keep]
kept_funds

[FundScore(fund_name='Monashees', score=100.0, reason="Investment range: 4/5 - As a leading LatAm investor, they can lead a $2M investment in a $10M Series A. Geography: 5/5 - LatAm focus matches perfectly with Brendi's Brazilian market. Funding rounds: 4/5 - They invest in various stages and support scaling businesses. Description: 3/3 - Their investment in marketplaces, consumption and technology solutions directly aligns with Brendi's AI food delivery platform. Observations: 3/5 - Identified as a top-tier fund which meets the requirement for quality investors in this hot deal."),
 FundScore(fund_name='Kaszek', score=94.11764705882354, reason="Investment range: 4/5 - As a top-tier LatAm fund, they can likely lead a $2M investment in a $10M round. Geography: 5/5 - LatAm-focused fund perfectly matches Brendi's Brazilian market. Funding rounds: 4/5 - They support high-impact technology companies at various stages including Series A. Description: 3/3 - They invest in a wide range of tech

In [108]:
doc_plaintext

'Analysis of Venture Capital Investment Preferences in Latin America\n1. Executive Summary\nThis report analyzes the preferred investment industries of a selected group of venture capital firms with a significant presence or interest in the Latin American market. The analysis reveals a strong concentration of investment in the technology sector, particularly within software-as-a-service (SaaS), financial technology (Fintech), and e-commerce enablers. While many firms exhibit a broad interest across various technology verticals, some demonstrate specialization in areas such as agritech, edtech, and climate technology. The findings highlight the dynamism and increasing maturity of the Latin American venture capital ecosystem, attracting both regional and global investors who are keen to capitalize on the region\'s burgeoning entrepreneurial landscape. Understanding these investment preferences is crucial for entrepreneurs seeking funding and for investors looking to identify key opportun

In [109]:
fund_names = []
for fund in kept_funds:
    fund_names.append(fund.fund_name)
fund_names

['Monashees',
 'Kaszek',
 'Valor Capital Group',
 'Atlantico',
 'big_bets',
 'Astella',
 'Alexia Ventures',
 'Upload']

In [15]:
df_first_filter = df[df["name"].isin(fund_names)]
df_first_filter.head(2)

Unnamed: 0,name,Opiniao Pedro,Opiniao Gus,industry_agnostic,leader?,investment_geography,preferred_industry,vc_quality_perception,observations,investment_range,...,Unnamed: 12,Opiniao Cache,investor_profile,prefered_industry_enriched,meeting_frequency,intros_made,intros_received,description,domains,entry_id
2,Andreessen Horowitz,"Lots of money, hyped, low activity in Brazil. ...",,True,Leader and Follower,Global,[],5.0,Focusing more on Series A+,"[USD 1-5mn, USD 5-10mn, USD 10-20mn, > USD 20mn]",...,,,VC,"(Global: AI, Bio + Healthcare, Consumer, Crypt...",[Monthly],14.0,3.0,Andreessen Horowitz is a venture capital firm ...,a16z.com,082a298f-a986-4686-becb-9905d63a806b
3,big_bets,"Pure software, they don't like hardware. Rarel...",,,Leader and Follower,"Brazil, USA",[],4.0,Brazil and Global thesis,"[< USD 1mn, USD 1-5mn]",...,,,VC,"Software (IDtech, AI, SaaS, Fintech, Edtech, G...",[Monthly],18.0,44.0,big_bets is a venture capital and private equi...,bigbets.com.br,0920744a-6cdf-4992-9ecd-f370cb240be6
