In [None]:
import os
from dotenv import load_dotenv

import textwrap
import numpy as np
import pandas as pd

import google.generativeai as genai

from IPython.display import Markdown

In [None]:
load_dotenv()
GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
# print(GOOGLE_API_KEY)
genai.configure(api_key=GOOGLE_API_KEY)

### embedding example

In [None]:
title = "The next generation of AI for developers and Google Workspace"
sample_text = ("Title: The next generation of AI for developers and Google Workspace",)
model = 'models/embedding-001'
embedding = genai.embed_content(model=model,
                                content=sample_text,
                                task_type="retrieval_document",
                                title=title)
print(embedding)
print(len(embedding['embedding'][0]))

## Extraction of text from PDF file

In [None]:
from pdfminer.high_level import extract_text

text = extract_text(r"microsoft_annual_report_2022.pdf")
# split after every 5 sentences
sentences = text.split(".")
sentences = [" ".join(sentences[i:i + 10]).replace('\n', ' ').strip() for i in range(0, len(sentences), 10)]
for i in sentences:
    print(i)


In [None]:

documents = []
for idx, i in enumerate(sentences):
    documents.append({
        'title': f"Document {idx}",
        'content': i
    })
documents

In [None]:
df = pd.DataFrame(documents)
df.columns = ['Title', 'Text']
df

In [12]:

# Get the embeddings of each text and add to an embeddings column in the dataframe
def embed_fn(title, text):
    return genai.embed_content(
            model=model,
            content=text,
            task_type="retrieval_document",
            title=title
        )["embedding"]

df['Embeddings'] = df.apply(lambda row: embed_fn(row['Title'], row['Text']), axis=1)
df

Unnamed: 0,Title,Text,Embeddings
0,Document 0,"Dear shareholders, colleagues, customers, and ...","[0.01836762, -0.0050430307, -0.032029595, 0.01..."
1,Document 1,"• Peace Parks Foundation, a nonprofit helping...","[0.010761021, -0.0089883255, -0.03835591, 0.00..."
2,Document 2,There is no more powerful input than digital ...,"[0.02871501, -0.04330673, -0.015005337, 0.0094..."
3,Document 3,"To help address this, we’ve committed to skill...","[0.039082542, -0.0036628272, -0.045578208, -0...."
4,Document 4,Building on our work in eight US cities...,"[0.04055749, -0.030310482, -0.04219523, 0.0252..."
...,...,...,...
190,Document 190,Compensation Committee 3 Governance and No...,"[0.016356546, -0.01140176, -0.053219605, -0.01..."
191,Document 191,m to 5:00 p m Pacific Time to answer ...,"[-0.024060866, -0.0019965412, -0.024987465, 0...."
192,Document 192,com Our mailing address is: Investor Rela...,"[-0.019213809, -0.025575353, -0.033646412, -0...."
193,Document 193,You can e-mail the transfer agent at: web qu...,"[0.036572132, -0.039415024, -0.049082648, -0.0..."


# question and its embedding

In [38]:
query = "Can you help me understand the growth of the company in the last year? give me a table of result"
model = 'models/embedding-001'

request = genai.embed_content(
            model=model,
            content=query,
            task_type="retrieval_query"
        )
print(request)

{'embedding': [0.026338462, -0.03902819, -0.026654007, 0.05893739, 0.04154411, -0.008128175, 0.02086181, -0.020727564, -0.013477313, 0.05866132, 0.023937695, -0.007590161, 0.000943387, 0.008735923, 0.029846014, -0.031593904, -0.030427873, -0.020761233, 0.013386109, 0.0094904415, 0.009541162, 0.029537184, -0.03450427, 0.0041051004, 0.016275054, -0.040544942, -0.013878575, -0.024039958, -0.009793417, 0.044689167, -0.116188355, 0.037942626, -0.06618022, 0.025099581, 0.020290334, -0.038012784, -0.0056411712, 0.04711495, -0.030590087, 0.034354143, 0.005308312, 0.0010465065, -0.036170084, -0.05421539, -0.018690355, -0.061751604, 0.002567143, 0.029287385, 0.014300767, -0.030870035, 0.013359955, -0.008443386, 0.041010473, -0.0665023, 0.037416387, -0.05978632, 0.012276446, -0.023701891, -0.02734162, 0.01866758, 0.0082044555, 0.012076585, -0.04665521, 0.013096132, 0.022608269, -0.026216261, 0.026026813, 0.020404898, 0.051263086, -0.073517546, -0.019185213, -0.020199236, 0.031766366, -0.009237397

In [39]:
def find_best_passage(query, dataframe):
    """
    Compute the distances between the query and each document in the dataframe
    using the dot product.
    """
    query_embedding = genai.embed_content(model=model,
                            content=query,
                            task_type="retrieval_query")
    dot_products = np.dot(np.stack(dataframe['Embeddings']), query_embedding["embedding"])
    idx = np.argmax(dot_products)
    return dataframe.iloc[idx]['Text'] # Return text from index with max value

In [40]:
passage = find_best_passage(query, df)
passage

'We  monitor  our  foreign  currency  exposures  daily  to  maximize  the  economic  effectiveness  of  our  foreign  currency  positions,  including  hedges   Principal currency exposures include the Euro, Japanese yen, British pound, Canadian dollar, and Australian dollar     Interest Rate    Securities  held  in  our  fixed-income  portfolio  are  subject  to  different  interest  rate  risks  based  on  their  maturities   We  manage  the  average  maturity  of  the  fixed-income  portfolio  to  achieve  economic  returns  that  correlate  to  certain  global  fixed-income indices     Credit    Our  fixed-income  portfolio  is  diversified  and  consists  primarily  of  investment-grade  securities   We  manage  credit  exposures relative to broad-based indices and to facilitate portfolio diversification     Equity    Securities held in our equity investments portfolio are subject to price risk     SENSITIVITY ANALYSIS    The following table sets forth the potential loss in future 

In [41]:
def make_prompt(query, relevant_passage):
    escaped = relevant_passage.replace("'", "").replace('"', "").replace("\n", " ")
    prompt = textwrap.dedent("""You are a helpful and informative bot that answers \
    questions using text from the reference passage included below. \
    Be sure to respond in a complete sentence, being comprehensive, \
    including all relevant background information. \
    However, you are talking to a non-technical audience, \
    so be sure to break down complicated concepts and \
    strike a friendly and converstional tone. \
    If the passage is irrelevant to the answer, you may ignore it.
    QUESTION: '{query}'
    PASSAGE: '{relevant_passage}'
    ANSWER:
    """).format(query=query, relevant_passage=escaped)

    return prompt

In [42]:
prompt = make_prompt(query, passage)
print(prompt)

You are a helpful and informative bot that answers     questions using text from the reference passage included below.     Be sure to respond in a complete sentence, being comprehensive,     including all relevant background information.     However, you are talking to a non-technical audience,     so be sure to break down complicated concepts and     strike a friendly and converstional tone.     If the passage is irrelevant to the answer, you may ignore it.
    QUESTION: 'Can you help me understand the growth of the company in the last year? give me a table of result'
    PASSAGE: 'We  monitor  our  foreign  currency  exposures  daily  to  maximize  the  economic  effectiveness  of  our  foreign  currency  positions,  including  hedges   Principal currency exposures include the Euro, Japanese yen, British pound, Canadian dollar, and Australian dollar     Interest Rate    Securities  held  in  our  fixed-income  portfolio  are  subject  to  different  interest  rate  risks  based  on  

In [43]:

# for m in genai.list_models():
#     if 'generateContent' in m.supported_generation_methods:
#         print(m.name)

In [44]:
model = genai.GenerativeModel('models/gemini-1.5-flash-latest')
answer = model.generate_content(prompt)
Markdown(answer.text)

The passage you provided does not contain information about the company's growth over the past year.  It focuses on the company's risk management strategies and its sensitivity to market changes. Therefore, I cannot provide a table of results for the company's growth in the last year. 
