In [1]:
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper
from llama_index.storage.storage_context import StorageContext
from llama_index.llm_predictor import HuggingFaceLLMPredictor
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from IPython.display import Markdown, display
from llama_index import ServiceContext, LangchainEmbedding
import os
import pandas as pd
from transformers import LlamaTokenizer, LlamaForCausalLM

In [2]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [3]:
data_dir = "/home/jupyter/data_articles"
persist_dir = "/home/jupyter/persist"

In [4]:
import torch
from llama_index.prompts.prompts import SimpleInputPrompt
from langchain.llms.base import LLM
from typing import Optional, List, Mapping, Any


system_prompt = """BEGINNING OF CONVERSATION: """ 

# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = SimpleInputPrompt("USER: {query_str} GPT:")
query_wrapper_prompt.format(query_str="test")

'USER: test GPT:'

In [5]:
tokenizer = LlamaTokenizer.from_pretrained("/home/jupyter/koala_transformer", device_map="auto", max_input_size=2048)
model = LlamaForCausalLM.from_pretrained("/home/jupyter/koala_transformer", torch_dtype=torch.float16, device_map="auto", cache_dir="/home/jupyter/data/transformers")


class CustomLLM(LLM):

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        prompt = query_wrapper_prompt.format(query_str=prompt)
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(**inputs, max_new_tokens=1024, 
                             do_sample=False, 
                             temperature=0)
        result = tokenizer.batch_decode(outputs, skip_special_tokens=True, spaces_between_special_tokens=False)[0]
        # only return newly generated tokens
        return result[len(prompt):]

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"name_of_model": "Koala"}

    @property
    def _llm_type(self) -> str:
        return "llama"

# define our LLM
num_output = 512
max_chunk_overlap = 20
llm_predictor = LLMPredictor(llm=CustomLLM())


prompt_helper = PromptHelper(max_input_size=2048, num_output=num_output, max_chunk_overlap=max_chunk_overlap)


hf_predictor = LLMPredictor(llm=CustomLLM())
embed_model = LangchainEmbedding(HuggingFaceEmbeddings())
service_context = ServiceContext.from_defaults(chunk_size_limit=1024, llm_predictor=hf_predictor, embed_model=embed_model, prompt_helper=prompt_helper)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cuda
Use pytorch device: cuda


In [6]:
from llama_index import load_index_from_storage, load_indices_from_storage, load_graph_from_storage
from llama_index.storage.docstore import SimpleDocumentStore
from llama_index.vector_stores import SimpleVectorStore
from llama_index.storage.index_store import SimpleIndexStore


storage_context = StorageContext.from_defaults(
    docstore=SimpleDocumentStore.from_persist_dir(persist_dir=persist_dir),
    vector_store=SimpleVectorStore.from_persist_dir(persist_dir=persist_dir),
    index_store=SimpleIndexStore.from_persist_dir(persist_dir=persist_dir),
)


index = load_index_from_storage(storage_context=storage_context, service_context=service_context)

INFO:llama_index.indices.loading:Loading all indices.
Loading all indices.


In [7]:
query_engine = index.as_query_engine()
response = query_engine.query("Which companies made drugs to help alzheimer disease?")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 11 tokens
> [retrieve] Total embedding token usage: 11 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 2255 tokens
> [get_response] Total LLM token usage: 2255 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens


In [8]:
print(response)

 Biogen and Eli Lilly are two companies that have developed drugs to treat Alzheimer's disease. Biogen's drug, leqembi, reduces a protein called amyloid, which builds up on the brain in Alzheimer's's patients and disrupts cell function. Eli Lilly's experimental Alzheimer's drug, remternetug, lowered levels of brain plaque in patients who are in the earliest stages of Alzheimer's, according to early clinical trial data. Both companies are racing to bring a new Alzheimer's treatment to the market after both of their previous drugs flopped. Biogen is banking on the success of Biogen's drug leqembi after the disastrous approval and rollout of Biogen's old Alzheimer's drug aduhelm last year. Eli Lilly is developing donanemab and another drug, remternetug, after Eli Lilly's older treatment, solanezumab, failed to slow the progression of Alzheimer's's in clinical trials. An estimated 6.7 million Americans age 65 and older are living with Alzheimer's's, according to the Alzheimer's Association

In [9]:
response = query_engine.query("Write a report to answer the question which companies made drugs to help alzheimer disease?")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 18 tokens
> [retrieve] Total embedding token usage: 18 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 2324 tokens
> [get_response] Total LLM token usage: 2324 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens


In [10]:
print(response)

 Based on the new context, it appears that Eli Lilly and Biogen are two companies that are developing drugs to treat Alzheimer's disease. Eli Lilly's experimental Alzheimer's drug, which is in the early stages of development, has shown promising results in reducing levels of brain plaque in patients with the disease. Biogen's drug, leqembi, is also in development and is expected to be approved in the near future.

Eli Lilly and Biogen are racing to bring a new Alzheimer's's treatment to the market after both of Eli Lilly's and Biogen's previous drugs flopped. Biogen is banking on the success of Biogen's drug leqembi after the disastrous approval and rollout of Biogen's old Alzheimer's's drug aduhelm last year. Biogen said Biogen booked an $18 million loss from Alzheimer's treatments during the first quarter. Eli Lilly is developing donanemab and another drug, remternetug, after Eli Lilly's older treatment, solanezumab, failed to slow the progression of Alzheimer's's in clinical trials.

In [11]:
response = query_engine.query("Write a summary with details relevant to investing to answer the question: Which companies made drugs to help alzheimer disease?")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 24 tokens
> [retrieve] Total embedding token usage: 24 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 3509 tokens
> [get_response] Total LLM token usage: 3509 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens


In [12]:
print(response)

 Based on the new context, it appears that Biogen is a biotechnology company that is developing drugs to treat Alzheimer's disease. The company's CEO stated that Biogen is not worried about competing with Eli Lilly, another biotechnology company that is also developing drugs to treat Alzheimer's disease.

Biogen's CEO mentioned that both Eli Lilly's donanemab and leqembi, a highly anticipated drug from Biogen and Japanese drugmaker Eisai, reduce a protein called amyloid. Amyloid is a protein that builds up on the brain in Alzheimer's disease and disrupts cell function.

The CEO also mentioned that Biogen's drug leqembi is expected to receive full approval from the Food and Drug Administration (FDA) by the summer. Eli Lilly's donanemab is also expected to receive full approval from the FDA, but the CEO stated that Eli Lilly's decision to stop dosing donanemab after donanemab clears a certain amount of amyloid plaque in a patient may not be the best approach to treating Alzheimer's disea

In [13]:
response = query_engine.query('Which companies are still recovering from COVID? Include only details relevant to investing, and be specific on each company involved.')

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 24 tokens
> [retrieve] Total embedding token usage: 24 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 2052 tokens
> [get_response] Total LLM token usage: 2052 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens


In [14]:
print(response)

 Based on the new context, Abbott Laboratories (ABT.N) is recovering from COVID-19, as the company reported a recovery in elective surgeries globally and raised its forecast for Abbott Laboratories (ABT.N)'s core business, even as Covid testing-related sales were seen a fall in Covid testing sales sharply. Abbott Laboratories (ABT.N) is the second major medical device maker to signal a recovery after rival Johnson & Johnson (JNJ.N) on Tuesday posted better-than-expected sales for the business. The company's glucose-monitoring device Freestyle Libre alone contributed $1.2 billion in sales. Abbott Chair and CEO Robert Ford said the rise signals a recovery in demand for surgical procedures, pointing to an improvement in staffing levels at hospitals across the U.S. "Abbott Chair and CEO Robert Ford think the hospital systems have done a really good job right now at managing through the staffing shortages, and we're starting to see the impact there," Abbott Chair and CEO Robert Ford said du

In [15]:
response = query_engine.query('What new drugs are being approved that may impact stock prices? Be specific on the drugs and companies.')

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 20 tokens
> [retrieve] Total embedding token usage: 20 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 2030 tokens
> [get_response] Total LLM token usage: 2030 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens


In [16]:
print(response)

 Based on the new context, it appears that there are no new drugs being approved that may impact stock prices. The article focuses on Johnson & Johnson's earnings report and the company's guidance for its pharmaceutical business. The article does not mention any new drugs being approved that may impact stock prices.

However, it is worth noting that the article does mention some companies that have reported earnings and revenue that beat expectations, including Intel, First Solar, Alteryx, L3 Harris Technologies, and Pinterest. These companies may have seen an impact on their stock prices as a result of their earnings reports.


In [17]:
response = query_engine.query('How did the intervention of the FED affect the stock market? Be specific about the interventions and the result.')

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 22 tokens
> [retrieve] Total embedding token usage: 22 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1143 tokens
> [get_response] Total LLM token usage: 1143 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens


In [18]:
print(response)

 Based on the context information, it is not clear how the interventions of the FED affected the stock market. The FED's interventions were focused on addressing the financial crisis and stabilizing the banking system, rather than directly affecting the stock market. The FED's actions included raising interest rates, providing emergency loans to banks, and implementing other measures to address the crisis. However, it is not clear how these actions specifically affected the stock market.
