In [1]:
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader
from llama_index.storage.storage_context import StorageContext
from llama_index.llm_predictor import HuggingFaceLLMPredictor
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from IPython.display import Markdown, display
from llama_index import ServiceContext, LangchainEmbedding
import os
import pandas as pd

In [2]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [3]:
data_dir = "/home/jupyter/data_articles"
persist_dir = "/home/jupyter/persist"

In [4]:
import torch
from llama_index.prompts.prompts import SimpleInputPrompt


system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version)
- StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
- StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
- StableLM will refuse to participate in anything that could harm a human.
""" 

# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")

hf_predictor = HuggingFaceLLMPredictor(
    max_input_size=4096, 
    max_new_tokens=2048,
    temperature=0,
    do_sample=False,
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="stabilityai/stablelm-tuned-alpha-7b",
    model_name="stabilityai/stablelm-tuned-alpha-7b",
    device_map="auto",
    stopping_ids=[50278, 50279, 50277, 1, 0],
    tokenizer_kwargs={"max_length": 4096},
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={"torch_dtype": torch.float16}
)
embed_model = LangchainEmbedding(HuggingFaceEmbeddings())
service_context = ServiceContext.from_defaults(chunk_size_limit=1024, llm_predictor=hf_predictor, embed_model=embed_model)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cuda
Use pytorch device: cuda


In [5]:
from llama_index import load_index_from_storage, load_indices_from_storage, load_graph_from_storage
from llama_index.storage.docstore import SimpleDocumentStore
from llama_index.vector_stores import SimpleVectorStore
from llama_index.storage.index_store import SimpleIndexStore


storage_context = StorageContext.from_defaults(
    docstore=SimpleDocumentStore.from_persist_dir(persist_dir=persist_dir),
    vector_store=SimpleVectorStore.from_persist_dir(persist_dir=persist_dir),
    index_store=SimpleIndexStore.from_persist_dir(persist_dir=persist_dir),
)


index = load_index_from_storage(storage_context=storage_context, service_context=service_context)

INFO:llama_index.indices.loading:Loading all indices.
Loading all indices.


In [6]:
query_engine = index.as_query_engine()
response = query_engine.query("Which companies made drugs to help alzheimer disease?")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 11 tokens
> [retrieve] Total embedding token usage: 11 tokens


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 2119 tokens
> [get_response] Total LLM token usage: 2119 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens


In [7]:
print(response)

The companies that made drugs to help Alzheimer's are Eli Lilly and Biogen.


In [8]:
response = query_engine.query("Write a report to answer the question: Which companies made drugs to help alzheimer disease?")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 19 tokens
> [retrieve] Total embedding token usage: 19 tokens


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 3043 tokens
> [get_response] Total LLM token usage: 3043 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens


In [9]:
print(response)

Biogen is a biotech company that is focused on developing drugs to treat Alzheimer's disease. The company is currently working on developing a drug called donanemab, which is a treatment for Alzheimer's disease that is currently in clinical trials. Biogen is also working on developing a drug called leqembi, which is a treatment for Alzheimer's disease that is currently in clinical trials. Both of these drugs are designed to help reduce the accumulation of amyloid plaque in the brain, which is a key step in the development of Alzheimer's disease.


In [10]:
response = query_engine.query("Write a summary with details relevant to investing to answer the question: Which companies made drugs to help alzheimer disease?")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 24 tokens
> [retrieve] Total embedding token usage: 24 tokens


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 4531 tokens
> [get_response] Total LLM token usage: 4531 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens


In [11]:
print(response)

Biogen is a biotech company that is developing Alzheimer's disease treatments through its drug, leqembi. The company is focused on developing a treatment that reduces the accumulation of amyloid plaque in the brain, which is a key step in the development of Alzheimer's. Eli Lilly and Biogen are both working on this drug, and Biogen is hoping that it will be approved by the FDA by the summer. However, the company is concerned that it may not be the best approach to treating Alzheimer's, as it is not as well-studied as other treatments. The FDA has declined to grant accelerated approval for leqembi, which means that Biogen and Eli Lilly are still developing the drug. Biogen is also working on a second Alzheimer's treatment, remternetug, which is in the early stages of development.

Biogen is a biotech company that is focused on developing Alzheimer's disease treatments through its drug, leqembi. The company is focused on developing a treatment that reduces the accumulation of amyloid pla

In [12]:
response = query_engine.query('Which companies are still recovering from COVID? Include only details relevant to investing, and be specific on each company involved.')

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 24 tokens
> [retrieve] Total embedding token usage: 24 tokens


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 2803 tokens
> [get_response] Total LLM token usage: 2803 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens


In [13]:
print(response)

Abbott Laboratories (ABT.N) is the second major medical device maker to signal a recovery after rival Johnson & Johnson (JNJ.N) on Tuesday posted better-than-expected sales for the business.

Abbott Laboratories (ABT.N) is the second major medical device maker to signal a recovery after rival Johnson & Johnson (JNJ.N) on Tuesday posted better-than-expected sales for the business.

Abbott Laboratories (ABT.N)'s upbeat commentary lifted shares of other medical device makers Medtronic (MDT.N), Stryker (SYK.N), Zimmer Biomet (ZBH.N) and Boston Scientific Corp (BSX.N) by 2%-4% in morning trade.

Abbott Laboratories (ABT.N)'s stronger outlook for Abbott Laboratories (ABT.N)'s non-COVID business was the main takeaway as investors had priced in a fall in COVID testing sales, J.P. Morgan analyst Robbie Marcus said in a note.

Abbott Laboratories (ABT.N) lowered Abbott Laboratories (ABT.N)'s outlook for COVID testing sales this year to $1.5 billion from the $2 billion Abbott Laboratories (ABT.N)

In [14]:
response = query_engine.query('What new drugs are being approved that may impact stock prices? Be specific on the drugs and companies.')

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 20 tokens
> [retrieve] Total embedding token usage: 20 tokens


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 2162 tokens
> [get_response] Total LLM token usage: 2162 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens


In [15]:
print(response)

There are several new drugs that are currently being approved that may impact stock prices. These include:

1. Johnson & Johnson's talc-based baby powder liabilities and costs tied to the upcoming spinoff of Johnson & Johnson's consumer health business.
2. The acquisition of Abiomed, a cardiovascular medical technology company, in December last year that fueled the rise of Johnson & Johnson's consumer health business.
3. Snap's first-quarter revenue fell short of Wall Street's expectations, but the company's revenue grew 6% from the prior year to $989 million.
4. Amazon's earnings were disappointing, but the company's revenue beat Wall Street's estimates.
5. T-Mobile's telecommunications company was down about 2% in premarket trading after reporting first-quarter earnings that beat Wall Street's expectations.
6. Exxon Mobil's first-quarter earnings beat analysts' estimates, but the company's revenue fell 25% on the drop in oil prices.
7. Chevron's shares rose in premarket trading despi

In [16]:
response = query_engine.query('How did the intervention of the FED affect the stock market? Be specific about the interventions and the result.')

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 22 tokens
> [retrieve] Total embedding token usage: 22 tokens


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1364 tokens
> [get_response] Total LLM token usage: 1364 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens


In [17]:
print(response)

The intervention of the Federal Reserve did not have a significant impact on the stock market. The stock market is a complex system that is influenced by a variety of factors, including economic conditions, political events, and investor sentiment. The Federal Reserve's decision to raise interest rates did not have a direct impact on the stock market, as the impact would have been felt in other financial markets.

In fact, the decision to raise interest rates was a key part of the broader monetary policy framework that was implemented by the Federal Reserve. The primary goal of the monetary policy was to fight inflation and promote stable financial conditions. The decision to raise interest rates was made in order to achieve these goals, and the impact on the stock market was limited.

In summary, the intervention of the Federal Reserve had limited effects on the stock market. The decision to raise interest rates did not have a direct impact on the stock market, and the impact would ha