In [1]:
from langchain_community.document_loaders import ArxivLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

import os 
import getpass

from langchain.chains import RetrievalQA
from langchain_openai import OpenAI

from dotenv import load_dotenv

from langchain_google_genai import ChatGoogleGenerativeAI



### Arxivloader ###

Info. - https://python.langchain.com/v0.2/docs/integrations/document_loaders/arxiv/

This would be super helpful as this will help u

In [14]:
### Getting relevant paper from arxiv #####

query = "burst rate and morphological evolution of the periodically repeating FRB 20180916B"
arxiv_docs = ArxivLoader(query=query, load_max_docs=3).load() #### Loads number of paper given the query

### Text splitting ###
Info - https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/recursive_text_splitter/

In [15]:
##### splitting data ####

pdf_data = []
for doc in arxiv_docs:
    text_splitter = RecursiveCharacterTextSplitter(
                    chunk_size=1000,
                    chunk_overlap=100)
    texts = text_splitter.create_documents([doc.page_content])
    pdf_data.append(texts)


### Embedding ####

Info - https://python.langchain.com/v0.2/docs/integrations/platforms/huggingface/

In [16]:

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-l6-v2")
db = Chroma.from_documents(pdf_data[0], embeddings) 

In [12]:
os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google AI API key: ")

Enter your Google AI API key:  ········


In [17]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",
                             temperature=0,
                             max_tokens=None,
                             timeout=None,
                             max_retries=2,)

qa = RetrievalQA.from_chain_type(llm=llm,
                                 chain_type="stuff",
                                 retriever=db.as_retriever())

I0000 00:00:1722302189.338847   29558 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported
I0000 00:00:1722302189.341116   29558 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


In [18]:
question = "What was the total exposure on the source?"
result = qa({"query": question})

print(result)

  warn_deprecated(


{'query': 'What was the total exposure on the source?', 'result': 'The total exposure on the source in the aforementioned time interval is 201 hours. \n'}


In [21]:
question = "Whats the limit on period derivative?"
result = qa({"query": question})

print(result)

{'query': 'Whats the limit on period derivative?', 'result': 'The limit on the period derivative (Pdot) is  ˙P = −0.2 ± 1.5 × 10−4 day day−1. \n'}


In [22]:
question = "Summarize the paper in a paragraph"
result = qa({"query": question})

print(result)

{'query': 'Summarize the paper in a paragraph', 'result': 'This paper details a pipeline for analyzing the temporal structure of Fast Radio Bursts (FRBs) to search for repeating patterns. The pipeline smooths the burst profile, identifies sub-bursts using Exponentially Modified Gaussians, and then analyzes the auto-correlation function (ACF) of the time series for each sub-burst. This ACF analysis helps identify potential repeating patterns within the burst, which could provide insights into the emission mechanism of FRBs. The authors apply this pipeline to a sample of bright FRBs and find one case with interesting features in the ACF, highlighting the potential of this method for uncovering complex temporal structures in FRB signals. \n'}


In [23]:
question = "Tell me 5 facts about FRB 20180916B"
result = qa({"query": question})

print(result)

{'query': 'Tell me 5 facts about FRB 20180916B', 'result': 'Here are five facts about FRB 20180916B:\n\n1. **Periodic Activity:** FRB 20180916B is a repeating fast radio burst with a known period of activity. \n2. **Steady Activity Rate:** It exhibits a steady rate of bursts within its activity cycle, unlike some other repeaters that show periods of heightened activity.\n3. **Chromatic Activity:**  FRB 20180916B displays chromatic activity, meaning its bursts at different frequencies occur at different times within its activity cycle. Higher frequency bursts happen earlier, and lower frequency bursts occur later.\n4. **Changing Rotation Measure (RM):**  Observations show a linear increase in its Rotation Measure (RM), indicating changes in the magnetic field environment around the source.\n5. **Stable Dispersion Measure (DM):**  Despite the changes in RM, no significant variations in its Dispersion Measure (DM) have been observed. \n'}
