In [1]:
from dotenv import load_dotenv

from langchain.tools import Tool
from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_community.utilities import TextRequestsWrapper

from bs4 import BeautifulSoup

#from src.search import top_n_results_factory

In [2]:
load_dotenv()

True

In [3]:
def top_3_results(query):
    search = GoogleSearchAPIWrapper()
    return search.results(query, 3)

In [4]:
tool = Tool(
    name="Google Search",
    description="Search Google for recent results.",
    func=top_3_results,
)

In [5]:
searches = tool.run("Layoutlmv3")

In [6]:
searches

[{'title': 'LayoutLMv3: Pre-training for Document AI with Unified Text and ...',
  'link': 'https://arxiv.org/abs/2204.08387',
  'snippet': 'Apr 18, 2022 ... Title:LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking ... Abstract:Self-supervised pre-training techniques have\xa0...'},
 {'title': 'Semantic Table Detection with LayoutLMv3',
  'link': 'https://arxiv.org/abs/2211.15504',
  'snippet': 'Nov 25, 2022 ... Title:Semantic Table Detection with LayoutLMv3 ... Abstract:This paper presents an application of the LayoutLMv3 model for semantic table\xa0...'},
 {'title': 'DocILE Benchmark for Document Information Localization and ...',
  'link': 'https://arxiv.org/abs/2302.05658',
  'snippet': 'Feb 11, 2023 ... The benchmark comes with several baselines, including RoBERTa, LayoutLMv3 and DETR-based Table Transformer; applied to both tasks of the\xa0...'}]

In [7]:
links = [search['link'] for search in searches]

In [8]:
links

['https://arxiv.org/abs/2204.08387',
 'https://arxiv.org/abs/2211.15504',
 'https://arxiv.org/abs/2302.05658']

In [9]:
requests = TextRequestsWrapper()

In [10]:
html_list = [requests.get(link) for link in links]

In [11]:
type(html_list[0])

str

In [12]:
soup = BeautifulSoup(html_list[0])

  soup = BeautifulSoup(html_list[0])


In [13]:
print(soup.find('meta', property='og:title')["content"])

LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking


In [14]:
# Get first 3 authors (remove comma from name) and append et.al. if more than 3 and format as a string
authors = soup.find_all("meta", property="article:author")
authors = [author["content"].replace(",", "") for author in authors]
if len(authors) > 3:
    authors = authors[:3]
    authors.append("et al.")
authors = ", ".join(authors)
authors

''

In [21]:
# Get first 3 authors (remove comma from name) and append et.al. if more than 3 and format as a string
authors = soup.find_all("meta", attrs={'name': 'citation_author'})
authors = [author["content"].replace(",", "") for author in authors]
if len(authors) > 3:
    authors = authors[:3]
    authors.append("et al.")
authors = ", ".join(authors)
authors

'Huang Yupan, Lv Tengchao, Cui Lei, et al.'

In [21]:
soup.find_all("meta", property="article:author")

[<meta content="Zaťko, B." property="article:author"/>,
 <meta content="Kubanda, D." property="article:author"/>,
 <meta content="Žemlička, J." property="article:author"/>,
 <meta content="Šagátová, A." property="article:author"/>,
 <meta content="Zápražný, Z." property="article:author"/>,
 <meta content="Boháček, P." property="article:author"/>,
 <meta content="Nečas, V." property="article:author"/>,
 <meta content="Mora, Y." property="article:author"/>,
 <meta content="Pichotka, M." property="article:author"/>,
 <meta content="Dudák, J." property="article:author"/>]

In [22]:
abstract = soup.find('meta', property='og:description')["content"]
print(abstract)

In this work, we have focused on Timepix detectors coupled with the semi-insulating GaAs material sensor. We used undoped bulk GaAs material with the thickness of 350 μm. We prepared and tested four pixelated detectors with 165 μm and 220 μm pixel size with two versions of technology preparation, without and with wet chemically etched trenches around each pixel. We have carried out adjustment of GaAs Timepix detectors to optimize their performance. The energy calibration of one GaAs Timepix detector in Time-over-threshold mode was performed with the use of <SUP>241</SUP>Am and <SUP>133</SUP>Ba radioisotopes. We were able to detect γ-photons with the energy up to 160 keV. The X-ray imaging quality of GaAs Timepix detector was tested with X-ray source using various samples. After flat field we obtained very promising imaging performance of tested GaAs Timepix detectors.


In [22]:
from langchain.prompts import PromptTemplate

In [27]:
prompt_template = PromptTemplate.from_template(
    """
You are a seasoned researcher, teacher and a popularizer of science that can explain scientific concepts to the wide variety of audiences.

You will be given an abstract of a scientific paper, an audience (e.g. high-school students) and a language, and you will have to explain the scientific paper it in a way that is understandable for the audience and translate your entire answer to given language. 

Please be concise and clear, make a summary and few bulletpoints. Please do not write any introductory sentences and keep your whole answer in a given language.
    
Audience: {audience}.
    
Language: {language}.
    
Abstract: {abstract}
"""
)

In [24]:
prompt_template_from_gemini = PromptTemplate.from_template(
    """
Imagine you're explaining a cool science discovery to a person from {audience} in {language} language.

Briefly introduce the paper's topic in a way that would grab their attention.
Explain the main question or problem the researchers were trying to answer in a simple and understandable way.
Describe what the researchers did to investigate the question (e.g., experiments, observations, etc.).
Summarize the key findings of the research in a way that would surprise or excite your audience.
Briefly mention any limitations or future directions of the research, if relevant.
Conclude by emphasizing the importance or potential impact of the findings in a language they can understand.

Remember:

Use simple language and avoid technical jargon (unless the topic of the paper is meant for that audience).
Focus on the most interesting and engaging aspects of the research.
Keep the summary concise and to the point (e.g., 3-5 sentences).
Use {language} language.

Tips:

You can use metaphors, analogies, or relatable examples to explain complex concepts.
Encourage questions and be prepared to explain things in more detail if needed.
Make it fun and engaging!

Optional:

You can also personalize the prompt based on the specific details of the paper you're summarizing. For example, if the research has implications for everyday life, you can highlight those in your summary.
I hope this template helps you write clear and engaging summaries of scientific paper abstracts for your chosen audience!

Content:

{abstract}
"""
)

In [29]:
prompt_template.save("../prompts/icebreaker_bad.yaml")

In [77]:
my_prompt = prompt_template_from_gemini.format(audience="technical university graduates", abstract=abstract, language="English")

In [78]:
my_prompt

"\nImagine you're explaining a cool science discovery to a person from technical university graduates in English language.\n\nBriefly introduce the paper's topic in a way that would grab their attention.\nExplain the main question or problem the researchers were trying to answer in a simple and understandable way.\nDescribe what the researchers did to investigate the question (e.g., experiments, observations, etc.).\nSummarize the key findings of the research in a way that would surprise or excite your audience.\nBriefly mention any limitations or future directions of the research, if relevant.\nConclude by emphasizing the importance or potential impact of the findings in a language they can understand.\n\nRemember:\n\nUse simple language and avoid technical jargon (unless the topic of the paper is meant for that audience).\nFocus on the most interesting and engaging aspects of the research.\nKeep the summary concise and to the point (e.g., 3-5 sentences).\nUse English language.\n\nTip

In [79]:
from langchain_openai import ChatOpenAI


In [80]:
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True)


In [81]:
for chunk in llm.stream(my_prompt):
    print(chunk.content, end="", flush=True)

Imagine being able to detect and image X-rays with unprecedented precision and clarity. That's exactly what researchers have achieved in their latest study using a cutting-edge technology called Timepix detectors coupled with a special material called semi-insulating GaAs. 

The main question they were trying to answer was how to optimize the performance of these detectors and improve their imaging quality. To do this, they prepared and tested four pixelated detectors with different pixel sizes and technology preparations. They also carried out adjustments and energy calibrations using radioisotopes to ensure accurate detection of γ-photons.

The key findings of their research are truly exciting. They were able to detect γ-photons with energies up to 160 keV, which is a significant improvement compared to previous technologies. Moreover, the X-ray imaging quality of the GaAs Timepix detectors was found to be very promising, providing high-resolution images with excellent clarity.

Whil