In [14]:
from dotenv import load_dotenv
import os
import boto3
import bs4

In [17]:
load_dotenv()
TEXT_MODEL = os.getenv("TEXT_MODEL")
SERVICE_NAME = os.getenv("AWS_SERVICE_NAME")
REGION_NAME = os.getenv("AWS_REGION_NAME")
ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY")
ACCESS_SECRET_KEY = os.getenv("AWS_SECRET_KEY")
os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")

In [12]:
client = boto3.client(service_name=SERVICE_NAME,
                       region_name = REGION_NAME,
                       aws_access_key_id=ACCESS_KEY_ID,
                       aws_secret_access_key=ACCESS_SECRET_KEY)

In [13]:
def generate_ai_text(payload, max_tokens=256, temperature=1, top_p=0.8, top_k=50):
    try:
        prompt = get_prompt(payload)
        model_id = TEXT_MODEL
        body = {
            "prompt": prompt,
            "max_tokens": max_tokens,
            "temperature": temperature,
            "top_p": top_p,
            "top_k": top_k
        }

        response = client.invoke_model(
            modelId=model_id, 
            body=json.dumps(body)
        )

        response_body = json.loads(response["body"].read())
        outputs = response_body.get("outputs")
        generated_text = outputs[0]['text'].replace('"', '')
        generated_text = generated_text.split('\n', 1)[0].strip()
        
        return generated_text
    
    except Exception as e:
        return {"error": str(e)}

In [1]:
## Data Ingestion
from langchain_community.document_loaders import TextLoader
loader=TextLoader("cricbuzz.txt")
text_documents=loader.load()
text_documents


[Document(page_content="Rashid Khan, Naveen Ul Haq carry Afghanistan to World Cup semis\nby  Cricbuzz Staff  •  Last updated on Tue, Jun 25, 2024, 11:44 AM\nShare\nTweet\nRashid Khan picked a four-wicket haul in his team's win\nRashid Khan picked a four-wicket haul in his team's win © AFP\nTwo days after beating Australia, Afghanistan went one step further with an incredible win over Bangladesh to advance to the semifinal of the Men's T20 World Cup 2024. Once India beat Australia in the morning game, the requirement was clear: a win would take Afghanistan through ahead of the 2021 champions. However, Bangladesh came as a thorn in their flesh, putting in a commendable bowling performance to restrict Afghanistan to 115/5 in 20 overs. The conditions got a bit better with rain spells and Litton Das showed batting resolve but Rashid Khan (4 for 23) and Naveen Ul Haq (4 for 26) combined to take their side to the last-four.\n\nWhere the game was won:\n\nBangladesh would've comfortably gotten 

In [2]:
import requests
import xml.etree.ElementTree as ET

# URL of the RSS feed
url = "https://feeds.feedburner.com/ign/games-all"

# Fetch the content of the RSS feed
response = requests.get(url)
response.raise_for_status()  # Raises an HTTPError for bad responses

# Parse the XML content of the feed
root = ET.fromstring(response.content)

# Iterate through each item in the RSS feed
for item in root.findall('.//item'):
    title = item.find('title').text
    description = item.find('description').text
    image_url = item.find('.//media:content', namespaces={'media': 'http://search.yahoo.com/mrss/'}).attrib['url'] if item.find('.//media:content', namespaces={'media': 'http://search.yahoo.com/mrss/'}) is not None else 'No image available'
    print(f'Title: {title}')
    print(f'Description: {description}')
    print(f'Image URL: {image_url}')
    print('---')

Title: Final Fantasy 14 Version 7.0 Patch Notes Reveal Every Change Coming Alongside Dawntrail
Description: Square Enix has released the preliminary patch notes for Final Fantasy 14's highly anticipated Patch 7.0, which will arrive soon alongside the Dawntrail expansion.
Image URL: https://assets-prd.ignimgs.com/2024/06/23/ffxiv-dawntrail-prepare-1719180665855.jpg
---
Title: Elden Ring: Shadow of the Erdtree Hardest Bosses Ranked
Description: Shadow of the Erdtree's bosses are some of From's toughest yet, but which is the hardest?
Image URL: https://assets-prd.ignimgs.com/2024/06/25/er-bosses-blog-1719311521211.jpg
---
Title: Brutal Stardew Valley Mod Obliterates Your Farm If You Open The Wiki
Description: A Stardew Valley player has taken inspiration from a satirical post to created a mod that deletes a player’s save file the moment that they open a Stardew Valley wiki.
Image URL: https://assets-prd.ignimgs.com/2024/01/29/stardew-valley-1706533007365.jpg
---
Title: 6 Months Later, Pal

In [15]:
# web based loader
from langchain_community.document_loaders import WebBaseLoader
import bs4

## load,chunk and index the content of the html page

loader=WebBaseLoader(web_paths=("https://www.cricbuzz.com/cricket-news/130856/rashid-khan-naveen-ul-haq-carry-afghanistan-to-world-cup-semis",),
                     bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                         class_=("nws-dtl-hdln","cb-nws-dtl-itms")

                     )))

text_documents=loader.load()

In [16]:
text_documents

[Document(page_content=" Rashid Khan, Naveen Ul Haq carry Afghanistan to World Cup semis   Two days after beating Australia, Afghanistan went one step further with an incredible win over Bangladesh to advance to the semifinal of the Men's T20 World Cup 2024. Once India beat Australia in the morning game, the requirement was clear: a win would take Afghanistan through ahead of the 2021 champions. However, Bangladesh came as a thorn in their flesh, putting in a commendable bowling performance to restrict Afghanistan to 115/5 in 20 overs. The conditions got a bit better with rain spells and Litton Das showed batting resolve but Rashid Khan (4 for 23) and Naveen Ul Haq (4 for 26) combined to take their side to the last-four.    Where the game was won:    Bangladesh would've comfortably gotten over the line if not for the collapse in the middle-overs. Contrastingly, Afghanistan did well to lose only one wicket in this phase even as they were in need of a move on from a slow start.      The 

In [None]:
## Pdf reader
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('attention.pdf')
docs=loader.load()

In [18]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(text_documents)
documents[:5]

[Document(page_content="Rashid Khan, Naveen Ul Haq carry Afghanistan to World Cup semis   Two days after beating Australia, Afghanistan went one step further with an incredible win over Bangladesh to advance to the semifinal of the Men's T20 World Cup 2024. Once India beat Australia in the morning game, the requirement was clear: a win would take Afghanistan through ahead of the 2021 champions. However, Bangladesh came as a thorn in their flesh, putting in a commendable bowling performance to restrict Afghanistan to 115/5 in 20 overs. The conditions got a bit better with rain spells and Litton Das showed batting resolve but Rashid Khan (4 for 23) and Naveen Ul Haq (4 for 26) combined to take their side to the last-four.    Where the game was won:    Bangladesh would've comfortably gotten over the line if not for the collapse in the middle-overs. Contrastingly, Afghanistan did well to lose only one wicket in this phase even as they were in need of a move on from a slow start.      The m

In [19]:
## Vector Embedding And Vector Store
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
db = Chroma.from_documents(documents,OpenAIEmbeddings())

In [22]:
query = "Who WON THE MATCH"
retireved_results=db.similarity_search(query)
print(retireved_results[0].page_content)

[Document(page_content="Rashid Khan, Naveen Ul Haq carry Afghanistan to World Cup semis   Two days after beating Australia, Afghanistan went one step further with an incredible win over Bangladesh to advance to the semifinal of the Men's T20 World Cup 2024. Once India beat Australia in the morning game, the requirement was clear: a win would take Afghanistan through ahead of the 2021 champions. However, Bangladesh came as a thorn in their flesh, putting in a commendable bowling performance to restrict Afghanistan to 115/5 in 20 overs. The conditions got a bit better with rain spells and Litton Das showed batting resolve but Rashid Khan (4 for 23) and Naveen Ul Haq (4 for 26) combined to take their side to the last-four.    Where the game was won:    Bangladesh would've comfortably gotten over the line if not for the collapse in the middle-overs. Contrastingly, Afghanistan did well to lose only one wicket in this phase even as they were in need of a move on from a slow start.      The m

In [None]:
## FAISS Vector Database
from langchain_community.vectorstores import FAISS
db = FAISS.from_documents(documents[:15], OpenAIEmbeddings())