In [None]:
# Install necessary libraries
!pip install langchain transformers torch beautifulsoup4 requests accelerate
!pip install langchain_community
# Import required modules
import requests
from bs4 import BeautifulSoup
from langchain.llms import HuggingFacePipeline
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch


# Load Falcon-7B Model from Hugging Face
model_name = "tiiuae/falcon-7b-instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

# Create the text generation pipeline
summarizer = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

# Wrap the model for LangChain
llm = HuggingFacePipeline(pipeline=summarizer)

# Function to scrape website content
def scrape_website(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        text = ' '.join([p.text for p in soup.find_all('p')])  # Extract text from <p> tags
        return text
    else:
        return f"❌ Failed to fetch website. Status code: {response.status_code}"

# Function to analyze and summarize scraped content
def analyze_content(url):
    scraped_text = scrape_website(url)

    if "Failed" in scraped_text:
        return scraped_text  # Return error message if scraping failed

    # Limit text length to 1024 tokens (model constraint)
    input_text = scraped_text[:1024]

    # Generate summary using Falcon-7B
    summary = llm(f"Summarize the following text:\n{input_text}")
    return summary

# User Input for URL
url = input("🔗 Enter the URL to scrape: ")
summary = analyze_content(url)

# Print Summary
print("\n📄 Webpage Summary:\n", summary)

In [None]:
# Install required libraries
!pip install streamlit langchain transformers torch beautifulsoup4 requests accelerate

import streamlit as st
import requests
from bs4 import BeautifulSoup
from langchain.llms import HuggingFacePipeline
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

# Load Falcon-7B Model from Hugging Face
model_name = "tiiuae/falcon-7b-instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

# Create the text generation pipeline
summarizer = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

# Wrap the model for LangChain
llm = HuggingFacePipeline(pipeline=summarizer)

# Function to scrape website content
def scrape_website(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        text = ' '.join([p.text for p in soup.find_all('p')])  # Extract text from <p> tags
        return text
    else:
        return f"❌ Failed to fetch website. Status code: {response.status_code}"

# Function to analyze and summarize scraped content
def analyze_content(url):
    scraped_text = scrape_website(url)

    if "Failed" in scraped_text:
        return scraped_text  # Return error message if scraping failed

    # Limit text length to 1024 tokens (model constraint)
    input_text = scraped_text[:1024]

    # Generate summary using Falcon-7B
    summary = llm(f"Summarize the following text:\n{input_text}")
    return summary

# Streamlit UI
st.title("🌐 Web Scraper & Summarizer using Falcon-7B")
st.write("Enter a URL, and this app will scrape and summarize its content.")

url = st.text_input("🔗 Enter the URL:")
if st.button("Summarize"):
    if url:
        summary = analyze_content(url)
        st.subheader("📄 Webpage Summary:")
        st.write(summary)
    else:
        st.error("❗ Please enter a valid URL.")

In [None]:
# Store the new GitHub token securely
GITHUB_TOKEN = "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"

# Set up Git remote URL with authentication
!git remote set-url origin https://{GITHUB_TOKEN}@github.com/vibhorjoshi/Ai-agents-using-falcon-7-B.git

# Push changes to GitHub
!git push origin main

In [None]:
# Set up Git in Colab
!git config --global user.name "vibhorjoshi"
!git config --global user.email "vibhorjoshi40@gmail.com"

# Clone your GitHub repo (Replace 'your-repo' with your actual repo)
!git clone https://github.com/vibhorjoshi/Ai-agents-using-falcon-7-B
%cd Ai-agents-using-falcon-7-B

# Save the Streamlit file
with open("app.py", "w") as f:
    f.write("""
import streamlit as st
import requests
from bs4 import BeautifulSoup
from langchain.llms import HuggingFacePipeline
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

# Load Falcon-7B Model from Hugging Face
model_name = "tiiuae/falcon-7b-instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

# Create the text generation pipeline
summarizer = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

# Wrap the model for LangChain
llm = HuggingFacePipeline(pipeline=summarizer)

# Function to scrape website content
def scrape_website(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        text = ' '.join([p.text for p in soup.find_all('p')])  # Extract text from <p> tags
        return text
    else:
        return f"❌ Failed to fetch website. Status code: {response.status_code}"

# Function to analyze and summarize scraped content
def analyze_content(url):
    scraped_text = scrape_website(url)

    if "Failed" in scraped_text:
        return scraped_text  # Return error message if scraping failed

    # Limit text length to 1024 tokens (model constraint)
    input_text = scraped_text[:1024]

    # Generate summary using Falcon-7B
    summary = llm(f"Summarize the following text:\n{input_text}")
    return summary

# Streamlit UI
st.title("🌐 Web Scraper & Summarizer using Falcon-7B")
st.write("Enter a URL, and this app will scrape and summarize its content.")

url = st.text_input("🔗 Enter the URL:")
if st.button("Summarize"):
    if url:
        summary = analyze_content(url)
        st.subheader("📄 Webpage Summary:")
        st.write(summary)
    else:
        st.error("❗ Please enter a valid URL.")
    # Paste the Streamlit code from above here
    """)

# Add & commit changes
!git add app.py
!git commit -m "Added Streamlit Web Scraper with Falcon-7B"

# Push to GitHub
!git push origin main