In [1]:
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Function to scrape Google search results and extract summaries
def scrape_google_search(query, num_results=10):
    search_url = f"https://www.google.com/search?q={query}&num={num_results}"
    headers = {"User-Agent": "Mozilla/5.0"}

    response = requests.get(search_url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        search_results = soup.find_all("div", class_="tF2Cxc")
        summaries = [result.find("div", class_="IsZvec").text for result in search_results]
        return summaries
    else:
        print(f"Failed to retrieve Google search results for '{query}'")
        return []

# Function to check for plagiarism between a user query and search result summaries
def plagiarism_checker(user_query, search_summaries):
    user_query = user_query.lower()  # Convert to lowercase for comparison

    for idx, summary in enumerate(search_summaries):
        summary = summary.lower()  # Convert to lowercase for comparison

        # Create TF-IDF vectorizers for the user query and summary
        tfidf_vectorizer = TfidfVectorizer()
        tfidf_matrix = tfidf_vectorizer.fit_transform([user_query, summary])

        # Calculate cosine similarity between the TF-IDF vectors
        similarity = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]

        # Set a similarity threshold to determine plagiarism
        threshold = 0.8

        if similarity > threshold:
            print(f"Search result {idx + 1} is potentially plagiarized:")
            print(summary)
            print(f"Similarity score: {similarity}")
            print("=" * 50)

    print("Plagiarism detection complete.")

if __name__ == "__main__":
    user_query = "What is Java?"  # Replace with the user's query
    search_summaries = scrape_google_search(user_query)

    if search_summaries:
        plagiarism_checker(user_query, search_summaries)


In [None]:
from googlesearch import search
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Function to perform a Google search and extract search result snippets
def google_search(query, num_results=10):
    search_results = list(search(query, num=num_results, stop=num_results))
    snippets = []

    for result in search_results:
        try:
            response = requests.get(result)
            soup = BeautifulSoup(response.text, 'html.parser')
            snippet = soup.get_text()
            snippets.append(snippet)
        except Exception as e:
            print(f"Error fetching content from {result}: {e}")

    return snippets

# Function to check for plagiarism using TF-IDF and cosine similarity
def plagiarism_checker(text1, text2):
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2])
    similarity = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]

    # Set a similarity threshold to determine plagiarism
    threshold = 0.8

    if similarity > threshold:
        return "Plagiarized"
    else:
        return "Not Plagiarized"

# Main function
if __name__ == "__main__":
    query = "What is Java?"
    search_results = google_search(query)

    # Check for plagiarism against the first search result
    if len(search_results) >= 2:
        query_result = search_results[0]
        for i, snippet in enumerate(search_results[1:], start=2):
            result = plagiarism_checker(query_result, snippet)
            print(f"Search result {i}: {result}")

        print("\nPlagiarism check against the first search result completed.")
    else:
        print("Insufficient search results to perform plagiarism check.")
