In [None]:
import re

def is_generated(content):
    """
    Detect if a given content is likely generated by AI (basic heuristic).

    Args:
    content (str): The text content to be checked.

    Returns:
    bool: True if the content is likely generated; False otherwise.
    """
    # Define a list of keywords or patterns that may indicate AI-generated content.
    ai_keywords = [
        "generated by AI",
        "auto-generated",
        "machine-generated",
        "model-generated",
        "artificial intelligence",
    ]

    # Convert the content to lowercase for case-insensitive matching.
    content = content.lower()

    # Check if any of the AI-related keywords are present in the content.
    for keyword in ai_keywords:
        if re.search(keyword, content):
            return True

    return False

# Example usage
content_to_check = "Detecting generative AI content without using a specialized API like OpenAI's GPT-3.5 can be challenging and may not be as accurate."
if is_generated(content_to_check):
    print("The content is likely generated by AI.")
else:
    print("The content does not appear to be generated by AI.")


The content does not appear to be generated by AI.


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def plagiarism_checker(text1, text2):
    # Create TF-IDF vectorizers for text1 and text2
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2])

    # Calculate cosine similarity between the TF-IDF vectors
    similarity = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]

    # Set a similarity threshold to determine plagiarism
    threshold = 0.8

    if similarity > threshold:
        return "Plagiarized"
    else:
        return "Not Plagiarized"

# Example usage
text1 = "This is an original text."
text2 = "This is a copied text."
result = plagiarism_checker(text1, text2)
print(f"Plagiarism check result: {result}")


Plagiarism check result: Not Plagiarized


In [None]:
from googlesearch import search

def google_search_content_detection(query):
    # Perform a Google search and collect search results
    search_results = list(search(query, num=10, stop=10))

    # Implement content detection logic for search results here

    # Return relevant content
    return search_results

# Example usage
query = "Data science is the study of data to extract meaningful insights for business."
results = google_search_content_detection(query)
print("Google search results:")
for idx, result in enumerate(results):
    print(f"{idx + 1}. {result}")


Google search results:
1. https://www.ibm.com/topics/data-science
2. https://aws.amazon.com/what-is/data-science/
3. https://aws.amazon.com/what-is/data-science/#seo-faq-pairs#history-of-data-science
4. https://aws.amazon.com/what-is/data-science/#seo-faq-pairs#what-are-the-benefits-of-data-science-for-business
5. https://aws.amazon.com/what-is/data-science/#seo-faq-pairs#what-is-the-data-science-process
6. https://www.datarobot.com/wiki/data-science/
7. https://www.simplilearn.com/tutorials/data-science-tutorial/what-is-data-science
8. https://www.mygreatlearning.com/blog/what-is-data-science/
9. https://www.techtarget.com/searchenterpriseai/definition/data-science
10. https://builtin.com/data-science


In [None]:
import requests
from bs4 import BeautifulSoup
from googlesearch import search

def google_search_with_summaries(query, num_results=5):
    search_results = list(search(query, num=num_results, stop=num_results))

    results_with_summaries = []

    for result in search_results:
        try:
            page = requests.get(result)
            soup = BeautifulSoup(page.content, 'html.parser')

            # Extract the title and snippet from the search result page
            title = soup.find('title').get_text()
            snippet = soup.find('meta', attrs={'name': 'description'})['content']

            results_with_summaries.append({'title': title, 'snippet': snippet, 'url': result})

        except Exception as e:
            print(f"Error processing result: {e}")
            continue

    return results_with_summaries

# Example usage
query = "What is Java"
search_results = google_search_with_summaries(query, num_results=5)

for idx, result in enumerate(search_results):
    print(f"Result {idx + 1}:")
    print(f"Title: {result['title']}")
    print(f"Snippet: {result['snippet']}")
    print(f"URL: {result['url']}")
    print()


KeyboardInterrupt: ignored

In [19]:
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Function to scrape Google search results and extract summaries
def scrape_google_search(query, num_results=10):
    search_url = f"https://www.google.com/search?q={query}&num={num_results}"
    headers = {"User-Agent": "Mozilla/5.0"}

    response = requests.get(search_url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        search_results = soup.find_all("div", class_="tF2Cxc")
        summaries = [result.find("div", class_="IsZvec").text for result in search_results]
        return summaries
    else:
        print(f"Failed to retrieve Google search results for '{query}'")
        return []

# Function to check for plagiarism between a user query and search result summaries
def plagiarism_checker(user_query, search_summaries):
    user_query = user_query.lower()  # Convert to lowercase for comparison

    for idx, summary in enumerate(search_summaries):
        summary = summary.lower()  # Convert to lowercase for comparison

        # Create TF-IDF vectorizers for the user query and summary
        tfidf_vectorizer = TfidfVectorizer()
        tfidf_matrix = tfidf_vectorizer.fit_transform([user_query, summary])

        # Calculate cosine similarity between the TF-IDF vectors
        similarity = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]

        # Set a similarity threshold to determine plagiarism
        threshold = 0.8

        if similarity > threshold:
            print(f"Search result {idx + 1} is potentially plagiarized:")
            print(summary)
            print(f"Similarity score: {similarity}")
            print("=" * 50)

    print("Plagiarism detection complete.")

if __name__ == "__main__":
    user_query = "What is Data science"  # Replace with the user's query
    search_summaries = scrape_google_search(user_query)

    if search_summaries:
        plagiarism_checker(user_query, search_summaries)


In [None]:
import spacy

# Load a spaCy language model
nlp = spacy.load("en_core_web_sm")

def fact_check(statement):
    # Analyze the statement using spaCy
    doc = nlp(statement)

    # Implement fact-checking logic here
    # You can compare the statement against a database of facts

    # Return a fact-check result
    return "True"  # Replace with your fact-checking result

# Example usage
statement = "The Earth orbits the Moon."
result = fact_check(statement)
print(f"Fact check result: {result}")


Fact check result: True
