In [1]:
from moviepy import *
from moviepy.video.io.VideoFileClip import VideoFileClip
import whisper
import string
from transformers import pipeline
from keybert import KeyBERT
from sentence_transformers import SentenceTransformer, util

import requests




# Audio and Transcript Extraction

In [3]:
def preprocess_video(video_path):
    video_clip = VideoFileClip(video_path)

    # Extract audio
    audio_clip = video_clip.audio

    # Save the audio to a file
    audio_output_path = "output_audio.mp3"  # Specify output file path and format
    audio_clip.write_audiofile(audio_output_path)

    #Audio to text
    model = whisper.load_model("base")
    result = model.transcribe(audio_output_path)
    paragraph = result["text"]

    return paragraph

# NewsAPI setup and Query

In [5]:
def get_articles(keywords, max_results=20, sort_by="relevance"):
    API_KEY = '69061b87273543a09951e17c0283c5a1'  
    BASE_URL = 'https://newsapi.org/v2/everything'

    articles = []
    page = 1
    query = ' OR '.join(keywords)  # Flexible matching: any keyword can appear

    while len(articles) < max_results:
        url = f"{BASE_URL}?q={query}&pageSize={min(100, max_results - len(articles))}&page={page}&sortBy={sort_by}&apiKey={API_KEY}"
        response = requests.get(url)

        if response.status_code == 200:
            data = response.json()
            articles.extend(data.get("articles", []))
            if len(data.get("articles", [])) < 100:  # No more articles on next pages
                break
            page += 1
        else:
            print(f"Error fetching articles: {response.status_code} - {response.text}")
            break

    return articles[:max_results] if articles else None

# Comparing Similarities

In [7]:
def compare_similarities(text_embedding, articles_embeddings):
    # List to store cosine similarities
    similarities = []
    for article_embedding in articles_embeddings:
        similarity = util.cos_sim(text_embedding, article_embedding).item() 
        similarities.append(similarity)

    # Sort articles by similarity (highest to lowest)
    sorted_indices = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)
    
    return sorted_indices[:5]

# News Verification

In [9]:
def fake_news_detection(video_path):
    #get the transcribed text
    paragraph = preprocess_video(video_path)
    print(paragraph)

    #getting keywords
    model = KeyBERT()
    keywords = model.extract_keywords(paragraph, stop_words="english", top_n=5)
    keywords_updated = [keyword[0] for keyword in keywords]
    print("Keywords:", keywords_updated)
    
    #get Articles NewsAPI with keywords
    articles = get_articles(keywords_updated, max_results=50)
    if articles:
        for article in articles:
            print("Title:", article["title"])
            print("Source:", article["source"]["name"])
            print("URL:", article["url"])
            print("=" * 80)
    else:
        print("No articles found.")
        return None

    #summary of text
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    summary = summarizer(paragraph, max_length=100, min_length=50, do_sample=False)
    summarized_text = summary[0]['summary_text']

    #Semantic Embedding
    model = SentenceTransformer('all-MiniLM-L6-v2')     # Load the Sentence-BERT model
    text_embedding = model.encode(summarized_text, convert_to_tensor=True)   # Generate embedding for the input text

    # Generate embeddings for each article
    article_embeddings = []
    for article in articles:
        content_to_embed = f"{article['title']} {article['description']} {article.get('content', '')}" # Combine title, description, and content
        embedding = model.encode(content_to_embed, convert_to_tensor=True)                             # Generate embedding
        article_embeddings.append(embedding)     

    #get the most similar articles 
    best_five_articles = compare_similarities(text_embedding, article_embeddings)

    print("the most similar articles describing the content of your video are : ")
    for idx in best_five_articles:  # Display top 5 most similar articles
        summary = summarizer(articles[idx]['content'],max_length = 100, min_length=100, do_sample=False)
        summarized_article = summary[0]['summary_text']
        
        print(f"Title: {articles[idx]['title']}")
        #print(f"Similarity Score: {similarities[idx]*100:.2f}%")
        print(f"Summarized Text: {summarized_article}")
        print(f"URL: {articles[idx]['url']}")
        print("=" * 80)

    return best_five_articles

In [10]:
fake_news_detection(r"C:\Users\wiame\Documents\RECONNAISSANCE DES FORMES\PROJET\Fake News Verification\video_of_real_news.mp4")

MoviePy - Writing audio in output_audio.mp3


                                                                                                                       

MoviePy - Done.
 Now, Metafounder and CEO Mark Zuckerberg stunned many with his announcement that he was pulling the plug on fact checking at Facebook and Instagram in the United States citing concerns about political bias. He is more.
Keywords: ['zuckerberg', 'metafounder', 'facebook', 'ceo', 'instagram']
Title: Dana White is joining Meta’s board of directors
Source: The Verge
URL: https://www.theverge.com/2025/1/6/24337670/meta-board-of-directors-dana-white-ufc
Title: Zuckerberg says he’s moving Meta moderators to Texas because California seems too ‘biased’
Source: The Verge
URL: https://www.theverge.com/2025/1/7/24338305/meta-mark-zuckerberg-moving-meta-moderators-texas-california-bias
Title: Meta is leaving its users to wade through hate and disinformation
Source: The Verge
URL: https://www.theverge.com/2025/1/7/24338127/meta-end-fact-checking-misinformation-zuckerberg
Title: Facebook and Instagram Ads Push Gun Silencers Disguised as Car Parts
Source: Wired
URL: https://www.wired.c

Device set to use cpu
Your max_length is set to 100, but your input_length is only 45. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=22)
Your max_length is set to 100, but your input_length is only 46. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=23)


the most similar articles describing the content of your video are : 


Your max_length is set to 100, but your input_length is only 50. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=25)


Title: Meta Ends Fact-Checking on Facebook, Instagram in Free-Speech Pitch
Summarized Text: News organizations are required to report news, and in many cases they're publicly funded and not allowed to run advertisements that aren't PSA (public service announcements.) US "news" organization… is not a news organization, it's a public service organization. US news organizations are not news organizations, they're public service organizations. US " news" organization is not news, it is public service advertising. US News organizations are public service announcements, they are news organizations. U.S.
URL: https://tech.slashdot.org/story/25/01/07/189242/meta-ends-fact-checking-on-facebook-instagram-in-free-speech-pitch


Your max_length is set to 100, but your input_length is only 53. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=26)


Title: What Zuckerberg Risks by Following Musk’s Lead
Summarized Text:  Meta CEO Mark Zuckerberg announced that the social media behemoth will end its third party fact-checking program in the U.S. Instead, the company will adopt a crowd-sourced community notes program. The announcement was made on Tuesday in a blog post on the company's Facebook page. The post said that the company would end its fact-checkers in the United States. It also said that it would adopt a community-sourcing program in place of the fact-checks.
URL: https://time.com/7205821/what-zuckerberg-risks-by-following-musks-lead/


Your max_length is set to 100, but your input_length is only 50. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=25)


Title: We're witnessing a new Mark Zuckerberg: Welcome to Zuck 3.0.
Summarized Text:  Meta announced big changes to kick off the new year, including ending third-party fact-checking and DEI programs. Meta CEO Mark Zuckerberg is entering a new era. Meta is a social networking site that lets users share photos, videos, and other content with friends and family. It was founded in 2004 by Mark Zuckerberg and his wife, Sheryl Sandberg. The company is now run by Zuckerberg's son, Mark Zuckerberg Jr., and his son-in-law, Mark
URL: https://www.businessinsider.com/mark-zuckerberg-new-identity-trump-elon-musk-fact-check-dei-2025-1


Your max_length is set to 100, but your input_length is only 47. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=23)


Title: Meta cutting its fact-checkers is a big deal. Here's why.
Summarized Text: Mark Zuckerberg is the CEO of Facebook. He is also the founder of the social media giant Instagram. Zuckerberg is also a co-founder of Oculus VR, a virtual reality headset company. He also has a stake in Facebook's Oculus VR headset company, which he co-founded with Oculus VR founder Sheryl Sandberg. Zuckerberg also co-owns Oculus VR with his wife, Marissa Mayer, and his brother, Eduardo Sotomayor. The company is valued at $
URL: https://www.businessinsider.com/meta-cut-fact-checkers-mark-zuckerberg-donald-trump-impact-2025-1
Title: A Lot of People Are Searching for How to Delete Facebook and Instagram, and We Would Like Their Clicks
Summarized Text: Mark Zuckerberg announced that Meta platforms including Facebook, Instagram, and Threads will ditch professional fa… in the wake of Mark Zuckerberg’s announcement that Meta Platforms like Facebook and Instagram would ditch professional journalists. Mark Zucke

[9, 34, 28, 30, 6]