In [1]:
# ---------------------------
# 🔧 PATH SETUP
# ---------------------------
import sys
import os

# Add src/ to the import path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "src")))


import nltk
nltk.download("punkt")

from utils.text_preprocessor import clean_text, split_into_sentences
from ner.ner_pipeline import extract_entities

# ---------------------------
# 🧪 Sample Test
# ---------------------------
sample = """
President Joe Biden met with Elon Musk on July 4th to discuss AI regulation.
Visit https://example.com for more info. <p>This is HTML</p>
"""

cleaned = clean_text(sample)
sentences = split_into_sentences(cleaned)

for s in sentences:
    print(f"📝 Sentence: {s}")
    print("🔍 Entities:", extract_entities(s))
    print("-" * 50)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ypran\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


📝 Sentence: President Joe Biden met with Elon Musk on July 4th to discuss AI regulation.
🔍 Entities: [('Joe Biden', 'PERSON'), ('Elon Musk', 'PERSON'), ('July 4th', 'DATE')]
--------------------------------------------------
📝 Sentence: Visit for more info.
🔍 Entities: []
--------------------------------------------------
📝 Sentence: This is HTML
🔍 Entities: [('HTML', 'ORG')]
--------------------------------------------------


In [2]:
import sys
import os

# Add src/ to the import path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "src")))

from utils.article_fetcher import extract_article_from_url

url = "https://indianexpress.com/article/business/aviation/ahmedabad-plane-crash-crew-pilot-10123795/"  # replace with any article
article_text = extract_article_from_url(url)

print(article_text[:1000])  # preview


Pilot associations in India are infuriated over the theories that are hinting that pilot action or error being behind the crash of Air India flight AI 171. The Indian Commercial Pilots Association (ICPA) on Sunday condemned the insinuation in public discourse and sections of the media that pilot suicide might be the cause of the tragic crash of the Air India Boeing 787-8 aircraft operating flight AI 171 in Ahmedabad on June 12.

The body, which represents the narrow-body fleet pilots at Air India, said in a statement that such a suggestion based on incomplete and preliminary information is “reckless and unfounded”, in addition to being “irresponsible” and “deeply insensitive”.

The ICPA is the second pilots’ association to raise serious concerns about the pilot action theories that has been doing rounds since the release of the preliminary report of the investigation into the crash—the worst aviation disaster involving an Indian airline in four decades. On Saturday, the Airline Pilots’

In [3]:
from utils.text_preprocessor import clean_text, split_into_sentences

cleaned = clean_text(article_text)
sentences = split_into_sentences(cleaned)


In [4]:
from ner.ner_pipeline import extract_entities

for s in sentences:
    print(f"📝 Sentence: {s}")
    print("🔍 Entities:", extract_entities(s))
    print("-" * 40)


📝 Sentence: Pilot associations in India are infuriated over the theories that are hinting that pilot action or error being behind the crash of Air India flight AI 171.
🔍 Entities: [('India', 'GPE'), ('Air India', 'ORG'), ('AI', 'GPE'), ('171', 'CARDINAL')]
----------------------------------------
📝 Sentence: The Indian Commercial Pilots Association (ICPA) on Sunday condemned the insinuation in public discourse and sections of the media that pilot suicide might be the cause of the tragic crash of the Air India Boeing 787-8 aircraft operating flight AI 171 in Ahmedabad on June 12.
🔍 Entities: [('The Indian Commercial Pilots Association', 'ORG'), ('Sunday', 'DATE'), ('the Air India Boeing', 'ORG'), ('787-8', 'PRODUCT'), ('171', 'CARDINAL'), ('Ahmedabad', 'GPE'), ('June 12', 'DATE')]
----------------------------------------
📝 Sentence: The body, which represents the narrow-body fleet pilots at Air India, said in a statement that such a suggestion based on incomplete and preliminary informa

In [5]:
import sys
import os

# Add src/ to the import path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "src")))


from claim_detection.claim_classifier import is_checkworthy

for s in sentences:
    print(f"📝 Sentence: {s}")
    print("🔍 Entities:", extract_entities(s))
    
    if is_checkworthy(s):
        print("⚠️ CLAIM: Check-worthy")
    else:
        print("✅ CLAIM: Not significant")

    print("-" * 50)



  from .autonotebook import tqdm as notebook_tqdm
Device set to use cpu


📝 Sentence: Pilot associations in India are infuriated over the theories that are hinting that pilot action or error being behind the crash of Air India flight AI 171.
🔍 Entities: [('India', 'GPE'), ('Air India', 'ORG'), ('AI', 'GPE'), ('171', 'CARDINAL')]
✅ CLAIM: Not significant
--------------------------------------------------
📝 Sentence: The Indian Commercial Pilots Association (ICPA) on Sunday condemned the insinuation in public discourse and sections of the media that pilot suicide might be the cause of the tragic crash of the Air India Boeing 787-8 aircraft operating flight AI 171 in Ahmedabad on June 12.
🔍 Entities: [('The Indian Commercial Pilots Association', 'ORG'), ('Sunday', 'DATE'), ('the Air India Boeing', 'ORG'), ('787-8', 'PRODUCT'), ('171', 'CARDINAL'), ('Ahmedabad', 'GPE'), ('June 12', 'DATE')]
✅ CLAIM: Not significant
--------------------------------------------------
📝 Sentence: The body, which represents the narrow-body fleet pilots at Air India, said in a statem

In [6]:
import sys
import os

# Add src/ to the import path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "src")))

from claim_detection.claimbuster_client import get_claimbuster_score

for s in sentences:
    print(f"📝 Sentence: {s}")
    print("🔍 Entities:", extract_entities(s))

    score = get_claimbuster_score(s)
    print(f"📊 ClaimBuster Score: {score:.2f}")

    if score >= 0.7:
        print("⚠️ CLAIM: Likely check-worthy")
    elif score >= 0.4:
        print("ℹ️ CLAIM: Possibly check-worthy")
    else:
        print("✅ CLAIM: Not significant")

    print("-" * 50)


📝 Sentence: Pilot associations in India are infuriated over the theories that are hinting that pilot action or error being behind the crash of Air India flight AI 171.
🔍 Entities: [('India', 'GPE'), ('Air India', 'ORG'), ('AI', 'GPE'), ('171', 'CARDINAL')]
🔁 Sent: Pilot associations in India are infuriated over the theories that are hinting that pilot action or error being behind the crash of Air India flight AI 171.
📥 Status: 200
📦 Raw Response: {"version":"2","claim":"Pilot associations in India are infuriated over the theories that are hinting that pilot action or error being behind the crash of Air India flight AI 171.","results":[{"text":"Pilot associations in India are infuriated over the theories that are hinting that pilot action or error being behind the crash of Air India flight AI 171.","index":0,"score":0.7507864314}]}
📊 ClaimBuster Score: 0.75
⚠️ CLAIM: Likely check-worthy
--------------------------------------------------
📝 Sentence: The Indian Commercial Pilots Associati

In [7]:
import sys
import os

# Add src/ to the import path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "src")))

from claim_detection.claimbuster_client import get_claimbuster_score


CHECKWORTHY_THRESHOLD = 0.5  # or tune this

for s in sentences:
    score = get_claimbuster_score(s)
    print(f"📝 {s}")
    print(f"📊 Score: {score:.2f}")
    if score >= CHECKWORTHY_THRESHOLD:
        print("✅ CLAIM: Check-worthy")
    else:
        print("❌ CLAIM: Not significant")
    print("-" * 40)




🔁 Sent: Pilot associations in India are infuriated over the theories that are hinting that pilot action or error being behind the crash of Air India flight AI 171.
📥 Status: 200
📦 Raw Response: {"version":"2","claim":"Pilot associations in India are infuriated over the theories that are hinting that pilot action or error being behind the crash of Air India flight AI 171.","results":[{"text":"Pilot associations in India are infuriated over the theories that are hinting that pilot action or error being behind the crash of Air India flight AI 171.","index":0,"score":0.7507864314}]}
📝 Pilot associations in India are infuriated over the theories that are hinting that pilot action or error being behind the crash of Air India flight AI 171.
📊 Score: 0.75
✅ CLAIM: Check-worthy
----------------------------------------
🔁 Sent: The Indian Commercial Pilots Association (ICPA) on Sunday condemned the insinuation in public discourse and sections of the media that pilot suicide might be the cause of 

In [8]:
print(get_claimbuster_score("India will host the G20 summit in 2023."))


🔁 Sent: India will host the G20 summit in 2023.
📥 Status: 200
📦 Raw Response: {"version":"2","claim":"India will host the G20 summit in 2023.","results":[{"text":"India will host the G20 summit in 2023.","index":0,"score":0.5477482605}]}
0.5477482605
