In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Step 1: Load and label data
fake_df = pd.read_csv("Fake.csv")
real_df = pd.read_csv("True.csv")

fake_df["label"] = "FAKE"
real_df["label"] = "REAL"

# Combine title and text
fake_df["content"] = fake_df["title"] + " " + fake_df["text"]
real_df["content"] = real_df["title"] + " " + real_df["text"]

# Step 2: Merge and shuffle
df = pd.concat([fake_df[["content", "label"]], real_df[["content", "label"]]])
df.dropna(inplace=True)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Step 3: Label encoding
le = LabelEncoder()
df["encoded_label"] = le.fit_transform(df["label"])  # 0 = FAKE, 1 = REAL

# Step 4: Split
X_train, X_test, y_train, y_test = train_test_split(
    df["content"], df["encoded_label"], test_size=0.2, random_state=42
)

# Step 5: CountVectorizer
vectorizer = CountVectorizer(stop_words="english", ngram_range=(1, 2), min_df=3)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Step 6: Train PassiveAggressive model
model = PassiveAggressiveClassifier(max_iter=1000)
model.fit(X_train_vec, y_train)

# Step 7: Evaluate
y_pred = model.predict(X_test_vec)
print("\n✅ Accuracy:", round(accuracy_score(y_test, y_pred) * 100, 2), "%")
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))

# Step 8: Save model
joblib.dump(model, "model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")
joblib.dump(le, "label_encoder.pkl")
print("\n✅ Model, vectorizer, and label encoder saved!")



✅ Accuracy: 99.48 %

📊 Classification Report:
               precision    recall  f1-score   support

        FAKE       0.99      1.00      1.00      4710
        REAL       1.00      0.99      0.99      4270

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980


✅ Model, vectorizer, and label encoder saved!
