In [1]:
import pandas as pd

# Load the datasets
fake = pd.read_csv("Fake.csv")
true = pd.read_csv("True.csv")

# Add labels: 0 = Fake, 1 = Real
fake['label'] = 0
true['label'] = 1

# Keep only the 'text' and 'label' columns
fake = fake[['text', 'label']]
true = true[['text', 'label']]

# Combine both into one dataframe
df = pd.concat([fake, true])

# Shuffle the data
df = df.sample(frac=1).reset_index(drop=True)

# Preview the dataset
df.head()


Unnamed: 0,text,label
0,WASHINGTON (Reuters) - White House homeland se...,1
1,KIEV (Reuters) - Ukrainian President Petro Por...,1
2,21st Century Wire asks Will this be the beginn...,0
3,WASHINGTON (Reuters) - A long-running battle o...,1
4,He was such a nice boy!Where have we heard thi...,0


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

# Split data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# Convert text data into numerical features using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

# Fit vectorizer on training text and transform both train and test
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


In [5]:
from sklearn.linear_model import LogisticRegression

# Initialize the model
model = LogisticRegression(max_iter=1000)

# Train the model on the training data
model.fit(X_train_vec, y_train)

print("Model training completed!")


Model training completed!


In [6]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Predict labels for test data
y_pred = model.predict(X_test_vec)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Show detailed classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Display confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.9845
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      4704
           1       0.98      0.99      0.98      4276

    accuracy                           0.98      8980
   macro avg       0.98      0.98      0.98      8980
weighted avg       0.98      0.98      0.98      8980

Confusion Matrix:
[[4615   89]
 [  50 4226]]


In [7]:
import pickle

pickle.dump(model, open('model.pkl', 'wb'))
pickle.dump(vectorizer, open('vectorizer.pkl', 'wb'))


In [2]:
pip install streamlit


Note: you may need to restart the kernel to use updated packages.


In [3]:
streamlit run app.py


SyntaxError: invalid syntax (507122745.py, line 1)