In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle


In [2]:
true_data = pd.read_csv('True.csv')
false_data = pd.read_csv('Fake.csv')

# Assuming your datasets have a 'text' column representing the news text and a 'label' column indicating true or false
true_data['label'] = 1
false_data['label'] = 0

# Concatenate the datasets
data = pd.concat([true_data, false_data], ignore_index=True)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['label'], test_size=0.2, random_state=42)


In [3]:
# Initialize a TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=5000)  # You can adjust max_features based on your dataset size

# Fit and transform on training data
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)


In [4]:
# Initialize Logistic Regression model
lr_classifier = LogisticRegression()

# Train the model
lr_classifier.fit(X_train_tfidf, y_train)

# Predict on the test set
y_pred_lr = lr_classifier.predict(X_test_tfidf)

# Evaluate accuracy
accuracy_lr = accuracy_score(y_test, y_pred_lr)
print("Logistic Regression Accuracy:", accuracy_lr)


Logistic Regression Accuracy: 0.9888641425389755


In [5]:
import joblib

# Save the trained Logistic Regression model using joblib
joblib.dump(lr_classifier, 'logistic_regression_model.pkl')


['logistic_regression_model.pkl']

In [6]:
# Tokenization for Neural Network
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(data['text'])

# Save tokenizer using pickle
with open('tokenizer.pkl', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [7]:
# Convert text data to sequences
X_nn = tokenizer.texts_to_sequences(data['text'])

# Pad sequences for consistent input shape
X_nn = pad_sequences(X_nn, maxlen=200)  # Assuming a max length of 200 words

# Split data for training and testing
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, data['label'], test_size=0.2, random_state=42)


In [8]:
# Build a simple Sequential model
nn_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=5000, output_dim=64, input_length=200),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
nn_model.fit(X_train_nn, y_train_nn, epochs=10, batch_size=64, validation_split=0.2)


Epoch 1/10




[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 23ms/step - accuracy: 0.8969 - loss: 0.2120 - val_accuracy: 0.9865 - val_loss: 0.0339
Epoch 2/10
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 21ms/step - accuracy: 0.9983 - loss: 0.0063 - val_accuracy: 0.9916 - val_loss: 0.0259
Epoch 3/10
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - accuracy: 0.9998 - loss: 0.0019 - val_accuracy: 0.9923 - val_loss: 0.0275
Epoch 4/10
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 21ms/step - accuracy: 0.9999 - loss: 3.2924e-04 - val_accuracy: 0.9840 - val_loss: 0.0489
Epoch 5/10
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - accuracy: 0.9998 - loss: 8.7802e-04 - val_accuracy: 0.9873 - val_loss: 0.0515
Epoch 6/10
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 21ms/step - accuracy: 1.0000 - loss: 9.1781e-04 - val_accuracy: 0.9921 - val_loss: 0.0349
Epoch 7/10
[1m4

<keras.src.callbacks.history.History at 0x2743edc3ef0>

In [9]:
# Save the trained neural network model
nn_model.save('nn_model.h5')




In [14]:
import joblib
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pickle

# Load the trained Logistic Regression model
lr_model = joblib.load('logistic_regression_model.pkl')

# Load the trained Neural Network model
nn_model = load_model('nn_model.h5')

# Load the tokenizer
with open('tokenizer.pkl', 'rb') as handle:
    tokenizer = pickle.load(handle)

# Sample news texts for testing
texts = [
    "Donald Trump just couldn t wish all Americans a Happy New Year and leave it at that.",
    "WASHINGTON (Reuters) - The head of a conservative Republican faction in the U.S. Congress"
]

# Tokenize and preprocess the test texts for the Neural Network model
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=200)

# Use the Logistic Regression model for prediction
lr_predictions = lr_model.predict(tfidf_vectorizer.transform(texts))
lr_predicted_labels = (lr_predictions > 0.5).astype(int)

# Use the Neural Network model for prediction
nn_predictions = nn_model.predict(padded_sequences)
nn_predicted_labels = (nn_predictions > 0.5).astype(int)

# Display the predictions
print("\nLogistic Regression Predictions:")
for text, label in zip(texts, lr_predicted_labels):
    print(f"Text: {text} => {'True' if label == 1 else 'False'}")

print("\nNeural Network Predictions:")
for text, label in zip(texts, nn_predicted_labels):
    print(f"Text: {text} => {'True' if label == 1 else 'False'}")








[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step

Logistic Regression Predictions:
Text: Donald Trump just couldn t wish all Americans a Happy New Year and leave it at that. => False
Text: WASHINGTON (Reuters) - The head of a conservative Republican faction in the U.S. Congress => True

Neural Network Predictions:
Text: Donald Trump just couldn t wish all Americans a Happy New Year and leave it at that. => False
Text: WASHINGTON (Reuters) - The head of a conservative Republican faction in the U.S. Congress => False
