In [36]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [37]:
import pickle
from sklearn.feature_extraction.text import CountVectorizer

In [38]:
filename = "/content/drive/My Drive/MIT AI Class Work/Final Project/n-gram_creator_title"
infile = open(filename,'rb')
word_vectorizer1 = pickle.load(infile)
infile.close()

In [39]:
filename = "/content/drive/My Drive/MIT AI Class Work/Final Project/n-gram_creator_text"
infile = open(filename,'rb')
word_vectorizer2 = pickle.load(infile)
infile.close()

In [40]:
filename = "/content/drive/My Drive/MIT AI Class Work/Final Project/SVM_with_text_model"
infile = open(filename,'rb')
model = pickle.load(infile)
infile.close()

In [41]:
import pandas as pd
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer 
from sklearn import svm
from scipy.sparse import hstack

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [42]:
def preprocess(headline):
  #lowercase
  headline=headline.lower()
  
  
  #tokenize
  tokens_for_title = nltk.word_tokenize(headline)
  # taken only words and numbers (not punctuation)
  token_words_for_title = [w for w in tokens_for_title if w.isalnum()]


  #remove stop words
  stops = set(stopwords.words("english"))        
  meaningful_words = [w for w in token_words_for_title if not w in stops]


  #lemmatize words
  lemmatizer = WordNetLemmatizer() 
  lemmatized_list = [lemmatizer.lemmatize(word) for word in meaningful_words]

  #join words
  joined_words = ( " ".join(lemmatized_list))

  return (joined_words)


In [43]:
def classify_headline_and_text(headline, text):
  processed_headline = preprocess(headline)
  processed_text = preprocess(text)

  #apply bigram
  sparse_matrix_for_title = word_vectorizer1.transform([processed_headline])
  sparse_matrix_for_text = word_vectorizer2.transform([processed_text])

  sparse_matrix_for_all = hstack([sparse_matrix_for_text, sparse_matrix_for_title])

  #classify with svm
  classification = model.predict(sparse_matrix_for_all)

  if (classification==1):
    return("FAKE NEWS!")
  else:
    return("Real News")

In [44]:
#Real News found at https://www.bbc.com/news/world-us-canada-53426285

headline = "Brad Parscale replaced as Trump's campaign manager"
text = "Facing a tough re-election battle, US President Donald Trump has replaced his campaign manager. Mr Trump said Bill Stepien, a field director for his 2016 campaign, would take the place of Brad Parscale. Mr Parscale - who was reportedly blamed by Mr Trump's inner circle for a poorly attended rally in Oklahoma last month - will stay on as senior adviser. Opinion polls show the president is trailing his Democratic challenger Joe Biden ahead of November's election. "

if(len(text)<1000):
  first_1000_chars_of_text = text
else:
  first_1000_chars_of_text = text[0:1000]

print(classify_headline_and_text(headline,first_1000_chars_of_text))

Real News


In [45]:
#Fake news found at https://politics.theonion.com/nancy-pelosi-calls-jamaal-bowman-to-scold-him-for-winni-1844151565

headline = "Nancy Pelosi Calls Jamaal Bowman To Scold Him For Winning Primary"
text = "WASHINGTON—Following the progressive challenger’s victory over 16-term incumbent Rep. Eliot Engel (D-NY), House Speaker Nancy Pelosi phoned Jamaal Bowman to scold him for winning his primary race, sources confirmed Wednesday. “I just wanted to call and personally reprimand you for your victory,” said Pelosi, extending her sincerest indignation to the former Bronx middle school educator, who is expected to easily win the general election in his heavily Democratic congressional district. “I understand there are some mail-in ballots that still need to be counted, but it appears you won big last night and energized a lot of first-time voters and young people we absolutely did not want voting in this primary. So allow me to extend my sincerest fuck-you for everything you’ve done. Obviously, we’re going to be working together soon, so I look forward to crushing you the first chance I get.” Pelosi added that when things became official in November, she would call again to express how frustrated she was to welcome Bowman to Congress."

if(len(text)<1000):
  first_1000_chars_of_text = text
else:
  first_1000_chars_of_text = text[0:1000]

print(classify_headline_and_text(headline,first_1000_chars_of_text))

FAKE NEWS!
