## Saving the Best Model with Fast Text

In [2]:
import pandas as NewsDetPd
import warnings as NewsDetWng
NewsDetWng.filterwarnings('ignore')
News_Detect = NewsDetPd.read_csv("FastTextNews_Covid19.csv")
News_Detect

Unnamed: 0,FastText_News,Label
0,"['tencent', 'reveal', 'real', 'number', 'death']",0
1,"['take', 'chlorin', 'dioxid', 'help', 'fight',...",0
2,"['video', 'show', 'workmen', 'uncov', 'batinfe...",0
3,"['asterix', 'comic', 'book', 'simpson', 'predi...",0
4,"['chines', 'presid', 'xi', 'jinp', 'visit', 'm...",0
...,...,...
7251,"['global', 'covid', 'prevent', 'trial', 'hydro...",1
7252,"['bavaria', 'free', 'covid', 'test', 'split', ...",1
7253,"['britain', 'lock', 'citi', 'leicest', 'covid'...",1
7254,"['uk', 'impos', 'lockdown', 'citi', 'leicest',...",1


In [3]:
NewsDetPd.set_option('display.max_colwidth', None)
NewsDetPd.set_option('display.max_columns', None)
NewsDetPd.set_option('display.expand_frame_repr', False)

print(News_Detect.sample(4, random_state=70))

                                                                                                                                              FastText_News  Label
3432                                                    ['real', 'sens', 'oklahoma', 'flatten', 'curv', 'number', 'case', 'oklahoma', 'declin', 'precipit']      0
6369                       ['time', 'state', 'gird', 'crush', 'covid', 'patient', 'like', 'need', 'intens', 'care', 'thousand', 'ventil', 'help', 'breath']      1
6056  ['india', 'us', 'plan', 'work', 'togeth', 'vaccin', 'research', 'test', 'covid', 'us', 'health', 'offici', 'said', 'tuesday', 'india', 'us', 'covid']      1
4923                                                  ['kerala', 'gradual', 'eas', 'covid', 'lockdown', 'restrict', 'least', 'seven', 'district', 'monday']      1


In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer as NewsDet_Tfvect
from sklearn.decomposition import PCA as NewsDetPcomponent
from sklearn.model_selection import train_test_split as NewsDet_trntst

#======== Input and Output
News_DetectX = News_Detect.drop('Label',axis=1)
News_DetectY = News_Detect['Label']

#======== Vectorization Covid19
NewsDetect_Vtc = NewsDet_Tfvect(analyzer='word',ngram_range= (1,1))
News_DetectX = NewsDetect_Vtc.fit_transform(News_Detect['FastText_News'].values.astype('U')).toarray()
print("Vectorized Shape of Covid19 FastText News: ",News_DetectX.shape)

#======== Reducting Dimensions Covid19
newsdetPca = NewsDetPcomponent(n_components = 100)
News_DetectX = newsdetPca.fit_transform(News_DetectX)
print("\nReduction Shape of Covid19 FastText News: ",News_DetectX.shape)

#======== Splitting 60-20-20 ratio Covid19
News_Detect['Label'] = News_Detect['Label'].astype(int)
NewsDetX_tr, NewsDetX_ts, NewsDetY_tr, NewsDetY_ts = NewsDet_trntst(News_DetectX, News_Detect['Label'], test_size=0.4, random_state=70)
NewsDetX_va, NewsDetX_ts, NewsDetY_va, NewsDetY_ts = NewsDet_trntst(NewsDetX_ts, NewsDetY_ts, test_size=0.5, random_state=70)
print("\nTraining Covid19 FastText News: ",NewsDetX_tr.shape)
print("Validation Covid19 FastText News: ",NewsDetX_va.shape)
print("Testing Covid19 FastText News: ",NewsDetX_ts.shape)

Vectorized Shape of Covid19 FastText News:  (7256, 9998)

Reduction Shape of Covid19 FastText News:  (7256, 100)

Training Covid19 FastText News:  (4353, 100)
Validation Covid19 FastText News:  (1451, 100)
Testing Covid19 FastText News:  (1452, 100)


In [5]:
from sklearn.model_selection import GridSearchCV as NewsDet_GridCV
import joblib as NewsDet_Jlib

In [6]:
from sklearn.ensemble import ExtraTreesClassifier as NewsDet_ExTr

NewsDetPmeters = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    'max_depth': [None, 1, 2, 3],
    'n_estimators': [20, 30, 90],
    'n_jobs': [1, 2, 4],
}

NewsDetClfier = NewsDet_ExTr(random_state=70)
NewsDetGrsrch = NewsDet_GridCV(estimator=NewsDetClfier, param_grid=NewsDetPmeters, cv=2)
NewsDetGrsrch.fit(NewsDetX_tr, NewsDetY_tr)

NewsDet_BstMod = NewsDetGrsrch.best_estimator_

#====== Train the Covid19 FastText
NewsDet_BstMod.fit(NewsDetX_tr, NewsDetY_tr)

#============= Save the trained model
NewsDet_Jlib.dump(NewsDet_BstMod,"WithFastText_ET-Model.h5")
NewsDet_BstMod

In [7]:
# ===== Load Trained Model, Vectorizer and PCA
NewsDet_BstMod = NewsDet_Jlib.load("WithFastText_ET-Model.h5")


def NewsDet_testing_news(news_tokens):

    ### Join the tokens to a single string for TF-IDF
    NewsDet_input_text = " ".join(news_tokens)
    NewsDet_vect_input = NewsDetect_Vtc.transform([NewsDet_input_text]).toarray()
    NewsDet_reduced_input = newsdetPca.transform(NewsDet_vect_input)
    prediction_NewsDet = NewsDet_BstMod.predict(NewsDet_reduced_input)

    if prediction_NewsDet[0] == 1:
        print("\nThe News is **True News**")
    else:
        print("\nThe News is **Fake News**")

Testing of ET

In [8]:
NewsDet_user_input = input("Enter the input news:\n")
NewsDet_user_tokens = NewsDet_user_input.lower().strip().split()

NewsDet_testing_news(NewsDet_user_tokens)

Enter the input news:
'kerala', 'gradual', 'eas', 'covid', 'lockdown', 'restrict', 'least', 'seven', 'district', 'monday'

The News is **True News**


In [9]:
NewsDet_user_input = input("Enter the input news:\n")
NewsDet_user_tokens = NewsDet_user_input.lower().strip().split()

NewsDet_testing_news(NewsDet_user_tokens)

Enter the input news:
'tencent', 'reveal', 'real', 'number', 'death'

The News is **Fake News**


## Saving the Best Models Without Fast Text

In [10]:
News_Detect = NewsDetPd.read_csv("ProcessedNews_Covid19.csv")
News_Detect.head(4)

Unnamed: 0,Text,Label
0,Tencent revealed the real number of deaths.\t\t,0
1,Taking chlorine dioxide helps fight coronavirus.\t\t,0
2,This video shows workmen uncovering a bat-infested roof in the US state of Florida in 2011.\t\t,0
3,The Asterix comic books and The Simpsons predicted the coronavirus outbreak.\t\t,0


In [12]:
from sklearn.feature_extraction.text import TfidfVectorizer as NewsDet_Tfvect
from sklearn.decomposition import PCA as NewsDetPcomponent
from sklearn.model_selection import train_test_split as NewsDet_trntst

#======== Input and Output
News_DetectX = News_Detect.drop('Label',axis=1)
News_DetectY = News_Detect['Label']

#======== Vectorization Covid19
NewsDetect_Vtc = NewsDet_Tfvect(analyzer='word',ngram_range= (1,1))
News_DetectX = NewsDetect_Vtc.fit_transform(News_Detect['Text'].values.astype('U')).toarray()
print("Vectorized Shape of Covid19 FastText News: ",News_DetectX.shape)

#======== Reducting Dimensions Covid19
newsdetPca = NewsDetPcomponent(n_components = 100)
News_DetectX = newsdetPca.fit_transform(News_DetectX)
print("\nReduction Shape of Covid19 FastText News: ",News_DetectX.shape)

#======== Splitting 60-20-20 ratio Covid19
News_Detect['Label'] = News_Detect['Label'].astype(int)
NewsDetX_tr, NewsDetX_ts, NewsDetY_tr, NewsDetY_ts = NewsDet_trntst(News_DetectX, News_Detect['Label'], test_size=0.4, random_state=70)
NewsDetX_va, NewsDetX_ts, NewsDetY_va, NewsDetY_ts = NewsDet_trntst(NewsDetX_ts, NewsDetY_ts, test_size=0.5, random_state=70)
print("\nTraining Covid19 FastText News: ",NewsDetX_tr.shape)
print("Validation Covid19 FastText News: ",NewsDetX_va.shape)
print("Testing Covid19 FastText News: ",NewsDetX_ts.shape)

Vectorized Shape of Covid19 FastText News:  (7256, 17446)

Reduction Shape of Covid19 FastText News:  (7256, 100)

Training Covid19 FastText News:  (4353, 100)
Validation Covid19 FastText News:  (1451, 100)
Testing Covid19 FastText News:  (1452, 100)


In [13]:
from sklearn.linear_model import LogisticRegression as NewsDet_LReg
NewsDetPmeters = {
    'penalty': ['l1', 'l2', 'elasticnet', 'none'],
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'C': [0.1, 0.2, 0.3, 0.8],
    'fit_intercept': [True, False],
    'n_jobs': [1, 2, 4],
}

NewsDetClfier = NewsDet_LReg(random_state=70)
NewsDetGrsrch = NewsDet_GridCV(estimator=NewsDetClfier, param_grid=NewsDetPmeters, cv=2)
NewsDetGrsrch.fit(NewsDetX_tr, NewsDetY_tr)

NewsDet_BstMod = NewsDetGrsrch.best_estimator_

#====== Train the Covid19 FastText
NewsDet_BstMod.fit(NewsDetX_tr, NewsDetY_tr)

#============= Save the trained model
NewsDet_Jlib.dump(NewsDet_BstMod,"WithoutFastText_LR-Model.h5")
NewsDet_BstMod

In [14]:
def NewsDet_testing_covid_news(news_text):

    NewsDet_input_vector = NewsDetect_Vtc.transform([news_text]).toarray()
    NewsDet_input_vector = newsdetPca.transform(NewsDet_input_vector)
    prediction_NewsDet = NewsDet_BstMod.predict(NewsDet_input_vector)


    if prediction_NewsDet[0] == 1:
        print("\nThe news is: **TRUE News**")
    else:
        print("\nThe news is: **FAKE News**")

# === Run manual testing ===
NewsDet_input_news = input("Enter the input news: ")
NewsDet_testing_covid_news(NewsDet_input_news)

Enter the input news: Bavaria's free COVID-19 test for all splits Germany https://uk.reuters.com/article/uk-health-coronavirus-germany-tests-idUKKBN24126I?taid=5efb5ba5621aa70001d01de1&utm_campaign=trueAnthem%3A+Trending+Content&utm_medium=trueAnthem&utm_source=twitter  pic.twitter.com/4DEGAKDBJE

The news is: **TRUE News**


In [15]:
from sklearn.ensemble import GradientBoostingClassifier as NewsDet_GBMachine
NewsDetPmeters = {
    'loss': ['log_loss', 'exponential', 'log_loss'],
    'learning_rate': [0.1, 0.001, 0.0001, 0.2],
    'n_estimators': [20, 30, 90],
    'min_samples_split': [1, 2, 4],
}

NewsDetClfier = NewsDet_GBMachine(random_state=70)
NewsDetGrsrch = NewsDet_GridCV(estimator=NewsDetClfier, param_grid=NewsDetPmeters, cv=2)
NewsDetGrsrch.fit(NewsDetX_tr, NewsDetY_tr)

NewsDet_BstMod = NewsDetGrsrch.best_estimator_

#====== Train the Covid19 FastText
NewsDet_BstMod.fit(NewsDetX_tr, NewsDetY_tr)

#============= Save the trained model
NewsDet_Jlib.dump(NewsDet_BstMod,"WithoutFastText_GB-Model.h5")
NewsDet_BstMod

In [16]:
def NewsDet_testing_covid_news(news_text):
    # Convert input text into a list so TF-IDF can process it
    NewsDet_input_vector = NewsDetect_Vtc.transform([news_text]).toarray()

    # Reduce dimensions using previously fitted PCA
    NewsDet_input_vector = newsdetPca.transform(NewsDet_input_vector)
    prediction_NewsDet = NewsDet_BstMod.predict(NewsDet_input_vector)

    if prediction_NewsDet[0] == 1:
        print("\nThe news is: **TRUE News**")
    else:
        print("\nThe news is: **FAKE News**")

# === Run manual testing ===
NewsDet_input_news = input("Enter the input news: ")
NewsDet_testing_covid_news(NewsDet_input_news)

Enter the input news: Tencent revealed the real number of deaths.\t\t

The news is: **FAKE News**


In [17]:
from sklearn.linear_model import SGDClassifier as NewsDet_SGradienr
NewsDetPmeters = {
    'loss': ['hinge', 'log_loss', 'modified_huber','squared_hinge','perceptron'],
    'penalty': ['l1', 'l2', 'elasticnet'],
    'alpha': [0.1, 0.02, 0.001, 0.00001],
    'fit_intercept': [True, False],
    'n_jobs': [1, 2, 4],
}

NewsDetClfier = NewsDet_SGradienr(random_state=70)
NewsDetGrsrch = NewsDet_GridCV(estimator=NewsDetClfier, param_grid=NewsDetPmeters, cv=2)
NewsDetGrsrch.fit(NewsDetX_tr, NewsDetY_tr)

NewsDet_BstMod = NewsDetGrsrch.best_estimator_

#====== Train the Covid19 FastText
NewsDet_BstMod.fit(NewsDetX_tr, NewsDetY_tr)

#============= Save the trained model
NewsDet_Jlib.dump(NewsDet_BstMod,"WithoutFastText_SGD-Model.h5")
NewsDet_BstMod

In [18]:
def NewsDet_testing_covid_news(news_text):
    # Convert input text into a list so TF-IDF can process it
    NewsDet_input_vector = NewsDetect_Vtc.transform([news_text]).toarray()

    # Reduce dimensions using previously fitted PCA
    NewsDet_input_vector = newsdetPca.transform(NewsDet_input_vector)

    # Make prediction
    prediction_NewsDet = NewsDet_BstMod.predict(NewsDet_input_vector)

    # Print the result
    if prediction_NewsDet[0] == 1:
        print("\nThe news is: **TRUE News**")
    else:
        print("\nThe news is: **FAKE News**")

# === Run manual testing ===
NewsDet_input_news = input("Enter the input news: ")
NewsDet_testing_covid_news(NewsDet_input_news)

Enter the input news: Grace Fusco, the matriarch of a large New Jersey family, died in March from Covid-19 without knowing that her 2 oldest children died before her. \n\nThe coronavirus ultimately killed 5 members of the Fusco family, and infected at least 19 others.\n https://www.nytimes.com/2020/03/18/nyregion/new-jersey-family-coronavirus.html

The news is: **TRUE News**
