#### Import all Libraries

In [1]:

from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
import numpy as np
from flask import Flask, request, jsonify
import pandas as pd

from gensim.matutils import softcossim
import gensim
from gensim import corpora
from gensim.utils import simple_preprocess

from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
import string

import lime
import lime.lime_text


import re
import webbrowser



#### Import trained model along with tfidf  vocabulary

In [2]:
app = Flask(__name__)
news_classifier_model = pickle.load(open('model1.pkl', 'rb'))
tf1_new = TfidfVectorizer(sublinear_tf=True, min_df=5, norm='l2', encoding='latin-1', ngram_range=(1, 2), stop_words='english',vocabulary=pickle.load(open("feature.pkl", "rb")))

#### For recommendations Import all similarity matrix along with its dictionary created using gensim 

In [3]:
business_similarity_matrix=pickle.load(open('business_similarity_matrix.mat','rb'))
politics_similarity_matrix=pickle.load(open('politics_similarity_matrix.mat','rb'))
entertainment_similarity_matrix=pickle.load(open('entertainment_similarity_matrix.mat','rb'))
tech_similarity_matrix=pickle.load(open('tech_similarity_matrix.mat','rb'))
sport_similarity_matrix=pickle.load(open('sport_similarity_matrix.mat','rb'))

In [4]:
business_dictionary=pickle.load(open('business_dictionary','rb'))
politics_dictionary=pickle.load(open('politics_dictionary','rb'))
tech_dictionary=pickle.load(open('tech_dictionary','rb'))
entertainment_dictionary=pickle.load(open('entertainment_dictionary','rb'))
sport_dictionary=pickle.load(open('sport_dictionary','rb'))

#### For cleaning of entered text 

In [5]:
stop = set(stopwords.words('english'))
exclude = set(string.punctuation)
lemma = WordNetLemmatizer()

def get_words( text ): 
    stop_free = " ".join([i for i in text.lower().split() if i not in stop]) #to remove words like is,the,am,I,to
    punc_free = ''.join(ch for ch in stop_free if ch not in exclude) #to remove punctuations , . '
    digit_free=' '.join(s for s in punc_free.split() if not any(c.isdigit() for c in s))
    normalized = " ".join(lemma.lemmatize(word) for word in digit_free.split()) #using wordnetlemmatizer to replace words
    return normalized
   

#### Function to get similar docs

In [6]:
def compute_similar_docs(query,docs,category,num_of_recommendation_articles):
    category_dictionary={'business':business_dictionary,'politics':politics_dictionary,'tech':tech_dictionary,'entertainement':entertainment_dictionary,'sport':sport_dictionary}
    category_similarity_matrix={'business':business_similarity_matrix,'politics':politics_similarity_matrix,'tech':tech_similarity_matrix,'entertainement':entertainment_similarity_matrix,'sport':sport_similarity_matrix}
    query_vec=category_dictionary[category].doc2bow(simple_preprocess(get_words(query)))   
    similarity_series=[]
    print('reached here')
    for doc in docs.text: 
        doc_vec = category_dictionary[category].doc2bow(simple_preprocess(get_words(doc)))                            
        similarity_series.append(softcossim(query_vec,doc_vec,category_similarity_matrix[category]))     
    
    df=pd.DataFrame({'score':similarity_series})
    df['filename']=[filename for filename in docs.filename]    
    df['headline']=[headline for headline in docs.headlines]       
    
    return df.sort_values(['score'],ascending=False)[:num_of_recommendation_articles]
        

#### Dataset from which recommendation articles will be pulled

In [7]:
dataset=pd.read_excel('C:/Users/vivyadav/Desktop/news_classify/dataset.xls')
received_text=''

#### LIME for Prediction Explaination

In [8]:
def fasttext_prediction_in_sklearn_format(classifier, texts):
    res = []
    labels=[]
    probabilities=[]
    
    text_features=tf1_new.fit_transform(texts)
    output=zip(classifier.predict(text_features),classifier.predict_proba(text_features))
    for i in output:
        labels.append([0,1,2,3,4])
        probabilities.append(i[1])
    
    for label, probs, text in zip(labels, probabilities, texts):
        order = np.argsort(np.array(label))
        res.append(probs[order])

    return np.array(res)

In [9]:
def tokenize_string(string):
    return string.split()

explainer = lime.lime_text.LimeTextExplainer(
    # We need to tell LIME how to split the string into words. We can do this
    # by giving it a function to call to split a string up the same way FastText does it.
    split_expression=tokenize_string,
    # Our FastText classifer uses bigrams (two-word pairs) to classify text. Setting
    # bow=False tells LIME to not assume that our classifier is based on single words only.
    bow=False,
    # To make the output pretty, tell LIME what to call each possible prediction from our model.
    class_names=['business', 'entertainment', 'politics', 'sport', 'tech']
)

In [10]:
def explaination(text):
    exp = explainer.explain_instance(
        # The review to explain
        text,
        # The wrapper function that returns FastText predictions in scikit-learn format
        classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(news_classifier_model, x),
        # How many labels to explain. We just want to explain the single most likely label.
        top_labels=5,
        # How many words in our sentence to include in the explanation. You can try different values.
        num_features=10
    )
    exp.save_to_file("explanation1.html")
    webbrowser.open("http://localhost:8888/view/explanation1.html")

#### functions performed on flask request

In [11]:
@app.route('/api',methods=['POST'])
def news_predict():
    ## Get text sent through post request
    text=request.get_json(force=True)
    received_text=text['content']
    
    print(received_text)
    ## find features of received text
    text_features=tf1_new.fit_transform([get_words(received_text)])
    
    ## Make prediction through trained loaded model
    predictions1 = news_classifier_model.predict(text_features)
    id_to_category=np.array(['business','entertainment','politics','sport','tech'])
    prediction=id_to_category[predictions1]
    predicted_category=prediction[0]
    print('Prediction',predicted_category)
    
    ##Find similar articles using compute_similar_docs function
    num_of_recommendation_articles=5
    top_similar_articles=compute_similar_docs(received_text,dataset.loc[dataset.category==predicted_category],predicted_category,num_of_recommendation_articles) 
    
    ## print above results 
    print(predicted_category)    
    print(top_similar_articles)
    #return jsonify(predicted_category)
    explaination(received_text)
    return jsonify([predicted_category,num_of_recommendation_articles,[i for i in top_similar_articles.headline],[i for i in top_similar_articles.score],[i for i in top_similar_articles.filename]])

@app.route('/prediction_explainer')
def prediction_explainer():
    explaination(received_text)
@app.route('/')
def home():
    return 'hello this is vivek'

In [None]:
if __name__ == '__main__':
    app.run(port=8010, debug=False)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8010/ (Press CTRL+C to quit)


Apple gave its chief executive Tim Cook a hefty 22 per cent pay raise in 2018, bringing his total compensation for the year to almost $15.7 million, according to a filing submitted to the Securities and Exchange Commission.
The figure comprised a base salary of $3 million, a $12 million bonus and $680,000 in what it called "other compensation" that includes private air travel and security expenses.
Apple's compensation committee cited the company's strong sales performance over the year in justifying the bonus.
"For 2018... we achieved net sales of $265.6 billion and operating income of $70.9 billion, each representing a year-over-year increase of 16 per cent, and exceeding the 2018 maximum annual cash incentive program goals for both of those performance measures," the document said.


Prediction business
reached here
business
        score filename                              headline
220  0.395141  221.txt   BP surges ahead on high oil price\n
262  0.381114  263.txt  Split-caps pay

127.0.0.1 - - [10/Jan/2019 15:08:11] "POST /api HTTP/1.1" 200 -


Apple gave its chief executive Tim Cook a hefty 22 per cent pay raise in 2018, bringing his total compensation for the year to almost $15.7 million, according to a filing submitted to the Securities and Exchange Commission.
The figure comprised a base salary of $3 million, a $12 million bonus and $680,000 in what it called "other compensation" that includes private air travel and security expenses.
Apple's compensation committee cited the company's strong sales performance over the year in justifying the bonus.
"For 2018... we achieved net sales of $265.6 billion and operating income of $70.9 billion, each representing a year-over-year increase of 16 per cent, and exceeding the 2018 maximum annual cash incentive program goals for both of those performance measures," the document said.


Prediction business
reached here
business
        score filename                              headline
220  0.395141  221.txt   BP surges ahead on high oil price\n
262  0.381114  263.txt  Split-caps pay

127.0.0.1 - - [10/Jan/2019 15:08:50] "POST /api HTTP/1.1" 200 -
127.0.0.1 - - [10/Jan/2019 15:08:57] "POST /prediction_explainer HTTP/1.1" 405 -


Gold prices held near seven-month highs on Thursday as the dollar was pressured by rising expectations the US Federal Reserve will keep interest rates steady this year, while investors also waited for further news on Sino-US trade talks.

FUNDAMENTALS

Spot gold was a tad lower at $1,292.46 per ounce at 0141 GMT, hovering near Friday's peak of $1,298.42 - a level last seen in June.

US gold futures rose 0.1 percent to $1,293.20 per ounce.

Prediction business
reached here
business
        score filename                             headline
382  0.454327  383.txt    Fed warns of more US rate rises\n
120  0.450498  121.txt  US interest rates increased to 2%\n
328  0.424766  329.txt    House prices drop as sales slow\n
239  0.422634  240.txt  Economy 'strong' in election year\n
213  0.422634  214.txt  Economy 'strong' in election year\n


127.0.0.1 - - [10/Jan/2019 15:10:38] "POST /api HTTP/1.1" 200 -


Gold prices held near seven-month highs on Thursday as the dollar was pressured by rising expectations the US Federal Reserve will keep interest rates steady this year, while investors also waited for further news on Sino-US trade talks.

FUNDAMENTALS

Spot gold was a tad lower at $1,292.46 per ounce at 0141 GMT, hovering near Friday's peak of $1,298.42 - a level last seen in June.

US gold futures rose 0.1 percent to $1,293.20 per ounce.

Prediction business
reached here
business
        score filename                             headline
382  0.454327  383.txt    Fed warns of more US rate rises\n
120  0.450498  121.txt  US interest rates increased to 2%\n
328  0.424766  329.txt    House prices drop as sales slow\n
239  0.422634  240.txt  Economy 'strong' in election year\n
213  0.422634  214.txt  Economy 'strong' in election year\n


127.0.0.1 - - [10/Jan/2019 15:16:43] "POST /api HTTP/1.1" 200 -


UK pioneers digital film network

The world's first digital cinema network will be established in the UK over the next 18 months.

The UK Film Council has awarded a contract worth £11.5m to Arts Alliance Digital Cinema (AADC), who will set up the network of up to 250 screens. AADC will oversee the selection of cinemas across the UK which will use the digital equipment. High definition projectors and computer servers will be installed to show mainly British and specialist films. Most cinemas currently have mechanical projectors but the new network will see up to 250 screens in up to 150 cinemas fitted with digital projectors capable of displaying high definition images. The new network will double the world's total of digital screens. Cinemas will be given the film on a portable hard drive and they will then copy the content to a computer server.

Each film is about 100 gigabytes and has been compressed from an original one terabyte-size file. Fiona Deans, associate director of AADC, sa

[2019-01-10 15:29:41,665] ERROR in app: Exception on /api [POST]
Traceback (most recent call last):
  File "C:\Users\vivyadav\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 2292, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\vivyadav\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 1815, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\vivyadav\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 1718, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\Users\vivyadav\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\_compat.py", line 35, in reraise
    raise value
  File "C:\Users\vivyadav\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 1813, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\vivyadav\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 1799, in disp

A petition has been filed in the Supreme Court challenging the constitution amendment bill that reserves 10 per cent quota for poorer sections in the general category. A petition filed by a group, Youth for Equality and Dr Kaushal Kant Mishra, said the amendment violates the 50 per cent ceiling that had been laid down by the Supreme Court.

The petition says each of the four provisions that are being introduced in the Constitution violate one or the other basic feature and should not be allowed. A 13-judge Constitution Bench of the top court in 1973 had propounded the “basic structure” doctrine, holding that Parliament can amend the Constitution but not alter its basic or essential features.

Prediction business
reached here
business
        score filename                             headline
103  0.324010  104.txt  US seeks new $280bn smoker ruling\n
290  0.321110  291.txt   Tobacco giants hail court ruling\n
24   0.301051  025.txt   Yukos loses US bankruptcy battle\n
191  0.283464  1

127.0.0.1 - - [10/Jan/2019 15:59:31] "POST /api HTTP/1.1" 200 -


 It’s a negative end on the market on Thursday, with the Nifty holding on to 10,800-mark.

There was some weakness in the financials space, which weighed on the indices. Automobiles and pharmaceuticals, among others, were the other big gainers. There was a considerable outperformance by the Nifty Midcap segment, gaining almost half a percent. 

At the close of market hours, the Sensex was down 106.41 points or 0.29% at 36106.50, and the Nifty down 33.60 points or 0.31% at 10821.60. The market breadth was negative as 1217 shares advanced, against a decline of 1362 shares, while 153 shares were unchanged.

Prediction business
reached here
business
        score filename                            headline
273  0.439595  274.txt  Google shares fall as staff sell\n
260  0.399545  261.txt    Booming markets shed few tears\n
427  0.393054  428.txt  Karachi stocks hit historic high\n
261  0.388312  262.txt  Asian quake hits European shares\n
243  0.377467  244.txt  Market unfazed by Aurora se

127.0.0.1 - - [10/Jan/2019 16:01:00] "POST /api HTTP/1.1" 200 -


 It’s a negative end on the market on Thursday, with the Nifty holding on to 10,800-mark.

There was some weakness in the financials space, which weighed on the indices. Automobiles and pharmaceuticals, among others, were the other big gainers. There was a considerable outperformance by the Nifty Midcap segment, gaining almost half a percent. 

At the close of market hours, the Sensex was down 106.41 points or 0.29% at 36106.50, and the Nifty down 33.60 points or 0.31% at 10821.60. The market breadth was negative as 1217 shares advanced, against a decline of 1362 shares, while 153 shares were unchanged.

Tata Motors, NTPC, and Titan were the top gainers, while IndusInd Bank, Kotak Mahindra Bank, and HPCL lost the most. 

Prediction business
reached here
business
        score filename                            headline
273  0.417344  274.txt  Google shares fall as staff sell\n
260  0.402732  261.txt    Booming markets shed few tears\n
261  0.392050  262.txt  Asian quake hits European 

127.0.0.1 - - [10/Jan/2019 16:07:21] "POST /api HTTP/1.1" 200 -


UK pioneers digital film network

The world's first digital cinema network will be established in the UK over the next 18 months.

The UK Film Council has awarded a contract worth £11.5m to Arts Alliance Digital Cinema (AADC), who will set up the network of up to 250 screens. AADC will oversee the selection of cinemas across the UK which will use the digital equipment. High definition projectors and computer servers will be installed to show mainly British and specialist films. Most cinemas currently have mechanical projectors but the new network will see up to 250 screens in up to 150 cinemas fitted with digital projectors capable of displaying high definition images. The new network will double the world's total of digital screens. Cinemas will be given the film on a portable hard drive and they will then copy the content to a computer server.

Each film is about 100 gigabytes and has been compressed from an original one terabyte-size file. Fiona Deans, associate director of AADC, sa

[2019-01-10 16:11:48,823] ERROR in app: Exception on /api [POST]
Traceback (most recent call last):
  File "C:\Users\vivyadav\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 2292, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\vivyadav\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 1815, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\vivyadav\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 1718, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\Users\vivyadav\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\_compat.py", line 35, in reraise
    raise value
  File "C:\Users\vivyadav\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 1813, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\vivyadav\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 1799, in disp

Dhoni scored a century

Prediction sport
reached here
sport
        score filename                             headline
218  0.191792  219.txt       Hearts of Oak 3-2 Cotonsport\n
150  0.093129  151.txt   Owen delighted with Real display\n
148  0.091394  149.txt  Legendary Dutch boss Michels dies\n
195  0.089446  196.txt              Wenger signs new deal\n
289  0.088689  290.txt   O'Gara revels in Ireland victory\n


127.0.0.1 - - [10/Jan/2019 16:34:26] "POST /api HTTP/1.1" 200 -
