In [None]:
from flask import Flask, request, jsonify
from flask_restful import Resource, Api
from newspaper import Article
import nltk
import re
from collections import defaultdict
from gensim import corpora
import json

# import logging
# logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
nltk.download('punkt')

app = Flask(__name__)
api = Api(app)

@app.route('/article-summary',methods=['GET'])

def article_summary():
    source_url = request.args.get('url')
    list_of_urls = request.args.getlist('list_url')
    summary_source = get_summary(source_url)
    summaries_target = []
    for item in list_of_urls:
        item_summary = get_summary(item)
        summaries_target.append(item_summary)

    documents = summaries_target

    stoplist = set('about a above above across after afterwards again against all almost alone along already also although always am among amongst amoungst amount an and another any anyhow anyone anything anyway anywhere are around as at back be became because become becomes becoming been before beforehand behind being below beside besides between beyond bill both bottom but by call can cannot cant co con could couldnt cry de describe detail do done down due during each eg eight either eleven else elsewhere empty enough etc even ever every everyone everything everywhere except few fifteen fify fill find fire first five for former formerly forty found four from front full further get give go had has hasnt have he hence her here hereafter hereby herein hereupon hers herself him himself his how however hundred ie if in inc indeed interest into is it its itself keep last latter latterly least less ltd made many may me meanwhile might mill mine more moreover most mostly move much must my myself name namely neither never nevertheless next nine no nobody none noone nor not nothing now nowhere of off often on once one only onto or other others otherwise our ours ourselves out over own part per perhaps please put rather re same see seem seemed seeming seems serious several she should show side since sincere six sixty so some somehow someone something sometime sometimes somewhere still such system take ten than that the their them themselves then thence there thereafter thereby therefore therein thereupon these they thick thin third this those though three through throughout thru thus to together too top toward towards twelve twenty two un under until up upon us very via was we well were what whatever when whence whenever where whereafter whereas whereby wherein whereupon wherever whether which while whither who whoever whole whom whose why will with within without would yet you your yours yourself yourselves the for a of the and to in'.split())
    texts = [
        [word for word in document.lower().split() if word not in stoplist]
        for document in documents
    ]

    # remove words that appear only once
    frequency = defaultdict(int)
    for text in texts:
        for token in text:
            frequency[token] += 1

    texts = [
        [token for token in text if frequency[token] > 1]
        for text in texts
    ]

    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]
    
    from gensim import models
    lsi = models.LsiModel(corpus, id2word=dictionary, num_topics=2)
    
    doc=summary_source
    vec_bow = dictionary.doc2bow(doc.lower().split())
    vec_lsi = lsi[vec_bow] 
    from gensim import similarities
    index = similarities.MatrixSimilarity(lsi[corpus])
    sims = index[vec_lsi]  # perform a similarity query against the corpus
    sims = sorted(enumerate(sims), key=lambda item: -item[1])
    list_of_indices = []
    for i, s in enumerate(sims):
        list_of_indices.append(str(s[0]))
  
    response = jsonify(list_of_indices)
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response
  
class result:
    def __init__(self, idx, score):
        self.idx = idx
        self.score = score
    

def get_summary(url):
    article = Article(url)
    article.download()
    article.parse()
    article.nlp()
    summary = article.summary
    regex = re.compile(r'[\n\r\t]')
    summary = regex.sub("", summary)
    return summary

if __name__ == "__main__":
	app.run()

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/soundaryatekkalakota/nltk_data...


 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


[nltk_data]   Package punkt is already up-to-date!
 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [02/Dec/2019 17:26:36] "GET /article-summary?url=https://www.nbcnews.com/politics/trump-impeachment-inquiry/newly-released-documents-shed-light-mueller-trump-meeting-n1094586&list_url=https://www.cnet.com/roadshow/news/uber-self-driving-car-crash-arizona/&list_url=https://www.theverge.com/2019/11/6/20951385/uber-self-driving-crash-death-reason-ntsb-dcouments&list_url=https://gizmodo.com/voyager-2-team-releases-first-scientific-data-on-inters-1839612752&list_url=https://www.engadget.com/2019/11/27/steven-universe-unleash-the-light-apple-arcade/&list_url=https://www.wired.com/story/ubers-self-driving-car-didnt-know-pedestrians-could-jaywalk/&list_url=https://gizmodo.com/u-s-police-already-using-spot-robot-from-boston-dynami-1840029868&list_url=https://www.cnn.com/2019/11/18/politics/trump-tax-documents-supreme-court/index.html&list_url=https://www.washingtonexamine

127.0.0.1 - - [02/Dec/2019 18:01:57] "GET /article-summary?url=https://www.cnn.com/2019/12/02/politics/house-republican-response-impeachment-inquiry/index.html&list_url=https://www.nytimes.com/2019/11/18/us/politics/trump-impeachment.html&list_url=https://www.nytimes.com/2019/11/06/us/politics/trump-impeachment-hearings.html&list_url=https://www.nytimes.com/2019/11/29/us/politics/house-judiciary-panel-asks-trump-if-he-will-present-impeachment-defense.html&list_url=https://www.cnn.com/2019/11/12/politics/house-republicans-trump-memo-impeachment-defense/index.html&list_url=https://www.nytimes.com/2019/12/02/us/politics/republican-impeachment-defense.html&list_url=https://www.nytimes.com/2019/11/14/podcasts/the-latest-impeachment-bribery.html&list_url=https://www.nytimes.com/2019/11/06/us/politics/william-taylor-ukraine-impeachment-testimony.html&list_url=https://www.nytimes.com/2019/11/26/us/politics/impeachment-trump-hearing.html&list_url=https://www.nytimes.com/2019/11/03/us/politics/w

127.0.0.1 - - [02/Dec/2019 18:57:13] "GET /article-summary?url=https://www.cnn.com/2019/12/02/politics/house-republican-response-impeachment-inquiry/index.html&list_url=https://www.nytimes.com/2019/11/18/us/politics/trump-impeachment.html&list_url=https://www.nytimes.com/2019/11/06/us/politics/trump-impeachment-hearings.html&list_url=https://www.nytimes.com/2019/11/29/us/politics/house-judiciary-panel-asks-trump-if-he-will-present-impeachment-defense.html&list_url=https://www.cnn.com/2019/11/12/politics/house-republicans-trump-memo-impeachment-defense/index.html&list_url=https://www.nytimes.com/2019/12/02/us/politics/republican-impeachment-defense.html&list_url=https://www.nytimes.com/2019/11/14/podcasts/the-latest-impeachment-bribery.html&list_url=https://www.nytimes.com/2019/11/06/us/politics/william-taylor-ukraine-impeachment-testimony.html&list_url=https://www.nytimes.com/2019/11/26/us/politics/impeachment-trump-hearing.html&list_url=https://www.nytimes.com/2019/11/03/us/politics/w

127.0.0.1 - - [02/Dec/2019 19:11:53] "GET /article-summary?url=https://www.cnn.com/2019/12/02/politics/house-republican-response-impeachment-inquiry/index.html&list_url=https://www.nytimes.com/2019/11/18/us/politics/trump-impeachment.html&list_url=https://www.nytimes.com/2019/11/06/us/politics/trump-impeachment-hearings.html&list_url=https://www.nytimes.com/2019/11/29/us/politics/house-judiciary-panel-asks-trump-if-he-will-present-impeachment-defense.html&list_url=https://www.cnn.com/2019/11/12/politics/house-republicans-trump-memo-impeachment-defense/index.html&list_url=https://www.nytimes.com/2019/12/02/us/politics/republican-impeachment-defense.html&list_url=https://www.nytimes.com/2019/11/14/podcasts/the-latest-impeachment-bribery.html&list_url=https://www.nytimes.com/2019/11/06/us/politics/william-taylor-ukraine-impeachment-testimony.html&list_url=https://www.nytimes.com/2019/11/26/us/politics/impeachment-trump-hearing.html&list_url=https://www.nytimes.com/2019/11/03/us/politics/w

127.0.0.1 - - [02/Dec/2019 19:25:31] "GET /article-summary?url=https://www.cnn.com/2019/12/02/politics/george-conway-kellyanne-conway/index.html&list_url=https://www.cnn.com/2019/12/02/politics/george-conway-kellyanne-conway/index.html&list_url=https://www.insider.com/george-conway-and-kellyanne-conway-go-on-opposing-tv-networks-2019-11&list_url=https://www.nytimes.com/2019/11/29/opinion/woke-impeachment-trump.html&list_url=https://www.nytimes.com/2019/11/12/us/politics/maya-rockeymoore-cummings-congress.html&list_url=https://www.huffpost.com/entry/george-conway-republicans-obama_n_5dcce803e4b0d43931cf393e&list_url=https://www.insider.com/george-conway-and-kellyanne-conway-go-on-opposing-tv-networks-2019-11&list_url=https://www.cnn.com/videos/politics/2019/11/09/cindy-mccain-on-gop-axefiles-vpx.cnn&list_url=https://mashable.com/article/mrs-fletcher-kathryn-hahn/&list_url=https://www.nytimes.com/2019/12/02/us/politics/republican-impeachment-defense.html&list_url=https://www.cnn.com/2019

In [5]:
@app.route("/home")
def output():
	return "Hello World!"



    
    

In [6]:
if __name__ == "__main__":
	app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


In [7]:
from flask import Flask, request, jsonify
from flask_restful import Resource, Api
from newspaper import Article
import nltk
import re
from collections import defaultdict
from gensim import corpora
import json

# import logging
# logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
nltk.download('punkt')

app = Flask(__name__)
api = Api(app)

@app.route('/article-summary',methods=['GET'])

def article_summary():
    source_title = request.args.get('title')
    list_of_urls = request.args.getlist('list_url')
    summary_source = source_title
    summaries_target = []
    for item in list_of_urls:
        item_summary = get_summary(item)
        summaries_target.append(item_summary)

    documents = summaries_target

    stoplist = set('about a above above across after afterwards again against all almost alone along already also although always am among amongst amoungst amount an and another any anyhow anyone anything anyway anywhere are around as at back be became because become becomes becoming been before beforehand behind being below beside besides between beyond bill both bottom but by call can cannot cant co con could couldnt cry de describe detail do done down due during each eg eight either eleven else elsewhere empty enough etc even ever every everyone everything everywhere except few fifteen fify fill find fire first five for former formerly forty found four from front full further get give go had has hasnt have he hence her here hereafter hereby herein hereupon hers herself him himself his how however hundred ie if in inc indeed interest into is it its itself keep last latter latterly least less ltd made many may me meanwhile might mill mine more moreover most mostly move much must my myself name namely neither never nevertheless next nine no nobody none noone nor not nothing now nowhere of off often on once one only onto or other others otherwise our ours ourselves out over own part per perhaps please put rather re same see seem seemed seeming seems serious several she should show side since sincere six sixty so some somehow someone something sometime sometimes somewhere still such system take ten than that the their them themselves then thence there thereafter thereby therefore therein thereupon these they thick thin third this those though three through throughout thru thus to together too top toward towards twelve twenty two un under until up upon us very via was we well were what whatever when whence whenever where whereafter whereas whereby wherein whereupon wherever whether which while whither who whoever whole whom whose why will with within without would yet you your yours yourself yourselves the for a of the and to in'.split())
    texts = [
        [word for word in document.lower().split() if word not in stoplist]
        for document in documents
    ]

    # remove words that appear only once
    frequency = defaultdict(int)
    for text in texts:
        for token in text:
            frequency[token] += 1

    texts = [
        [token for token in text if frequency[token] > 1]
        for text in texts
    ]

    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]
    
    from gensim import models
    lsi = models.LsiModel(corpus, id2word=dictionary, num_topics=2)
    
    doc=summary_source
    vec_bow = dictionary.doc2bow(doc.lower().split())
    vec_lsi = lsi[vec_bow] 
    from gensim import similarities
    index = similarities.MatrixSimilarity(lsi[corpus])
    sims = index[vec_lsi]  # perform a similarity query against the corpus
    sims = sorted(enumerate(sims), key=lambda item: -item[1])
    list_of_indices = []
    for i, s in enumerate(sims):
        list_of_indices.append(str(s[0]))
  
    response = jsonify(list_of_indices)
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response
  
class result:
    def __init__(self, idx, score):
        self.idx = idx
        self.score = score
    

def get_summary(url):
    article = Article(url)
    article.download()
    article.parse()
    article.nlp()
    summary = article.summary
    regex = re.compile(r'[\n\r\t]')
    summary = regex.sub("", summary)
    return summary

if __name__ == "__main__":
	app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/soundaryatekkalakota/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
