In [1]:
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
import string
import os
import itertools
import matplotlib.pyplot as plt
import pickle
import pdb

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.metrics import accuracy_score

from gensim.scripts.glove2word2vec import glove2word2vec
from gensim.models.keyedvectors import KeyedVectors

import model as m



############# Parameters
path2embeddings = '../pretrained_embeds/glove.6B/'
embedfile = 'glove.6B.50d'

path2data = '../data/'
datafile = 'news_articles.pkl'

cat2id_file = 'category2id.pkl'

model_saving_path = '../attention/'
model_saving_file = 'attention_model.pt'

article_max_len = 600
top_k_keywords = 10

embed_size = 50
hidden_dim = 100
n_classes = 7



############# Loading Pretrained Glove Embeddings
if os.path.isfile(path2embeddings + embedfile + '_w2v.txt'):
    glove_model = KeyedVectors.load_word2vec_format(path2embeddings + embedfile + '_w2v.txt', binary=False)
else:
    glove2word2vec(glove_input_file=path2embeddings + embedfile + '.txt', word2vec_output_file=path2embeddings + embedfile + '_w2v.txt')
    glove_model = KeyedVectors.load_word2vec_format(path2embeddings + embedfile + '_w2v.txt', binary=False)

def get_embed(word):
    # Case folding
    word = word.lower()
    try:
        return (glove_model.get_vector(word))
    except:
        return (glove_model.get_vector('<unk>'))
    
    

############## Categories to its id
if os.path.exists(path2data + cat2id_file):
    with open(path2data + cat2id_file, 'rb') as handle:
        category2id = pickle.load(handle)
# inverse the dict
id2category = {v: k for k, v in category2id.items()}
        
        
        
############ Loading the model
# Using gpu if available else cpu
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

model = m.atten_classifier(embed_size, hidden_dim, n_classes)
model = model.to(device)

if torch.cuda.is_available():
    model.load_state_dict(torch.load(model_saving_path + model_saving_file))
else:
    model.load_state_dict(torch.load(model_saving_path + model_saving_file, map_location='cpu'))

In [2]:
article = input('Enter the news article: ')

article = article.lower()
article = nltk.word_tokenize(article)
stop = stopwords.words('english') + list(string.punctuation)
article_words = [i for i in article if i not in stop]
article = [get_embed(j) for j in article_words]
article = np.array(article[:article_max_len])

article_inp = torch.from_numpy(article).to(device)
out, alphas = model(article_inp)

###### Getting the domain of news article
domain = id2category[int(torch.argmax(out).data.cpu().numpy())]
print('\n\n')
print('Domain of the news article: ' + domain)
print('\n\n')

####### Getting keywords with higest alpha weights
word2weights = {}
extracted_keywords = []
alpha_weights = alphas.data.cpu().numpy().reshape(-1).tolist()
for i in range(len(article_words)):
    word2weights[article_words[i]] = alpha_weights[i]
word2weights = sorted(word2weights.items(), key=lambda kv: kv[1], reverse= True)
for j in range(top_k_keywords):
    extracted_keywords.append(word2weights[j][0])
print('Extracted Keywords: ' + str(extracted_keywords))

Enter the news article: Picking specialists is an extension of Virat Kohli’s Test selection policy but this time it has produced a World Cup squad that looks really thin on batting. Going by the primary role of the players, the 15-member India squad has five bowlers — Jasprit Bumrah, Mohammed Shami, Bhuvneshwar Kumar, Yuzvendra Chahal and Kuldeep Yadav – four all-rounders — Kedar Jadhav, Hardik Pandya, Vijay Shankar and Ravindra Jadeja — two wicket-keepers – MS Dhoni and Dinesh Karthik -- and only four specialist batsmen — Virat Kohli, Rohit Sharma, Shikhar Dhawan and KL Rahul.  This squad has possibly the best opening batsmen, the best batsman of this generation, a legendary wicket-keeper and tactician as well as a balanced bowling attack. What it lacks is a middle-order batsman of repute and a finisher who takes minimum time to shift gears. That’s quite a departure from the years — as recent as last World Cup — India had bragging rights in these departments.  For the first time since