### import data & modules

In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ModelCheckpoint

### load model for test

In [None]:
simple_model = Sequential([Dense(150,activation='relu', input_dim=features.shape[1], kernel_regularizer=l2(0.01)),
                           Dropout(0.6, noise_shape=None, seed=None),
                           Dense(200,activation='relu', kernel_regularizer=l2(0.01)),
                           Dropout(0.4, noise_shape=None, seed=None),
                           Dense(1,activation='sigmoid')])

simple_model.load_weights('improvement-10-0.9714-0.9181.hdf5')

simple_model.compile(loss='binary_crossentropy',
                     optimizer='adam',
                     metrics=['binary_accuracy'])

### tfidf ctreation from file

In [2]:
with open('tfidf_vocab.json') as f:
    vocab=json.load(f)
type(vocab)

In [3]:
with open('idf_.json') as f:
    idf=json.load(f)
idf

In [None]:
tfidf = TfidfVectorizer(stop_words='english', vocabulary=vocab, norm=None, min_df=0.0003, ngram_range=(1, 2))
tfidf.idf_=idf

In [None]:
text = """The average American would rather share their health data with pharmaceutical companies, health insurers and the government than tech companies like Amazon and Facebook.

That's according to the latest results of a new survey of 4,000 diverse participants from Rock Health, which conducts research and invests in health tech companies.

The survey, which was conducted in the fall of 2018, asked people who they'd be wiling to share health data with. Tech companies came in at the bottom:

My doctor: 72 percent willing to share
My health insurer: 49 percent
My pharmacy: 47 percent
Research institution: 35 percent
Pharmaceutical company: 20 percent
Government organization: 12 percent
Tech company: 11 percent
This is the fourth year that Rock Health has released survey results.

This year, Rock Health dug a little deeper into the opinions of that 11 percent who said they'd share health info with a tech company to find out which companies they trusted most. The most trusted company among them was Google, while IBM brought up the rear:"""

In [None]:
transformed = tfidf.transform([text])

In [None]:
simple_model.predict(transformed)

array([[0.9318109]], dtype=float32)

### Manual trial

In [None]:
# text = 'i want to use this classifier for a commercial project. It is light-weighted and can work in a chrome plugin'
# text = 'i care for my country very much, vote for me. believeme, i will be the best president you have ever known'
# text = 'This ballet style is often performed barefoot. Contemporary ballets may include mime and acting, and are usually set to music (typically orchestral but occasionally vocal). It can be difficult to differentiate this form from neoclassical or modern ballet. Contemporary ballet is also close to contemporary dance, because many contemporary ballet concepts come from the ideas and innovations of twentieth-century modern dance, including floor work and turn-in of the legs. The main distinction is that ballet technique is essential to perform a contemporary ballet.George Balanchine is considered to have been a pioneer of contemporary ballet. Another early contemporary ballet choreographer, Twyla Tharp, choreographed Push Comes To Shove for the American Ballet Theatre in 1976, and in 1986 created In The Upper Room for her own company. Both of these pieces were considered innovative for their melding of distinctly modern movements with the use of pointe shoes and classically trained dancers.Today there are many contemporary ballet companies and choreographers. These include Alonzo King and his company LINES Ballet; Matthew Bourne and his company New Adventures; Complexions Contemporary Ballet; Nacho Duato and his Compañia Nacional de Danza; William Forsythe and The Forsythe Company; and Jiří Kylián of the Nederlands Dans Theater. Traditionally "classical" companies, such as the Mariinsky (Kirov) Ballet and the Paris Opera Ballet, also regularly perform contemporary works.The term ballet has evolved to include all forms associated with it. Someone training as a ballet dancer will now be expected to perform neoclassical, modern and contemporary work. A ballet dancer is expected to be able to be stately and regal for classical work, free and lyrical in neoclassical work, and unassuming, harsh or pedestrian for modern and contemporary work. In addition, there are several modern varieties of dance that fuse classical ballet technique with contemporary dance, such as Hiplet, that require dancers to be practised in non-Western dance styles.'
# text='The first thing that comes into our minds while thinking of employees at tech companies is probably Software Engineers and Data Scientists. But a technology company requires employees beyond those in technical roles, so they hire for non-tech roles too. The current scenario at the job market doesn’t necessarily require you to be a tech pro to get hired by technology companies. As the growth tech companies increases, they need more non-technical workforce to help transform their technology into revenue. Roles such as sales, marketing, business operation, etc. are crucial to their expansion. These highly coveted positions are also well compensated by tech employers.'
# # text = '''As part of an effort to reduce noise in Ghana’s capital, Accra, the environment minister has suggested that the Muslim call to prayer, normally broadcast over loudspeakers across the city, should instead be sent out on WhatsApp. The notion has proved immensely unpopular – not least because it equates the call to prayer with noise pollution. But it also highlights religion’s growing, if sometimes uneasy, reliance on tech.

# Contactless collection Catholic and Protestant churches in the UK have begun using contactless card readers for donations and other payments, hoping to make life easier for parishioners who may not be carrying cash. A contactless collection plate is being trialled by the Church of England, but it is being held up because it is feared the technology might slow things down.

# The confession app Confession (version 2.1) walks sinners through the business of confession, pings you push notifications when it’s time for your next shriving and includes a handy sin checklist in case you have forgotten what you’ve done wrong. What it doesn’t do is offer absolution. You still need a priest for that.

# Televangelist tech American TV preachers tend to be at home in the digital realm, particularly those espousing “prosperity theology”, which holds that God rewards the pious with material wealth. It works for them, anyway. Controversial televangelist Joel Osteen’s free app offers daily devotional messages and sermons. And Creflo Dollar’s latest app boasts a “give” feature that makes handing your money over to Creflo A Dollar Ministries as easy as gambling. Webcast rites Funerals broadcast online are at least a decade old. Many UK crematoria now offer it as part of the package, and rabbis and vicars routinely officiate at live-streamed services. It is not quite the same as being there, but in many cases it’s probably preferable.Halal apps There are several apps that help Muslims find halal restaurants and stores in the immediate area . There is even one – Scan Halal – that allows you to scan the barcode of any food in a supermarket to instantly determine whether it contains non-halal ingredients. Online reviews for this idea are decidedly mixed.'
# '''
# text = "Redux Redux DevTools debugging"
# text = "Redux DevTools for debugging application's state changes."
text = "NodeSource is a company focused on providing enterprise-grade Node support and they maintain a repository containing the latest versions of Node.js."
text = """made that was made.
4In him was life; and the life was the light of men.
5And the light shineth in darkness; and the darkness comprehended it not.
6There was a man sent from God, whose name was John.
7The same came for a witness, to bear witness of the Light, that all men through him might believe.
8He was not that Light, but was sent to bear witness of that Light.
9That was the true Light, which lighteth every man that cometh into the world."""

In [None]:
transformed = tfidf.transform([text])

In [None]:
tfidf._tfidf._idf_diag.toarray()

In [None]:
transformed.toarray().tolist()

In [None]:
tfidf.idf_.tolist()

In [None]:
tfidf.vocabulary_

In [None]:
simple_model.predict(transformed)


array([[0.49849725]], dtype=float32)

### for js testing

In [None]:
text = ["Redux DevTools for debugging application's state changes.", "I want to use this classifier for a commercial project.", "for debugging application's I want to use this classifier"]

In [None]:
tfidf = TfidfVectorizer(stop_words='english', norm=None, use_idf=True, smooth_idf=True, sublinear_tf=True)
feat = tfidf.fit_transform(text)