In [0]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
import string
import re
import numpy as np
from collections import Counter
 
stop = set(stopwords.words('english'))
exclude = set(string.punctuation)
lemma = WordNetLemmatizer()

def clean(doc):
    stop_free = " ".join([i for i in doc.lower().split() if i not in stop])
    punc_free = ''.join(ch for ch in stop_free if ch not in exclude)
    normalized = " ".join(lemma.lemmatize(word) for word in punc_free.split())
    processed = re.sub(r"\d+","",normalized)
    y = processed.split()
    return y

print ("There are 10 sentences of following three classes on which K-NN classification and K-means clustering"\
         " is performed : \n1. Cricket \n2. Artificial Intelligence \n3. Chemistry")
path = "Sentences.txt"
 
train_clean_sentences = []
fp = open(path,'r')
for line in fp:
    line = line.strip()
    cleaned = clean(line)
    cleaned = ' '.join(cleaned)
    train_clean_sentences.append(cleaned)
 
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(train_clean_sentences)

y_train = np.zeros(30)
y_train[10:20] = 1
y_train[20:30] = 2

modelknn = KNeighborsClassifier(n_neighbors=5)
modelknn.fit(X,y_train)
 
modelkmeans = KMeans(n_clusters=3, init='k-means++', max_iter=200, n_init=100)
modelkmeans.fit(X)

test_sentences = ["Chemical compunds are used for preparing bombs based on some reactions",
                  "Cricket is a boring game where the batsman only enjoys the game",
                  "Machine learning is a area of Artificial intelligence"]
 
test_clean_sentence = []
for test in test_sentences:
    cleaned_test = clean(test)
    cleaned = ' '.join(cleaned_test)
    cleaned = re.sub(r"\d+","",cleaned)
    test_clean_sentence.append(cleaned)
 
Test = vectorizer.transform(test_clean_sentence)
 
true_test_labels = ['Cricket','AI','Chemistry']
predicted_labels_knn = modelknn.predict(Test)
predicted_labels_kmeans = modelkmeans.predict(Test)
 
print("\nBelow 3 sentences will be predicted against the learned nieghbourhood and learned clusters:\n1. ",
      test_sentences[0],"\n2. ",test_sentences[1],"\n3. ",test_sentences[2])
print("\n-------------------------------PREDICTIONS BY KNN------------------------------------------")
print("\n",test_sentences[0],":",true_test_labels[np.int(predicted_labels_knn[0])],
      "\n",test_sentences[1],":",true_test_labels[np.int(predicted_labels_knn[1])],
      "\n",test_sentences[2],":",true_test_labels[np.int(predicted_labels_knn[2])])
 
print("\n-------------------------------PREDICTIONS BY K-Means--------------------------------------")
print("\nIndex of Cricket cluster : ",Counter(modelkmeans.labels_[0:10]).most_common(1)[0][0])
print("Index of Artificial Intelligence cluster : ",Counter(modelkmeans.labels_[10:20]).most_common(1)[0][0])
print("Index of Chemistry cluster : ",Counter(modelkmeans.labels_[20:30]).most_common(1)[0][0])
 
print("\n",test_sentences[0],":",predicted_labels_kmeans[0],
      "\n",test_sentences[1],":",predicted_labels_kmeans[1],
      "\n",test_sentences[2],":",predicted_labels_kmeans[2])

There are 10 sentences of following three classes on which K-NN classification and K-means clustering is performed : 
1. Cricket 
2. Artificial Intelligence 
3. Chemistry
['chemical compunds used preparing bomb based reaction', 'cricket boring game batsman enjoys game', 'machine learning area artificial intelligence']

Below 3 sentences will be predicted against the learned nieghbourhood and learned clusters:
1.  Chemical compunds are used for preparing bombs based on some reactions 
2.  Cricket is a boring game where the batsman only enjoys the game 
3.  Machine learning is a area of Artificial intelligence

-------------------------------PREDICTIONS BY KNN------------------------------------------

 Chemical compunds are used for preparing bombs based on some reactions : Chemistry 
 Cricket is a boring game where the batsman only enjoys the game : Cricket 
 Machine learning is a area of Artificial intelligence : AI

-------------------------------PREDICTIONS BY K-Means---------------