In [205]:
#Import libraries
import pandas as pd
import numpy as np
import gensim.downloader as api
from gensim.models.word2vec import Word2Vec
from transformers import pipeline
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import silhouette_score


In [206]:
#Read in 'twitter_df'
twitter_df = pd.read_csv("../dataset/twitter_df.csv")

In [207]:
twitter_df.shape

(22153, 8)

In [208]:
twitter_df.drop(['Unnamed: 0'], axis=1, inplace=True)

In [209]:
#Create our contextual stop words
stops = ["online", "home", "remote", "work", "working","pandemic","business", \
               "internet", "remotework", "online","team","office","company","hybrid","employee","looking"]

In [210]:
len(twitter_df[twitter_df.duplicated("tweets")])

4557

In [211]:
# originaltweets =twitter_df.drop_duplicates(subset=['tweets'], keep='first').reset_index(drop=True)

In [212]:
originaltweets = twitter_df[~twitter_df['tweets'].str.contains('RT')]

In [213]:
originaltweets = originaltweets.reset_index(drop=True)

In [214]:
originaltweets =originaltweets.drop_duplicates(subset=['cleaned_tweets'], keep='first').reset_index(drop=True)

In [215]:
originaltweets.shape

(10781, 7)

In [216]:
X = originaltweets['cleaned_tweets']
X.head()

0    rise slowly becoming normal which mean need re...
1    opportunity join fantastic team tech fast pace...
2    good news open this excellent report from remo...
3            four way energize post pandemic workforce
4    these tool that will save your google meet too...
Name: cleaned_tweets, dtype: object

### CV

In [238]:
#https://stackoverflow.com/questions/39303912/tfidfvectorizer-in-scikit-learn-valueerror-np-nan-is-an-invalid-document
#https://stackoverflow.com/questions/27697766/understanding-min-df-and-max-df-in-scikit-countvectorizer
cv = CountVectorizer(max_features=5000, min_df=30, max_df=0.5, stop_words=stopwords.words('english')) 
X_cv = cv.fit_transform(X.values.astype('U'))  

In [239]:
len(cv.get_feature_names())

579

In [240]:
X_cv_df = pd.DataFrame(X_cv.toarray(), columns = cv.get_feature_names())

In [241]:
df = pd.concat([originaltweets, X_cv_df], axis=1)

In [242]:
df = df.reset_index(drop=True)

In [243]:
df.head()

Unnamed: 0,tweet_id,tweet_posted_on,tweets,cleaned_tweets,user_location,user_location_cleaned,polarity,ability,able,access,...,workspace,world,worth,would,writer,writing,year,youtube,zero,zoom
0,1416181616846811137,2021-07-16 23:43:03+00:00,🏡 #RemoteWork is on the rise &amp; slowly beco...,rise slowly becoming normal which mean need re...,"New York, NY",US,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1416181380279635970,2021-07-16 23:42:07+00:00,Opportunity to join a fantastic team at a hi-t...,opportunity join fantastic team tech fast pace...,London | New York,US,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1416181225979473920,2021-07-16 23:41:30+00:00,Good news for #JobSeekers open to #RemoteWork!...,good news open this excellent report from remo...,Sydney | Hong Kong | Singapore,HK,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1416180635903868934,2021-07-16 23:39:09+00:00,Four Ways to Energize a Post-Pandemic Workforc...,four way energize post pandemic workforce,"Chicago, IL",US,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1416180231350759425,2021-07-16 23:37:33+00:00,🚑 These are the tools that will save your #Rem...,these tool that will save your google meet too...,"Duluth, GA",US,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [244]:
cv_col = df.columns[7:]
cv_col_names = df[cv_col]

In [245]:
X_cv=cv_col_names
sc = StandardScaler()
X_cv_sc = sc.fit_transform(X_cv)
X_cv_sc[0:1]

array([[-0.05370029, -0.06334373, -0.07801458, -0.05456208, -0.09486351,
        -0.06260278, -0.07418022, -0.05624654, -0.0528246 , -0.06253782,
        -0.08856383, -0.06376264, -0.06401546, -0.05624654, -0.05947425,
         9.76006806, -0.08594379, -0.06961808, -0.05541064, -0.07046215,
        -0.07465665, -0.05715419, -0.06894221, -0.12931979, -0.05357606,
        -0.0704612 , -0.25886036, -0.06203245, -0.06260278, -0.07369983,
        -0.10441915, -0.09587184, -0.0716525 , -0.09177314, -0.05896602,
        -0.07230279, -0.15622853, -0.07294743, -0.07390186, -0.10435004,
        -0.05292836, -0.09931893, 18.04717853, -0.06253782,  8.28043591,
         5.55037751, -0.11362618, -0.05903339, -0.0966816 , -0.05796321,
        -0.08051791, -0.05947425, -0.06373209, -0.08184415, -0.06999544,
        -0.06328088, -0.0992398 , -0.08373423, -0.22924572, -0.11109778,
        -0.05737391, -0.06934696, -0.08373423, -0.06185315, -0.05715419,
        -0.06127147,  8.28124764, -0.05796321, -0.1

In [271]:
from sklearn.cluster import KMeans
km_cv = KMeans(n_clusters=10, random_state=42)
km_cv.fit(X_cv_sc)

KMeans(n_clusters=10, random_state=42)

In [247]:
for n in range(3, 16):
    kmeans = KMeans(n_clusters=n, random_state=42)
    kmeans.fit(X_cv_sc)
    print(n, kmeans.score(X_cv_sc), silhouette_score(X_cv_sc, kmeans.labels_))

3 -6192849.941308153 0.030618977511846012
4 -6167200.862772848 0.07006684173899169
5 -6158029.168175263 0.029904804423303805
6 -6157793.041344845 0.08773619476617435
7 -6109542.040010904 0.052639458771891724
8 -6109569.209759847 -0.00651980502111646
9 -6110420.239448032 0.05351011236338742
10 -6079775.208431132 0.07244279896198835
11 -6083480.954220364 0.0274753871030357
12 -6063034.714193459 0.013632932043164887
13 -6078644.059020367 0.02482936615546004
14 -6043940.519699644 0.019353998932595474
15 -6018761.338540577 -0.005234423062950015


In [127]:
for n in range(3, 16):
    kmeans = KMeans(n_clusters=n, random_state=42)
    kmeans.fit(X_cv_sc)
    print(n, kmeans.score(X_cv_sc), silhouette_score(X_cv_sc, kmeans.labels_))

3 -28940694.445593517 0.1411355764121874
4 -28955605.107846163 0.2078817857404128
5 -28906006.973357055 0.1970638810574114
6 -28884172.496514603 0.028873499206607006
7 -28826976.60191446 0.09113724414951647
8 -28750926.767359056 -0.00945907965969369
9 -28770313.455217905 0.13272966594500843
10 -28623047.328139532 -0.1698422052147124
11 -28627091.92663387 -0.17417764712698425
12 -28529959.223249592 -0.06320072875537311
13 -28532456.60545764 0.003828884180525165
14 -28536859.472120997 -0.04087767521683216
15 -28528615.429199748 -0.19447256285233147


In [128]:
for n in range(16, 30):
    kmeans = KMeans(n_clusters=n, random_state=42)
    kmeans.fit(X_cv_sc)
    print(n, kmeans.score(X_cv_sc), silhouette_score(X_cv_sc, kmeans.labels_))

16 -28425829.79330667 -0.14149180300095468
17 -28359876.988320835 -0.17008951449869308
18 -28357524.207793504 -0.04319445595486965
19 -28348956.86852532 -0.16488235090251382
20 -28274634.919274133 -0.1680674131964157
21 -28300120.653501965 -0.18179577607056205
22 -28135184.75237838 -0.03364704598291362
23 -28082168.265770976 -0.17619106194378809
24 -28051377.07201064 -0.17596067383198333
25 -28086759.84170141 -0.17966950618094796
26 -28147142.57187076 -0.1854144019157126
27 -28087308.061399817 -0.1624141991487051
28 -28006948.424123127 -0.1834039491486426
29 -27967580.520754755 -0.18355555431752105


In [272]:
km_cv.cluster_centers_

array([[-0.05370029, -0.06334373,  0.07628283, ..., -0.05876133,
        -0.05405549, -0.08211041],
       [-0.00800149,  0.00164215,  0.00276817, ...,  0.0058294 ,
         0.0036649 ,  0.00695816],
       [-0.05370029, -0.06334373, -0.07801458, ..., -0.05876133,
        -0.05405549, -0.08211041],
       ...,
       [-0.05370029, -0.06334373, -0.07801458, ..., -0.05876133,
        -0.05405549,  0.02777444],
       [-0.05370029,  0.32577805, -0.07801458, ..., -0.05876133,
        -0.05405549, -0.08211041],
       [ 0.37074458,  0.05162407, -0.07801458, ..., -0.05876133,
        -0.05405549, -0.08211041]])

In [273]:
df['km']=km_cv.labels_

In [274]:
df['km'].value_counts()

1    9808
0     316
9     264
7     106
5      91
2      53
3      42
8      39
4      32
6      30
Name: km, dtype: int64

In [275]:
silhouette_score(X_cv_sc, km_cv.labels_)

0.07244279896198835

In [276]:
df[df['km'] == 6]

Unnamed: 0,tweet_id,tweet_posted_on,tweets,cleaned_tweets,user_location,user_location_cleaned,polarity,ability,able,access,...,world,worth,would,writer,writing,year,youtube,zero,zoom,km
1693,1415348994444775425,2021-07-14 16:34:30+00:00,The latest The Remote Worker Weekly! https://t...,latest remote worker weekly thanks,United States,US,1,0,0,0,...,0,0,0,0,0,0,0,0,0,6
2383,1414973814275035137,2021-07-13 15:43:40+00:00,Want to find the job of your dreams? Remote wo...,want find your dream remote work best weekly n...,Anywhere,,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
3137,1416066623388241925,2021-07-16 16:06:06+00:00,We’re taking a look at the booming tech job ma...,taking look booming tech market city offering ...,"Indiana, USA",US,1,0,0,0,...,0,0,0,0,0,0,0,0,0,6
3567,1415764211758678018,2021-07-15 20:04:26+00:00,Free Weekly Marketing Training. Lead generatio...,free weekly marketing training lead generation...,"Chapala, Jalisco Mexico",MX,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
3890,1415586408320884736,2021-07-15 08:17:54+00:00,Financial Freedom Working From Home $$$500$$$T...,financial freedom working from home weekly,"Mississippi, USA",US,1,0,0,0,...,0,0,0,0,0,0,0,0,0,6
4136,1415370627121356800,2021-07-14 18:00:28+00:00,"24 new fully #remoteimpactjobs (global, region...",fully global regional multiple location weekly...,Florida/ USA,US,1,0,0,0,...,0,0,0,0,0,0,0,0,0,6
4353,1415253029276856320,2021-07-14 10:13:10+00:00,I need some people to work from 🏡 \nAll from y...,need some people work from from your cell phon...,"PO Box 2258 Stuart, FL 34995",US,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
4420,1415210130308837381,2021-07-14 07:22:43+00:00,Part Time Sales - Paid Weekly Work from Home\...,part time sale paid weekly work from home from...,Canada CA,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
4767,1414934970951421970,2021-07-13 13:09:19+00:00,One-day return to office ‘could lead to £21m o...,return office could lead weekly saving energy ...,London,GB,1,0,0,0,...,0,0,0,0,0,0,0,0,0,6
5066,1414649563223404544,2021-07-12 18:15:13+00:00,You want to send your teams home to keep them ...,want send your team home keep them safe great ...,"Richmond, BC Canada",US,1,0,0,0,...,0,0,0,0,0,0,0,0,0,6


In [277]:
word_frequencies_by_cluster = df.groupby('km').sum()

In [278]:
word_frequencies_by_cluster.head()

Unnamed: 0_level_0,tweet_id,polarity,ability,able,access,according,account,achieve,across,actually,...,workspace,world,worth,would,writer,writing,year,youtube,zero,zoom
km,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,4.4758e+20,71.0,0.0,0.0,4.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,1.0,3.0,2.0,4.0,0.0,0.0,0.0
1,1.389204e+22,4958.0,24.0,42.0,65.0,32.0,90.0,42.0,56.0,32.0,...,49.0,179.0,29.0,135.0,57.0,31.0,211.0,39.0,34.0,75.0
2,7.501999e+19,49.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,5.949136e+19,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4.531935e+19,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0


In [279]:
#https://stackoverflow.com/questions/27889873/clustering-text-documents-using-scikit-learn-kmeans-in-python
print("Top terms per cluster:")
order_centroids = km_cv.cluster_centers_.argsort()[:, ::-1]
terms = cv.get_feature_names()
for i in range(14):
    print ("Cluster %d:" % i)
    for ind in order_centroids[i, :20]:
        print( ' %s' % terms[ind])

Top terms per cluster:
Cluster 0:
 compensation
 competitive
 engineer
 role
 check
 equity
 software
 remote
 offering
 javascript
 react
 stack
 senior
 developer
 blockchain
 looking
 engineering
 python
 full
 frontend
Cluster 1:
 home
 employee
 work
 productivity
 tip
 need
 money
 online
 featured
 video
 read
 make
 pandemic
 affiliate
 click
 worker
 tell
 getting
 business
 long
Cluster 2:
 talented
 exciting
 join
 opportunity
 team
 hiring
 apply
 remote
 product
 manager
 designer
 senior
 front
 twitter
 america
 director
 coordinator
 customer
 account
 representative
Cluster 3:
 download
 free
 computer
 link
 code
 play
 using
 training
 software
 report
 microsoft
 traffic
 million
 network
 safe
 impact
 google
 earn
 still
 staff
Cluster 4:
 leadership
 culture
 strategy
 leader
 talk
 trust
 highlight
 style
 coordinator
 transition
 advice
 collaboration
 development
 associate
 head
 focus
 tech
 shift
 team
 news
Cluster 5:
 vacancy
 update
 consultant
 daily
 j

IndexError: index 10 is out of bounds for axis 0 with size 10

### TFIDF

In [257]:
#https://stackoverflow.com/questions/37593293/how-to-get-tfidf-with-pandas-dataframe
tfidf = TfidfVectorizer(max_features=5000, min_df=30, max_df=0.5,stop_words=stopwords.words('english'))

In [258]:
X_tfidf = tfidf.fit_transform(X.values.astype('U'))

In [259]:
len(tfidf.get_feature_names())

579

In [260]:
X_tfidf_df = pd.DataFrame(X_tfidf.toarray(), columns = tfidf.get_feature_names())

In [261]:
df1 = pd.concat([originaltweets, X_tfidf_df], axis=1)

In [262]:
df1 = df1.reset_index(drop=True)

In [263]:
df1.head()

Unnamed: 0,tweet_id,tweet_posted_on,tweets,cleaned_tweets,user_location,user_location_cleaned,polarity,ability,able,access,...,workspace,world,worth,would,writer,writing,year,youtube,zero,zoom
0,1416181616846811137,2021-07-16 23:43:03+00:00,🏡 #RemoteWork is on the rise &amp; slowly beco...,rise slowly becoming normal which mean need re...,"New York, NY",US,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1416181380279635970,2021-07-16 23:42:07+00:00,Opportunity to join a fantastic team at a hi-t...,opportunity join fantastic team tech fast pace...,London | New York,US,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1416181225979473920,2021-07-16 23:41:30+00:00,Good news for #JobSeekers open to #RemoteWork!...,good news open this excellent report from remo...,Sydney | Hong Kong | Singapore,HK,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1416180635903868934,2021-07-16 23:39:09+00:00,Four Ways to Energize a Post-Pandemic Workforc...,four way energize post pandemic workforce,"Chicago, IL",US,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1416180231350759425,2021-07-16 23:37:33+00:00,🚑 These are the tools that will save your #Rem...,these tool that will save your google meet too...,"Duluth, GA",US,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [264]:
tfidf_col = df1.columns[7:]
tfidf_col_names = df1[tfidf_col]

In [265]:
X_tfidf=tfidf_col_names
sc = StandardScaler()
X_tfidf_sc = sc.fit_transform(X_tfidf)
X_tfidf_sc[0:1]

array([[-0.05202947, -0.0605925 , -0.07706628, -0.05281009, -0.09073248,
        -0.05981162, -0.07195525, -0.05442906, -0.05165782, -0.06070334,
        -0.0902881 , -0.06254891, -0.06329241, -0.05458429, -0.05740021,
         6.18934831, -0.08148408, -0.06703513, -0.05335992, -0.06883784,
        -0.07300717, -0.0562829 , -0.0661336 , -0.12588817, -0.05408662,
        -0.07197645, -0.24756608, -0.06181959, -0.06079042, -0.07265217,
        -0.10182798, -0.09371571, -0.06963362, -0.08638938, -0.05766527,
        -0.07055041, -0.14777603, -0.07141397, -0.07288222, -0.10033531,
        -0.05164207, -0.09717317, 12.42298022, -0.05947399,  4.87447716,
         2.90784022, -0.10757786, -0.05986146, -0.09407135, -0.05679191,
        -0.07766292, -0.05752237, -0.06274608, -0.08108682, -0.06874397,
        -0.06121129, -0.09567407, -0.08170248, -0.22215826, -0.11270131,
        -0.05765915, -0.06963006, -0.08190516, -0.05982127, -0.05604869,
        -0.06183955,  5.08275651, -0.05680164, -0.1

In [266]:
from sklearn.cluster import KMeans
km_tfidf = KMeans(n_clusters=15, random_state=42)
km_tfidf.fit(X_tfidf_sc)

KMeans(n_clusters=15, random_state=42)

In [267]:
for i in range(3, 16):
    kmeans = KMeans(n_clusters=i, random_state=42)
    kmeans.fit(X_tfidf_sc)
    print(i, kmeans.score(X_tfidf_sc), silhouette_score(X_tfidf_sc, kmeans.labels_))

3 -6187597.521275744 -0.038879401915755354
4 -6180641.6222398775 -0.03457205172495074
5 -6139690.319021157 -0.0397058461157566
6 -6141834.717026974 -0.035018996402061485
7 -6122730.717792992 -0.05529930688223335
8 -6122383.641500191 -0.03425790558883513
9 -6101773.09894736 -0.03542206855726427
10 -6093166.549280853 -0.035714196670819834
11 -6084557.449298803 -0.037167380276226736
12 -6071406.36081785 -0.047053334239604484
13 -6056554.298489784 -0.036605538764313605
14 -6052835.335485422 -0.03981460894525785
15 -6037993.876886576 0.0010330051345629424


In [69]:
km_tfidf.cluster_centers_

array([[-0.06954607, -0.04856609, -0.02368941, ..., -0.04741743,
        -0.04745091,  0.17179848],
       [ 0.00276813,  0.00045208,  0.00094291, ...,  0.00188735,
         0.00188868,  0.00225768],
       [-0.06954607, -0.04856609, -0.02368941, ..., -0.04741743,
        -0.04745091, -0.06685293],
       ...,
       [-0.06954607, -0.04856609, -0.02368941, ..., -0.04741743,
        -0.04745091, -0.06685293],
       [-0.06954607, -0.04856609, -0.02368941, ..., -0.04741743,
        -0.04745091, -0.06685293],
       [-0.06954607, -0.04856609, -0.02368941, ..., -0.04741743,
        -0.04745091, -0.06685293]])

In [70]:
df1['km']=km_tfidf.labels_

In [71]:
df1['km'].value_counts()

1     21305
10      322
14      154
5        99
7        37
0        36
12       34
2        30
4        27
9        22
13       21
8        21
3        17
6        16
11       12
Name: km, dtype: int64

In [72]:
silhouette_score(X_tfidf_sc, km_tfidf.labels_)

-0.1343755446812336

In [73]:
word_frequency_by_cluster = df1.groupby('km').sum()

In [74]:
word_frequency_by_cluster.head()

Unnamed: 0_level_0,tweet_id,polarity,ability,able,absolutely,accelerated,access,accessible,accessory,accommodation,...,year,yesterday,york,young,youtube,ysense,zero,zombie,zone,zoom
km,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,5.096904e+19,26.0,0.0,0.0,0.0,0.0,0.0,0.0,0.363649,0.0,...,0.51587,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.228903
1,3.01778e+22,11007.0,46.009167,20.921029,5.445305,52.927129,36.233883,7.675012,8.888907,6.530289,...,90.572023,8.007466,9.079564,14.752173,28.175194,10.308512,20.559031,18.383023,22.289003,39.229311
2,4.256708e+19,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.402177,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2.407026e+19,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.270438,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,3.823612e+19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [75]:
print("Top terms per cluster:")
order_centroids = km_tfidf.cluster_centers_.argsort()[:, ::-1]
terms = tfidf.get_feature_names()
for i in range(14):
    print ("Cluster %d:" % i)
    for ind in order_centroids[i, :20]:
        print( ' %s' % terms[ind])

Top terms per cluster:
Cluster 0:
 efficient
 remain
 enabling
 profitable
 productive
 potential
 enter
 edition
 reach
 hard
 virtual
 requires
 setup
 consistent
 stop
 tool
 resource
 discover
 desktop
 control
Cluster 1:
 work
 remote
 apply
 hiring
 team
 employee
 business
 pandemic
 looking
 opportunity
 find
 tip
 developer
 life
 post
 design
 many
 job
 service
 full
Cluster 2:
 considerably
 dreaming
 longing
 expected
 tied
 scenery
 number
 rise
 five
 extra
 next
 might
 space
 year
 since
 people
 working
 remotely
 change
 home
Cluster 3:
 insightful
 decrease
 increase
 article
 director
 security
 check
 conducted
 habit
 reliable
 internal
 explained
 machine
 safe
 webinar
 detail
 changed
 across
 learning
 trend
Cluster 4:
 smartbox
 steady
 pocket
 airtel
 workflow
 wifi
 device
 internet
 keep
 home
 working
 shed
 sorry
 summit
 webcam
 mine
 modus
 friendship
 architecture
 acquisition
Cluster 5:
 infracloud
 reliability
 site
 database
 ready
 cloud
 native
