***

# Word Clouds

- By [Zachary Kilhoffer](https://zkilhoffer.github.io/)
- Updated 2024-06-17

***

## Description

- Creating quick word clouds can be a nice and intuitive way to look at similarities between texts.
- This is code that may be useful to play around with for exploring text.

***

In [None]:
import os, re, warnings, random
import pandas as pd
import numpy as np
import openpyxl
import matplotlib
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

In [None]:
# display tweaks
pd.set_option("display.max_colwidth", 200)  # how much text is showing within a cell
pd.set_option("display.max_columns", False)
pd.set_option("display.max_rows", False)
warnings.filterwarnings("ignore")

In [None]:
# load data
df = pd.read_excel('../data/df_embeddings_publicdomain.xlsx')

In [None]:
# preparing for loop to get all text from all docs
documents_list = df["document"].value_counts().index.to_list()
documents_list
d = dict.fromkeys(documents_list)

# creating words for wordcloud
for doc in documents_list:
    # produce words from pandas series matching one doc
    words = df[df["document"] == f"{doc}"]["full_control_text"]
    # concatenate all cells in series
    d[f"{doc}"] = " ".join(words)
d

In [None]:
for doc in sorted(documents_list):
    wordcloud = WordCloud(
        background_color="white", collocations=False, stopwords=STOPWORDS
    )  # , contour_color="white", contour_width=1
    wordcloud.generate(d[f"{doc}"])
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.title(f"{doc}")
    plt.show()

In [None]:
# # to display all wordcloud figures together
# def display_wordcloud(top_words, title, n_components):
#     plt.figure()
#     j = np.ceil(n_components/4)
#     j = int(j)
#     print(j)

#     i=1
#     for k, v in d.items():
#         plt.subplot(j, 4, i).set_title("Document: " + k)
#         plt.plot()
#         plt.imshow(WordCloud().fit_words(v))
#         plt.axis("off")
#         i+=1
#     # fig.suptitle(title)
#     plt.show()