In [14]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer 
from sklearn.decomposition import LatentDirichletAllocation
import matplotlib.pyplot as plt 

In [15]:
# Sample Data
texts = ["This is the first document.", "The Document is pathetic.", "This document is really nice", "Is this the first document?"] 

In [16]:
# Vectorize the text
vectorizer = CountVectorizer(stop_words='english') 

In [17]:
x = vectorizer.fit_transform(texts)

In [18]:
# Topic Modeling with LDA
lda = LatentDirichletAllocation(n_components=2, random_state=42) 
lda.fit(x)

In [19]:
# Display topics
def display_topics(model, feature_names, no_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print(f"Topic {topic_idx}:")
        print(" ".join([feature_names[i] for i in topic.argsort()[:-no_top_words - 1:-1]])) 
    display_topics(lda, vectorizer.get_feature_names_out(), 3)

In [20]:
# Trend Analysis with TF-IDF
tfidf_vectorizer = TfidfVectorizer(stop_words='english') 
tfidf = tfidf_vectorizer.fit_transform(texts)
df = pd.DataFrame(tfidf.toarray(), columns=tfidf_vectorizer.get_feature_names_out()) 
print(df)
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer 
analyzer = SentimentIntensityAnalyzer()
# Analyze sentiments
sentiments = [analyzer.polarity_scores(text) for text in texts] 
sentiment_df = pd.DataFrame(sentiments) 
print(sentiment_df)

   document      nice  pathetic    really
0  1.000000  0.000000  0.000000  0.000000
1  0.462637  0.000000  0.886548  0.000000
2  0.346182  0.663385  0.000000  0.663385
3  1.000000  0.000000  0.000000  0.000000
     neg    neu    pos  compound
0  0.000  1.000  0.000    0.0000
1  0.552  0.448  0.000   -0.5719
2  0.000  0.564  0.436    0.4754
3  0.000  1.000  0.000    0.0000
