# PyCon 2022: Sentiment Analysis of Tweets

First, we will retrieve data on tweets about PyCon from the Cosmos database.

In [1]:
from azure.cosmos import exceptions, CosmosClient, PartitionKey
import json
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import pandas as pd
import numpy as np
import re

# Initialize and create the Cosmos client
endpoint = "https://tweets-pycon.documents.azure.com:443/"
key = '5Qq1xszozzcaPBr0B1mlDfp3dhzgWJJXjl13R29ZgjyI2DEVFanP3Zi3v7Vj3Ef65aE4CL8tfNXyzZLjZW5HWA=='

client = CosmosClient(endpoint, key)

# Retrieve database and container
database = client.get_database_client('tweetsdatabase')
container = database.get_container_client('tweetscollection')

### Overview
A summary of the distribution of tweets.

In [2]:
tweets_query = "SELECT tweets.sentiment FROM tweetscollectionm tweets"

tweets = list(container.query_items(
    query=tweets_query,
    enable_cross_partition_query=True
))

print("So far, there have been {0} tweets made about PyCon 2022!\n".format(len(tweets)))

print("The {0} tweets have been classified as follows:".format(len(tweets)))

df = pd.DataFrame(tweets, columns=["sentiment"])

# Filter dataframes
df_positive = df[df.sentiment == "positive"]
pos_count = df_positive.shape[0]
df_negative = df[df.sentiment == "negative"]
neg_count = df_negative.shape[0]
df_neutral = df[df.sentiment == "neutral"]
neutral_count = df_neutral.shape[0]
df_mixed = df[df.sentiment == "mixed"]
mixed_count = df_mixed.shape[0]

print("\t\n positive: ", pos_count, "\t\n negative: ", neg_count,"\t\n negative: ", neutral_count,"\t\n neutral: ", neutral_count,"\t\n mixed: ", mixed_count)

So far, there have been 437 tweets made about PyCon 2022!

The 437 tweets have been classified as follows:
	
 positive:  103 	
 negative:  32 	
 negative:  295 	
 neutral:  295 	
 mixed:  7


### Positive Tweets

In [3]:
# Extreme positive tweets, confidence score for positive > 0.98
tweets_pos_query = "SELECT {\"tweet\":tweets.text} AS tweet_info FROM tweetscollection tweets WHERE tweets.confidence_scores.positive > 0.98"
tweets_positive = list(container.query_items(
    query=tweets_pos_query,
    enable_cross_partition_query=True
))

tweets_pos_display = pd.json_normalize(tweets_positive, max_level=1)

print("\nFollowing are the {0} tweets that have been categorized as extremely positive (confidence score is > 0.98):".format(len(tweets_positive)))
print(tweets_pos_display)


Following are the 27 tweets that have been categorized as extremely positive (confidence score is > 0.98):
                                     tweet_info.tweet
0   RT @Yhg1s: I'm at the Salt Palace convention c...
1   RT @adem_onar: Best #MachineLearning #Algorith...
2   Best #MachineLearning #Algorithms for classifi...
3   RT @adem_onar: Best #MachineLearning #Algorith...
4   RT @adem_onar: Best #MachineLearning #Algorith...
5   RT @adem_onar: Best #MachineLearning #Algorith...
6   RT @falconcode16: Best open-source app framewo...
7   RT @adem_onar: Best #MachineLearning #Algorith...
8   RT @fikinft: Some great @apocalypticapes backg...
9   @pystar @reuvenmlerner @pycon If you write abo...
10  RT @TorPan13: @reuvenmlerner @pycon Any good b...
11  Two more days until I head out to @pycon! Exci...
12  @reuvenmlerner @pystar @pycon I second the com...
13  Aún estamos necesitando personas para el volun...
14  RT @_AGOTO: Alchemy stars - Azure\n\nSpecial t...
15  RT @_AGOTO: Alchemy star

### Negative Tweets

In [4]:
# Extreme negative tweets, confidence score for negative > 0.98
tweets_neg_query = "SELECT {\"tweet\":tweets.text} AS tweet_info FROM tweetscollection tweets WHERE tweets.confidence_scores.negative > 0.98"
tweets_negative = list(container.query_items(
    query=tweets_neg_query,
    enable_cross_partition_query=True
))

tweets_neg_display = pd.json_normalize(tweets_negative, max_level=1)

print("\nFollowing are the {0} tweets that have been categorized as extremely negative (confidence score is > 0.98):".format(len(tweets_negative)))
print(tweets_neg_display)


Following are the 3 tweets that have been categorized as extremely negative (confidence score is > 0.98):
                                    tweet_info.tweet
0  RT @energydata123: Day 87, 88:\nCame across an...
1  RT @energydata123: Day 87, 88:\nCame across an...
2  @MrEinFan Im still mostly using sorcery, need ...


### Neutral Tweets

In [5]:
# Extreme neutral tweets, confidence score for neutral > 0.98
tweets_neutral_query = "SELECT {\"tweet\":tweets.text} AS tweet_info FROM tweetscollection tweets WHERE tweets.confidence_scores.neutral > 0.98"
tweets_neutral = list(container.query_items(
    query=tweets_neutral_query,
    enable_cross_partition_query=True
))

tweets_neutral_display = pd.json_normalize(tweets_neutral, max_level=1)

print("\nFollowing are the {0} tweets that have been categorized as extremely neutral (confidence score is > 0.98):".format(len(tweets_neutral)))
print(tweets_neutral_display)


Following are the 12 tweets that have been categorized as extremely neutral (confidence score is > 0.98):
                                     tweet_info.tweet
0   Target is looking for a Sr. Data Scientist - O...
1   Az.Cdn v2.0.0-preview https://t.co/I6kvJXzPO3 ...
2   Target is looking for a Sr. Data Scientist - O...
3   Azure Cosmos DB Cassandra API: A true differen...
4   Lumen is looking for a SR MGR DATA SCIENCE\nht...
5   History of #machinelearning &amp; #DataScience...
6   Microsoft Azure Data Scientist Associate - DP-...
7   Global Shutter Camera Module\n#Robotics #AI #I...
8   Global Shutter Camera Module\n#Robotics #AI #I...
9   #youtube Belajar Linear Programming Python Goo...
10  Azure Cost Management and Billing updates – Ap...
11  Azure Cost Management and Billing updates – Ap...


### Mixed Tweets

In [6]:
tweets_query = "SELECT tweets.text FROM tweetscollectionm tweets WHERE tweets.sentiment = 'mixed'"

tweets_mixed = list(container.query_items(
    query=tweets_query,
    enable_cross_partition_query=True
))

df_mixed_tweets = pd.DataFrame(tweets_mixed, columns=["text"])

print("Following are {0} tweets that do not have a high confidence score in any one sentiment, and are therefore cateforized as 'mixed'.".format(len(tweets_mixed)))

print(df_mixed_tweets)

Following are 7 tweets that do not have a high confidence score in any one sentiment, and are therefore cateforized as 'mixed'.
                                                text
0  @SamHLevey R is great for what it is, a better...
1  Here's the weather forecast for Salt Lake City...
2  RT @rootsecdev: Found this cool little script ...
3  @AkitaOnRails Muuuito legal isso. Ganhou uma e...
4  RT @Tofag_E: Wrote an API call program, I've s...
5  @hmmmmmbbb @pycon It's like a foreign language...
6  @s_gruppetta_ct @pystar @pycon Oh, right — I s...


### Overall Analysis

In [7]:
total_tweets = [pos_count, neg_count, neutral_count, mixed_count]

max_value = max(total_tweets)

overall_sentiment = None

if (total_tweets.index(max_value) == 0):
    overall_sentiment = 'positive'
elif (total_tweets.index(max_value) == 1):
    overall_sentiment = 'negative'
elif (total_tweets.index(max_value) == 2):
    overall_sentiment = 'neutral'
elif (total_tweets.index(max_value) == 3):
    overall_sentiment = 'mixed'

print("From the sentiment analysis, we have detected that so far, the overall sentiment of tweets at PyCon 2022 is", overall_sentiment, ".")


From the sentiment analysis, we have detected that so far, the overall sentiment of tweets at PyCon 2022 is neutral .
