### Import Required Libraries

In [2]:
import pandas as pd
from textblob import TextBlob
import matplotlib.pyplot as plt
import seaborn as sns
from azure.storage.blob import BlobServiceClient

### Data Acquisition

In [6]:
# Load the dataset
data = pd.read_csv('twitter_training.csv') 
print(data.head())

     id        topic sentiment  \
0  2401  Borderlands  Positive   
1  2401  Borderlands  Positive   
2  2401  Borderlands  Positive   
3  2401  Borderlands  Positive   
4  2401  Borderlands  Positive   

                                               tweet  
0  im getting on borderlands and i will murder yo...  
1  I am coming to the borders and I will kill you...  
2  im getting on borderlands and i will kill you ...  
3  im coming on borderlands and i will murder you...  
4  im getting on borderlands 2 and i will murder ...  


### Data Preprocessing

In [10]:
# Define the text cleaning function
def clean_text(text):
    if not isinstance(text, str):  # Check if the value is not a string
        return ""  # Return an empty string for non-string or missing values
    # Split the text into words, keep only alphanumeric words, and join them back
    return ' '.join(word.lower() for word in text.split() if word.isalnum())

# Apply the cleaning function to the 'tweet' column
data['cleaned_tweets'] = data['tweet'].apply(clean_text)

# Display a few examples of original and cleaned tweets
print(data[['tweet', 'cleaned_tweets']].head())

                                               tweet  \
0  im getting on borderlands and i will murder yo...   
1  I am coming to the borders and I will kill you...   
2  im getting on borderlands and i will kill you ...   
3  im coming on borderlands and i will murder you...   
4  im getting on borderlands 2 and i will murder ...   

                                      cleaned_tweets  
0  im getting on borderlands and i will murder yo...  
1     i am coming to the borders and i will kill you  
2      im getting on borderlands and i will kill you  
3     im coming on borderlands and i will murder you  
4  im getting on borderlands 2 and i will murder ...  


### Perform Sentiment Analysis

In [11]:
data['sentiment_polarity'] = data['cleaned_tweets'].apply(lambda x: TextBlob(x).sentiment.polarity)
data['sentiment_label'] = data['sentiment_polarity'].apply(lambda x: 'Positive' if x > 0 else 'Negative')
print(data[['cleaned_tweets', 'sentiment_polarity', 'sentiment_label']].head())

                                      cleaned_tweets  sentiment_polarity  \
0  im getting on borderlands and i will murder yo...                 0.0   
1     i am coming to the borders and i will kill you                 0.0   
2      im getting on borderlands and i will kill you                 0.0   
3     im coming on borderlands and i will murder you                 0.0   
4  im getting on borderlands 2 and i will murder ...                 0.0   

  sentiment_label  
0        Negative  
1        Negative  
2        Negative  
3        Negative  
4        Negative  


### Save Processed Data

In [12]:
data.to_csv('processed_tweets.csv', index=False)