# Loading McDonalds Dataset

In [3]:
import pandas as pd

print('------ Data Head ------')
try:
    data = pd.read_csv('McDonald_s_Reviews.csv', encoding='latin1')
    print(data.head())
except Exception as e:
    print("Error reading file with latin1 encoding:", e)

print('\n')
print('------ Data Info ------')
print(data.info())
print('\n')
print('------ Descriptive Statistics ------')
print(data.describe())


------ Data Head ------
   reviewer_id  store_name              category  \
0            1  McDonald's  Fast food restaurant   
1            2  McDonald's  Fast food restaurant   
2            3  McDonald's  Fast food restaurant   
3            4  McDonald's  Fast food restaurant   
4            5  McDonald's  Fast food restaurant   

                                       store_address  latitude   longitude  \
0  13749 US-183 Hwy, Austin, TX 78750, United States  30.460718 -97.792874   
1  13749 US-183 Hwy, Austin, TX 78750, United States  30.460718 -97.792874   
2  13749 US-183 Hwy, Austin, TX 78750, United States  30.460718 -97.792874   
3  13749 US-183 Hwy, Austin, TX 78750, United States  30.460718 -97.792874   
4  13749 US-183 Hwy, Austin, TX 78750, United States  30.460718 -97.792874   

  rating_count   review_time  \
0        1,240  3 months ago   
1        1,240    5 days ago   
2        1,240    5 days ago   
3        1,240   a month ago   
4        1,240  2 months ago   

 

#### Data Cleaning

In [6]:
# Trim spaces from column names
data.columns = data.columns.str.strip()

# Check the columns to ensure the names are correct
print("Corrected Columns in DataFrame:", data.columns)

# Proceed with filling missing values
data['latitude'].fillna(data['latitude'].median(), inplace=True)
data['longitude'].fillna(data['longitude'].median(), inplace=True)


Corrected Columns in DataFrame: Index(['reviewer_id', 'store_name', 'category', 'store_address', 'latitude',
       'longitude', 'rating_count', 'review_time', 'review', 'rating'],
      dtype='object')


## Sentiment Analysis

In [9]:
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')

# Initialize the Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Example: Applying sentiment analysis to the first 5 reviews
data['review'].head().apply(lambda x: sia.polarity_scores(x))

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/kanishkaw/nltk_data...


0    {'neg': 0.027, 'neu': 0.879, 'pos': 0.094, 'co...
1    {'neg': 0.0, 'neu': 0.791, 'pos': 0.209, 'comp...
2    {'neg': 0.051, 'neu': 0.949, 'pos': 0.0, 'comp...
3    {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...
4    {'neg': 0.143, 'neu': 0.857, 'pos': 0.0, 'comp...
Name: review, dtype: object