# Instructor Do: Intro to VADER Sentiment

In [1]:
# Initial imports
import os
from pathlib import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dotenv import load_dotenv
load_dotenv()


True

In [2]:
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/laanuadeyeye/nltk_data...


In [3]:
# Reading the News API key enviroment variable
api_key = os.getenv("news_api")



In [4]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=api_key)



In [5]:
# Fetch all the news about Facebook Libra
PDD_headlines = newsapi.get_everything(
    q="pinduoduo",
    language="en",
    page_size=100,
    sort_by="relevancy"
)

# Print total articles
print(f"Total articles about PDD : {PDD_headlines['totalResults']}")

# Show sample article
PDD_headlines["articles"][0]



Total articles about PDD : 265


{'source': {'id': 'techcrunch', 'name': 'TechCrunch'},
 'author': 'Rita Liao',
 'title': 'Why Alibaba rival Pinduoduo is investing in agritech',
 'description': 'Back in 2018, Pinduoduo sent shock waves through the investor community when it raised $1.6 billion from a Nasdaq listing as a three-year-old company. Online shoppers in China were excited to see its rise as an alternative to long-time market dominators Aliba…',
 'url': 'http://techcrunch.com/2020/12/16/pinduoduo-agritech/',
 'urlToImage': 'https://techcrunch.com/wp-content/uploads/2020/12/Screen-Shot-2020-12-16-at-6.11.42-PM.png?w=607',
 'publishedAt': '2020-12-16T10:36:23Z',
 'content': 'Back in 2018, Pinduoduo sent shock waves through the investor community when it raised $1.6 billion from a Nasdaq listing as a three-year-old company. Online shoppers in China were excited to see its… [+5346 chars]'}

In [10]:
# Create the Facebook Libra sentiment scores DataFrame
PDD_sentiments = []

for article in PDD_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        PDD_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
# Create DataFrame
PDD_df = pd.DataFrame(PDD_sentiments)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
PDD_df = PDD_df[cols]

PDD_df.head()



Unnamed: 0,date,text,compound,positive,negative,neutral
0,2020-12-16,"Back in 2018, Pinduoduo sent shock waves throu...",-0.0516,0.067,0.072,0.861
1,2021-01-12,"Pinduoduo, a rapidly growing Chinese e-commerc...",-0.7269,0.047,0.186,0.767
2,2020-12-24,China’s market regulator has opened an investi...,0.0,0.0,0.0,1.0
3,2021-01-04,"By Reuters Staff\r\nBEIJING, Jan 4 (Reuters) -...",-0.5994,0.0,0.126,0.874
4,2021-01-04,By Reuters Staff\r\nBEIJING (Reuters) - Local ...,-0.5994,0.0,0.122,0.878


In [11]:
# Get descriptive stats from the DataFrame
PDD_df.describe()



Unnamed: 0,compound,positive,negative,neutral
count,93.0,93.0,93.0,93.0
mean,0.057874,0.056505,0.040559,0.902968
std,0.424809,0.060502,0.057876,0.073809
min,-0.8402,0.0,0.0,0.75
25%,-0.2023,0.0,0.0,0.861
50%,0.0,0.051,0.0,0.898
75%,0.4404,0.102,0.072,0.96
max,0.7717,0.209,0.225,1.0
