# 뉴스 감성 분석

In [82]:
# pip install transformers
# pip install tf-keras
# pip install vaderSentiment

In [1]:
from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
import tensorflow
import re

In [2]:
news_df = pd.read_csv("../데이터셋/ProQuest_Articles_ALL4.csv")

In [3]:
news_df.drop('Source', axis=1, inplace=True)

In [4]:
# 날짜 추출 함수 (YYYY-MM-DD 형식 변환)
def extract_date(text):
    # 날짜 패턴 찾기 (다양한 형식 지원)
    match = re.search(r'(\d{4}[-/.]\d{2}[-/.]\d{2})|(\d{2}[thrdn]*\s\w+\s\d{4})', text)

    if match:
        date_str = match.group(0)

        # 날짜 형식을 YYYY-MM-DD로 변환
        try:
            parsed_date = pd.to_datetime(date_str, errors='coerce', dayfirst=False)
            return parsed_date.strftime('%Y-%m-%d')  # '0000-00-00' 형식
        except:
            return None  # 변환 실패 시 None 처리

    return None  # 날짜 패턴이 없는 경우

# 날짜 열 변환
news_df['Date'] = news_df['Date'].apply(extract_date)


In [5]:
news_df[news_df['Date'].isna()]

Unnamed: 0,Title,Date


In [6]:
# 날짜 None인 행 삭제
news_df.drop([91, 171, 179, 188], axis=0, inplace=True)

In [7]:
# 감성 분석 모델 로드 (Hugging Face 'DistilBERT' 기반)
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

# 감성 점수 계산 함수
def get_bert_sentiment(text):
    if pd.isna(text) or text.strip() == "":
        return 0  # 빈 문장은 중립(0) 처리
    result = sentiment_pipeline(text[:512])  # BERT는 최대 512 토큰까지 처리 가능
    label = result[0]['label']
    score = result[0]['score']
    return score if label == "POSITIVE" else -score  # 긍정이면 양수, 부정이면 음수

# 뉴스 본문 감성 분석 적용
news_df['Sentiment_Score'] = news_df['Title'].apply(get_bert_sentiment)





All PyTorch model weights were used when initializing TFDistilBertForSequenceClassification.

All the weights of TFDistilBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.
Device set to use 0


In [8]:
news_df

Unnamed: 0,Title,Date,Sentiment_Score
0,Tory senators kill climate change bill; NDP ou...,2010-11-18,-0.993788
1,Trump: Blame green policy,2014-09-20,-0.991146
2,Bill to trump local bans on fracking advances,2015-12-03,-0.884010
3,House OKs bill to trump local bans on fracking,2016-01-28,-0.621351
4,Trump Stresses Fossil Fuel Agenda In Energy Po...,2016-09-23,-0.964925
...,...,...,...
381,India should double down on climate action in ...,2025-02-12,-0.973281
382,"In Modi-Trump statement, climate mitigation an...",2025-02-14,-0.911531
383,Trump hits roadblock on offshore drilling: Bid...,2025-02-16,-0.983906
384,How Trump’s Paris Climate Agreement withdrawal...,2025-02-18,-0.968012


In [9]:
news_df.to_csv('../데이터셋/news_sentiment_analysis.csv', index=False)