In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir('drive/MyDrive/twitter_sentiment')

## Install Packages

In [None]:
!pip install transformers

## Import Packages

In [4]:
import pandas as pd
from tqdm.notebook import tqdm
tqdm.pandas()

## Load Data

In [5]:
df = pd.read_csv('data/Yapay_Zeka_Stratejisi_2021_08_20_2021_08_25_df.csv',encoding = 'utf-8', engine = 'python')
df

Unnamed: 0.1,Unnamed: 0,id,created_at,date,time,tweet
0,0,1430303911487762438,2021-08-24 23:00:00+00:00,2021-08-24,23:00:00,Türkiye'nin Yapay Zeka Stratejisi... Ulusal Ya...
1,1,1430296179934470146,2021-08-24 22:29:17+00:00,2021-08-24,22:29:17,Açıklanan Türkiye Yapay Zeka Stratejisi ile il...
2,2,1430291373366857736,2021-08-24 22:10:11+00:00,2021-08-24,22:10:11,@ZB6868 @firstjedii @varank Bence biraz absürd...
3,3,1430289767976116229,2021-08-24 22:03:48+00:00,2021-08-24,22:03:48,Türkiye’nin İlk Ulusal Yapay Zeka Stratejisi a...
4,4,1430279189106675716,2021-08-24 21:21:46+00:00,2021-08-24,21:21:46,Türkiye'nin İlk Ulusal Yapay Zeka Stratejisi a...
...,...,...,...,...,...,...
657,657,1428589924807217157,2021-08-20 05:29:14+00:00,2021-08-20,05:29:14,Bugünkü resmi gazete ile “Ulusal Yapay Zeka S...
658,658,1428583261966684161,2021-08-20 05:02:46+00:00,2021-08-20,05:02:46,Bugünkü Resmi Gazete'de (20.08.2021 T-31574 sa...
659,659,1428574478536151042,2021-08-20 04:27:51+00:00,2021-08-20,04:27:51,Ulusal Yapay Zekâ Stratejisi (2021-2025) ile İ...
660,660,1428569701056122883,2021-08-20 04:08:52+00:00,2021-08-20,04:08:52,Yapay zeka alanında çalışan arkadaşlar için ön...


## Load Transformers Model

In [6]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
# load model, it takes time since it loads over 500 MB model file
model = AutoModelForSequenceClassification.from_pretrained("savasy/bert-base-turkish-sentiment-cased")
tokenizer = AutoTokenizer.from_pretrained("savasy/bert-base-turkish-sentiment-cased")
# create pipeline
sa = pipeline("sentiment-analysis", tokenizer=tokenizer, model=model)

Downloading:   0%|          | 0.00/596 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/443M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/263k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

## PreProcess

In [7]:
import re
import sys
from nltk.stem.porter import PorterStemmer


def preprocess_word(word):
    # Remove punctuation
    #word = word.strip('\'"?!,.():;')
    
    # Convert more than 2 letter repetitions to 2 letter
    # funnnnny --> funny
    word = re.sub(r'(.)\1+', r'\1\1', word)
    # Remove - & '
    word = re.sub(r'(-|\')', '', word)
    return word


def is_valid_word(word):
    # Check if word begins with an alphabet
    return (re.search(r'^[a-zA-Z][a-z0-9A-Z\._]*$', word) is not None)


def handle_emojis(tweet):
    # Smile -- :), : ), :-), (:, ( :, (-:, :')
    tweet = re.sub(r'(:\s?\)|:-\)|\(\s?:|\(-:|:\'\))', ' EMO_POS ', tweet)
    # Laugh -- :D, : D, :-D, xD, x-D, XD, X-D
    tweet = re.sub(r'(:\s?D|:-D|x-?D|X-?D)', ' EMO_POS ', tweet)
    # Love -- <3, :*
    tweet = re.sub(r'(<3|:\*)', ' EMO_POS ', tweet)
    # Wink -- ;-), ;), ;-D, ;D, (;,  (-;
    tweet = re.sub(r'(;-?\)|;-?D|\(-?;)|😉', ' EMO_POS ', tweet)
    # Sad -- :-(, : (, :(, ):, )-:
    tweet = re.sub(r'(:\s?\(|:-\(|\)\s?:|\)-:)', ' EMO_NEG ', tweet)
    # Cry -- :,(, :'(, :"(
    tweet = re.sub(r'(:,\(|:\'\(|:"\()', ' EMO_NEG ', tweet)
    return tweet


def preprocess_tweet(tweet):
    processed_tweet = []
    # Convert to lower case
    tweet = tweet.lower()
    
    tweet = re.sub("^\d+\s|\s\d+\s|\s\d+$", " ", tweet)
    
    # Replaces URLs with the word URL
    #tweet = re.sub(r'((www\.[\S]+)|(https?://[\S]+))', ' URL ', tweet)
    tweet = re.sub(r'((www\.[\S]+)|(https?://[\S]+))', '', tweet)
    
    # Replace @handle with the word USER_MENTION
    #tweet = re.sub(r'@[\S]+', 'USER_MENTION', tweet)
    tweet = re.sub(r'@[\S]+', '', tweet)
    
    # Replaces #hashtag with hashtag
    #tweet = re.sub(r'#(\S+)', r' \1 ', tweet)
    tweet = re.sub(r'#(\S+)', '', tweet)
    
    # Remove RT (retweet)
    tweet = re.sub(r'\brt\b', '', tweet)
    
    # Replace 2+ dots with space
    tweet = re.sub(r'\.{2,}', ' ', tweet)
    
    # Strip space, " and ' from tweet
    tweet = tweet.strip(' "\'')

    # Replace emojis with either EMO_POS or EMO_NEG
    #tweet = handle_emojis(tweet)
    
    # Replace multiple spaces with a single space
    tweet = re.sub(r'\s+', ' ', tweet)
    words = tweet.split()

    for word in words:
      word = preprocess_word(word)
      #if is_valid_word(word):
      #    processed_tweet.append(word)
      processed_tweet.append(word)

    return ' '.join(processed_tweet)

#Sentiment Analysis

In [24]:
THRESHOLD = 0.95

def get_sentiment_analysis(tweet):
    """True if tweet has positive compound sentiment, False otherwise."""
    p = sa(str(tweet))
    #print("label", p[0]["label"])
    #print("score", round(p[0]["score"],4))

    return pd.Series([p[0]["label"], round(p[0]["score"],4)])

In [13]:
df["sentiment_label"] = "-"
df["sentiment_score"] = -1
df[["sentiment_label", "sentiment_score"]] = df["tweet"].progress_apply(get_sentiment_analysis)

df['sentiment_score'].apply(lambda x: float(x))

  0%|          | 0/662 [00:00<?, ?it/s]

0      0.6704
1      0.9017
2      0.9990
3      0.5813
4      0.6870
        ...  
657    0.7368
658    0.8500
659    0.7597
660    0.9103
661    0.8053
Name: sentiment_score, Length: 662, dtype: float64

In [14]:
df

Unnamed: 0.1,Unnamed: 0,id,created_at,date,time,tweet,sentiment_label,sentiment_score
0,0,1430303911487762438,2021-08-24 23:00:00+00:00,2021-08-24,23:00:00,Türkiye'nin Yapay Zeka Stratejisi... Ulusal Ya...,negative,0.6704
1,1,1430296179934470146,2021-08-24 22:29:17+00:00,2021-08-24,22:29:17,Açıklanan Türkiye Yapay Zeka Stratejisi ile il...,positive,0.9017
2,2,1430291373366857736,2021-08-24 22:10:11+00:00,2021-08-24,22:10:11,@ZB6868 @firstjedii @varank Bence biraz absürd...,positive,0.9990
3,3,1430289767976116229,2021-08-24 22:03:48+00:00,2021-08-24,22:03:48,Türkiye’nin İlk Ulusal Yapay Zeka Stratejisi a...,positive,0.5813
4,4,1430279189106675716,2021-08-24 21:21:46+00:00,2021-08-24,21:21:46,Türkiye'nin İlk Ulusal Yapay Zeka Stratejisi a...,negative,0.6870
...,...,...,...,...,...,...,...,...
657,657,1428589924807217157,2021-08-20 05:29:14+00:00,2021-08-20,05:29:14,Bugünkü resmi gazete ile “Ulusal Yapay Zeka S...,negative,0.7368
658,658,1428583261966684161,2021-08-20 05:02:46+00:00,2021-08-20,05:02:46,Bugünkü Resmi Gazete'de (20.08.2021 T-31574 sa...,negative,0.8500
659,659,1428574478536151042,2021-08-20 04:27:51+00:00,2021-08-20,04:27:51,Ulusal Yapay Zekâ Stratejisi (2021-2025) ile İ...,negative,0.7597
660,660,1428569701056122883,2021-08-20 04:08:52+00:00,2021-08-20,04:08:52,Yapay zeka alanında çalışan arkadaşlar için ön...,positive,0.9103


In [20]:
df.to_csv("data/Yapay_Zeka_Stratejisi_2021_08_20_2021_08_25_df_sentiment_analysis_all.csv")

## Filter Only Positive Tweets Above Threshold

In [25]:
df_positive_t = df[(df["sentiment_label"]=="positive") & (df["sentiment_score"]>THRESHOLD)]
df_positive_t

Unnamed: 0.1,Unnamed: 0,id,created_at,date,time,tweet,sentiment_label,sentiment_score
2,2,1430291373366857736,2021-08-24 22:10:11+00:00,2021-08-24,22:10:11,@ZB6868 @firstjedii @varank Bence biraz absürd...,positive,0.9990
11,11,1430266493892276225,2021-08-24 20:31:19+00:00,2021-08-24,20:31:19,"Türkiye, yapay zekâ stratejisini yayımlayan ül...",positive,0.9763
16,16,1430249798922346500,2021-08-24 19:24:59+00:00,2021-08-24,19:24:59,"Ulusal Yapay Zeka Stratejisi Programı, savunma...",positive,0.9746
20,20,1430246235554951171,2021-08-24 19:10:49+00:00,2021-08-24,19:10:49,#YapayZekaÇağı başlıyor 📣 🇹🇷 @udijital ve @TC...,positive,0.9616
26,26,1430234962431729669,2021-08-24 18:26:02+00:00,2021-08-24,18:26:02,YAPAY ZEKA ÇAĞI @dijital ile @TCSanayi işbir...,positive,0.9614
...,...,...,...,...,...,...,...,...
581,581,1428626061311594500,2021-08-20 07:52:50+00:00,2021-08-20,07:52:50,🇹🇷Türkiye'nin 2021-2025 Ulusal Yapay Zeka Stra...,positive,0.9887
586,586,1428622499676397577,2021-08-20 07:38:41+00:00,2021-08-20,07:38:41,🧠 Ulusal Yapay Zeka Stratejisi 24 Ağustos'ta Y...,positive,0.9581
595,595,1428617623156273158,2021-08-20 07:19:18+00:00,2021-08-20,07:19:18,Kısa fıkra: Türkiye'nin yapay zeka stratejisi...,positive,0.9728
614,614,1428609046337228800,2021-08-20 06:45:13+00:00,2021-08-20,06:45:13,Türkiye'nin yapay zeka stratejisi belirlendi ...,positive,0.9842


In [26]:
df_positive_t.to_csv("data/Yapay_Zeka_Stratejisi_2021_08_20_2021_08_25_df_sentiment_analysis_positive.csv")

## Filter Only Negative Tweets Above Threshold

In [27]:
df_negative_t = df[(df["sentiment_label"]=="negative") & (df["sentiment_score"]>THRESHOLD)]
df_negative_t

Unnamed: 0.1,Unnamed: 0,id,created_at,date,time,tweet,sentiment_label,sentiment_score
5,5,1430277484822421504,2021-08-24 21:15:00+00:00,2021-08-24,21:15:00,Füsun Sarp Nebil yazdı: Yapay zekâ stratejisi...,negative,0.9978
14,14,1430255053424693256,2021-08-24 19:45:52+00:00,2021-08-24,19:45:52,Sabırla dinledim konuşmaları ve çok sığ buldum...,negative,0.9994
35,35,1430212749519429635,2021-08-24 16:57:46+00:00,2021-08-24,16:57:46,1 Yıl Önce de 1 Milyon Yazılımcı Yetiştireceği...,negative,0.9987
59,59,1430173539391451138,2021-08-24 14:21:57+00:00,2021-08-24,14:21:57,Yapay zeka stratejisi dokümanına bakıyorduk. Y...,negative,0.9912
105,105,1430127840532570115,2021-08-24 11:20:22+00:00,2021-08-24,11:20:22,Yapay zeka stratejisi güzel bir girişim olabil...,negative,0.9887
219,219,1430086544736538663,2021-08-24 08:36:16+00:00,2021-08-24,08:36:16,"Sizden, 'Ulusal yapay zeka' degil, ancak, 'Ulu...",negative,0.9867
221,221,1430085795017330688,2021-08-24 08:33:17+00:00,2021-08-24,08:33:17,Ulusal Yapay Zeka Stratejisi/ İthal Yapay Zeka...,negative,0.9813
223,223,1430084739973664779,2021-08-24 08:29:06+00:00,2021-08-24,08:29:06,#YapayZekaÇağı na Hazırlanan ve amacı öncüleri...,negative,0.9688
250,250,1430077077462659072,2021-08-24 07:58:39+00:00,2021-08-24,07:58:39,Ulusal Yapay Zeka Stratejisi diye bakan Varank...,negative,0.9989
255,255,1430075226814128136,2021-08-24 07:51:18+00:00,2021-08-24,07:51:18,@erdl1971 Tribüne oynama temalı yapay zeka str...,negative,0.9905


In [28]:
df_negative_t.to_csv("data/Yapay_Zeka_Stratejisi_2021_08_20_2021_08_25_df_sentiment_analysis_negative.csv")