# Influencer Selected using Text Analytics

### Install Library

In [2]:
# !pip install numpy
# !pip install pandas
# !pip install matplotlib
# !pip install seaborn
# !pip install beautifulsoup4
# !pip install scikit-learn
# !pip install tweepy
# !pip install pythainlp
# !pip install tqdm
# !pip install imblearn
# !pip install emoji

# '''If run this notebook on Colab'''

# !pip install ortools
# !pip -q install torch==1.5.0 torchtext==0.4.0 torchvision==0.6.0 pytorch-lightning==1.3.0
# !pip -q install transformers==3.5.0 thai2transformers==0.1.2
# !pip install tensorflow_text

Collecting ortools
  Downloading ortools-9.0.9048-cp37-cp37m-manylinux1_x86_64.whl (14.4 MB)
[K     |████████████████████████████████| 14.4 MB 96 kB/s 
Installing collected packages: ortools
Successfully installed ortools-9.0.9048
[K     |████████████████████████████████| 752.0 MB 9.8 kB/s 
[K     |████████████████████████████████| 53 kB 1.6 MB/s 
[K     |████████████████████████████████| 6.6 MB 18.0 MB/s 
[K     |████████████████████████████████| 804 kB 45.8 MB/s 
[K     |████████████████████████████████| 282 kB 51.2 MB/s 
[K     |████████████████████████████████| 829 kB 46.3 MB/s 
[K     |████████████████████████████████| 119 kB 54.4 MB/s 
[K     |████████████████████████████████| 636 kB 45.5 MB/s 
[K     |████████████████████████████████| 1.3 MB 38.8 MB/s 
[K     |████████████████████████████████| 294 kB 43.5 MB/s 
[K     |████████████████████████████████| 142 kB 54.9 MB/s 
[?25h  Building wheel for future (setup.py) ... [?25l[?25hdone
[K     |███████████████████████

### Import Library

In [3]:
# Data imputation
import pandas as pd
import numpy as np

#Twitter API
import tweepy
import re
import emoji

import torch

#datasets
from datasets import load_dataset

#transformers
from transformers import (
    AutoConfig,
    RobertaConfig,
    AutoModelForMaskedLM,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    CamembertTokenizer,
    pipeline,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)

#thai2transformers
import thai2transformers
from thai2transformers.preprocess import process_transformers, replace_rep_after, rm_brackets, rm_useless_spaces
from thai2transformers.metrics import classification_metrics

# Extra import for preparing dataset
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter

from tqdm.auto import tqdm

# For USE
import tensorflow_hub as hub
import tensorflow_text
import tensorflow as tf
from sklearn.metrics.pairwise import cosine_similarity

from sklearn.metrics import classification_report, confusion_matrix

from ortools.linear_solver import pywraplp

pd.set_option('display.max_rows', 500)



## Prepare Function for Tweet

### Get User Information

In [4]:
## Twitter Statistics
# Follower Count
def follower_c(account_name):
    return api.get_user(account_name).followers_count

# Following Count
def following_c(account_name):
    return api.get_user(account_name).friends_count

# Status Count
def status_c(account_name):
    return api.get_user(account_name).statuses_count

# Create Date
def create_acc(account_name):
    return api.get_user(account_name).created_at

### Get Tweet

In [5]:
# From User

def get_tweets(account_name):
    all_tweets = []
    tweets = api.user_timeline(screen_name = account_name, count=200, tweet_mode='extended')
    all_tweets.extend(tweets)
    while len(tweets) > 0:
        previous_tweet = all_tweets[-1].id - 1
        print("getting tweets before %s" % (previous_tweet))
        tweets = api.user_timeline(screen_name = account_name, count=200, max_id=previous_tweet, tweet_mode='extended')
        all_tweets.extend(tweets)
        print("...%s tweets downloaded so far" % (len(all_tweets)))
    outtweets = [[tweet.user.screen_name,tweet.id_str, tweet.created_at,tweet.retweet_count,
                    tweet.favorite_count, tweet.full_text, tweet.is_quote_status, tweet.retweeted, 
                    tweet.in_reply_to_status_id_str, tweet.in_reply_to_screen_name,
                    tweet.entities, tweet.display_text_range] for tweet in all_tweets]
    tweet_df = pd.DataFrame.from_records(outtweets, columns = ['account', 'tweet_id',
                'tweet_create_date', 'tweet_retweet_count', 'tweet_favorite_count',
                'tweet_full', 'tweet_is_quote', 'tweet_is_retweet', 'tweet_reply_id',
                'tweet_reply_name', 'tweet_entities', 'tweet_text_long'])
    return tweet_df

# From Hashtag

def get_tweets_hashtag(selected_hashtag, limit = 5,result_type = "mixed"):
    all_tweets = []
    tweets = api.search(q = selected_hashtag, count = 100, result_type = result_type, tweet_mode='extended')
    all_tweets.extend(tweets)
    # while (len(tweets) > 0 & len(tweets) < limit):
    i = 1
    while i < limit:
        previous_tweet = all_tweets[-1].id - 1
        print("getting tweets before %s" % (previous_tweet))
        tweets = api.search(q = selected_hashtag, count = 100, max_id = previous_tweet, result_type = result_type, tweet_mode='extended')
        all_tweets.extend(tweets)
        print("...%s tweets downloaded so far" % (len(all_tweets)))
        i += 1
    outtweets = [[tweet.user.screen_name,tweet.id_str, tweet.created_at,tweet.retweet_count,
                    tweet.favorite_count, tweet.full_text, tweet.is_quote_status, tweet.retweeted, 
                    tweet.in_reply_to_status_id_str, tweet.in_reply_to_screen_name,
                    tweet.entities, tweet.display_text_range] for tweet in all_tweets]
    tweet_df = pd.DataFrame.from_records(outtweets, columns = ['account', 'tweet_id',
                'tweet_create_date', 'tweet_retweet_count', 'tweet_favorite_count',
                'tweet_full', 'tweet_is_quote', 'tweet_is_retweet', 'tweet_reply_id',
                'tweet_reply_name', 'tweet_entities', 'tweet_text_long'])
    return tweet_df

### Tweet Information

In [6]:
## Get Hashtag
def hashtag_extract(entities_list):
    hashtag_list = []
    try:
        if entities_list['hashtags'] == []:
            pass
        else:
            for tag in entities_list['hashtags']:
                hashtag_list.append(tag['text'])
    except:
        pass
    return hashtag_list

## Get User Mention
def user_mention_extract(entities_list):
    user_list = []
    try:
        if entities_list['user_mentions'] == []:
            pass
        else:
            for user in entities_list['user_mentions']:
                user_list.append(user['screen_name'])
    except:
        pass
    return user_list    

## Get URL
def url_extract(entities_list):
    url_list = []
    try:
        if entities_list['urls'] == []:
            pass
        else:
            for url in entities_list['urls']:
                url_list.append(url['expanded_url'])
    except:
        pass
    return url_list   

## Get Media
def media_extract(entities_list):
    media_list = []
    try:
        if entities_list['media'] == []:
            pass
        else:
            for pic in entities_list['media']:
                media_list.append(pic['media_url'])
    except:
        pass
    return media_list   

def retweet_check(text):
    return text.startswith("RT")

## Get Tweet Type
'''
- รีทวิต : retweeted
- โควททวิต : is_quote_status
- เริ่มเมนชั่นไปหาใครซักคน : in_reply_to_screen_name แต่ไม่มี in_reply_to_status_id
- ตอบเมนชั่นคนอื่น : มี in_reply_to_screen_name และ in_reply_to_status_id
- สเตตัสที่ติดเมนชั่นมา : ไม่มีทั้ง in_reply_to_screen_name และ in_reply_to_status_id แต่มี entities['user_mentions']
- สเตตัสทั่วไปไม่เมนชั่นใคร : ไม่มีอะไรซักอย่าง
'''
def tweet_type(tweet_record):
    twt_type = None
    if (tweet_record['tweet_is_retweet'] == True) or (tweet_record['retweet_check'] == True):
        twt_type = 'Retweet'
    elif tweet_record['tweet_is_quote'] == True:
        twt_type = 'Retweet - Quote'
    elif tweet_record['tweet_reply_name'] != None:
        if tweet_record['tweet_reply_name'] == tweet_record['account']:
            twt_type = 'Reply - Owned'
        else:
            twt_type = 'Reply - Mention'
    elif tweet_record['tweet_user_mention'] != []:
        twt_type = 'Start - Mention'
    else:
        twt_type = 'Normal Tweet'
    return twt_type 

### Clean Text

In [7]:
def give_emoji_free_text(text):
    allchars = [str for str in text]
    emoji_list = [c for c in allchars if c in emoji.UNICODE_EMOJI]
    clean_text = ' '.join([str for str in text.split() if not any(i in str for i in emoji_list)])
    return clean_text

def remove_emoji(text):
    return emoji.get_emoji_regexp().sub(u'', text)

def cleaner(tweet):
    # word_list = ["ม็อบ1สิงหา","ตบช่วยชาติCheerFromHome", "วันครีษมายัน", "ร้านกาแฟ", "การลงทุน", "ของดีบอกต่อ", "ซึมเศร้า", "ดูดวง", "บังทัน", "สหภาพไรเดอร์", "TCAS65", "JS100", "บอลยูโร", "ก้อยนัตตี้ดรีม", "โรงเรียนมัธยมย่านรางน้ํา", "ประเทศกูมี", "ประชุมสภา", "สามกีบ", "ธนาธร", "ปิยบุตร", "โทนี่", "ประยุทธ์ออกไป", "โรงรับจำนำ", "รัฐธรรมนูญ", "ผนงรจตกม", "ประชาธิปไตย", "การเมือง", "ราษฎร"]

    tweet = re.sub("@[A-Za-z0-9_]+","",tweet) #Remove @ sign
    tweet = re.sub(r"RT : ", "", tweet) #Remove RT
    tweet = re.sub(r"(?:\@|http?\://|https?\://|www)\S+", "", tweet) #Remove http links
    tweet = re.sub(r"\n", "", tweet) #Remove newline
    tweet = re.sub(r"[A-Za-z]+", "", tweet) #Remove English
    tweet = re.sub(r"[0-9-!$%^&*#()_+|~=`{}\[\]:\"@;'<>,.\\\/]+", "", tweet) #Remove Number and special character
    tweet = remove_emoji(tweet)
    # for i in word_list:
    #     tweet = tweet.replace(i,"")
    tweet = give_emoji_free_text(tweet)
    tweet = " ".join(tweet.split())
    # tweet = ''.join(c for c in tweet if c not in word_list)
    tweet = ''.join(c for c in tweet if c not in emoji.UNICODE_EMOJI) #Remove Emojis
    tweet = tweet.replace("#", "").replace("_", " ") #Remove hashtag sign but keep the text
    return tweet

def cleaner_use(tweet):
    #  word_list = ["ม็อบ1สิงหา","ตบช่วยชาติCheerFromHome", "วันครีษมายัน", "ร้านกาแฟ", "การลงทุน", "ของดีบอกต่อ", "ซึมเศร้า", "ดูดวง", "บังทัน", "สหภาพไรเดอร์", "TCAS65", "JS100", "บอลยูโร", "ก้อยนัตตี้ดรีม", "โรงเรียนมัธยมย่านรางน้ํา", "ประเทศกูมี", "ประชุมสภา", "สามกีบ", "ธนาธร", "ปิยบุตร", "โทนี่", "ประยุทธ์ออกไป", "โรงรับจำนำ", "รัฐธรรมนูญ", "ผนงรจตกม", "ประชาธิปไตย", "การเมือง", "ราษฎร"]

    tweet = re.sub("@[A-Za-z0-9_]+","",tweet) #Remove @ sign
    tweet = re.sub(r"(?:\@|http?\://|https?\://|www)\S+", "", tweet) #Remove http links
    tweet = re.sub(r"RT : ", "", tweet) #Remove http links
    tweet = re.sub(r"\n", "", tweet) #Remove http links
    tweet = re.sub(r"[0-9-!$%^&*#()_+|~=`{}\[\]:\"@;'<>,.\\\/]+", "", tweet) #Remove Number and special character
    tweet = remove_emoji(tweet)
    tweet = rm_brackets(tweet)
    tweet = rm_useless_spaces(tweet)
    tweet = replace_rep_after(tweet) # Remove Repeat characters
    # for i in word_list:
    #     tweet = tweet.replace(i,"")
    tweet = give_emoji_free_text(tweet)
    tweet = " ".join(tweet.split())
    # tweet = ''.join(c for c in tweet if c not in word_list)
    tweet = ''.join(c for c in tweet if c not in emoji.UNICODE_EMOJI) #Remove Emojis
    tweet = tweet.replace("#", "").replace("_", " ") #Remove hashtag sign but keep the text
    return tweet

## Twitter API

In [8]:
consumer_key = 'Your_consumer_key'
consumer_secret = 'Your_consumer_secret'
access_token = 'Your_access_token'
access_token_secret = 'Your_access_token_secret'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

### Get twitter data from user

#### Selected Attribute

In [9]:
# List of twitter user (influencer)
twt_list = ["belldelagua", "fhay_cotton_h", "KaofangArea", "pikulham","WiJupiter", "pingponghime6", "nuengst_", "pungiireviewx"]

# List of focus sentiment
focus_sentiment = ['pos']

# List of focus topic
set_category = [
                'ลดน้ำหนัก', 'อาหารสุขภาพ', 'เครื่องสำอางค์'
                ]

# Set Threshold for focus topic
threshold_topic = 0.10

# List of focus politics
focus_politics = ['politics']

# cost rate for calcurate cost per post
cost_rate = 0.35

# set sentiment weight
sentiment_weight = 0.1

# set politics weight
politics_weight = 0.1

# set topic weight
topic_weight = 2

# set_budget
set_budget = 70000

#### Get data

In [10]:
df_twt_info = pd.DataFrame(twt_list,columns=["account"])
df_twt_info["no_follower"] = df_twt_info["account"].apply(follower_c)
df_twt_info["no_following"] = df_twt_info["account"].apply(following_c)
df_twt_info["no_status"] = df_twt_info["account"].apply(status_c)
df_twt_info["create_date"] = df_twt_info["account"].apply(create_acc)

In [11]:
df_twt_info

Unnamed: 0,account,no_follower,no_following,no_status,create_date
0,belldelagua,102724,824,107727,2009-03-27 08:53:52
1,fhay_cotton_h,144767,229,17947,2016-02-22 16:06:06
2,KaofangArea,62182,246,64834,2018-02-19 08:58:22
3,pikulham,52235,856,26852,2016-06-19 14:37:31
4,WiJupiter,21302,1762,135968,2018-11-21 14:22:33
5,pingponghime6,14206,981,79631,2010-07-04 11:12:21
6,nuengst_,4661,117,31318,2011-01-20 10:10:22
7,pungiireviewx,2043,283,9901,2020-04-24 16:37:00


In [12]:
df_tweet_data = None
for twt_account in twt_list:
  if df_tweet_data is None:
    df_tweet_data = get_tweets(twt_account)
  else:
    df_temp = get_tweets(twt_account)
    df_tweet_data = pd.concat([df_tweet_data,df_temp])

getting tweets before 1432367612340494335
...399 tweets downloaded so far
getting tweets before 1430964396709191681
...599 tweets downloaded so far
getting tweets before 1429898894201147395
...799 tweets downloaded so far
getting tweets before 1428682704103510017
...998 tweets downloaded so far
getting tweets before 1427555830480601092
...1198 tweets downloaded so far
getting tweets before 1425852460581539843
...1398 tweets downloaded so far
getting tweets before 1424473622417076225
...1598 tweets downloaded so far
getting tweets before 1422982758578614271
...1798 tweets downloaded so far
getting tweets before 1421824967096684543
...1998 tweets downloaded so far
getting tweets before 1420769925518422015
...2198 tweets downloaded so far
getting tweets before 1419002528880943107
...2398 tweets downloaded so far
getting tweets before 1417762177654484993
...2598 tweets downloaded so far
getting tweets before 1415379316213387270
...2798 tweets downloaded so far
getting tweets before 1413162

In [13]:
df_tweet_data['tweet_hashtag'] = df_tweet_data['tweet_entities'].apply(hashtag_extract)
df_tweet_data['tweet_user_mention'] = df_tweet_data['tweet_entities'].apply(user_mention_extract)
df_tweet_data['tweet_url'] = df_tweet_data['tweet_entities'].apply(url_extract)
df_tweet_data['tweet_media'] = df_tweet_data['tweet_entities'].apply(media_extract)
df_tweet_data['retweet_check'] = df_tweet_data["tweet_full"].apply(retweet_check)
df_tweet_data['tweet_full_clean'] = df_tweet_data["tweet_full"].apply(cleaner)
df_tweet_data['tweet_full_use'] = df_tweet_data["tweet_full"].apply(cleaner_use)
df_tweet_data['tweet_type'] = df_tweet_data.apply(tweet_type,axis = 1)

In [14]:
df_tweet_selected = df_tweet_data[(df_tweet_data['tweet_type'] == 'Normal Tweet') | (df_tweet_data['tweet_type'] == 'Reply - Owned')]

In [15]:
df_tweet_selected = df_tweet_selected[df_tweet_selected['tweet_full_use'].apply(lambda x : len(x) >= 15)]

## Analytics Model

### WangchanBERTa (Prepare)

In [16]:
model_name = "wangchanberta-base-att-spm-uncased"

#create tokenizer
tokenizer = AutoTokenizer.from_pretrained(
                f'airesearch/{model_name}',
                revision='main',
                model_max_length=416)

Downloading:   0%|          | 0.00/546 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/905k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/282 [00:00<?, ?B/s]

### Sentiment Analysis (Pre-trained from WangchanBERTa)

In [17]:
dataset_name = "wisesight_sentiment"

classify_multiclass = pipeline(task='sentiment-analysis',
         tokenizer=tokenizer,
         model = f'airesearch/{model_name}',
         revision = f'finetuned@{dataset_name}')

Downloading:   0%|          | 0.00/423M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/716 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/421M [00:00<?, ?B/s]

In [18]:
df_tweet_selected['sentiment_class'] = df_tweet_selected['tweet_full_clean'].apply(process_transformers).apply(classify_multiclass)

In [19]:
df_tweet_selected = df_tweet_selected.explode('sentiment_class')
df_tweet_selected.reset_index(drop = True, inplace = True)
df_sentiment_temp = df_tweet_selected['sentiment_class']
df_tweet_selected = df_tweet_selected.join(pd.json_normalize(df_sentiment_temp))

In [20]:
df_tweet_selected.rename(columns={"label":"sentiment_label","score":"sentiment_score"}, inplace = True)

In [21]:
df_tweet_selected['sentiment_check'] = df_tweet_selected['sentiment_label'].apply(lambda x: x in focus_sentiment)

### Topic Modeling (USE)

In [22]:
# !wget http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/moses/en-th.txt.zip
# !unzip download.php?f=OpenSubtitles%2Fv2018%2Fmoses%2Fen-th.txt.zip -d data

In [23]:
# create USE embedding by using tensorflow hub
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-multilingual/3")

INFO:absl:Using /tmp/tfhub_modules to cache modules.
INFO:absl:Downloading TF-Hub Module 'https://tfhub.dev/google/universal-sentence-encoder-multilingual/3'.
INFO:absl:Downloaded https://tfhub.dev/google/universal-sentence-encoder-multilingual/3, Total size: 266.88MB
INFO:absl:Downloaded TF-Hub Module 'https://tfhub.dev/google/universal-sentence-encoder-multilingual/3'.


In [24]:
# Encode / Embed df_tweet_selected
emb_tweet_full = embed(df_tweet_selected['tweet_full_use'].values).numpy()
df_emb_tweet_full = pd.DataFrame(
                                    emb_tweet_full,
                                    index = df_tweet_selected['tweet_full_use'].values
                                  )

In [25]:
# Create Category for Categorizing ( category df )
# create df which contains embedded vector of selected category
df_emb_category = pd.DataFrame(
                                embed(set_category).numpy(), 
                                index = set_category
                               )

In [26]:
# Categorize and Compare with Original Text

argmax_cal_cosine = df_emb_tweet_full.dot(df_emb_category.T)
argmax_df_cosine = argmax_cal_cosine.reset_index()

In [27]:
lst_topic_col_names = []
for column in set_category:
  topic_col_names = None
  topic_col_names = "topic_"+column
  lst_topic_col_names.append(topic_col_names)
  argmax_df_cosine.rename(columns={column:topic_col_names},inplace = True)

In [28]:
df_tweet_selected = df_tweet_selected.join(argmax_df_cosine[lst_topic_col_names])

In [29]:
for column in lst_topic_col_names:
  topic_chk_col_names = column +"_check"
  df_tweet_selected[topic_chk_col_names] = df_tweet_selected[column].apply(lambda x : True if x > threshold_topic else False)

In [30]:
df_tweet_selected

Unnamed: 0,account,tweet_id,tweet_create_date,tweet_retweet_count,tweet_favorite_count,tweet_full,tweet_is_quote,tweet_is_retweet,tweet_reply_id,tweet_reply_name,tweet_entities,tweet_text_long,tweet_hashtag,tweet_user_mention,tweet_url,tweet_media,retweet_check,tweet_full_clean,tweet_full_use,tweet_type,sentiment_class,sentiment_label,sentiment_score,sentiment_check,topic_ลดน้ำหนัก,topic_อาหารสุขภาพ,topic_เครื่องสำอางค์,topic_ลดน้ำหนัก_check,topic_อาหารสุขภาพ_check,topic_เครื่องสำอางค์_check
0,belldelagua,1433830150446084096,2021-09-03 16:32:01,1,4,@ipedd_ ของโรงแรมแกรนด์ไฮแอทฮะ สั่ง Grab เหมีย...,False,False,1433829764234579972,belldelagua,"{'hashtags': [], 'symbols': [], 'user_mentions...","[8, 75]",[],[ipedd_],[],[http://pbs.twimg.com/media/E-X862BVEAAabBB.jpg],False,ของโรงแรมแกรนด์ไฮแอทฮะ สั่ง เหมียนกัน เสิร์ช ได้,ของโรงแรมแกรนด์ไฮแอทฮะ สั่ง Grab เหมียนกัน เสิ...,Reply - Owned,"{'label': 'neu', 'score': 0.916154146194458}",neu,0.916154,False,0.003251,0.101631,-0.028716,False,True,False
1,belldelagua,1433809839868682243,2021-09-03 15:11:19,0,1,นั่งไล่ตอบข้อมูลฉีดวัคซีนเรื่องวัคซีนใต้โพสในเ...,False,False,,,"{'hashtags': [], 'symbols': [], 'user_mentions...","[0, 142]",[],[],[],[],False,นั่งไล่ตอบข้อมูลฉีดวัคซีนเรื่องวัคซีนใต้โพสในเ...,นั่งไล่ตอบข้อมูลฉีดวัคซีนเรื่องวัคซีนใต้โพสในเ...,Normal Tweet,"{'label': 'neg', 'score': 0.9412005543708801}",neg,0.941201,False,-0.020036,0.061541,-0.021106,False,False,False
2,belldelagua,1433705078091763712,2021-09-03 08:15:02,0,1,แพม... เธอบอกว่า Neck Friendly แต่เปิดมาเจอท่า...,False,False,,,"{'hashtags': [], 'symbols': [], 'user_mentions...","[0, 91]",[],[],[https://www.youtube.com/watch?v=l2AHpn2i7_Y&t...,[],False,แพม เธอบอกว่า แต่เปิดมาเจอท่าเกร็งคอรัวๆ คืออะไร๊,แพม เธอบอกว่า Neck Friendly แต่เปิดมาเจอท่าเกร...,Normal Tweet,"{'label': 'neu', 'score': 0.5171377658843994}",neu,0.517138,False,-0.006441,0.044438,-0.007059,False,False,False
3,belldelagua,1433480607087009803,2021-09-02 17:23:04,11,21,"ล่าสุดแม่ค้าจำได้ พูดกลาง live ขายของว่า ""อ่ะต...",False,False,,,"{'hashtags': [], 'symbols': [], 'user_mentions...","[0, 107]",[],[],[],[],False,ล่าสุดแม่ค้าจำได้ พูดกลาง ขายของว่า อ่ะตัวนี้ไ...,ล่าสุดแม่ค้าจำได้ พูดกลาง live ขายของว่า อ่ะตั...,Normal Tweet,"{'label': 'neu', 'score': 0.589180052280426}",neu,0.589180,False,-0.039389,-0.029716,0.060642,False,False,False
4,belldelagua,1433467786051739648,2021-09-02 16:32:07,4,2,เคยเจอเหมือนกัน เลิกคบไปแล้ว ทุกวันนี้ชียังไม่...,False,False,1433467506648248321,belldelagua,"{'hashtags': [], 'symbols': [], 'user_mentions...","[0, 152]",[],[],[],[],False,เคยเจอเหมือนกัน เลิกคบไปแล้ว ทุกวันนี้ชียังไม่...,เคยเจอเหมือนกัน เลิกคบไปแล้ว ทุกวันนี้ชียังไม่...,Reply - Owned,"{'label': 'neg', 'score': 0.5820262432098389}",neg,0.582026,False,-0.084420,-0.015289,-0.050272,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3477,pungiireviewx,1334437192932843520,2020-12-03 09:59:54,4,7,ขวด2จะหมดแล้วคุมแม๊!\nลูกรักสุดๆอ่ะคนนี้ ใช้แล...,False,False,,,{'hashtags': [{'text': 'ไว้รีวิวห้ามขายของโว้ย...,"[0, 254]",[ไว้รีวิวห้ามขายของโว้ยย],[],[],[http://pbs.twimg.com/media/EoTfjLUU4AIJ4AE.jpg],False,ขวดจะหมดแล้วคุมแม๊ลูกรักสุดๆอ่ะคนนี้ ใช้แล้วรู...,ขวดจะหมดแล้วคุมแม๊ลูกรักสุดๆอ่ะคนนี้ ใช้แล้วรู...,Normal Tweet,"{'label': 'pos', 'score': 0.7957203984260559}",pos,0.795720,True,0.025966,-0.007398,0.179707,False,False,True
3478,pungiireviewx,1333070422640893956,2020-11-29 15:28:50,0,1,ความรักนายแอปเปิ้ลเขียววันนี้ไม่ไหวมาก แม่! ง้...,False,False,,,"{'hashtags': [], 'symbols': [], 'user_mentions...","[0, 60]",[],[],[],[],False,ความรักนายแอปเปิ้ลเขียววันนี้ไม่ไหวมาก แม่ ง้า...,ความรักนายแอปเปิ้ลเขียววันนี้ไม่ไหวมาก แม่ ง้าก,Normal Tweet,"{'label': 'neg', 'score': 0.9772095084190369}",neg,0.977210,False,-0.060171,-0.008140,-0.077172,False,False,False
3479,pungiireviewx,1332871790385790977,2020-11-29 02:19:33,46,14,ใช้งานร่างกายหนักก็ต้องบำรุงตัวเองซะหน่อยย บอก...,False,False,,,{'hashtags': [{'text': 'ไว้รีวิวห้ามขายของโว้ย...,"[0, 271]",[ไว้รีวิวห้ามขายของโว้ยย],[],[],[http://pbs.twimg.com/media/En9P0tNUUAADiLu.jpg],False,ใช้งานร่างกายหนักก็ต้องบำรุงตัวเองซะหน่อยย บอก...,ใช้งานร่างกายหนักก็ต้องบำรุงตัวเองซะหน่อยย บอก...,Normal Tweet,"{'label': 'pos', 'score': 0.7659270167350769}",pos,0.765927,True,0.230149,0.080744,0.089114,True,False,False
3480,pungiireviewx,1332729652847071234,2020-11-28 16:54:44,0,1,ประกาศค้าบบ ขอให้ได้ของขวัญจากคูมซานต้านะค้าา ...,False,False,1325735517493755905,pungiireviewx,"{'hashtags': [], 'symbols': [], 'user_mentions...","[0, 88]",[],[ananbutter],[],[],False,ประกาศค้าบบ ขอให้ได้ของขวัญจากคูมซานต้านะค้าา ...,ประกาศค้าบบ ขอให้ได้ของขวัญจากคูมซานต้านะค้าา ...,Reply - Owned,"{'label': 'neu', 'score': 0.8668104410171509}",neu,0.866810,False,-0.008252,0.027218,0.006322,False,False,False


### Politics Model

#### Prepare Dataset for Politics Model

##### Scrape Data

In [31]:
# def get_tweets_hashtag(selected_hashtag, limit = 5,result_type = "mixed"):
#     all_tweets = []
#     tweets = api.search(q = selected_hashtag, count = 100, result_type = result_type, tweet_mode='extended')
#     all_tweets.extend(tweets)
#     # while (len(tweets) > 0 & len(tweets) < limit):
#     i = 1
#     while i < limit:
#         previous_tweet = all_tweets[-1].id - 1
#         print("getting tweets before %s" % (previous_tweet))
#         tweets = api.search(q = selected_hashtag, count = 100, max_id = previous_tweet, result_type = result_type, tweet_mode='extended')
#         all_tweets.extend(tweets)
#         print("...%s tweets downloaded so far" % (len(all_tweets)))
#         i += 1
#     outtweets = [[tweet.user.screen_name,tweet.id_str, tweet.created_at,tweet.retweet_count,
#                     tweet.favorite_count, tweet.full_text, tweet.is_quote_status, tweet.retweeted, 
#                     tweet.in_reply_to_status_id_str, tweet.in_reply_to_screen_name,
#                     tweet.entities, tweet.display_text_range] for tweet in all_tweets]
#     tweet_df = pd.DataFrame.from_records(outtweets, columns = ['account', 'tweet_id',
#                 'tweet_create_date', 'tweet_retweet_count', 'tweet_favorite_count',
#                 'tweet_full', 'tweet_is_quote', 'tweet_is_retweet', 'tweet_reply_id',
#                 'tweet_reply_name', 'tweet_entities', 'tweet_text_long'])
#     return tweet_df

##### Process Data

In [32]:
# file_name = '/Users/yo/Documents/02 NIDA/IS/is-twt-inf/politics_tweets/politics_{}.csv'
# df_politics = pd.concat([pd.read_csv(file_name.format(i)) for i in range(1, 11)])
# del df_politics['Unnamed: 0']
# df_politics['tweet_full'] = df_politics['tweet_full'].map(lambda x: cleaner(x))
# df_politics.drop_duplicates(subset = 'tweet_full', inplace = True)

In [33]:
# file_name = '/Users/yo/Documents/02 NIDA/IS/is-twt-inf/no_politics_tweets/no_politics_{}.csv'
# df_no_politics = pd.concat([pd.read_csv(file_name.format(i)) for i in range(1, 11)])
# del df_no_politics['Unnamed: 0']
# df_no_politics['tweet_full'] = df_no_politics['tweet_full'].map(lambda x: cleaner(x))
# df_no_politics.drop_duplicates(subset = 'tweet_full', inplace = True)

In [34]:
# file_name = '/content/drive/MyDrive/IS/politics_dataset.csv'
# df_selected_politics = pd.read_csv(file_name)
# del df_selected_politics['Unnamed: 0']

In [35]:
# df_selected_politics.fillna(" ", inplace = True)

In [36]:
# df_politics["politics_sentiment"] = 1
# df_no_politics["politics_sentiment"] = 0
# df_politics_selected = df_politics[["tweet_full","politics_sentiment"]]
# df_no_politics_selected = df_no_politics[["tweet_full","politics_sentiment"]]
# df_selected = df_politics_selected.append(df_no_politics_selected)
# df_selected.reset_index(inplace = True)
# df_selected = df_selected[pd.notna(df_selected['tweet_full'])]

In [37]:
# from sklearn.model_selection import train_test_split

In [38]:
# X = df_selected_politics[df_selected_politics["tweet_full"] != "NaN"]["tweet_full"].values.reshape(-1, 1)
# y = df_selected_politics["politics_sentiment"].values
# os = RandomUnderSampler(sampling_strategy=0.6,random_state=29)
# X_new, y_new = os.fit_resample(X, y)
# print('Original dataset shape {}'.format(Counter(y)))
# print('Resampled dataset shape {}'.format(Counter(y_new)))

In [39]:
# X_train, X_val, y_train, y_val = train_test_split(X_new, y_new, test_size=0.1, random_state=29, stratify = y_new)
# X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.1, random_state=29, stratify = y_train)

# y_train_1 = y_train.reshape(-1, 1)
# y_val_1 = y_val.reshape(-1, 1)
# y_test_1 = y_test.reshape(-1, 1)

In [40]:
# df_train_1 = pd.DataFrame(data = X_train, columns = ["tweet_full"])
# df_train_2 = pd.DataFrame(data = y_train, columns = ["politics_sentiment"])
# df_train = df_train_1.merge(df_train_2,left_index=True, right_index=True)

# df_test_1 = pd.DataFrame(data = X_test, columns = ["tweet_full"])
# df_test_2 = pd.DataFrame(data = y_test, columns = ["politics_sentiment"])
# df_test = df_test_1.merge(df_test_2,left_index=True, right_index=True)

# df_val_1 = pd.DataFrame(data = X_val, columns = ["tweet_full"])
# df_val_2 = pd.DataFrame(data = y_val, columns = ["politics_sentiment"])
# df_val = df_val_1.merge(df_val_2,left_index=True, right_index=True)

In [41]:
# dataset_train = Dataset.from_pandas(df_train,split = "train")
# dataset_test = Dataset.from_pandas(df_test,split = "test")
# dataset_val = Dataset.from_pandas(df_val,split = "validation")

#### Train Model with WangchanBERTa

In [42]:
# label_encoder = LabelEncoder()
# label_encoder.fit(y_train)
# label_encoder.classes_

In [43]:
# feature_col = 'tweet_full'
# label_col = 'politics_sentiment'
# N_LABELS = 2

In [44]:
# enc = OneHotEncoder(handle_unknown="ignore")
# y_train_1 = enc.fit_transform(np.array(y_train)[:, None])
# y_valid_1 = enc.transform(np.array(y_val)[:, None])
# y_test_1 = enc.transform(np.array(y_test)[:, None])

# id2label = {
#     0:'Non-Politics',
#     1:'Politics'
# }

In [45]:
# tokenizer = AutoTokenizer.from_pretrained('airesearch/wangchanberta-base-att-spm-uncased',
#                                           revision='main')

In [46]:
# dataset_preprocessed = { split_name[0]: SequenceClassificationDataset.from_dataset(
#                             Task.MULTICLASS_CLS,
#                             tokenizer,
#                             split_name[1],
#                             feature_col,
#                             label_col,
#                             max_length=416,
#                             bs=1000,
#                             preprocessor=process_transformers,
#                             prepare_for_tokenization=True,
#                             label_encoder=label_encoder) for split_name in [('train',dataset_train), ('validation',dataset_val), ('test',dataset_test)]
#                         }

In [47]:
# config = AutoConfig.from_pretrained(
#         'airesearch/wangchanberta-base-att-spm-uncased',
#          revision='main',
#          num_labels=N_LABELS
#     )


# model_seq_cls = AutoModelForSequenceClassification.from_pretrained('airesearch/wangchanberta-base-att-spm-uncased',
#                                                         revision='main',
#                                                         config=config)

In [48]:
# training_args = TrainingArguments(
#                         # num_train_epochs=3,
#                         max_steps=1000,
#                         per_device_train_batch_size=4,
#                         per_device_eval_batch_size=4,
#                         gradient_accumulation_steps=32,
#                         learning_rate=3e-5,
#                         warmup_steps=75,
#                         weight_decay=0.01,
#                         adam_epsilon=1e-08,
#                         max_grad_norm=1.0,
#                         #checkpoint
#                         output_dir='./checkpoints/wangchanberta-base-att-spm-uncased/finetuned/politics/',
#                         overwrite_output_dir=True,
#                         #logs
#                         logging_dir='./logs/wangchanberta-base-att-spm-uncased/finetuned/politics/',
#                         logging_first_step=False,
#                         logging_steps=5,
#                         #eval
#                         evaluation_strategy='steps',
#                         eval_steps=100,
#                         load_best_model_at_end=True,
#                         #others
#                         seed=2929,
#                         dataloader_drop_last=False,
#                         no_cuda=False,
#                         metric_for_best_model='f1_micro',
#                         prediction_loss_only=False
#                 )

# # training_args = TrainingArguments(
# #                         # num_train_epochs=3,
# #                         max_steps=400,
# #                         per_device_train_batch_size=2,
# #                         per_device_eval_batch_size=2,
# #                         gradient_accumulation_steps=16,
# #                         learning_rate=3e-5,
# #                         warmup_steps=75,
# #                         weight_decay=0.1,
# #                         adam_epsilon=1e-08,
# #                         max_grad_norm=1.0,
# #                         #checkpoint
# #                         output_dir='./checkpoints/wangchanberta-base-att-spm-uncased/finetuned/ws/',
# #                         overwrite_output_dir=True,
# #                         #logs
# #                         logging_dir='./logs/wangchanberta-base-att-spm-uncased/finetuned/ws/',
# #                         logging_first_step=False,
# #                         logging_steps=5,
# #                         #eval
# #                         evaluation_strategy='steps',
# #                         eval_steps=200,
# #                         load_best_model_at_end=True,
# #                         #others
# #                         seed=2020,
# #                         dataloader_drop_last=False,
# #                         no_cuda=False,
# #                         metric_for_best_model='f1_micro',
# #                         prediction_loss_only=False
# #                 )

# data_collator = DataCollatorWithPadding(tokenizer, padding=True)

# trainer = Trainer(
#         model=model_seq_cls,
#         args=training_args,
#         compute_metrics=classification_metrics,
#         train_dataset=dataset_preprocessed['train'],
#         eval_dataset=dataset_preprocessed['validation'],
#         data_collator=data_collator
#     )

In [49]:
# !rm -r checkpoints logs

In [50]:
# print('\nBegin model finetuning.')
# trainer.train()
# print('Done.\n')

In [51]:
# %tensorboard --logdir ./logs/wangchanberta-base-att-spm-uncased/finetuned/politics/

In [52]:
# torch.save(model_seq_cls, '/content/drive/MyDrive/IS/Model/politics_model.pt')

#### Load model and predict

In [53]:
politics_model = torch.load('/content/drive/MyDrive/IS/Model/politics_model.pt')#,map_location=torch.device('cpu'))

In [54]:
classify_politics = pipeline(task='sentiment-analysis',
         tokenizer=tokenizer,
         model = politics_model,
         device=0)

df_tweet_selected['politics_class'] = df_tweet_selected['tweet_full_clean'].apply(process_transformers).apply(classify_politics)

In [55]:
df_tweet_selected = df_tweet_selected.explode('politics_class')
df_tweet_selected.reset_index(drop = True, inplace = True)
df_politics_temp = df_tweet_selected['politics_class']
df_tweet_selected = df_tweet_selected.join(pd.json_normalize(df_politics_temp))

In [56]:
df_tweet_selected.rename(columns={"label":"politics_label","score":"politics_score"},inplace=True)
df_tweet_selected.replace({"LABEL_0": "non politics", "LABEL_1": "politics"},inplace=True)

In [57]:
df_tweet_selected['politics_check'] = df_tweet_selected['politics_label'].apply(lambda x: x in focus_politics)

### Analytics Scoring

In [58]:
df_twt_main_result = df_tweet_selected.groupby(by = ['account']).agg(number_tweets = ('tweet_id', 'count'),
                                                          sentiment_score = ('sentiment_check', 'sum'),
                                                          politics_score = ('politics_check', 'sum'))

df_twt_topic_result = None
for topic_col_name in lst_topic_col_names:
  df_twt_topic_temp = None
  topic_check_col_name = topic_col_name+"_check"
  topic_score_col_name = topic_col_name+"_score"
  df_twt_topic_temp = df_tweet_selected.groupby(by = ['account']).agg({topic_check_col_name: 'sum'})
  df_twt_topic_temp.rename(columns = {topic_check_col_name:topic_score_col_name}, inplace = True)
  if df_twt_topic_result is None :
    df_twt_topic_result = df_twt_topic_temp.copy()
  else :
    df_twt_topic_result = df_twt_topic_result.join(df_twt_topic_temp)
df_twt_main_result = df_twt_main_result.join(df_twt_topic_result)

In [59]:
df_twt_main_result = df_twt_main_result.iloc[:,1:].div(df_twt_main_result['number_tweets'], axis=0)

In [60]:
for result_col_name in df_twt_main_result.columns:
  rank_col_name = result_col_name+"_rank"
  df_twt_main_result[rank_col_name] = df_twt_main_result[result_col_name].rank(ascending=False)

In [61]:
df_twt_main_result["sum_rank"] = df_twt_main_result.loc[:, df_twt_main_result.columns.str.endswith("_rank")].sum(axis = 1)
df_twt_main_result

Unnamed: 0_level_0,sentiment_score,politics_score,topic_ลดน้ำหนัก_score,topic_อาหารสุขภาพ_score,topic_เครื่องสำอางค์_score,sentiment_score_rank,politics_score_rank,topic_ลดน้ำหนัก_score_rank,topic_อาหารสุขภาพ_score_rank,topic_เครื่องสำอางค์_score_rank,sum_rank
account,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
KaofangArea,0.305195,0.183442,0.100649,0.056818,0.248377,5.0,2.0,5.0,4.5,5.0,21.5
WiJupiter,0.555556,0.055556,0.194444,0.194444,0.361111,1.0,5.0,1.0,1.0,2.0,10.0
belldelagua,0.081761,0.309748,0.100629,0.084906,0.062893,8.0,1.0,6.0,2.0,8.0,25.0
fhay_cotton_h,0.469203,0.07971,0.081522,0.059783,0.362319,2.0,4.0,8.0,3.0,1.0,18.0
nuengst_,0.277273,0.052273,0.117045,0.055682,0.264773,7.0,7.0,2.0,6.0,4.0,26.0
pikulham,0.464945,0.053506,0.107011,0.051661,0.330258,3.0,6.0,4.0,8.0,3.0,24.0
pingponghime6,0.295455,0.113636,0.113636,0.05303,0.227273,6.0,3.0,3.0,7.0,7.0,26.0
pungiireviewx,0.306818,0.034091,0.090909,0.056818,0.238636,4.0,8.0,7.0,4.5,6.0,29.5


### Engagement Scoring

In [62]:
def check_count(col):
  return 1 if col == True else 0

In [63]:
df_tweet_selected['sentiment_engagement'] = df_tweet_selected['sentiment_check']*(df_tweet_selected['tweet_retweet_count']+df_tweet_selected['tweet_favorite_count'])*sentiment_weight
df_tweet_selected['sentiment_count'] = df_tweet_selected['sentiment_check']*sentiment_weight
df_tweet_selected['politics_engagement'] = df_tweet_selected['politics_check']*(df_tweet_selected['tweet_retweet_count']+df_tweet_selected['tweet_favorite_count'])*politics_weight
df_tweet_selected['politics_count'] = df_tweet_selected['politics_check']*politics_weight
for topic_col_name in lst_topic_col_names:
  topic_check_col_name = topic_col_name+"_check"
  topic_engage_col_name = topic_col_name+"_engagement"
  topic_count_col_name = topic_col_name+"_count"
  df_tweet_selected[topic_engage_col_name] = df_tweet_selected[topic_check_col_name]*(df_tweet_selected['tweet_retweet_count']+df_tweet_selected['tweet_favorite_count'])*topic_weight
  df_tweet_selected[topic_count_col_name] = df_tweet_selected[topic_check_col_name]*topic_weight

### Cost per post

calculate from no. follower

In [64]:
lst_topic_col_names_count = [t + "_count" for t in lst_topic_col_names]
lst_topic_col_names_engage = [t + "_engagement" for t in lst_topic_col_names]
lst_topic_col_names_all = ['sentiment_engagement', 'sentiment_count', 'politics_engagement', 'politics_count'] + lst_topic_col_names_count + lst_topic_col_names_engage
lst_topic_col_names_all_check = ['sentiment_count', 'politics_count'] +lst_topic_col_names_count
lst_topic_col_names_for_cal = ['sentiment', 'politics'] + lst_topic_col_names 

In [65]:
v = 'sum'
dict_for_agg = {k:v for k in lst_topic_col_names_all}

In [66]:
df_tweet_selected_for_engage = df_tweet_selected.groupby(by = ['account']).agg(dict_for_agg)
df_tweet_selected_for_engage['total_engagement_score'] = 0
for col in lst_topic_col_names_for_cal:
  col_eng = col + '_engagement'
  col_cou = col + '_count'
  col_total = col + '_total'
  df_tweet_selected_for_engage[col_total] = df_tweet_selected_for_engage[col_eng]/df_tweet_selected_for_engage[col_cou]
  df_tweet_selected_for_engage['total_engagement_score'] = df_tweet_selected_for_engage['total_engagement_score']+df_tweet_selected_for_engage[col_total]
df_tweet_selected_for_engage['total_engagement_score'] = df_tweet_selected_for_engage['total_engagement_score']/len(lst_topic_col_names_for_cal)

In [67]:
df_twt_info['cost_per_post'] = df_twt_info['no_follower']*cost_rate
df_twt_info

Unnamed: 0,account,no_follower,no_following,no_status,create_date,cost_per_post
0,belldelagua,102724,824,107727,2009-03-27 08:53:52,35953.4
1,fhay_cotton_h,144767,229,17947,2016-02-22 16:06:06,50668.45
2,KaofangArea,62182,246,64834,2018-02-19 08:58:22,21763.7
3,pikulham,52235,856,26852,2016-06-19 14:37:31,18282.25
4,WiJupiter,21302,1762,135968,2018-11-21 14:22:33,7455.7
5,pingponghime6,14206,981,79631,2010-07-04 11:12:21,4972.1
6,nuengst_,4661,117,31318,2011-01-20 10:10:22,1631.35
7,pungiireviewx,2043,283,9901,2020-04-24 16:37:00,715.05


### Optimize your budget

In [68]:
df_twt_final_info = df_twt_info.merge(right = df_tweet_selected_for_engage, on = 'account')

In [69]:
df_twt_opm = df_twt_final_info[['account', 'cost_per_post','total_engagement_score']]
df_twt_opm['total_engagement_score'] = round(df_twt_opm['total_engagement_score']).astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [70]:
opm_data = {}
opm_data['constraint_coeffs'] = [df_twt_opm['cost_per_post'].tolist()]
opm_data['bounds'] = [set_budget]
opm_data['obj_coeffs'] = df_twt_opm['total_engagement_score'].tolist()
opm_data['num_vars'] = len(df_twt_opm['cost_per_post'].tolist())
opm_data['num_constraints'] = len(opm_data['constraint_coeffs'])

In [71]:
solver = pywraplp.Solver.CreateSolver('SCIP')

In [72]:
# Define Variables
x = {}
for j in range(opm_data['num_vars']):
    x[j] = solver.IntVar(0, 1, 'x[%i]' % j)
print('Number of variables =', solver.NumVariables())

Number of variables = 8


In [73]:
# Define Constraints
for i in range(opm_data['num_constraints']):
    constraint = solver.RowConstraint(0, opm_data['bounds'][i], '')
    for j in range(opm_data['num_vars']):
        constraint.SetCoefficient(x[j], opm_data['constraint_coeffs'][i][j])
print('Number of constraints =', solver.NumConstraints())

Number of constraints = 1


In [74]:
# Define Objective
objective = solver.Objective()
for j in range(opm_data['num_vars']):
  objective.SetCoefficient(x[j], opm_data['obj_coeffs'][j])
objective.SetMaximization()

In [75]:
status = solver.Solve()
solution_list = []
if status == pywraplp.Solver.OPTIMAL:
    print('Objective value =', solver.Objective().Value())
    for j in range(opm_data['num_vars']):
        print(x[j].name(), ' = ', x[j].solution_value())
        solution_list.append(x[j].solution_value())
    print()
    print('Problem solved in %f milliseconds' % solver.wall_time())
    print('Problem solved in %d iterations' % solver.iterations())
    print('Problem solved in %d branch-and-bound nodes' % solver.nodes())
else:
    print('The problem does not have an optimal solution.')

Objective value = 2603.9999999999995
x[0]  =  0.0
x[1]  =  1.0
x[2]  =  0.0
x[3]  =  0.0
x[4]  =  1.0
x[5]  =  1.0
x[6]  =  1.0
x[7]  =  1.0

Problem solved in 210.000000 milliseconds
Problem solved in 3 iterations
Problem solved in 1 branch-and-bound nodes


In [76]:
df_twt_final_info['selected_influencer'] = solution_list
df_twt_final_info['solution_engagemnet'] = df_twt_final_info['total_engagement_score'] * df_twt_final_info['selected_influencer']
df_twt_final_info['total_cost'] = df_twt_final_info['cost_per_post'] * df_twt_final_info['selected_influencer']
df_twt_final_info

Unnamed: 0,account,no_follower,no_following,no_status,create_date,cost_per_post,sentiment_engagement,sentiment_count,politics_engagement,politics_count,topic_ลดน้ำหนัก_count,topic_อาหารสุขภาพ_count,topic_เครื่องสำอางค์_count,topic_ลดน้ำหนัก_engagement,topic_อาหารสุขภาพ_engagement,topic_เครื่องสำอางค์_engagement,total_engagement_score,sentiment_total,politics_total,topic_ลดน้ำหนัก_total,topic_อาหารสุขภาพ_total,topic_เครื่องสำอางค์_total,selected_influencer,solution_engagemnet,total_cost
0,belldelagua,102724,824,107727,2009-03-27 08:53:52,35953.4,1258.2,5.2,5471.9,19.7,128,108,80,44350,46546,17518,303.232763,241.961538,277.761421,346.484375,430.981481,218.975,0.0,0.0,0.0
1,fhay_cotton_h,144767,229,17947,2016-02-22 16:06:06,50668.45,67006.2,25.9,2034.8,4.4,90,66,400,25534,319384,543494,1906.232828,2587.111969,462.454545,283.711111,4839.151515,1358.735,1.0,1906.232828,50668.45
2,KaofangArea,62182,246,64834,2018-02-19 08:58:22,21763.7,9231.0,18.8,11008.1,11.3,124,70,306,204768,12706,69230,704.857947,491.010638,974.168142,1651.354839,181.514286,226.24183,0.0,0.0,0.0
3,pikulham,52235,856,26852,2016-06-19 14:37:31,18282.25,4655.7,25.2,34.2,2.9,116,56,358,12682,2860,50410,99.550435,184.75,11.793103,109.327586,51.071429,140.810056,0.0,0.0,0.0
4,WiJupiter,21302,1762,135968,2018-11-21 14:22:33,7455.7,460.2,2.0,10.8,0.2,14,14,26,2606,3614,26076,346.261758,230.1,54.0,186.142857,258.142857,1002.923077,1.0,346.261758,7455.7
5,pingponghime6,14206,981,79631,2010-07-04 11:12:21,4972.1,384.9,3.9,26.0,1.5,30,14,60,1490,1050,5770,67.371795,98.692308,17.333333,49.666667,75.0,96.166667,1.0,67.371795,4972.1
6,nuengst_,4661,117,31318,2011-01-20 10:10:22,1631.35,1656.3,24.4,10.4,4.6,206,98,466,2568,97188,124658,268.365752,67.881148,2.26087,12.466019,991.714286,267.506438,1.0,268.365752,1631.35
7,pungiireviewx,2043,283,9901,2020-04-24 16:37:00,715.05,84.9,2.7,0.1,0.3,16,10,42,218,112,1118,16.644365,31.444444,0.333333,13.625,11.2,26.619048,1.0,16.644365,715.05
