In [None]:
'''
Using LLM to extract sentiment from Twitch chat messages
Twitch is a livestreaming platform where people can go to watch gaming or IRL content
It could be useful to discern the sentiment of Twitch chat messages to determine how 
people respond to things in-stream advertisements, e-sports events, or marketing. 
'''

In [1]:
%pip install  torch -qq
%pip install  accelerate  transformers datasets evaluate tensorboard -qq

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Looking in indexes: https://download.pytorch.org/whl/cu121
Note: you may need to restart the kernel to use updated packages.


In [3]:
from huggingface_hub import login
from transformers import AutoTokenizer
import transformers
import torch
import time
import json

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Reading in HF token here, logging into HF for access of LLama-2
with open("secrets.json", "r") as file:
    secrets = json.load(file)
login(
  token=secrets["hf_token"], 
  add_to_git_credential=True
)

model = "meta-llama/Llama-2-7b-chat-hf" # this is the smallest (7 billion weights) LLaMa model, it fits on colab T4 GPU. See here for other models: https://huggingface.co/meta-llama



Token is valid (permission: write).
Your token has been saved in your configured git credential helpers (manager-core).
Your token has been saved to C:\Users\lukeu\.cache\huggingface\token
Login successful


In [5]:
# Loading model
t0=time.time()
tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)
t1=time.time();
print("Took %d seconds"%(t1-t0))

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.16s/it]


Took 3 seconds


In [6]:
import socket
from collections import Counter

# Reading in live comments from a Twitch chat to determine their respective sentiments
# Twitch IRC server information
SERVER = "irc.twitch.tv"
PORT = 6667
BOT_TOKEN = secrets["twitch_oath"]
BOT_NICK = secrets["twitch_nick"]
CHANNEL= '#zackrawrr'

# Connect to Twitch IRC server
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect((SERVER, PORT))

# Authenticate and join the channel
sock.send(f"PASS {BOT_TOKEN}\r\n".encode("utf-8"))
sock.send(f"NICK {BOT_NICK}\r\n".encode("utf-8"))
sock.send(f"JOIN {CHANNEL}\r\n".encode("utf-8"))

17

In [7]:
import time
import re

# Ignoring non IRC Twitch chat messages
                        # any Twitch username       # IRC server  # PRIVMSG   #Twitch channel        # Message
pattern = re.compile(r":([a-zA-Z0-9_]{3,25})!\1@\1.tmi.twitch.tv\sPRIVMSG\s#([a-zA-Z0-9_]{3,25})\s:(.*)")

# Keep receiving messages from IRC server until responses [] has len num_messages
num_messages = 25
responses = []
while len(responses) < num_messages:
  resp = sock.recv(2048).decode('utf-8')
  if pattern.match(resp): # If the message is a comment
    groups = pattern.search(resp)
    username = groups.group(1)
    if username == 'fossabot': # Ignoring comments from bots (known neutral sentiment)
        continue
    channel = groups.group(2)
    message = groups.group(3)
    if len(message.split(' ')) > 7: # Arbitrary 7 word limit; makes messages more diverse (usually people just spam the same thing)
      print(f'{username}: {message}')
      responses.append(message)

# If you cannot tell people are talking about World of Warcraft

oopsifeededagain: are you gna be playin poe season on the 8th
bitlegions: Wait till you get a pet and u instant tag and pet abuse them lol
tatatatatatatat: idk if blizzard has a balance team, they'd def play warlock
lazilyenraged: But you get a shittier victory rush as a warrior lololol
rma_90: bro its like you playing a hunter so many pets around u
spergler_: it looks like you’ve never played before and you’re 10 years old just having an amazing time casting spells
lebrewski: warlock was the original Heroic Class before DK
bitlegions: who needs health when you have a PET lol
prophecy_78: Think you'll check out new PoE league as well?
tazzasaurus: have to complete bfd to get metamorph -.- :(
imkrox_: Hearing stuff like "are you going tank lock" is INSANE to me
bdidddles: bro your only lvl 3 its not op yet
morningglory747: Do you think classic is going in the right direction after playing SOD? @zackrawrr
matil_m: what server is he playing, I still can not understand >?
infuze27: ur mana

In [16]:
# Using LLM to discern sentiment from Twitch Messages

def remove_prefix(text, prefix):
    if text.startswith(prefix):
        return text[len(prefix):]
    else:
        return text
llama_responses = []
print("Discerning sentiment from Twitch Chat messages!\n")
system = "For each of these Twitch Chat messages, simply classify the message as POSITIVE, NEGATIVE, or NEUTRAL.\nExample: Wow that's so cool!\nOutput: POSITIVE. Only write one word."
user = responses[:11] # Cutting off responses to fulfill character limit
prompt = f"<s>[INST] <<SYS>>\\n{system}\\n<</SYS>>\\n\\n{user}[/INST]"
sequences = pipeline(
    prompt,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=512,
)
print(sequences)
response = sequences[0]['generated_text']
response = remove_prefix(response, prompt)
response = re.sub(r'\s+', ' ', response.replace('\n', ' ')).strip()
print(response)

llama_responses.append(response)

Discerning sentiment from Twitch Chat messages!

[{'generated_text': '<s>[INST] <<SYS>>\\nFor each of these Twitch Chat messages, simply classify the message as POSITIVE, NEGATIVE, or NEUTRAL.\nExample: Wow that\'s so cool!\nOutput: POSITIVE. Only write one word.\\n<</SYS>>\\n\\n[\'are you gna be playin poe season on the 8th\\r\', \'Wait till you get a pet and u instant tag and pet abuse them lol\\r\', "idk if blizzard has a balance team, they\'d def play warlock\\r", \'But you get a shittier victory rush as a warrior lololol\\r\', \'bro its like you playing a hunter so many pets around u\\r\', \'it looks like you’ve never played before and you’re 10 years old just having an amazing time casting spells\\r\', \'warlock was the original Heroic Class before DK\\r\', \'who needs health when you have a PET lol\\r\', "Think you\'ll check out new PoE league as well?\\r", \'have to complete bfd to get metamorph -.- :(\\r\', \'Hearing stuff like "are you going tank lock" is INSANE to me\\r\'][/

In [17]:
# Doing same thing to get more responses from Llama
print("Discerning sentiment from Twitch Chat messages!\n")
system = "For each of these Twitch Chat messages, simply classify the message as POSITIVE, NEGATIVE, or NEUTRAL.\nExample: Wow that's so cool!\nOutput: POSITIVE. Only write one word."
user = responses[11:22]
prompt = f"<s>[INST] <<SYS>>\\n{system}\\n<</SYS>>\\n\\n{user}[/INST]"
sequences = pipeline(
    prompt,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=512,
)
print(sequences)
response = sequences[0]['generated_text']
response = remove_prefix(response, prompt)
response = re.sub(r'\s+', ' ', response.replace('\n', ' ')).strip()
print(response)
llama_responses.append(response)

Discerning sentiment from Twitch Chat messages!

[{'generated_text': "<s>[INST] <<SYS>>\\nFor each of these Twitch Chat messages, simply classify the message as POSITIVE, NEGATIVE, or NEUTRAL.\nExample: Wow that's so cool!\nOutput: POSITIVE. Only write one word.\\n<</SYS>>\\n\\n['bro your only lvl 3 its not op yet\\r', 'Do you think classic is going in the right direction after playing SOD? @zackrawrr\\r', 'what server is he playing, I still can not understand >?\\r', 'ur mana problems is the healers mana probnlem\\r', 'long time priest here, leveling a priest , i have not TOUCHED my wand\\r', '0 mana problems, get master channeling rune and just keep drain life up 100%\\r', 'Use the wand and it will be clear :)\\r', 'lifestap drainlife/drain mana, wand is fine but you rarely go oom\\r', 'haunt heal can be nice for a quick tap\\r', 'as someone who has made it to 21 on a lock you are in for a rude awakening\\r', 'chatters saying life tap sucks are trash at the game LUL\\r'][/INST]  Sure

In [18]:
llama_responses

['Sure! Here are the classifications for each of the Twitch chat messages you provided: * \'are you gna be playin poe season on the 8th\' - NEUTRAL * \'Wait till you get a pet and u instant tag and pet abuse them lol\' - NEGATIVE * "idk if blizzard has a balance team, they\'d def play warlock" - POSITIVE * \'But you get a shittier victory rush as a warrior lololol\' - NEGATIVE * \'bro its like you playing a hunter so many pets around u\' - POSITIVE * \'it looks like you’ve never played before and you’re 10 years old just having an amazing time casting spells\' - NEUTRAL * \'warlock was the original Heroic Class before DK\' - POSITIVE * \'who needs health when you',
 "Sure! Here are the classifications for each of the Twitch Chat messages you provided: * 'bro your only lvl 3 its not op yet' - NEGATIVE * 'Do you think classic is going in the right direction after playing SOD? @zackrawrr' - NEUTRAL * 'what server is he playing, I still can not understand >?' - NEUTRAL * 'ur mana problems 

In [20]:
# Forming dictionary for each response and sentiment to clean up sentiment decisions
total_responses = llama_responses[0] + llama_responses[1]

# Extracting only necessary information from chatbot responses
                    # Extract msg     # Extract sentiment
sentiment_pattern = r'(\'.*\').*(POSITIVE|NEGATIVE|NEUTRAL)'
sentiments = ['POSITIVE', 'NEGATIVE', 'NEUTRAL']
filtered = ''

# Attempt to add new line when sentiment decision reached 
# Easier for counting of sentiments
for word in total_responses.split(' '):
    if word in sentiments:
        filtered += word+'\n'
    else:
        filtered += word+' '
filtered_lines = filtered.splitlines()

# Add each msg + sentiment combo to dict for prettier printing + access to sentiment decisions
sentiments_dict = {}
for line in filtered_lines:
    pat_match = re.search(sentiment_pattern, line)
    if pat_match is not None:
        message = pat_match.group(1)
        sent = pat_match.group(2)
        sentiments_dict[message] = sent

# Counting sentiment proportionality; determine what subset of chat messages might be like (positive? negative? neutral?)
negative = 0
positive = 0
neutral = 0
for val in sentiments_dict.values():
    if val == 'NEGATIVE':
        negative += 1
    elif val == 'POSITIVE':
        positive += 1
    elif val == 'NEUTRAL':
        neutral += 1
total = negative + positive + neutral

# Printing results
print(f'n = {total}')
print(f'Negative: {negative / total * 100}%')
print(f'Positive: {positive / total * 100}%')
print(f'Neutral: {neutral / total * 100}%')
sentiments_dict

n = 13
Negative: 30.76923076923077%
Positive: 38.46153846153847%
Neutral: 30.76923076923077%


{"'are you gna be playin poe season on the 8th'": 'NEUTRAL',
 "'Wait till you get a pet and u instant tag and pet abuse them lol'": 'NEGATIVE',
 "'But you get a shittier victory rush as a warrior lololol'": 'NEGATIVE',
 "'bro its like you playing a hunter so many pets around u'": 'POSITIVE',
 "'it looks like you’ve never played before and you’re 10 years old just having an amazing time casting spells'": 'NEUTRAL',
 "'warlock was the original Heroic Class before DK'": 'POSITIVE',
 "'who needs health when youSure! Here are the classifications for each of the Twitch Chat messages you provided: * 'bro your only lvl 3 its not op yet'": 'NEGATIVE',
 "'Do you think classic is going in the right direction after playing SOD? @zackrawrr'": 'NEUTRAL',
 "'what server is he playing, I still can not understand >?'": 'NEUTRAL',
 "'ur mana problems is the healers mana probnlem'": 'NEGATIVE',
 "'long time priest here, leveling a priest, i have not TOUCHED my wand'": 'POSITIVE',
 "'0 mana problems, get 