In [None]:
import socket
import re
import requests
import json
import time
import threading
from datetime import datetime
from google.cloud import bigquery

# GCP configuration
PROJECT_ID = "twitch-realtime-chats"
DATASET_ID = "202503200809"
TABLE_ID = "twich_chat_log_partitioned"
# CREDENTIALS_PATH = "/content/twitch-realtime-chats-e6edd4d2d2ab.json"

# initialise BigQuery
bq_client = bigquery.Client()

# Twitch configuration
CLIENT_ID = "gp762nuuoqcoxypju8c569th9wz7q5"
OAUTH_TOKEN = "oauth:mb053vdgfqc0u2m7folpig78vgxdke"
USERNAME = "danieljia36"
API_URL = "https://api.twitch.tv/helix/streams"
GAMES_API_URL = "https://api.twitch.tv/helix/games"


GAME_LIST = [
    "League of Legends",
    "Counter-Strike",
    # "Grand Theft Auto V",
    # "Assassin's Creed Shadows",
    "Fortnite",
    "VALORANT",
    "Marval Rivals"
]

# Twitch IRC server info
IRC_SERVER = "irc.chat.twitch.tv"
IRC_PORT = 6667

# joined channels in accordance with games
joined_channels = {}


def connect():
    """ Connecting to Twitch IRC server ... """
    while True:
        try:
            sock = socket.socket()
            sock.settimeout(60)
            sock.connect((IRC_SERVER, IRC_PORT))
            sock.send(f"PASS {OAUTH_TOKEN}\n".encode("utf-8"))
            sock.send(f"NICK {USERNAME}\n".encode("utf-8"))
            print("✅ Connected to Twitch")
            return sock
        except Exception as e:
            print(f"⚠ Connection failure，retry after 10 seconds... Error: {e}")
            time.sleep(10)


def write_to_bigquery(rows):
    """ Write batch to BigQuery """
    table_ref = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

    for row in rows:
        if isinstance(row["timestamp"], datetime):
            row["timestamp"] = row["timestamp"].strftime("%Y-%m-%d %H:%M:%S")

    errors = bq_client.insert_rows_json(table_ref, rows)

    if errors:
        print(f"❌ BigQuery writing error: {errors}")
    else:
        print(f"✅ Data loaded into BigQuery: {len(rows)} rows")


def listen_chat(sock):
    """ Listen to multiple channels chats """
    chat_buffer = []
    last_write_time = time.time()

    while True:
        try:
            resp = sock.recv(2048).decode("utf-8").strip()

            # 🚀 print message length to aoid empty messages.
            print(f"📩 Meta data ({len(resp)} byte): {resp}")

            if not resp:
                print("⚠ Empty message, reconnecting...")
                sock = connect()
                continue

            # sending Twitch PING to keep the connection
            if resp.startswith("PING"):
                sock.send("PONG :tmi.twitch.tv\n".encode("utf-8"))
                print("✅ sending PONG")
                continue

            # joined channel successfully
            if "JOIN" in resp:
                print(f"✅ Channel joined: {resp}")
                continue

            #
            if "PRIVMSG" in resp:
                try:
                    username = re.search(r":(\w+)!", resp).group(1)
                    message = re.search(r"PRIVMSG #([\w]+) :(.*)", resp)
                    channel = message.group(1)
                    chat_message = message.group(2)
                    timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
                    game_name = joined_channels.get(channel, "Unknown Game")

                    print(f"💬 [{timestamp}] [{game_name}] [{channel}] [{username}]: {chat_message}")

                    chat_buffer.append({
                        "timestamp": timestamp,
                        "game_name": game_name,
                        "channel": channel,
                        "user_name": username,
                        "message": chat_message
                    })

                    # write to BigQuery every 20 rows or five seconds
                    if len(chat_buffer) >= 20 or time.time() - last_write_time > 5:
                        write_to_bigquery(chat_buffer)
                        chat_buffer.clear()
                        last_write_time = time.time()

                except AttributeError:
                    continue

        except socket.timeout:
            print("⚠ 60s no new messages，reconnecting to Twitch")
            sock = connect()
            continue

        except (socket.error, BrokenPipeError) as e:
            print(f"⚠ Connecction lost, reconnecting... Error: {e}")
            sock = connect()
            continue


def update_channels(sock):
    """ Keep updating channels on live, and add on new ones """
    while True:
        try:
            game_ids = get_game_ids(GAME_LIST)
            live_channels = get_live_channels(game_ids)

            for game_name, channels in live_channels.items():
                for channel in channels:
                    try:
                        connect_to_channel(sock, channel, game_name)
                        time.sleep(2)  # Wait for 2s every time joining a channel, to avoide Twitch limits.
                    except (BrokenPipeError, socket.error):
                        print(f"⚠ connectiong lost, reconnecting...")
                        sock = connect()
                        connect_to_channel(sock, channel, game_name)

            time.sleep(60)

        except Exception as e:
            print(f"⚠ Error: {e}")
            time.sleep(10)  # retry after 10 seconds

if __name__ == "__main__":
    sock = connect()

    # start listening pipeline
    listen_thread = threading.Thread(target=listen_chat, args=(sock,), daemon=True)
    listen_thread.start()

    # keep updating channels
    update_channels(sock)

    # make sure the listening process doesn't get cut off.
    listen_thread.join()


[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
📩 Meta data (155 byte): :bracizo!bracizo@bracizo.tmi.twitch.tv PRIVMSG #mictia00 :Jajjss
:rkurupt!rkurupt@rkurupt.tmi.twitch.tv PRIVMSG #valorant_northamerica :we playing or nah?
💬 [2025-03-20 22:51:39] [VALORANT] [mictia00] [bracizo]: Jajjss
✅ Data loaded into BigQuery: 20 rows
📩 Meta data (402 byte): :askirt_kura!askirt_kura@askirt_kura.tmi.twitch.tv PRIVMSG #noway4u_sir :full crit gehen ohne gauntlet weiß ich ja nicht HmmmOK
:nightbot!nightbot@nightbot.tmi.twitch.tv PRIVMSG #k1ng :xD
:miozor_ya!miozor_ya@miozor_ya.tmi.twitch.tv PRIVMSG #lonely__me :если лера покажет настройки, надо будет сделать команду настроек с клипом
:leamoremii!leamoremii@leamoremii.tmi.twitch.tv PRIVMSG #olesha :654321
💬 [2025-03-20 22:51:39] [League of Legends] [noway4u_sir] [askirt_kura]: full crit gehen ohne gauntlet weiß ich ja nicht HmmmOK
📩 Meta data (578 byte): :alckasher!alckasher@alckasher.tmi.twitch.tv PRIVMSG #olesha :654321
:nathanjzn!nathanjzn@nathanjzn.tmi