# Review statistics from logs

In [1]:
from pathlib import Path
from datetime import datetime
import ast
import json
import pandas as pd
import re

In [2]:
def extract_session_info(p2log, cleanup=True):
    
    new_session = re.compile(r"^New Session: (?P<timestamp_str>\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)$")
    game_info = re.compile(r"^(?P<time_stamp>\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d):   update info for gameidx: \d+ and name (?P<game_name>.*)$")
    host_info = re.compile(r"^(?P<yr>\d\d\d\d)-(?P<mth>\d\d)-(?P<day>\d\d) \d\d:\d\d:\d\d:    Host Index: \d+ Host Name: (?P<host_name>.*)$")
    cast_info = re.compile(r"^(?P<yr>\d\d\d\d)-(?P<mth>\d\d)-(?P<day>\d\d) \d\d:\d\d:\d\d:    player idxs: .* players: (?P<players>.*), audience: \d+$")

    cols = ['session', 'session_ts', 'game', 'host', 'players']
    df = pd.DataFrame(columns=cols)

    with p2log.open() as f:
        i = 1
        session = 0
        new_row = pd.DataFrame(columns=cols)

        while True:
            line = f.readline()
            i += 1                   
            # search for new session
            m = new_session.search(line)
            if m:
                session += 1
                session_ts = m.group('timestamp_str')
                continue
            
            # search for game and cast info
            m = game_info.search(line)
            if m:
                # append last game info to df, unless it is empty
                if not new_row.empty:
                    df = new_row if df.empty else pd.concat([df, new_row], ignore_index=False) 
                
                game_name = m.group('game_name')
                game_ts = m.group('time_stamp')
                new_row = pd.DataFrame(
                    data={ 'session': [session], 'session_ts': [session_ts], 'game': [game_name], 'host': [None], 'players': [None]},
                    index=[datetime.fromisoformat(game_ts)]
                    )
                continue
            m = host_info.search(line)
            if m:
                host_name = m.group('host_name')
                new_row['host'] = host_name
                continue
            m = cast_info.search(line)
            if m:
                players = ast.literal_eval(m.group('players'))
                new_row.iloc[0,4] = ','.join(players)
                continue
            # test for EOF
            if line == '': break
            
    if cleanup:
        print('cleaning up too short sessions')
        sessions = df.session.unique()
        for s in sessions:
            session_df = df.loc[df.session == s]
            session_idxs = session_df.index
            first, last = session_idxs[0], session_idxs[-1]
            session_duration = (last - first).total_seconds()
            if session_duration < 10 * 60:
                df = df.drop(session_idxs)
        
    return df

In [3]:
p2log = Path('..') / 'logs/_short.log'
assert p2log.exists()
df = extract_session_info(p2log)
sessions = df.session.unique()
print(f"{len(sessions)} valid sessions")
print(f"{len(df)} games")
df.tail(5)


cleaning up too short sessions
3 valid sessions
55 games


Unnamed: 0,session,session_ts,game,host,players
2025-04-14 20:36:59,28,2025-04-14 19:58:14,Everyone Dies,Etienne,"Alex,Chris,Yirong"
2025-04-14 20:43:56,28,2025-04-14 19:58:14,Oscar Winning Moment,Don,"Etienne,Knick,Yirong"
2025-04-14 20:47:46,28,2025-04-14 19:58:14,Super Heroes,Grace,"Alex,Don,Haili,Xu,Yirong"
2025-04-14 20:53:33,28,2025-04-14 19:58:14,Two lines,Haili,"Etienne,Grace,Xu"
2025-04-14 20:56:25,28,2025-04-14 19:58:14,Chain Death,Yirong,"Chris,Don,Grace,Knick"


In [20]:
session_idx = -2
coi = ['host', 'players']
selected_session = df.session.unique()[session_idx]
selected_session_df = df.loc[df.session == selected_session]
selected_session_df.index = selected_session_df.game
selected_session_df[coi]

Unnamed: 0_level_0,host,players
game,Unnamed: 1_level_1,Unnamed: 2_level_1
Emotional Floorplan,Punk,"Yirong,Don,Knick"
One Three Five,Knick,"Haili,Etienne,Punk"
Blind Freeze,Chris,"Alex,Grace"
Park Bench,Don,"Haili,Alex,Chris,Grace,Punk"
Forward Reverse,Etienne,"Chris,Don,Knick"
Everyone Dies,Haili,"Yirong,Etienne,Grace"
Puppets,Grace,"Punk,Knick"
Super Heroes,Alex,"Haili,Etienne,Chris,Don,Punk"
Chain Death,Yirong,"Haili,Chris,Grace,Knick"
Irish Drinking Song,Yirong,"Haili,Alex,Etienne,Chris,Grace,Don,Punk,Knick"


In [21]:
selected_session_df.host.value_counts()

host
Don        3
Punk       2
Knick      2
Chris      2
Etienne    2
Haili      2
Grace      2
Alex       2
Yirong     2
Name: count, dtype: int64

In [22]:
cast_counts = selected_session_df.players.str.split(',', expand=True).stack().value_counts()
cast_counts

Knick      10
Alex       10
Punk        9
Haili       8
Etienne     8
Chris       8
Yirong      7
Grace       7
Don         6
Name: count, dtype: int64

In [23]:
# cast = ["Alex","Eddie","Etienne","Grace","Knick","Punk","Xu","Yirong", "Fulvio"]
cast2idx = {c:i for i,c in enumerate(cast_counts.index)}
cast2idx

{'Knick': 0,
 'Alex': 1,
 'Punk': 2,
 'Haili': 3,
 'Etienne': 4,
 'Chris': 5,
 'Yirong': 6,
 'Grace': 7,
 'Don': 8}

In [24]:
import numpy as np

session_cast_stats = pd.DataFrame(columns=cast2idx.keys())
for idx, row in selected_session_df[coi].iterrows():
    players = row.players.split(',')
    row = pd.DataFrame(np.zeros((1, len(cast2idx))), columns=cast2idx.keys(), index=[idx])
    for p in players:
        row.iloc[0,cast2idx[p]] = 1
    # display(row)
    session_cast_stats = row if session_cast_stats.empty else pd.concat([session_cast_stats, row], ignore_index=False)

session_cast_stats.astype(int)

Unnamed: 0,Knick,Alex,Punk,Haili,Etienne,Chris,Yirong,Grace,Don
Emotional Floorplan,1,0,0,0,0,0,1,0,1
One Three Five,0,0,1,1,1,0,0,0,0
Blind Freeze,0,1,0,0,0,0,0,1,0
Park Bench,0,1,1,1,0,1,0,1,0
Forward Reverse,1,0,0,0,0,1,0,0,1
Everyone Dies,0,0,0,0,1,0,1,1,0
Puppets,1,0,1,0,0,0,0,0,0
Super Heroes,0,0,1,1,1,1,0,0,1
Chain Death,1,0,0,1,0,1,0,1,0
Irish Drinking Song,1,1,1,1,1,1,0,1,1


In [25]:
session_cast_stats.sum(axis=0)

Knick      10.0
Alex       10.0
Punk        9.0
Haili       8.0
Etienne     8.0
Chris       8.0
Yirong      7.0
Grace       7.0
Don         6.0
dtype: float64