# Review statistics from logs

In [1]:
from pathlib import Path
from datetime import datetime
import ast
import json
import pandas as pd
import re

In [2]:
def extract_session_info(p2log, cleanup=True):
    
    new_session = re.compile(r"^New Session: (?P<timestamp_str>\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)$")
    game_info = re.compile(r"^(?P<time_stamp>\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d):   update info for gameidx: \d+ and name (?P<game_name>.*)$")
    host_info = re.compile(r"^(?P<yr>\d\d\d\d)-(?P<mth>\d\d)-(?P<day>\d\d) \d\d:\d\d:\d\d:    Host Index: \d+ Host Name: (?P<host_name>.*)$")
    cast_info = re.compile(r"^(?P<yr>\d\d\d\d)-(?P<mth>\d\d)-(?P<day>\d\d) \d\d:\d\d:\d\d:    player idxs: .* players: (?P<players>.*), audience: \d+$")

    cols = ['session', 'session_ts', 'game', 'host', 'players']
    df = pd.DataFrame(columns=cols)

    with p2log.open() as f:
        i = 1
        session = 0
        new_row = pd.DataFrame(columns=cols)

        while True:
            line = f.readline()
            i += 1                   
            # search for new session
            m = new_session.search(line)
            if m:
                session += 1
                session_ts = m.group('timestamp_str')
                continue
            
            # search for game and cast info
            m = game_info.search(line)
            if m:
                # append last game info to df, unless it is empty
                if not new_row.empty:
                    df = new_row if df.empty else pd.concat([df, new_row], ignore_index=False) 
                
                game_name = m.group('game_name')
                game_ts = m.group('time_stamp')
                new_row = pd.DataFrame(
                    data={ 'session': [session], 'session_ts': [session_ts], 'game': [game_name], 'host': [None], 'players': [None]},
                    index=[datetime.fromisoformat(game_ts)]
                    )
                continue
            m = host_info.search(line)
            if m:
                host_name = m.group('host_name')
                new_row['host'] = host_name
                continue
            m = cast_info.search(line)
            if m:
                players = ast.literal_eval(m.group('players'))
                new_row.iloc[0,4] = ','.join(players)
                continue
            # test for EOF
            if line == '': break
            
    if cleanup:
        print('cleaning up too short sessions')
        sessions = df.session.unique()
        for s in sessions:
            session_df = df.loc[df.session == s]
            session_idxs = session_df.index
            first, last = session_idxs[0], session_idxs[-1]
            session_duration = (last - first).total_seconds()
            if session_duration < 10 * 60:
                df = df.drop(session_idxs)
        
    return df

In [3]:
p2log = Path('..') / 'logs/_short.log'
assert p2log.exists()
df = extract_session_info(p2log)
sessions = df.session.unique()
print(f"{len(sessions)} valid sessions")
print(f"{len(df)} games")
df.tail(5)


cleaning up too short sessions
17 valid sessions
233 games


Unnamed: 0,session,session_ts,game,host,players
2024-11-16 20:49:21,137,2024-11-16 19:49:26,Questions Only,Eddie,"Alex,Elva,Grace,Haili,Meg,Punk,Stacey,Yirong"
2024-11-16 20:53:25,137,2024-11-16 19:49:26,Forward Reverse,Haili,"Charles,Eddie,Etienne"
2024-11-16 20:57:53,137,2024-11-16 19:49:26,Pillars,Alex,"Harry,Jennel,Knick"
2024-11-16 21:02:23,137,2024-11-16 19:49:26,One Three Five,Xu,"Elva,Harry,Meg"
2024-11-16 21:06:37,137,2024-11-16 19:49:26,Emotion Roller Coaster,Elva,"Charles,Grace,Haili,Knick"


In [4]:
session_idx = -1
coi = ['host', 'players']
selected_session = df.session.unique()[session_idx]
selected_session_df = df.loc[df.session == selected_session]
selected_session_df.index = selected_session_df.game
selected_session_df[coi]

Unnamed: 0_level_0,host,players
game,Unnamed: 1_level_1,Unnamed: 2_level_1
Emotional Floorplan,Grace,"Etienne,Harry,Knick"
TV Box,Punk,"Eddie,Fulvio,Xu,Yirong"
Musical Paper Chase,Etienne,"Fulvio,Jennel,Stacey"
Chain Death,Knick,"Charles,Elva,Grace,Punk"
Stunt Doubles,Fulvio,"Alex,Haili,Meg,Xu"
Questions Only,Eddie,"Alex,Elva,Grace,Haili,Meg,Punk,Stacey,Yirong"
Forward Reverse,Haili,"Charles,Eddie,Etienne"
Pillars,Alex,"Harry,Jennel,Knick"
One Three Five,Xu,"Elva,Harry,Meg"
Emotion Roller Coaster,Elva,"Charles,Grace,Haili,Knick"


In [5]:
selected_session_df.host.value_counts()

host
Grace      1
Punk       1
Etienne    1
Knick      1
Fulvio     1
Eddie      1
Haili      1
Alex       1
Xu         1
Elva       1
Name: count, dtype: int64

In [12]:
cast_counts = selected_session_df.players.str.split(',', expand=True).stack().value_counts()
cast_counts

Harry      3
Knick      3
Charles    3
Elva       3
Grace      3
Haili      3
Meg        3
Etienne    2
Eddie      2
Fulvio     2
Xu         2
Yirong     2
Jennel     2
Stacey     2
Punk       2
Alex       2
Name: count, dtype: int64

In [13]:
# cast = ["Alex","Eddie","Etienne","Grace","Knick","Punk","Xu","Yirong", "Fulvio"]
cast2idx = {c:i for i,c in enumerate(cast_counts.index)}
cast2idx

{'Harry': 0,
 'Knick': 1,
 'Charles': 2,
 'Elva': 3,
 'Grace': 4,
 'Haili': 5,
 'Meg': 6,
 'Etienne': 7,
 'Eddie': 8,
 'Fulvio': 9,
 'Xu': 10,
 'Yirong': 11,
 'Jennel': 12,
 'Stacey': 13,
 'Punk': 14,
 'Alex': 15}

In [14]:
import numpy as np

session_cast_stats = pd.DataFrame(columns=cast2idx.keys())
for idx, row in selected_session_df[coi].iterrows():
    players = row.players.split(',')
    row = pd.DataFrame(np.zeros((1, len(cast2idx))), columns=cast2idx.keys(), index=[idx])
    for p in players:
        row.iloc[0,cast2idx[p]] = 1
    # display(row)
    session_cast_stats = row if session_cast_stats.empty else pd.concat([session_cast_stats, row], ignore_index=False)

session_cast_stats

Unnamed: 0,Harry,Knick,Charles,Elva,Grace,Haili,Meg,Etienne,Eddie,Fulvio,Xu,Yirong,Jennel,Stacey,Punk,Alex
Emotional Floorplan,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TV Box,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
Musical Paper Chase,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0
Chain Death,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
Stunt Doubles,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
Questions Only,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0
Forward Reverse,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Pillars,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
One Three Five,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Emotion Roller Coaster,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
session_cast_stats.sum(axis=0)

Harry      3.0
Knick      3.0
Charles    3.0
Elva       3.0
Grace      3.0
Haili      3.0
Meg        3.0
Etienne    2.0
Eddie      2.0
Fulvio     2.0
Xu         2.0
Yirong     2.0
Jennel     2.0
Stacey     2.0
Punk       2.0
Alex       2.0
dtype: float64