# Review game duration statistics

In [19]:
from pathlib import Path
from datetime import datetime
import json
import pandas as pd
import re

In [20]:
def retrieve_game_duration(
    p2log: Path,            # Path to the log file
    min_duration:int=0.5    # Minumum duration of a game in minutes
    )-> pd.DataFrame:       # Return a DataFrame with columns: session, game name, duration 

    with open(p2log, 'r') as f:
        df = None
        while True:
            line = f.readline()
            if line == '':
                break
            
            # New session: pattern: New Session: 2024-06-03 19:23:05   
            regex = "^New Session: (?P<dt>\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d)\s*$"
            matches = re.match(regex, line)
            if matches:
                previous_game_name = None
                previous_game_start = None
                session_dt = datetime.strptime(matches.group('dt'), '%Y-%m-%d %H:%M:%S')
            
            # Get new game name and start time 
            # New game pattern:
            # 2023-10-21 20:02:28   update info for gameidx: 20 and name Sound Effects
            # 2023-10-21 20:06:14   update info for gameidx: 6 and name Pillars
            regex = r"^(?P<dt>\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d)\s*update\sinfo\sfor\sgameidx\:\s\d*\sand\sname\s(?P<game_name>[a-zA-Z\s]*)$"
            matches = re.match(regex, line, flags=re.MULTILINE)
            if matches:
                new_game_name = matches.group('game_name').strip('\n')
                new_game_start = datetime.strptime(matches.group('dt'), '%Y-%m-%d %H:%M:%S')
                # Get finished game duration in miinutes
                if previous_game_name is not None:
                    duration = (new_game_start - previous_game_start).total_seconds()/60
                    row = pd.DataFrame({'session': session_dt, 'game': previous_game_name, 'duration': duration}, index=[0])
                    df = row if df is None else pd.concat([df, row], ignore_index=True)
                previous_game_name = new_game_name
                previous_game_start = new_game_start
    # Return all games with duration > min_duration
    return df.loc[df['duration'] > min_duration, :]

In [21]:
df = None
for p2log in Path('../logs').glob('_short*.log'):
    print(f'Processing {p2log}')
    new_df = retrieve_game_duration(p2log)
    df = new_df if df is None else pd.concat([df, new_df], ignore_index=True)

print(f"{df.shape[0]} games with duration > minimum duration")  
df = df.sort_values(by='game', ascending=True)
df

Processing ..\logs\_short.log
Processing ..\logs\_short_2023.log
30 games with duration > minimum duration


Unnamed: 0,session,game,duration
4,2024-06-03 20:00:50,A Date with Me,3.533333
8,2024-06-17 20:05:10,Da Doo Ron Ron,7.3
18,2023-10-21 20:02:19,Deaf Replay,7.833333
27,2023-11-13 20:15:13,Double Reverse Alphabet,6.583333
9,2024-06-17 20:05:10,Emotion Roller Coaster,4.133333
24,2023-11-13 20:15:13,Everybody Dies,8.183333
16,2023-10-21 20:02:19,Everybody Dies,17.25
0,2024-06-03 20:00:50,Everyone Dies,10.333333
2,2024-06-03 20:00:50,Forward Reverse,7.733333
5,2024-06-17 20:05:10,Forward Reverse,3.783333


In [22]:
name_changes = {
    'Highlander': 'Last Comic Standing',
    'Da Doo Ron Ron': 'Da Doo Rap Rap',
    'Everyone Dies': 'Everybody Dies',
}

df.game = df.game.apply(lambda x: name_changes.get(x,x))
df

Unnamed: 0,session,game,duration
4,2024-06-03 20:00:50,A Date with Me,3.533333
8,2024-06-17 20:05:10,Da Doo Rap Rap,7.3
18,2023-10-21 20:02:19,Deaf Replay,7.833333
27,2023-11-13 20:15:13,Double Reverse Alphabet,6.583333
9,2024-06-17 20:05:10,Emotion Roller Coaster,4.133333
24,2023-11-13 20:15:13,Everybody Dies,8.183333
16,2023-10-21 20:02:19,Everybody Dies,17.25
0,2024-06-03 20:00:50,Everybody Dies,10.333333
2,2024-06-03 20:00:50,Forward Reverse,7.733333
5,2024-06-17 20:05:10,Forward Reverse,3.783333


In [23]:
df.groupby('game')[['duration']].mean()

Unnamed: 0_level_0,duration
game,Unnamed: 1_level_1
A Date with Me,3.533333
Da Doo Rap Rap,7.3
Deaf Replay,7.833333
Double Reverse Alphabet,6.583333
Emotion Roller Coaster,4.133333
Everybody Dies,11.922222
Forward Reverse,5.758333
Genres,8.25
Last Comic Standing,13.833333
More or Less,3.95


In [24]:
df.groupby('game')[['duration']].mean().to_dict()['duration']

{'A Date with Me': 3.533333333333333,
 'Da Doo Rap Rap': 7.3,
 'Deaf Replay': 7.833333333333333,
 'Double Reverse Alphabet': 6.583333333333333,
 'Emotion Roller Coaster': 4.133333333333334,
 'Everybody Dies': 11.922222222222222,
 'Forward Reverse': 5.758333333333333,
 'Genres': 8.25,
 'Last Comic Standing': 13.833333333333332,
 'More or Less': 3.95,
 'No Laughing': 4.641666666666667,
 'One Three Five': 4.383333333333334,
 'Pillars': 5.294444444444444,
 'Puppets': 3.3583333333333334,
 'Questions Only': 6.225,
 'Sound Effects': 3.7666666666666666,
 'Stunt Doubles': 6.883333333333334,
 'Suitcase': 11.15,
 'Sure Ding': 8.375}

In [25]:
p2game_durations = p2log.parent / 'game_durations.json'

with open(p2game_durations, 'w') as f:
    json.dump(df.groupby('game')[['duration']].mean().to_dict()['duration'], f, indent=4)

In [27]:
d = df.groupby('game')[['duration']].mean().to_dict()['duration']
mean_duration = sum(d.values())/len(d)

session_duration = 45

print(f"Average game duration: {mean_duration:.2f} minutes")
print(f"Between {session_duration//max(d.values())+1:.0f} and {session_duration//min(d.values())+1:.0f} games per session, with {session_duration//mean_duration+1:.0f} in average for a {session_duration} minute session.")

Average game duration: 6.69 minutes
Between 4 and 14 games per session, with 7 in average for a 45 minute session.
