# Review game duration statistics

In [8]:
from pathlib import Path
from datetime import datetime
import json
import pandas as pd
import re

In [23]:
def retrieve_game_duration(
    p2log: Path,            # Path to the log file
    min_duration:int=0.5    # Minumum duration of a game in minutes
    )-> pd.DataFrame:       # Return a DataFrame with columns: session, game name, duration 

    with open(p2log, 'r') as f:
        df = None
        while True:
            line = f.readline()
            if line == '':
                break
            if 'update info' in line: print(line)
            # New session: pattern: New Session: 2024-06-03 19:23:05   
            regex = "^New Session: (?P<dt>\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d)\s*$"
            matches = re.match(regex, line)
            if matches:
                previous_game_name = None
                previous_game_start = None
                session_dt = datetime.strptime(matches.group('dt'), '%Y-%m-%d %H:%M:%S')
            
            # Get new game name and start time 
            # New game pattern:
            # 2023-10-21 20:02:28   update info for gameidx: 20 and name Sound Effects
            # 2023-10-21 20:06:14   update info for gameidx: 6 and name Pillars
            regex = r"^(?P<dt>\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d)\s*update\sinfo\sfor\sgameidx\:\s\d*\sand\sname\s(?P<game_name>[a-zA-Z\s]*)$"
            matches = re.match(regex, line, flags=re.MULTILINE)
            print(">",matches)
            if matches:
                print("got match")
                new_game_name = matches.group('game_name').strip('\n')
                new_game_start = datetime.strptime(matches.group('dt'), '%Y-%m-%d %H:%M:%S')
                # Get finished game duration in miinutes
                if previous_game_name is not None:
                    duration = (new_game_start - previous_game_start).total_seconds()/60
                    row = pd.DataFrame({'session': session_dt, 'game': previous_game_name, 'duration': duration}, index=[0])
                    display(row)
                    df = row if df is None else pd.concat([df, row], ignore_index=True)
                previous_game_name = new_game_name
                previous_game_start = new_game_start
    # Return all games with duration > min_duration
    display(df)
    return df.loc[df['duration'] > min_duration, :]

In [24]:
df = None
for p2log in Path('../logs').glob('_short*.log'):
    print(f'Processing {p2log}')
    new_df = retrieve_game_duration(p2log)
    df = new_df if df is None else pd.concat([df, new_df], ignore_index=True)

print(f"{df.shape[0]} games with duration > minimum duration")  
df = df.sort_values(by='game', ascending=True)
df

Processing ..\logs\_short.log
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
2025-02-24 20:46:10:   update info for gameidx: 7 and name Genres

> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None
> None


None

AttributeError: 'NoneType' object has no attribute 'loc'

In [4]:
name_changes = {
    'Highlander': 'Last Comic Standing',
    'Da Doo Ron Ron': 'Da Doo Rap Rap',
    'Everyone Dies': 'Everybody Dies',
}

df.game = df.game.apply(lambda x: name_changes.get(x,x))
df

AttributeError: 'NoneType' object has no attribute 'game'

In [5]:
df.groupby('game')[['duration']].mean()

AttributeError: 'NoneType' object has no attribute 'groupby'

In [7]:
df.groupby('game')[['duration']].mean().to_dict()['duration']

AttributeError: 'NoneType' object has no attribute 'groupby'

In [25]:
p2game_durations = p2log.parent / 'game_durations.json'

with open(p2game_durations, 'w') as f:
    json.dump(df.groupby('game')[['duration']].mean().to_dict()['duration'], f, indent=4)

In [27]:
d = df.groupby('game')[['duration']].mean().to_dict()['duration']
mean_duration = sum(d.values())/len(d)

session_duration = 45

print(f"Average game duration: {mean_duration:.2f} minutes")
print(f"Between {session_duration//max(d.values())+1:.0f} and {session_duration//min(d.values())+1:.0f} games per session, with {session_duration//mean_duration+1:.0f} in average for a {session_duration} minute session.")

Average game duration: 6.69 minutes
Between 4 and 14 games per session, with 7 in average for a 45 minute session.
