# Stepwise Implementation Plan: Dashboard Summary API Test (Pure Functionality)
This notebook demonstrates the step-by-step process of the dashboard summary pipeline, from request construction to response handling, using only pure Python functions (no actual API calls). Each cell represents a logical step in the process.

In [47]:
# Step 1: Import Required Libraries and Utilities
import sys
from pathlib import Path
# ensure that the project root is on PYTHONPATH so that `app` can be imported
sys.path.insert(0, str(Path.cwd().parents[1]))

import pandas as pd
import numpy as np
from app.util.football_data_manager import FootballDataManager
from app.services.metrics_engine import MetricsEngine
import json

In [48]:
# Step 2: Set Parameters for the Dashboard Summary
competition_id = 9
season_id = 281
team_id = 904

In [49]:
# Step 3: Load Data Using FootballDataManager
fdm = FootballDataManager()
matches_df = fdm.get_matches_for_team(competition_id, season_id, team_id)
if matches_df is None or matches_df.empty:
    raise ValueError("No matches found for team in this competition/season.")



In [50]:
# Debug: Inspect matches_df
print("matches_df shape:", matches_df.shape)
display(matches_df.head())

matches_df shape: (34, 44)


Unnamed: 0,match_id,match_date,kick_off,competition_id,competition_name,country_name,season_id,season_name,home_team_id,home_team_name,...,competition_stage_id,competition_stage_name,stadium_id,stadium_name,stadium_country_id,stadium_country_name,referee_id,referee_name,referee_country_id,referee_country_name
0,3895302,2024-04-14,17:30:00.000,9,1. Bundesliga,Germany,281,2023/2024,904,Bayer Leverkusen,...,1,Regular Season,377,BayArena,85,Germany,234,Harm Osmers,85,Germany
1,3895292,2024-04-06,15:30:00.000,9,1. Bundesliga,Germany,281,2023/2024,190,Union Berlin,...,1,Regular Season,560,Stadion An der Alten Försterei,85,Germany,235,Benjamin Brand,85,Germany
2,3895333,2024-05-05,18:30:00.000,9,1. Bundesliga,Germany,281,2023/2024,184,Eintracht Frankfurt,...,1,Regular Season,116458,Deutsche Bank Park,85,Germany,237,Christian Dingert,85,Germany
3,3895340,2024-05-12,20:30:00.000,9,1. Bundesliga,Germany,281,2023/2024,868,Bochum,...,1,Regular Season,550,Vonovia Ruhrstadion,85,Germany,235,Benjamin Brand,85,Germany
4,3895348,2024-05-18,16:30:00.000,9,1. Bundesliga,Germany,281,2023/2024,904,Bayer Leverkusen,...,1,Regular Season,377,BayArena,85,Germany,837,Matthias Jöllenbeck,85,Germany


In [51]:
# Step 4: Load All Events for the Team's Matches
all_events = [fdm.get_events(match['match_id']) for _, match in matches_df.iterrows()]
if not all_events:
    raise ValueError("No event data found for competition/season/team.")
events = pd.concat(all_events)



In [52]:
# Debug: Inspect events DataFrame
print("events shape:", events.shape)
print("events columns:", events.columns.tolist())
display(events.info())

events shape: (137765, 27)
events columns: ['index', 'period', 'timestamp', 'minute', 'second', 'type_id', 'type_name', 'possession', 'possession_team_id', 'possession_team_name', 'play_pattern_id', 'play_pattern_name', 'team_id', 'team_name', 'player_id', 'player_name', 'location', 'duration', 'match_id', 'tactics_formation', 'tactics_lineup', 'tactics_lineup_json', 'pass_end_location', 'pass_outcome', 'shot_end_location', 'shot_outcome', 'shot_statsbomb_xg']
<class 'pandas.core.frame.DataFrame'>
Index: 137765 entries, 794ec549-5288-4d1a-93e8-0fc6d3968784 to cfb23993-b9e4-41ff-b075-ce80e49bbd8d
Data columns (total 27 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   index                 137765 non-null  int64  
 1   period                137765 non-null  int64  
 2   timestamp             137765 non-null  object 
 3   minute                137765 non-null  int64  
 4   second                137765 non-null  int64  


None

In [53]:
# Step 5: Filter Events for the Team
team_events = events[events['team_id'] == team_id]
# Step 5: Filter Events for the Team
team_events = events[events['team_id'] == team_id]
if team_events.empty:
    raise ValueError("No events found for the specified team.")

# Also filter the opposition events
opposition_events = events[events['team_id'] != team_id]
if opposition_events.empty:
    raise ValueError("No opposition events found.")

print(f"Team events: {team_events.shape[0]} rows")
print(f"Opposition events: {opposition_events.shape[0]} rows")

opposition_events = opposition_events['team_id'].unique()
print(f"Unique opposition teams: {len(opposition_events)}")


Team events: 81440 rows
Opposition events: 56325 rows
Unique opposition teams: 17


In [54]:
# Debug: Inspect team_events DataFrame
print("team_events shape:", team_events.shape)
display(team_events.head())

team_events shape: (81440, 27)


Unnamed: 0_level_0,index,period,timestamp,minute,second,type_id,type_name,possession,possession_team_id,possession_team_name,...,duration,match_id,tactics_formation,tactics_lineup,tactics_lineup_json,pass_end_location,pass_outcome,shot_end_location,shot_outcome,shot_statsbomb_xg
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
794ec549-5288-4d1a-93e8-0fc6d3968784,1,1,00:00:00.000,0,0,35,Starting XI,1,904,Bayer Leverkusen,...,0.0,3895302,3421.0,"[{'player': {'id': 8667, 'name': 'Lukáš Hrádec...","[{""player"": {""id"": 8667, ""name"": ""Luk\u00e1\u0...",,,,,
1f956a2b-ece0-475b-8802-e82ed922c307,3,1,00:00:00.000,0,0,18,Half Start,1,904,Bayer Leverkusen,...,0.0,3895302,,,,,,,,
cfa1f5e1-4e8e-4fc2-bbc8-4df5fdef8283,14,1,00:00:09.637,0,9,17,Pressure,2,176,Werder Bremen,...,0.709739,3895302,,,,,,,,
a43fa179-91f5-4b22-9f74-7f20af0570e4,19,1,00:00:12.832,0,12,17,Pressure,2,176,Werder Bremen,...,0.572638,3895302,,,,,,,,
533ffee0-b9fc-4065-a9db-cb729523d926,27,1,00:00:15.552,0,15,17,Pressure,2,176,Werder Bremen,...,0.575431,3895302,,,,,,,,


In [61]:
from app.util.metrics.ppda import calculate_ppda as ppda_calculator  # Import the comprehensive PPDA calculator


def calculate_ppda(events_df, team, opposition_half_only=True):
    """Calculate PPDA (Passes Per Defensive Action) for a team.

    This is a wrapper around the more comprehensive implementation from ppda.py.

    Args:
        events_df: DataFrame of match events
        team: Team to calculate PPDA for
        opposition_half_only: If True, only consider defensive actions in the opposition half
        
    Returns:
        PPDA value
    """
    # Get opposition team
    teams = events_df['team_name'].unique()

    print(f"Calculating PPDA for team: {team}, opposition_half_only: {opposition_half_only}")

    print(f"Teams found in events: {teams}")
    opposition_team = [t for t in teams if t != team][0] if len(teams) > 1 else None
    
    if not opposition_team:
        return float('inf')  # Return infinity if we can't find the opposition team

    # Call the comprehensive PPDA calculator
    ppda_results = ppda_calculator(events_df, opposition_thirds=opposition_half_only)

    # Determine if the team is "home" or "away" in the PPDA results
    # Note: In the comprehensive implementation, the first team is considered "home"
    is_home = teams[0] == team if len(teams) > 0 else True
    team_key = "home" if is_home else "away"

    # Return just the PPDA value for the specified team
    return ppda_results[team_key]["ppda"]



def calculate_team_summary(events, team_id):
    matches_played = events['match_id'].nunique()
    goals = len(events[(events['type_name'] == 'Shot') & (events['shot_outcome'] == 'Goal')]) / max(1, matches_played)
    xg = events['shot_statsbomb_xg'].sum() / max(1, matches_played) if 'shot_statsbomb_xg' in events else 0
    shots = len(events[events['type_name'] == 'Shot']) / max(1, matches_played)
    passes = events[events['type_name'] == 'Pass']
    completed_passes = passes[passes['pass_outcome'].isna()]
    pass_accuracy = len(completed_passes) / max(1, len(passes)) * 100 if len(passes) > 0 else 0
    possessions = len(events[events['possession_team_id'] == team_id]['possession'].unique())
    total_possessions = len(events['possession'].unique())
    possession = possessions / max(1, total_possessions) * 100 if total_possessions > 0 else 0
    # Calculate PPDA for this team (use all events for the match, but extract the correct team)
    try:
        ppda_result = calculate_ppda(events)
        # Find which key (home/away) matches this team
        teams = events['team_name'].dropna().unique()
        team_name = None
        if hasattr(events, 'team_name') and team_id in events['team_id'].values:
            team_name = events[events['team_id'] == team_id]['team_name'].iloc[0]
        if team_name is not None and team_name in teams:
            if team_name == teams[0]:
                ppda_val = ppda_result['home']['ppda']
            elif team_name == teams[1]:
                ppda_val = ppda_result['away']['ppda']
            else:
                ppda_val = None
        else:
            ppda_val = None
        if ppda_val is not None and (np.isnan(ppda_val) or np.isinf(ppda_val)):
            ppda_val = None
    except Exception as e:
        ppda_val = None
    return {
        "goals_per_game": goals,
        "xg_per_game": xg,
        "shots_per_game": shots,
        "possession": possession,
        "pass_accuracy": pass_accuracy,
        "ppda": ppda_val
    }
# Calculate summary metrics using the full events DataFrame for correct PPDA calculation
summary_metrics = calculate_team_summary(events, team_id)
pd.DataFrame([summary_metrics])

Unnamed: 0,goals_per_game,xg_per_game,shots_per_game,possession,pass_accuracy,ppda
0,3.235294,2.955959,26.941176,98.412698,85.000255,


**Note:** To ensure PPDA is calculated correctly, we now pass the full events DataFrame (not just team_events) to the summary metrics function. This allows the PPDA calculation to access both team and opposition events as required.

In [56]:
# Debug: Inspect summary_metrics
display(summary_metrics)
print(json.dumps(summary_metrics, indent=2))

{'goals_per_game': 3.235294117647059,
 'xg_per_game': np.float64(2.9559587344294123),
 'shots_per_game': 26.941176470588236,
 'possession': 98.4126984126984,
 'pass_accuracy': 85.00025501096547,
 'ppda': None}

{
  "goals_per_game": 3.235294117647059,
  "xg_per_game": 2.9559587344294123,
  "shots_per_game": 26.941176470588236,
  "possession": 98.4126984126984,
  "pass_accuracy": 85.00025501096547,
  "ppda": null
}


In [57]:
# Step 7: (Optional) Calculate League Benchmarks for Comparison (now with PPDA)
def calculate_league_benchmarks(events):
    team_ids = events['team_id'].unique()
    benchmarks = {}
    for tid in team_ids:
        metrics = calculate_team_summary(events[events['team_id'] == tid], tid)
        benchmarks[tid] = metrics
    return pd.DataFrame.from_dict(benchmarks, orient='index')
league_benchmarks = calculate_league_benchmarks(events)
league_benchmarks.describe()

Unnamed: 0,goals_per_game,xg_per_game,shots_per_game,possession,pass_accuracy
count,18.0,18.0,18.0,18.0,18.0
mean,0.80719,0.891208,9.156863,72.165495,79.809772
std,0.754201,0.551608,4.031022,7.361592,4.387725
min,0.0,0.291992,4.0,62.420382,72.054381
25%,0.125,0.505627,6.5,69.361126,77.447677
50%,0.5,0.750393,8.5,71.054341,79.825307
75%,1.375,1.034383,11.25,73.341018,82.8692
max,2.529412,2.138097,18.323529,98.412698,88.067151


In [58]:
# Debug: Inspect league_benchmarks DataFrame
print("league_benchmarks shape:", league_benchmarks.shape)
display(league_benchmarks.head())

league_benchmarks shape: (18, 6)


Unnamed: 0,goals_per_game,xg_per_game,shots_per_game,possession,pass_accuracy,ppda
904,2.529412,2.138097,18.323529,98.412698,88.067151,
176,0.0,0.43484,4.5,70.967742,82.703214,
190,0.0,0.291992,5.5,73.939394,77.260982,
184,0.5,0.813224,10.5,66.666667,82.924528,
868,0.0,0.300886,4.5,70.481928,72.054381,


In [59]:
# Step 8: (Optional) Visualize or Further Analyze the Results
# You can use pandas, matplotlib, seaborn, or plotly for visualization here.

In [60]:
# Step 9: Convert Team Summary Metrics to JSON (now with PPDA)
# Ensure all values are JSON serializable (replace inf/nan with None)
def safe_jsonify(obj):
    def safe_value(val):
        if isinstance(val, float) and (np.isnan(val) or np.isinf(val)):
            return None
        return val
    if isinstance(obj, dict):
        return {k: safe_value(v) for k, v in obj.items()}
    return obj

summary_json = json.dumps(safe_jsonify(summary_metrics), indent=2)
print(summary_json)

{
  "goals_per_game": 3.235294117647059,
  "xg_per_game": 2.9559587344294123,
  "shots_per_game": 26.941176470588236,
  "possession": 98.4126984126984,
  "pass_accuracy": 85.00025501096547,
  "ppda": null
}
