In [104]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

In [105]:
# Set Plotly template for professional appearance
import plotly.io as pio
pio.templates.default = "plotly_white"

In [106]:
# =============================================================================
# CUSTOM PITCH CREATION FOR 0-100 COORDINATE SYSTEM
# =============================================================================

def create_custom_pitch(pitch_color='#2E7D32', line_color='white', stripe_color='#388E3C', stripe_pattern=True):
    """
    Create a professional and realistic football pitch using Plotly,
    scaled to a 0-100 coordinate system.
    """
    fig = go.Figure()

    # Pitch layout
    fig.update_layout(
        xaxis=dict(range=[-5, 105], visible=False),
        yaxis=dict(range=[-5, 105], visible=False),
        plot_bgcolor=pitch_color,
        height=700,
        width=1050,
        margin=dict(l=20, r=20, t=50, b=20),
        showlegend=False
    )

    # Shapes for pitch markings
    pitch_shapes = []

    # Outer lines
    pitch_shapes.append(dict(type='rect', x0=0, y0=0, x1=100, y1=100, line=dict(color=line_color, width=2)))
    # Center line
    pitch_shapes.append(dict(type='line', x0=50, y0=0, x1=50, y1=100, line=dict(color=line_color, width=2)))
    
    # Penalty areas
    pitch_shapes.append(dict(type='rect', x0=0, y0=21.1, x1=16.5, y1=78.9, line=dict(color=line_color, width=2)))
    pitch_shapes.append(dict(type='rect', x0=83.5, y0=21.1, x1=100, y1=78.9, line=dict(color=line_color, width=2)))
    
    # Goal areas
    pitch_shapes.append(dict(type='rect', x0=0, y0=36.8, x1=5.5, y1=63.2, line=dict(color=line_color, width=2)))
    pitch_shapes.append(dict(type='rect', x0=94.5, y0=36.8, x1=100, y1=63.2, line=dict(color=line_color, width=2)))

    # Center circle
    pitch_shapes.append(dict(type='circle', x0=40.85, y0=40.85, x1=59.15, y1=59.15, line=dict(color=line_color, width=2)))
    
    # Penalty arcs
    pitch_shapes.append(dict(type='path', path='M 16.5,38.7 C 23.5,43.7, 23.5,56.3, 16.5,61.3', line=dict(color=line_color, width=2)))
    pitch_shapes.append(dict(type='path', path='M 83.5,38.7 C 76.5,43.7, 76.5,56.3, 83.5,61.3', line=dict(color=line_color, width=2)))

    # Corner arcs
    pitch_shapes.append(dict(type='path', path='M 0,1 A 1,1 0 0,1 1,0', line=dict(color=line_color, width=2)))
    pitch_shapes.append(dict(type='path', path='M 0,99 A 1,1 0 0,0 1,100', line=dict(color=line_color, width=2)))
    pitch_shapes.append(dict(type='path', path='M 100,1 A 1,1 0 0,0 99,0', line=dict(color=line_color, width=2)))
    pitch_shapes.append(dict(type='path', path='M 100,99 A 1,1 0 0,1 99,100', line=dict(color=line_color, width=2)))

    # Add stripes if enabled
    if stripe_pattern:
        for i in range(0, 100, 10):
            pitch_shapes.append(dict(
                type='rect',
                x0=i, y0=0, x1=i+5, y1=100,
                fillcolor=stripe_color,
                layer='below',
                line_width=0,
                opacity=0.3
            ))

    fig.update_layout(shapes=pitch_shapes)

    # Scatter plots for spots and goals
    # Center spot
    fig.add_trace(go.Scatter(x=[50], y=[50], mode='markers', marker=dict(color=line_color, size=8), hoverinfo='none'))
    # Penalty spots
    fig.add_trace(go.Scatter(x=[11, 89], y=[50, 50], mode='markers', marker=dict(color=line_color, size=8), hoverinfo='none'))
    
    # Goals
    fig.add_trace(go.Scatter(x=[-1, -1], y=[45.2, 54.8], mode='lines', line=dict(color=line_color, width=4), hoverinfo='none'))
    fig.add_trace(go.Scatter(x=[101, 101], y=[45.2, 54.8], mode='lines', line=dict(color=line_color, width=4), hoverinfo='none'))

    return fig

In [107]:
# Display the new pitch design
pitch_fig = create_custom_pitch()
pitch_fig.show()

In [108]:
# =============================================================================
# STEP 1: SETUP AND DATA PREPARATION
# =============================================================================

def load_and_prepare_data():
    """
    Load and prepare the dataset for set-piece analysis
    """
    print("=== STEP 1: DATA LOADING AND PREPARATION ===")
    
    try:
        # Load the dataset
        df = pd.read_csv('Dinamo_Bucuresti_2024_2025_events.csv', low_memory=False)
        print(f"✓ Dataset loaded successfully: {len(df)} events")
        
        # Basic dataset info
        print(f"✓ Dataset shape: {df.shape}")
        print(f"✓ Total matches: {df['matchId'].nunique()}")
        print(f"✓ Date range: {df['date'].min()} to {df['date'].max()}")
        
        return df
        
    except FileNotFoundError:
        print("❌ Error: Dataset file not found!")
        return None

In [109]:
def filter_set_pieces(df):
    """
    Filter the dataset for set-piece events only
    """
    print("\n--- Filtering for Set-Piece Events ---")
    
    # Define set-piece event types
    set_piece_types = ['corner', 'free_kick', 'throw_in', 'goal_kick']
    
    # Filter for set-piece events
    set_piece_events = df[df['type.primary'].isin(set_piece_types)].copy()
    
    # Add set-piece classification
    set_piece_events['set_piece_category'] = set_piece_events['type.primary'].map({
        'corner': 'Corner Kick',
        'free_kick': 'Free Kick', 
        'throw_in': 'Throw In',
        'goal_kick': 'Goal Kick'
    })
    
    # Add team context
    set_piece_events['is_dinamo'] = set_piece_events['team.name'] == 'Dinamo Bucureşti'
    set_piece_events['context'] = set_piece_events['is_dinamo'].map({
        True: 'Dinamo Attacking',
        False: 'Dinamo Defending'
    })
    
    print(f"✓ Total set-piece events identified: {len(set_piece_events)}")
    print(f"✓ Set-piece breakdown:")
    for sp_type in set_piece_types:
        count = len(set_piece_events[set_piece_events['type.primary'] == sp_type])
        print(f"  • {sp_type.title()}: {count}")
    
    return set_piece_events

In [110]:
def clean_set_piece_data(set_piece_events):
    """
    Clean and prepare set-piece data for analysis
    """
    print("\n--- Cleaning Set-Piece Data ---")
    
    # Remove events with missing location data
    initial_count = len(set_piece_events)
    set_piece_events = set_piece_events.dropna(subset=['location.x', 'location.y'])
    
    # Remove events with missing player data
    set_piece_events = set_piece_events.dropna(subset=['player.name'])
    
    # Ensure numeric columns are properly typed
    numeric_columns = ['location.x', 'location.y', 'possession.duration', 'possession.attack.xg']
    for col in numeric_columns:
        if col in set_piece_events.columns:
            set_piece_events[col] = pd.to_numeric(set_piece_events[col], errors='coerce')
    
    final_count = len(set_piece_events)
    print(f"✓ Data cleaned: {initial_count - final_count} events removed due to missing data")
    print(f"✓ Final set-piece dataset: {final_count} events")
    
    return set_piece_events

In [111]:
# =============================================================================
# STEP 2: OFFENSIVE SET-PIECE ANALYSIS (DINAMO ATTACKING)
# =============================================================================

def analyze_offensive_set_pieces(set_piece_events):
    """
    Analyze Dinamo's offensive set-piece strategies
    """
    print("\n=== STEP 2: OFFENSIVE SET-PIECE ANALYSIS ===")
    
    # Filter for Dinamo's attacking set-pieces
    dinamo_attacking = set_piece_events[
        (set_piece_events['is_dinamo'] == True) & 
        (set_piece_events['type.primary'].isin(['corner', 'free_kick']))
    ].copy()
    
    print(f"✓ Dinamo attacking set-pieces analyzed: {len(dinamo_attacking)}")
    
    return dinamo_attacking

In [112]:
def identify_main_takers(dinamo_attacking):
    """
    Identify the primary set-piece takers for Dinamo with position-based analysis
    """
    print("\n--- Identifying Main Set-Piece Takers ---")
    
    # Analyze corner takers
    corner_takers = dinamo_attacking[dinamo_attacking['type.primary'] == 'corner']['player.name'].value_counts()
    
    # Analyze free-kick takers with position classification
    free_kicks = dinamo_attacking[dinamo_attacking['type.primary'] == 'free_kick'].copy()
    
    # Classify free-kicks by position and destination
    free_kicks['free_kick_type'] = 'Other'
    
    for idx, fk in free_kicks.iterrows():
        x_pos = fk['location.x']
        
        # Check if it's an attacking free-kick (in attacking third)
        if x_pos > 66:
            # Check if it leads to a shot or goal
            if pd.notna(fk['possession.attack.withShot']) and fk['possession.attack.withShot']:
                free_kicks.loc[idx, 'free_kick_type'] = 'Attacking (Dangerous)'
            else:
                free_kicks.loc[idx, 'free_kick_type'] = 'Attacking (Non-Dangerous)'
        elif x_pos > 33:
            free_kicks.loc[idx, 'free_kick_type'] = 'Middle Third'
        else:
            free_kicks.loc[idx, 'free_kick_type'] = 'Defensive Third'
    
    # Get takers by free-kick type
    attacking_dangerous_takers = free_kicks[free_kicks['free_kick_type'] == 'Attacking (Dangerous)']['player.name'].value_counts()
    attacking_non_dangerous_takers = free_kicks[free_kicks['free_kick_type'] == 'Attacking (Non-Dangerous)']['player.name'].value_counts()
    other_free_kick_takers = free_kicks[free_kicks['free_kick_type'].isin(['Middle Third', 'Defensive Third'])]['player.name'].value_counts()
    
    print("🏃 CORNER TAKERS (Top 5):")
    for i, (player, count) in enumerate(corner_takers.head().items(), 1):
        print(f"  {i}. {player}: {count} corners")
    
    print("\n⚽ ATTACKING FREE-KICK TAKERS (Dangerous - Top 5):")
    for i, (player, count) in enumerate(attacking_dangerous_takers.head().items(), 1):
        print(f"  {i}. {player}: {count} dangerous attacking free-kicks")
    
    print("\n⚽ ATTACKING FREE-KICK TAKERS (Non-Dangerous - Top 5):")
    for i, (player, count) in enumerate(attacking_non_dangerous_takers.head().items(), 1):
        print(f"  {i}. {player}: {count} non-dangerous attacking free-kicks")
    
    print("\n⚽ OTHER FREE-KICK TAKERS (Top 5):")
    for i, (player, count) in enumerate(other_free_kick_takers.head().items(), 1):
        print(f"  {i}. {player}: {count} other free-kicks")
    
    # Create comprehensive visualization using Plotly
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Top 5 Corner Takers', 'Top 5 Dangerous Attacking Free-Kick Takers',
                       'Top 5 Non-Dangerous Attacking Free-Kick Takers', 'Top 5 Other Free-Kick Takers'),
        specs=[[{"type": "bar"}, {"type": "bar"}],
               [{"type": "bar"}, {"type": "bar"}]]
    )
    
    # Corner takers chart
    if len(corner_takers) > 0:
        fig.add_trace(
            go.Bar(
                x=corner_takers.head(5).index,
                y=corner_takers.head(5).values,
                name='Corner Takers',
                marker_color='skyblue',
                marker_line_color='navy',
                marker_line_width=1
            ),
            row=1, col=1
        )
    
    # Dangerous attacking free-kick takers
    if len(attacking_dangerous_takers) > 0:
        fig.add_trace(
            go.Bar(
                x=attacking_dangerous_takers.head(5).index,
                y=attacking_dangerous_takers.head(5).values,
                name='Dangerous Attacking Free-Kicks',
                marker_color='red',
                marker_line_color='darkred',
                marker_line_width=1
            ),
            row=1, col=2
        )
    
    # Non-dangerous attacking free-kick takers
    if len(attacking_non_dangerous_takers) > 0:
        fig.add_trace(
            go.Bar(
                x=attacking_non_dangerous_takers.head(5).index,
                y=attacking_non_dangerous_takers.head(5).values,
                name='Non-Dangerous Attacking Free-Kicks',
                marker_color='orange',
                marker_line_color='darkorange',
                marker_line_width=1
            ),
            row=2, col=1
        )
    
    # Other free-kick takers
    if len(other_free_kick_takers) > 0:
        fig.add_trace(
            go.Bar(
                x=other_free_kick_takers.head(5).index,
                y=other_free_kick_takers.head(5).values,
                name='Other Free-Kicks',
                marker_color='lightcoral',
                marker_line_color='darkred',
                marker_line_width=1
            ),
            row=2, col=2
        )
    
    # Update layout
    fig.update_layout(
        title_text="Comprehensive Set-Piece Takers Analysis",
        title_x=0.5,
        height=800,
        showlegend=False
    )
    
    # Update x-axis labels for better readability
    fig.update_xaxes(tickangle=45)
    
    # Save and show
    fig.write_html('set_piece_viz_html/set_piece_takers_comprehensive.html')
    fig.show()
    
    return corner_takers, attacking_dangerous_takers, attacking_non_dangerous_takers, other_free_kick_takers

In [113]:
def analyze_delivery_zones(dinamo_attacking):
    """
    Analyze set-piece delivery zones using contour heatmaps for frequency
    and xG-scaled points for danger.
    """
    print("\n--- Analyzing Delivery Zones & Danger ---")

    # --- CORNER KICK ANALYSIS ---
    corners = dinamo_attacking[dinamo_attacking['type.primary'] == 'corner'].copy()
    corners.dropna(subset=['location.x', 'location.y', 'pass.endLocation.x', 'pass.endLocation.y'], inplace=True)
    corners['possession.attack.xg'].fillna(0, inplace=True)
    
    if not corners.empty:
        print(f"Analyzing {len(corners)} corner kicks...")
        
        fig_corners = create_custom_pitch()
        fig_corners.update_layout(
            title=dict(text="<b>Corner Kick Analysis: Delivery Zones & Danger (xG)</b>", x=0.5, font=dict(size=20, color='white')),
            legend=dict(yanchor="top", y=0.95, xanchor="left", x=0.01, bgcolor='rgba(0,0,0,0.5)', font=dict(color='white'))
        )

        # Add contour heatmap for delivery frequency
        fig_corners.add_trace(go.Histogram2dContour(
            x=corners['pass.endLocation.x'],
            y=corners['pass.endLocation.y'],
            colorscale='Reds',
            showscale=False,
            name='Delivery Frequency',
            contours=dict(coloring='heatmap'),
            opacity=0.5
        ))

        # Add scatter points for each delivery, sized and colored by xG
        fig_corners.add_trace(go.Scatter(
            x=corners['pass.endLocation.x'],
            y=corners['pass.endLocation.y'],
            mode='markers',
            marker=dict(
                color=corners['possession.attack.xg'],
                colorscale='YlOrRd',
                size=corners['possession.attack.xg'] * 70 + 10,
                sizemode='diameter',
                showscale=True,
                colorbar=dict(title='xG', x=0.95),
                opacity=0.8,
                line=dict(color='white', width=1)
            ),
            name='Delivery Danger (xG)',
            hoverinfo='text',
            text=[f"xG: {xg:.3f}" for xg in corners['possession.attack.xg']]
        ))

        fig_corners.write_html('set_piece_viz_html/corner_delivery_zones_heatmap.html')
        fig_corners.show()
        print("✓ Corner delivery analysis created. High xG values indicate dangerous deliveries.")

    # --- ATTACKING FREE KICK ANALYSIS ---
    attacking_fks = dinamo_attacking[
        (dinamo_attacking['type.primary'] == 'free_kick') & 
        (dinamo_attacking['location.x'] > 50)
    ].copy()
    attacking_fks.dropna(subset=['location.x', 'location.y', 'pass.endLocation.x', 'pass.endLocation.y'], inplace=True)
    attacking_fks['possession.attack.xg'].fillna(0, inplace=True)

    if not attacking_fks.empty:
        print(f"\nAnalyzing {len(attacking_fks)} attacking free kicks...")

        fig_fks = create_custom_pitch()
        fig_fks.update_layout(
            title=dict(text="<b>Attacking Free Kick Analysis: Delivery Zones & Danger (xG)</b>", x=0.5, font=dict(size=20, color='white')),
            legend=dict(yanchor="top", y=0.95, xanchor="left", x=0.01, bgcolor='rgba(0,0,0,0.5)', font=dict(color='white'))
        )

        # Add contour heatmap for delivery frequency
        fig_fks.add_trace(go.Histogram2dContour(
            x=attacking_fks['pass.endLocation.x'],
            y=attacking_fks['pass.endLocation.y'],
            colorscale='Blues',
            showscale=False,
            name='Delivery Frequency',
            contours=dict(coloring='heatmap'),
            opacity=0.5
        ))

        # Add scatter points for each delivery, sized and colored by xG
        fig_fks.add_trace(go.Scatter(
            x=attacking_fks['pass.endLocation.x'],
            y=attacking_fks['pass.endLocation.y'],
            mode='markers',
            marker=dict(
                color=attacking_fks['possession.attack.xg'],
                colorscale='Cividis',
                size=attacking_fks['possession.attack.xg'] * 70 + 10,
                sizemode='diameter',
                showscale=True,
                colorbar=dict(title='xG', x=0.95),
                opacity=0.8,
                line=dict(color='white', width=1)
            ),
            name='Delivery Danger (xG)',
            hoverinfo='text',
            text=[f"xG: {xg:.3f}" for xg in attacking_fks['possession.attack.xg']]
        ))
        
        # Mark the location of the free kicks
        fig_fks.add_trace(go.Scatter(
            x=attacking_fks['location.x'],
            y=attacking_fks['location.y'],
            mode='markers',
            marker=dict(color='cyan', size=8, symbol='x'),
            name='Free Kick Location'
        ))

        fig_fks.write_html('set_piece_viz_html/attacking_free_kick_delivery_zones_heatmap.html')
        fig_fks.show()
        print("✓ Attacking free kick analysis created. High xG values indicate dangerous deliveries.")

In [114]:
def detect_attacking_patterns(dinamo_attacking):
    """
    Detect common attacking routines and patterns
    """
    print("\n--- Detecting Attacking Patterns ---")
    
    # Analyze short corners vs direct deliveries
    corners = dinamo_attacking[dinamo_attacking['type.primary'] == 'corner']
    
    if len(corners) > 0:
        # Identify short corners (delivery to nearby teammate)
        short_corners = 0
        direct_deliveries = 0
        
        for _, corner in corners.iterrows():
            # Check if there's a pass recipient nearby (within 20 units)
            if pd.notna(corner['pass.recipient.id']):
                recipient_x = corner['pass.endLocation.x']
                recipient_y = corner['pass.endLocation.y']
                corner_x = corner['location.x']
                corner_y = corner['location.y']
                
                if pd.notna(recipient_x) and pd.notna(recipient_y):
                    distance = np.sqrt((recipient_x - corner_x)**2 + (recipient_y - corner_y)**2)
                    if distance < 20:
                        short_corners += 1
                    else:
                        direct_deliveries += 1
                else:
                    direct_deliveries += 1
            else:
                direct_deliveries += 1
        
        # Create visualization using Plotly
        fig = go.Figure()
        
        categories = ['Short Corners', 'Direct Deliveries']
        values = [short_corners, direct_deliveries]
        colors = ['lightgreen', 'lightcoral']
        
        fig.add_trace(go.Bar(
            x=categories,
            y=values,
            marker_color=colors,
            marker_line_color='black',
            marker_line_width=2,
            text=values,
            textposition='auto',
            textfont=dict(size=14, color='black')
        ))
        
        fig.update_layout(
            title="Corner Delivery Strategy Analysis",
            title_x=0.5,
            xaxis_title="Strategy Type",
            yaxis_title="Number of Corners",
            height=500,
            showlegend=False
        )
        
        # Save and show
        fig.write_html('set_piece_viz_html/corner_strategy_analysis.html')
        fig.show()
        
        print(f"✓ Corner strategy analysis completed:")
        print(f"  • Short corners: {short_corners} ({short_corners/len(corners)*100:.1f}%)")
        print(f"  • Direct deliveries: {direct_deliveries} ({direct_deliveries/len(corners)*100:.1f}%)")
    
    # Analyze set-piece sequences
    print("\n--- Set-Piece Sequence Analysis ---")
    
    # Count set-pieces leading to shots
    set_pieces_with_shots = dinamo_attacking[
        dinamo_attacking['possession.attack.withShot'] == True
    ]
    
    set_pieces_with_goals = dinamo_attacking[
        dinamo_attacking['possession.attack.withGoal'] == True
    ]
    
    print(f"✓ Set-pieces leading to shots: {len(set_pieces_with_shots)} ({len(set_pieces_with_shots)/len(dinamo_attacking)*100:.1f}%)")
    print(f"✓ Set-pieces leading to goals: {len(set_pieces_with_goals)} ({len(set_pieces_with_goals)/len(dinamo_attacking)*100:.1f}%)")
    
    # Create visualization for set-piece effectiveness using Plotly
    fig = go.Figure()
    
    categories = ['All Set-Pieces', 'Leading to Shots', 'Leading to Goals']
    values = [len(dinamo_attacking), len(set_pieces_with_shots), len(set_pieces_with_goals)]
    colors = ['lightblue', 'orange', 'red']
    
    # Calculate percentages for text labels
    percentages = []
    for i, value in enumerate(values):
        if i == 0:
            percentages.append('100%')
        else:
            percentages.append(f'{value/len(dinamo_attacking)*100:.1f}%')
    
    fig.add_trace(go.Bar(
        x=categories,
        y=values,
        marker_color=colors,
        marker_line_color='black',
        marker_line_width=2,
        text=[f'{v}<br>({p})' for v, p in zip(values, percentages)],
        textposition='auto',
        textfont=dict(size=12, color='black')
    ))
    
    fig.update_layout(
        title="Set-Piece Effectiveness Analysis",
        title_x=0.5,
        xaxis_title="Event Type",
        yaxis_title="Number of Events",
        height=500,
        showlegend=False
    )
    
    # Save and show
    fig.write_html('set_piece_viz_html/set_piece_effectiveness.html')
    fig.show()

In [115]:
def identify_dangerous_players(dinamo_attacking):
    """
    Identify the most dangerous players in set-piece situations, split by type
    """
    print("\n--- Identifying Dangerous Players by Set-Piece Type ---")
    
    # Split set-pieces by type
    corners = dinamo_attacking[dinamo_attacking['type.primary'] == 'corner']
    attacking_free_kicks = dinamo_attacking[
        (dinamo_attacking['type.primary'] == 'free_kick') & 
        (dinamo_attacking['location.x'] > 66)
    ]
    other_free_kicks = dinamo_attacking[
        (dinamo_attacking['type.primary'] == 'free_kick') & 
        (dinamo_attacking['location.x'] <= 66)
    ]
    
    # Analyze targets by set-piece type
    corner_targets = corners[pd.notna(corners['pass.recipient.name'])]['pass.recipient.name'].value_counts()
    attacking_fk_targets = attacking_free_kicks[pd.notna(attacking_free_kicks['pass.recipient.name'])]['pass.recipient.name'].value_counts()
    other_fk_targets = other_free_kicks[pd.notna(other_free_kicks['pass.recipient.name'])]['pass.recipient.name'].value_counts()
    
    print("🎯 CORNER TARGETS (Top 5):")
    for i, (player, count) in enumerate(corner_targets.head().items(), 1):
        print(f"  {i}. {player}: {count} receptions")
    
    print("\n🎯 ATTACKING FREE-KICK TARGETS (Top 5):")
    for i, (player, count) in enumerate(attacking_fk_targets.head().items(), 1):
        print(f"  {i}. {player}: {count} receptions")
    
    print("\n🎯 OTHER FREE-KICK TARGETS (Top 5):")
    for i, (player, count) in enumerate(other_fk_targets.head().items(), 1):
        print(f"  {i}. {player}: {count} receptions")
    
    # Create comprehensive visualization using Plotly
    fig = make_subplots(
        rows=1, cols=3,
        subplot_titles=('Top Corner Targets', 'Top Attacking Free-Kick Targets', 'Top Other Free-Kick Targets'),
        specs=[[{"type": "bar"}, {"type": "bar"}, {"type": "bar"}]]
    )
    
    # Corner targets
    if len(corner_targets) > 0:
        fig.add_trace(
            go.Bar(
                x=corner_targets.head(8).index,
                y=corner_targets.head(8).values,
                name='Corner Targets',
                marker_color='gold',
                marker_line_color='orange',
                marker_line_width=1,
                text=corner_targets.head(8).values,
                textposition='auto',
                textfont=dict(size=10)
            ),
            row=1, col=1
        )
    
    # Attacking free-kick targets
    if len(attacking_fk_targets) > 0:
        fig.add_trace(
            go.Bar(
                x=attacking_fk_targets.head(8).index,
                y=attacking_fk_targets.head(8).values,
                name='Attacking Free-Kick Targets',
                marker_color='red',
                marker_line_color='darkred',
                marker_line_width=1,
                text=attacking_fk_targets.head(8).values,
                textposition='auto',
                textfont=dict(size=10)
            ),
            row=1, col=2
        )
    
    # Other free-kick targets
    if len(other_fk_targets) > 0:
        fig.add_trace(
            go.Bar(
                x=other_fk_targets.head(8).index,
                y=other_fk_targets.head(8).values,
                name='Other Free-Kick Targets',
                marker_color='blue',
                marker_line_color='darkblue',
                marker_line_width=1,
                text=other_fk_targets.head(8).values,
                textposition='auto',
                textfont=dict(size=10)
            ),
            row=1, col=3
        )
    
    # Update layout
    fig.update_layout(
        title_text="Set-Piece Targets by Type",
        title_x=0.5,
        height=500,
        showlegend=False
    )
    
    # Update x-axis labels for better readability
    fig.update_xaxes(tickangle=45)
    
    # Save and show
    fig.write_html('set_piece_viz_html/set_piece_targets_by_type.html')
    fig.show()
    
    return corner_targets, attacking_fk_targets, other_fk_targets

In [116]:
# =============================================================================
# STEP 3: DEFENSIVE SET-PIECE ANALYSIS (DINAMO DEFENDING)
# =============================================================================

def analyze_defensive_set_pieces(set_piece_events):
    """
    Analyze Dinamo's defensive set-piece vulnerabilities
    """
    print("\n=== STEP 3: DEFENSIVE SET-PIECE ANALYSIS ===")
    
    # Filter for opponent set-pieces (Dinamo defending)
    dinamo_defending = set_piece_events[
        (set_piece_events['is_dinamo'] == False) & 
        (set_piece_events['type.primary'].isin(['corner', 'free_kick']))
    ].copy()
    
    print(f"✓ Dinamo defensive set-pieces analyzed: {len(dinamo_defending)}")
    
    return dinamo_defending

In [117]:
def analyze_defensive_vulnerabilities(dinamo_defending, df):
    """
    Analyze defensive system and vulnerabilities using a professional pitch visualization.
    This function creates a heatmap of conceded shots and sizes them by xG.
    """
    print("\n--- Analyzing Defensive Vulnerabilities ---")
    
    # Get all shots conceded from set-pieces
    set_piece_shots_conceded = []
    
    for _, set_piece in dinamo_defending.iterrows():
        possession_id = set_piece['possession.id']
        if pd.notna(possession_id):
            # Find shots in this possession by the opposition
            possession_shots = df[
                (df['possession.id'] == possession_id) & 
                (df['type.primary'] == 'shot') &
                (df['team.name'] != 'Dinamo Bucureşti')
            ]
            
            for _, shot in possession_shots.iterrows():
                set_piece_shots_conceded.append({
                    'x': shot['location.x'],
                    'y': shot['location.y'],
                    'is_goal': shot['shot.isGoal'],
                    'xg': shot['shot.xg'] if pd.notna(shot['shot.xg']) else 0,
                    'set_piece_type': set_piece['type.primary']
                })
    
    if set_piece_shots_conceded:
        shots_df = pd.DataFrame(set_piece_shots_conceded)
        
        # Create the pitch using our new function
        fig = create_custom_pitch()
        fig.update_layout(
            title=dict(text="<b>Dinamo București - Defensive Vulnerabilities from Set-Pieces</b>", x=0.5, font=dict(size=20, color='white')),
            legend=dict(yanchor="top", y=0.95, xanchor="left", x=0.01, bgcolor='rgba(0,0,0,0.5)', font=dict(color='white'))
        )

        # Add a heatmap for shot density
        fig.add_trace(go.Histogram2d(
            x=shots_df['x'],
            y=shots_df['y'],
            colorscale='Reds',
            showscale=False,
            name='Shot Hotspots',
            opacity=0.5
        ))

        # Separate goals from other shots
        goals = shots_df[shots_df['is_goal'] == True]
        non_goals = shots_df[shots_df['is_goal'] == False]

        # Plot non-goal shots, sized by xG
        fig.add_trace(go.Scatter(
            x=non_goals['x'],
            y=non_goals['y'],
            mode='markers',
            marker=dict(
                color='orange',
                size=non_goals['xg'] * 50 + 5,  # Scale marker size by xG
                sizemode='diameter',
                opacity=0.8,
                line=dict(color='white', width=1)
            ),
            name='Shots Conceded',
            hoverinfo='text',
            text=[f"xG: {xg:.2f}" for xg in non_goals['xg']]
        ))

        # Plot goals, sized by xG, with a distinct marker
        fig.add_trace(go.Scatter(
            x=goals['x'],
            y=goals['y'],
            mode='markers',
            marker=dict(
                symbol='star',
                color='red',
                size=goals['xg'] * 50 + 10, # Make goals slightly larger
                sizemode='diameter',
                opacity=1.0,
                line=dict(color='white', width=2)
            ),
            name='Goals Conceded',
            hoverinfo='text',
            text=[f"GOAL! (xG: {xg:.2f})" for xg in goals['xg']]
        ))
        
        # Save and show
        fig.write_html('set_piece_viz_html/defensive_vulnerabilities_heatmap.html')
        fig.show()
        
        print(f"✓ Defensive vulnerabilities analyzed:")
        print(f"  • Total shots conceded from set-pieces: {len(shots_df)}")
        print(f"  • Goals conceded from set-pieces: {shots_df['is_goal'].sum()}")
        print(f"  • Total xG conceded: {shots_df['xg'].sum():.3f}")
        
        # Zone analysis (based on x-coordinate, assuming attack towards the right)
        six_yard_box_shots = len(shots_df[shots_df['x'] > 94.5])
        penalty_area_shots = len(shots_df[(shots_df['x'] > 83.5) & (shots_df['x'] <= 94.5)])
        outside_box_shots = len(shots_df[shots_df['x'] <= 83.5])
        
        print(f"\n  Shot locations:")
        print(f"  • Inside Six-Yard Box: {six_yard_box_shots}")
        print(f"  • Inside Penalty Area (excl. 6-yard box): {penalty_area_shots}")
        print(f"  • Outside Penalty Area: {outside_box_shots}")
    else:
        print("No shots conceded from set-pieces to analyze.")
    
    return set_piece_shots_conceded

In [118]:
def analyze_aerial_duels(dinamo_defending, df):
    """
    Analyze defensive aerial duels during opponent set-pieces.
    This version normalizes all duel locations to a single defensive half
    for accurate tactical visualization and excludes unknown outcomes.

    Contract
    - Inputs: dinamo_defending (DataFrame of opponent corners/free-kicks), df (full events)
    - Output: list of duel dicts [{player, team, x, y, outcome}]
    - Side effects: saves HTML to set_piece_viz_html/aerial_duels_analysis.html and shows it; prints insights
    """
    print("\n--- Analyzing Aerial Duels ---")

    if df is None or len(df) == 0:
        print("No events available to analyze.")
        return []

    # Helper: unified boolean mask builder for aerial duels
    def aerial_mask(frame):
        masks = []
        if 'type.secondary' in frame.columns:
            masks.append(frame['type.secondary'].str.contains('aerial', case=False, na=False))
        if 'aerialDuel' in frame.columns:
            masks.append(frame['aerialDuel'].notna() & (frame['aerialDuel'] != ''))
        if not masks:
            return frame['type.primary'].astype(str).str.lower().eq('duel')
        m = masks[0]
        for extra in masks[1:]:
            m = m | extra
        return m

    # Helper: outcome mapping per-row
    def outcome_for_row(row):
        if 'type.secondary' in row and isinstance(row['type.secondary'], str):
            if 'win' in row['type.secondary'].lower() or 'won' in row['type.secondary'].lower():
                return 'win'
            if 'loss' in row['type.secondary'].lower() or 'lost' in row['type.secondary'].lower():
                return 'loss'
        for col in ['duel.outcome', 'duel.isWon', 'duel.success', 'duel.won', 'outcome', 'result']:
            if col in row and pd.notna(row[col]):
                val = row[col]
                if isinstance(val, (bool, np.bool_)): return 'win' if bool(val) else 'loss'
                if isinstance(val, (int, float, np.integer, np.floating)): return 'win' if val > 0 else 'loss'
                if isinstance(val, str):
                    v = val.strip().lower()
                    if v in {'win', 'won', 'success', 'successful', 'yes', 'true'}: return 'win'
                    if v in {'loss', 'lost', 'fail', 'failed', 'unsuccessful', 'no', 'false'}: return 'loss'
        return 'unknown'

    has_time = all(c in df.columns for c in ['minute', 'second', 'matchId'])
    aerial_duels = []
    total_possessions_checked = 0
    possession_hits = 0

    is_duel = df['type.primary'].astype(str).str.lower().eq('duel') if 'type.primary' in df.columns else pd.Series(False, index=df.index)
    df_duels = df[is_duel].copy()
    if len(df_duels) == 0:
        print("No duel events present in the dataset.")
        return []
    df_duels['__is_aerial'] = aerial_mask(df_duels)

    for _, sp in dinamo_defending.iterrows():
        total_possessions_checked += 1
        pos_id = sp.get('possession.id', np.nan)
        match_id = sp.get('matchId', None)
        candidates = pd.DataFrame()

        if pd.notna(pos_id) and 'possession.id' in df_duels.columns:
            candidates = df_duels[(df_duels['possession.id'] == pos_id) & (df_duels['__is_aerial'])]
            if len(candidates) > 0:
                possession_hits += 1

        if len(candidates) == 0 and has_time and pd.notna(sp.get('minute')) and pd.notna(sp.get('second')) and match_id is not None:
            sp_time = float(sp['minute']) * 60.0 + float(sp['second'])
            df_m = df_duels[df_duels['matchId'] == match_id] if 'matchId' in df_duels.columns else df_duels
            tseries = df_m['minute'].astype(float) * 60.0 + df_m['second'].astype(float)
            win_mask = (tseries >= sp_time) & (tseries <= sp_time + 20.0)
            candidates = df_m[win_mask & df_m['__is_aerial']]

        if len(candidates) == 0:
            continue

        for _, duel in candidates.iterrows():
            oc = outcome_for_row(duel)
            if oc == 'unknown':
                continue  # Skip unknown outcomes as requested

            is_dinamo = str(duel.get('team.name', '')).strip() == 'Dinamo Bucureşti'
            dinamo_outcome = oc if is_dinamo else ('loss' if oc == 'win' else 'win')
            
            # --- Coordinate Normalization ---
            # Standardize all defensive actions to the left side of the pitch (defending goal at x=0)
            x, y = duel.get('location.x', np.nan), duel.get('location.y', np.nan)
            if pd.notna(x) and x > 50:
                x = 100 - x
                y = 100 - y

            aerial_duels.append({
                'player': duel.get('player.name', 'Unknown'),
                'x': x,
                'y': y,
                'outcome': dinamo_outcome
            })

    if not aerial_duels:
        print("No decisive (win/loss) aerial duels found around defensive set-pieces to analyze.")
        return []

    duels_df = pd.DataFrame(aerial_duels).dropna(subset=['x', 'y'])
    lost_duels = duels_df[duels_df['outcome'] == 'loss']
    won_duels = duels_df[duels_df['outcome'] == 'win']

    # --- Create Visualization ---
    fig = create_custom_pitch()
    title_text = f"<b>Defensive Aerial Duels (Normalized)</b> — Total: {len(duels_df)} | Won: {len(won_duels)} | Lost: {len(lost_duels)}"
    fig.update_layout(
        title=dict(text=title_text, x=0.5, font=dict(size=20, color='white')),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, bgcolor='rgba(0,0,0,0.5)', font=dict(color='white'))
    )

    if len(lost_duels) > 0:
        fig.add_trace(go.Scatter(
            x=lost_duels['x'], y=lost_duels['y'], mode='markers',
            marker=dict(color='red', symbol='x', size=11, line=dict(width=2)),
            name='Lost', hoverinfo='text',
            text=[f"Lost by: {p}" for p in lost_duels['player']]
        ))
    if len(won_duels) > 0:
        fig.add_trace(go.Scatter(
            x=won_duels['x'], y=won_duels['y'], mode='markers',
            marker=dict(color='lime', symbol='circle', size=9, opacity=0.75, line=dict(width=0.5, color='white')),
            name='Won', hoverinfo='text',
            text=[f"Won by: {p}" for p in won_duels['player']]
        ))
    
    fig.write_html('set_piece_viz_html/aerial_duels_analysis.html')
    fig.show()

    # --- Print Insights ---
    print("✓ Aerial duel analysis completed.")
    print(f"  • Total decisive aerial duels considered: {len(duels_df)}")
    if len(duels_df) > 0:
        win_percentage = (len(won_duels) / len(duels_df)) * 100
        loss_percentage = (len(lost_duels) / len(duels_df)) * 100
        print(f"  • Duels Won: {len(won_duels)} ({win_percentage:.1f}%)")
        print(f"  • Duels Lost: {len(lost_duels)} ({loss_percentage:.1f}%)")

    if len(lost_duels) > 0:
        player_duels_lost = lost_duels['player'].value_counts()
        print("\n🥊 Players most often losing aerial duels (defensive set-pieces):")
        for i, (player, count) in enumerate(player_duels_lost.head(5).items(), 1):
            print(f"  {i}. {player}: {count}")

    return aerial_duels

In [119]:
def analyze_second_ball_reaction(dinamo_defending, df):
    """
    Analyze second ball reaction and possession recovery
    """
    print("\n--- Analyzing Second Ball Reaction ---")
    
    # Analyze how quickly Dinamo regains possession after clearing set-pieces
    second_ball_analysis = []
    
    for _, set_piece in dinamo_defending.iterrows():
        possession_id = set_piece['possession.id']
        if pd.notna(possession_id):
            # Find the next possession after this set-piece
            set_piece_time = set_piece['minute'] * 60 + set_piece['second']
            
            # Look for Dinamo possession events shortly after
            subsequent_events = df[
                (df['team.name'] == 'Dinamo Bucureşti') &
                (df['minute'] * 60 + df['second'] > set_piece_time) &
                (df['minute'] * 60 + df['second'] <= set_piece_time + 30)  # Within 30 seconds
            ]
            
            if len(subsequent_events) > 0:
                time_to_recovery = (subsequent_events.iloc[0]['minute'] * 60 + 
                                  subsequent_events.iloc[0]['second']) - set_piece_time
                
                second_ball_analysis.append({
                    'set_piece_type': set_piece['type.primary'],
                    'time_to_recovery': time_to_recovery,
                    'recovery_event': subsequent_events.iloc[0]['type.primary']
                })
    
    if second_ball_analysis:
        recovery_df = pd.DataFrame(second_ball_analysis)
        
        print(f"✓ Second ball reaction analysis completed:")
        print(f"  • Average time to recovery: {recovery_df['time_to_recovery'].mean():.1f} seconds")
        print(f"  • Recovery events analyzed: {len(recovery_df)}")
        
        # Recovery time distribution
        fast_recovery = len(recovery_df[recovery_df['time_to_recovery'] <= 10])
        medium_recovery = len(recovery_df[(recovery_df['time_to_recovery'] > 10) & 
                                        (recovery_df['time_to_recovery'] <= 20)])
        slow_recovery = len(recovery_df[recovery_df['time_to_recovery'] > 20])
        
        print(f"  • Fast recovery (≤10s): {fast_recovery} ({fast_recovery/len(recovery_df)*100:.1f}%)")
        print(f"  • Medium recovery (11-20s): {medium_recovery} ({medium_recovery/len(recovery_df)*100:.1f}%)")
        print(f"  • Slow recovery (>20s): {slow_recovery} ({slow_recovery/len(recovery_df)*100:.1f}%)")
        
        # Create visualization using Plotly
        fig = go.Figure()
        
        recovery_categories = ['Fast (≤10s)', 'Medium (11-20s)', 'Slow (>20s)']
        recovery_counts = [fast_recovery, medium_recovery, slow_recovery]
        colors = ['green', 'yellow', 'red']
        
        fig.add_trace(go.Bar(
            x=recovery_categories,
            y=recovery_counts,
            marker_color=colors,
            marker_line_color='black',
            marker_line_width=2,
            text=recovery_counts,
            textposition='auto',
            textfont=dict(size=14, color='black')
        ))
        
        fig.update_layout(
            title="Second Ball Recovery Speed Analysis",
            title_x=0.5,
            xaxis_title="Recovery Speed",
            yaxis_title="Number of Recoveries",
            height=500,
            showlegend=False
        )
        
        # Save and show
        fig.write_html('set_piece_viz_html/second_ball_recovery.html')
        fig.show()
    
    return second_ball_analysis

In [120]:
print("🏆 DINAMO BUCUREȘTI SET-PIECE TACTICAL ANALYSIS")
print("=" * 60)
print("Analyzing set-piece strategies for the 2024/25 season...")
print("=" * 60)

# Step 1: Data Preparation
df = load_and_prepare_data()
if df is None:
    raise RuntimeError("Data loading failed.")

set_piece_events = filter_set_pieces(df)
set_piece_events = clean_set_piece_data(set_piece_events)

# Step 2: Offensive Analysis
print("\n" + "="*60)
dinamo_attacking = analyze_offensive_set_pieces(set_piece_events)
if not dinamo_attacking.empty:
    identify_main_takers(dinamo_attacking)
    analyze_delivery_zones(dinamo_attacking)
    detect_attacking_patterns(dinamo_attacking)
    identify_dangerous_players(dinamo_attacking)
else:
    print("No attacking set-pieces for Dinamo to analyze.")

# Step 3: Defensive Analysis
print("\n" + "="*60)
dinamo_defending = analyze_defensive_set_pieces(set_piece_events)
if not dinamo_defending.empty:
    analyze_defensive_vulnerabilities(dinamo_defending, df)
    analyze_aerial_duels(dinamo_defending, df)
    analyze_second_ball_reaction(dinamo_defending, df)
else:
    print("No defending set-pieces for Dinamo to analyze.")

print("\n" + "="*60)
print("🎯 ANALYSIS COMPLETE - PROFESSIONAL REPORT READY")
print("="*60)
print("✓ All visualizations have been saved as interactive HTML files.")
print("✓ Comprehensive set-piece analysis completed.")
print("✓ Professional football pitch visualizations generated.")
print("✓ Data-driven insights ready for coaching staff review.")

🏆 DINAMO BUCUREȘTI SET-PIECE TACTICAL ANALYSIS
Analyzing set-piece strategies for the 2024/25 season...
=== STEP 1: DATA LOADING AND PREPARATION ===
✓ Dataset loaded successfully: 65683 events
✓ Dataset shape: (65683, 74)
✓ Total matches: 40
✓ Date range: 2024-07-14 18:00:00 to 2025-05-24 20:00:00

--- Filtering for Set-Piece Events ---
✓ Total set-piece events identified: 3874
✓ Set-piece breakdown:
  • Corner: 362
  • Free_Kick: 1038
  • Throw_In: 1858
  • Goal_Kick: 616

--- Cleaning Set-Piece Data ---
✓ Data cleaned: 412 events removed due to missing data
✓ Final set-piece dataset: 3462 events


=== STEP 2: OFFENSIVE SET-PIECE ANALYSIS ===
✓ Dinamo attacking set-pieces analyzed: 548

--- Identifying Main Set-Piece Takers ---
🏃 CORNER TAKERS (Top 5):
  1. G. Milanov: 64 corners
  2. P. Olsen: 45 corners
  3. C. Cîrjan: 43 corners
  4. A. Pop: 7 corners
  5. A. Bani: 6 corners

⚽ ATTACKING FREE-KICK TAKERS (Dangerous - Top 5):
  1. G. Milanov: 13 dangerous attacking free-kicks
  2. P


--- Analyzing Delivery Zones & Danger ---
Analyzing 178 corner kicks...


✓ Corner delivery analysis created. High xG values indicate dangerous deliveries.

Analyzing 133 attacking free kicks...


✓ Attacking free kick analysis created. High xG values indicate dangerous deliveries.

--- Detecting Attacking Patterns ---


✓ Corner strategy analysis completed:
  • Short corners: 12 (6.7%)
  • Direct deliveries: 166 (93.3%)

--- Set-Piece Sequence Analysis ---
✓ Set-pieces leading to shots: 102 (18.6%)
✓ Set-pieces leading to goals: 7 (1.3%)



--- Identifying Dangerous Players by Set-Piece Type ---
🎯 CORNER TARGETS (Top 5):
  1. K. Boateng: 20 receptions
  2. C. Cîrjan: 13 receptions
  3. J. Homawoo: 11 receptions
  4. P. Olsen: 9 receptions
  5. G. Milanov: 7 receptions

🎯 ATTACKING FREE-KICK TARGETS (Top 5):
  1. K. Boateng: 7 receptions
  2. C. Cîrjan: 4 receptions
  3. A. Marginean: 4 receptions
  4. A. Selmani: 4 receptions
  5. E. Gnahoré: 4 receptions

🎯 OTHER FREE-KICK TARGETS (Top 5):
  1. C. Cîrjan: 38 receptions
  2. E. Gnahoré: 33 receptions
  3. J. Homawoo: 32 receptions
  4. K. Boateng: 21 receptions
  5. P. Olsen: 16 receptions




=== STEP 3: DEFENSIVE SET-PIECE ANALYSIS ===
✓ Dinamo defensive set-pieces analyzed: 535

--- Analyzing Defensive Vulnerabilities ---


✓ Defensive vulnerabilities analyzed:
  • Total shots conceded from set-pieces: 90
  • Goals conceded from set-pieces: 8
  • Total xG conceded: 10.266

  Shot locations:
  • Inside Six-Yard Box: 12
  • Inside Penalty Area (excl. 6-yard box): 45
  • Outside Penalty Area: 33

--- Analyzing Aerial Duels ---


✓ Aerial duel analysis completed.
  • Total decisive aerial duels considered: 141
  • Duels Won: 91 (64.5%)
  • Duels Lost: 50 (35.5%)

🥊 Players most often losing aerial duels (defensive set-pieces):
  1. A. Marginean: 7
  2. J. Homawoo: 6
  3. K. Boateng: 6
  4. R. Patriche: 4
  5. G. Milanov: 4

--- Analyzing Second Ball Reaction ---
✓ Second ball reaction analysis completed:
  • Average time to recovery: 10.0 seconds
  • Recovery events analyzed: 535
  • Fast recovery (≤10s): 324 (60.6%)
  • Medium recovery (11-20s): 123 (23.0%)
  • Slow recovery (>20s): 88 (16.4%)
✓ Second ball reaction analysis completed:
  • Average time to recovery: 10.0 seconds
  • Recovery events analyzed: 535
  • Fast recovery (≤10s): 324 (60.6%)
  • Medium recovery (11-20s): 123 (23.0%)
  • Slow recovery (>20s): 88 (16.4%)



🎯 ANALYSIS COMPLETE - PROFESSIONAL REPORT READY
✓ All visualizations have been saved as interactive HTML files.
✓ Comprehensive set-piece analysis completed.
✓ Professional football pitch visualizations generated.
✓ Data-driven insights ready for coaching staff review.
