In [11]:
import pandas as pd

# Load the data
file_path = '/Users/vrishfish/Mens-Soccer-Stats-1/player-combined-data.csv'  
data = pd.read_csv(file_path)

# List of offensive features
offensive_features = [
    'goals', 'shots', 'shots_on_target', 'assists', 'total_passes', 'total_passes_completed',
    'long_passes', 'long_passes_completed', 'crosses', 'accurate_crosses',
    'xg', 'dribbles', 'successful_dribbles',
    'fouls_drawn', 'offensive_duels'
]

# Filter the dataset to include only relevant features
offensive_data = data[['player_name', 'Position'] + offensive_features]

# Calculate Goal Accuracy
offensive_data['Goal Accuracy (%)'] = (offensive_data['goals'] / offensive_data['shots_on_target']) * 100
offensive_data['Goal Accuracy (%)'] = offensive_data['Goal Accuracy (%)'].fillna(0)  # Handle division by zero

# Group by player and position to aggregate offensive features
player_position_offense = offensive_data.groupby(['player_name', 'Position']).sum().reset_index()

# Recalculate Goal Accuracy after grouping
player_position_offense['Goal Accuracy (%)'] = (
    player_position_offense['goals'] / player_position_offense['shots_on_target']
) * 100
player_position_offense['Goal Accuracy (%)'] = player_position_offense['Goal Accuracy (%)'].fillna(0)

# Calculate 25th and 75th percentiles for each offensive feature across all players
quartiles = offensive_data[offensive_features + ['Goal Accuracy (%)']].quantile([0.25, 0.75]).T
quartiles.columns = ['25th_percentile', '75th_percentile']

# Identify strengths and weaknesses
def evaluate_strengths_weaknesses(row):
    strengths = []
    weaknesses = []
    
    for feature in offensive_features + ['Goal Accuracy (%)']:
        if row[feature] >= quartiles.loc[feature, '75th_percentile']:
            strengths.append(feature)
        elif row[feature] <= quartiles.loc[feature, '25th_percentile']:
            weaknesses.append(feature)
    
    return pd.Series([', '.join(strengths), ', '.join(weaknesses)], index=['Strengths', 'Weaknesses'])

# Apply the evaluation function to each player and position
player_position_offense[['Strengths', 'Weaknesses']] = player_position_offense.apply(evaluate_strengths_weaknesses, axis=1)

# Save the table to a CSV file
player_position_offense.to_csv('player_offensive_quartile_with_goal_accuracy.csv', index=False)

# Display a preview of the results
player_position_offense[['player_name', 'Position', 'Goal Accuracy (%)', 'Strengths', 'Weaknesses']].head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  offensive_data['Goal Accuracy (%)'] = (offensive_data['goals'] / offensive_data['shots_on_target']) * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  offensive_data['Goal Accuracy (%)'] = offensive_data['Goal Accuracy (%)'].fillna(0)  # Handle division by zero


Unnamed: 0,player_name,Position,Goal Accuracy (%),Strengths,Weaknesses
0,A. Adalsteinsson,CB,0.0,"goals, shots, shots_on_target, assists, total_...","dribbles, successful_dribbles, fouls_drawn, of..."
1,A. Adalsteinsson,"CB, LCB",0.0,"goals, shots_on_target, assists, long_passes, ...","shots, crosses, xg, dribbles, successful_dribb..."
2,A. Adalsteinsson,"CB, RCB",0.0,"goals, shots_on_target, assists, total_passes,...","shots, xg, dribbles, successful_dribbles, foul..."
3,A. Adalsteinsson,LCB,0.0,"goals, shots, shots_on_target, assists, total_...","crosses, fouls_drawn"
4,A. Adalsteinsson,"LCB, RCB",0.0,"goals, shots_on_target, assists, long_passes, ...","shots, crosses, xg, dribbles, successful_dribb..."


In [None]:
import pandas as pd

# Load the data
file_path = '/Users/vrishfish/Mens-Soccer-Stats-1/player-combined-data.csv'  
data = pd.read_csv(file_path)

# Calculate accuracy percentages
data['Pass Accuracy (%)'] = (data['total_passes_completed'] / data['total_passes']) * 100
data['Long Pass Accuracy (%)'] = (data['long_passes_completed'] / data['long_passes']) * 100
data['Cross Accuracy (%)'] = (data['accurate_crosses'] / data['crosses']) * 100
data['Dribble Accuracy (%)'] = (data['successful_dribbles'] / data['dribbles']) * 100
data['Foul Percentage (%)'] = (data['fouls_drawn'] / data['offensive_duels']) * 100
data['Offensive Duels Won (%)'] = (data['offensive_duels_won'] / data['offensive_duels']) * 100

# Handle division by zero or NaN
accuracy_features = [
    'Pass Accuracy (%)', 'Long Pass Accuracy (%)', 'Cross Accuracy (%)',
    'Dribble Accuracy (%)', 'Foul Percentage (%)', 'Offensive Duels Won (%)'
]
data[accuracy_features] = data[accuracy_features].fillna(0)

# Group by player and position
player_position_stats = data.groupby(['player_name', 'Position']).mean().reset_index()

# Calculate quartiles for accuracy metrics
quartiles = player_position_stats[accuracy_features].quantile([0.25, 0.75]).T
quartiles.columns = ['25th_percentile', '75th_percentile']

# Identify strengths and weaknesses
def evaluate_strengths_weaknesses(row):
    strengths = []
    weaknesses = []
    
    for feature in accuracy_features:
        if row[feature] >= quartiles.loc[feature, '75th_percentile']:
            strengths.append(feature)
        elif row[feature] <= quartiles.loc[feature, '25th_percentile']:
            weaknesses.append(feature)
    
    return pd.Series([', '.join(strengths), ', '.join(weaknesses)], index=['Strengths', 'Weaknesses'])

# Apply evaluation to determine strengths and weaknesses
player_position_stats[['Strengths', 'Weaknesses']] = player_position_stats.apply(evaluate_strengths_weaknesses, axis=1)

# Save results to a CSV
output_file = 'player_strengths_weaknesses.csv'
player_position_stats.to_csv(output_file, index=False)

# Display a preview of the results
from tabulate import tabulate
print(tabulate(player_position_stats[['player_name', 'Position', 'Strengths', 'Weaknesses']], headers='keys', tablefmt='grid', showindex=False))
