In [6]:
# calculate the total number of shots
# divide it by total set number
# get the average number of shots per set

import pandas as pd

print("cauculate th average number of shots per set, using rally data only")
print("use 'rally_shot_count' to calculate the total number of shots")
print("use unique 'set_id' to calculate the total number of sets")

original_rally = pd.read_csv('rally_0108.csv')

columns_to_keep = ["rally_id", "set_id", "rally_number", "rally_shot_count"]
rally_data = original_rally[columns_to_keep]
rally_data = rally_data.dropna(subset=columns_to_keep)

total_shots = rally_data["rally_shot_count"].sum()
total_sets = rally_data["set_id"].nunique()

average_shots_per_set = total_shots / total_sets if total_sets > 0 else 0

print(f"Total number of shots: {total_shots}")
print(f"Total number of unique sets: {total_sets}")
print(f"Average number of shots per set: {average_shots_per_set:.2f}")
print("---------------------------------")

# lets change to another approach
print("cauculate th average number of shots per set, combine shots data and rally data")
print("count how many rows in shot data to calculate the total number of shots")
print("use unique 'set_id' to calculate the total number of sets")

original_shots = pd.read_csv('convert_shot.csv')
original_rally = pd.read_csv('rally_0108.csv')

columns_to_keep = ["shot_id", "rally_id"]
shot_data = original_shots[columns_to_keep]
shot_data = shot_data.dropna(subset=columns_to_keep)

columns_to_keep = ["rally_id", "set_id"]
rally_data = original_rally[columns_to_keep]
rally_data = rally_data.dropna(subset=columns_to_keep)

merged_data = pd.merge(shot_data, rally_data, on="rally_id", how="inner")
total_shots = merged_data["shot_id"].nunique()
total_sets = merged_data["set_id"].nunique()

average_shots_per_set = total_shots / total_sets if total_sets > 0 else 0

print(f"Total number of shots: {total_shots}")
print(f"Total number of unique sets: {total_sets}")
print(f"Average number of shots per set: {average_shots_per_set:.2f}")

cauculate th average number of shots per set, using rally data only
use 'rally_shot_count' to calculate the total number of shots
use unique 'set_id' to calculate the total number of sets
Total number of shots: 200013.0
Total number of unique sets: 565
Average number of shots per set: 354.01
---------------------------------
cauculate th average number of shots per set, combine shots data and rally data
count how many rows in shot data to calculate the total number of shots
use unique 'set_id' to calculate the total number of sets
Total number of shots: 194106
Total number of unique sets: 539
Average number of shots per set: 360.12


In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# this section is to drop unwanted columns and rows with NaN values

# convert_shot 是惠晴給我的
original_shots = pd.read_csv('convert_shot.csv')
original_rally = pd.read_csv('rally_0108.csv')
original_set = pd.read_csv('set_0108.csv')

# print("Original shots data columns: ")
# print(original_shots.columns)
# print("---------------------------------")

# nan_count = original_shots.isna().sum()
# print("Number of NaN values in each column: ")
# print(nan_count)
# print("---------------------------------")

# drop nan values in shots data
print("drop nan values in shots data")
columns_to_check = ["shot_id", "shot_type",
                    "rally_id", "shot_number", "frame_num", "end_frame_num" ]
drop_nan_shots = original_shots.dropna(subset=columns_to_check)
# print("Number of NaN values in each column after dropping NaN values: ")
# print(drop_nan_shots.isna().sum())
# print("data after dropping NaN values: ", len(drop_nan_shots))

# drop columns unrelated in shots data
print("drop columns unrelated in shots data")
columns_to_drop = original_shots.columns.difference(columns_to_check)
drop_nan_and_unrelated_shots = drop_nan_shots.drop(columns=columns_to_drop)
print("---------------------------------")

# drop nan values in rally data
print("drop nan values in rally data")
columns_to_check = ["rally_id", "set_id", "rally_number", "rally_current_winner_score",
                    "rally_current_loser_score", "rally_shot_count"]
drop_nan_rally = original_rally.dropna(subset=columns_to_check)
print("data after dropping NaN values: drop_nan_rally")

# drop columns unrelated in rally data
print("drop columns unrelated in rally data")
columns_to_drop = drop_nan_rally.columns.difference(columns_to_check)
drop_nan_and_unrelated_rally = drop_nan_rally.drop(columns=columns_to_drop)

# drop nan values in set data
print("drop nan values in set data")
columns_to_check = ["set_id", "match_id", "set_number"]
drop_nan_set = original_set.dropna(subset=columns_to_check)

# drop columns unrelated in set data
print("drop columns unrelated in set data")
columns_to_drop = drop_nan_set.columns.difference(columns_to_check)
drop_nan_and_unrelated_set = drop_nan_set.drop(columns=columns_to_drop)

print("merge shots and rally and set data")
shots_merged = pd.merge(drop_nan_and_unrelated_shots, drop_nan_and_unrelated_rally, on="rally_id", how="inner")
shots_merged = pd.merge(shots_merged, drop_nan_and_unrelated_set, on="set_id", how="inner")
print("length of data after merging: ", len(shots_merged))

# print("columns of data after merging: ")
# print(shots_with_set_id.columns)
# print("---------------------------------")

# print("output data to csv file")
# shots_with_set_id.to_csv('shots_with_set_id.csv', index=False)
# print("file name: shots_with_set_id.csv")
# print("done")
# print("---------------------------------")

# print("print how many 0 in end_frame_num")
# print(shots_with_set_id["end_frame_num"].value_counts())
least_unique_numbers = shots_merged["end_frame_num"].dropna().unique()  # Get unique values
least_unique_numbers.sort()  # Sort them in ascending order

# Select the first 10 smallest values
top_10_least_numbers = least_unique_numbers[:10]
print("10 smallest unique numbers in 'end_frame_num':")
print(top_10_least_numbers)

# print("print the first 20 rows of the data")
# print(shots_with_set_id.head(30))


print("columns of data after merging: ")
print(shots_merged.columns)


drop nan values in shots data
drop columns unrelated in shots data
---------------------------------
drop nan values in rally data
data after dropping NaN values: drop_nan_rally
drop columns unrelated in rally data
drop nan values in set data
drop columns unrelated in set data
merge shots and rally and set data
length of data after merging:  170419
10 smallest unique numbers in 'end_frame_num':
[  0.  85. 105. 124. 160. 188. 218. 244. 250. 264.]
columns of data after merging: 
Index(['shot_id', 'rally_id', 'shot_number', 'shot_type', 'frame_num',
       'end_frame_num', 'set_id', 'rally_number', 'rally_current_winner_score',
       'rally_current_loser_score', 'rally_shot_count', 'match_id',
       'set_number'],
      dtype='object')


In [8]:
import pandas as pd

mapping = {
    '過度切球': '切球',
    '防守回抽': '平球',
    '後場抽平球': '平球',
    '防守回挑': '挑球',
    '推球': '推撲球',
    '撲球': '推撲球',
    '擋小球': '網前小球',
    '勾球': '網前小球',
    '放小球': '網前小球',
    '小平球': '網前小球',
    '點扣': '殺球'
}
shots_merged['shot_type'] = shots_merged['shot_type'].replace(mapping)

def analyze_balltypes_detailed(df):
    if 'shot_type' not in df.columns:
        return "Error: 'shot_type_' column not found in the DataFrame"
    
    # Get value counts
    shot_type__counts = df['shot_type'].value_counts()
    total_rows = len(df)
    
    print(f"Analysis of ball types in the DataFrame")
    print(f"Total rows in DataFrame: {total_rows}")
    print(f"Total unique ball types: {len(shot_type__counts)}")
    print("\nBreakdown by ball type:")
    print("-" * 40)
    
    for shot_type, count in shot_type__counts.items():
        percentage = (count / total_rows) * 100
        print(f"Ball Type: {shot_type}")
        print(f"Count: {count} rows")
        print("-" * 40)
    
    return shot_type__counts.to_dict()

# Example usage:
result = analyze_balltypes_detailed(shots_merged)
print(result)

Analysis of ball types in the DataFrame
Total rows in DataFrame: 170419
Total unique ball types: 10

Breakdown by ball type:
----------------------------------------
Ball Type: 網前小球
Count: 49119 rows
----------------------------------------
Ball Type: 挑球
Count: 30926 rows
----------------------------------------
Ball Type: 殺球
Count: 20745 rows
----------------------------------------
Ball Type: 長球
Count: 17896 rows
----------------------------------------
Ball Type: 切球
Count: 16880 rows
----------------------------------------
Ball Type: 推撲球
Count: 10862 rows
----------------------------------------
Ball Type: 發短球
Count: 9890 rows
----------------------------------------
Ball Type: 平球
Count: 6734 rows
----------------------------------------
Ball Type: 發長球
Count: 4367 rows
----------------------------------------
Ball Type: 未知球種
Count: 3000 rows
----------------------------------------
{'網前小球': 49119, '挑球': 30926, '殺球': 20745, '長球': 17896, '切球': 16880, '推撲球': 10862, '發短球': 9890, '平球': 

In [9]:
import pandas as pd

def process_shots(df):
    # Create new columns to store results
    df['shot_duration'] = 0
    df['is_second_half'] = False
    df['set_shot_counter'] = 0
    
    # Track shots per set and rally
    set_shot_counts = {}  # Dictionary to track shots per set
    current_rally_id = None
    rally_shot_counter = 0
    
    # Iterate through each row
    for index, row in df.iterrows():
        # 1. Calculate shot duration
        if row['end_frame_num'] < row['frame_num']:
            df.at[index, 'shot_duration'] = -1
        else:
            df.at[index, 'shot_duration'] = row['end_frame_num'] - row['frame_num']
            
        # 2. Check if shot is in second half of set
        set_id = row['set_id']
        
        # Initialize set counter if not exists
        if set_id not in set_shot_counts:
            set_shot_counts[set_id] = 0
            
        # Rally tracking
        if current_rally_id != row['rally_id']:
            # When rally changes, verify shot count
            if current_rally_id is not None:
                # Get the rally_shot_count for the previous rally
                prev_rally_data = df[df['rally_id'] == current_rally_id].iloc[0]
                expected_count = prev_rally_data['rally_shot_count']
                
                # If actual count doesn't match expected, adjust set count
                if rally_shot_counter != expected_count:
                    set_shot_counts[set_id] -= rally_shot_counter  # Remove incorrect count
                    set_shot_counts[set_id] += expected_count     # Add correct count
            
            # Reset for new rally
            current_rally_id = row['rally_id']
            rally_shot_counter = 0
            
        # Increment counters
        rally_shot_counter += 1
        set_shot_counts[set_id] += 1
        
        # Determine if in second half (after 178 shots)
        df.at[index, 'is_second_half'] = set_shot_counts[set_id] > 178
        df.at[index, 'set_shot_counter'] = set_shot_counts[set_id]
    
    # Final rally verification (for the last rally in the dataframe)
    if current_rally_id is not None:
        last_rally_data = df[df['rally_id'] == current_rally_id].iloc[0]
        expected_count = last_rally_data['rally_shot_count']
        if rally_shot_counter != expected_count:
            set_id = last_rally_data['set_id']
            set_shot_counts[set_id] -= rally_shot_counter
            set_shot_counts[set_id] += expected_count
            
            # Update is_second_half for all shots in this last rally
            for idx in df[df['rally_id'] == current_rally_id].index:
                df.at[idx, 'is_second_half'] = set_shot_counts[set_id] > 178
    
    return df

# Example usage:
shots_processed = process_shots(shots_merged)
# save the processed data to a new csv file
shots_processed.to_csv('shots_processed_for_ball_speed_analysis.csv', index=False)
print("how many rows in the processed data: ", len(shots_processed))
print("how many shot is in the first half of the set: ", len(shots_processed[shots_processed['is_second_half'] == False]))
print("how many shot is in the second half of the set: ", len(shots_processed[shots_processed['is_second_half'] == True]))

how many rows in the processed data:  170419
how many shot is in the first half of the set:  86980
how many shot is in the second half of the set:  83439


In [10]:
import pandas as pd

def analyze_shot_type_by_half_existing(df):
    # Check required columns
    if 'shot_type' not in df.columns or 'is_second_half' not in df.columns:
        return "Error: Required columns 'balltype' or 'is_second_half' not found"
    
    # Split into first and second half
    first_half = df[~df['is_second_half']]
    second_half = df[df['is_second_half']]
    
    # Get counts
    first_half_counts = first_half['shot_type'].value_counts()
    second_half_counts = second_half['shot_type'].value_counts()
    
    # Create summary DataFrame
    summary = pd.DataFrame({
        'First_Half': first_half_counts,
        'Second_Half': second_half_counts
    }).fillna(0).astype(int)
    
    # Add percentages
    first_half_total = len(first_half)
    second_half_total = len(second_half)
    
    summary['First_Half_%'] = (summary['First_Half'] / first_half_total * 100).round(2)
    summary['Second_Half_%'] = (summary['Second_Half'] / second_half_total * 100).round(2)
    
    # Print results
    print(f"Ball Type Distribution Analysis")
    print(f"Total shots - First Half: {first_half_total}, Second Half: {second_half_total}")
    print("\nSummary Table:")
    print(summary)
    
    return summary

# Usage with your existing DataFrame that has 'is_second_half':
result = analyze_shot_type_by_half_existing(shots_merged)
# print(result)

Ball Type Distribution Analysis
Total shots - First Half: 86980, Second Half: 83439

Summary Table:
           First_Half  Second_Half  First_Half_%  Second_Half_%
shot_type                                                      
網前小球            24442        24677         28.10          29.57
挑球              15668        15258         18.01          18.29
殺球              10470        10275         12.04          12.31
長球               9293         8603         10.68          10.31
切球               8713         8167         10.02           9.79
推撲球              5518         5344          6.34           6.40
發短球              5309         4581          6.10           5.49
平球               3338         3396          3.84           4.07
發長球              2557         1810          2.94           2.17
未知球種             1672         1328          1.92           1.59


In [11]:
import pandas as pd

def analyze_balltype_durations(df):
    # Check required columns
    required_cols = ['shot_type', 'is_second_half', 'shot_duration']
    if not all(col in df.columns for col in required_cols):
        return "Error: Required columns not found"
    
    # Split into first and second half
    first_half = df[~df['is_second_half']]
    second_half = df[df['is_second_half']]
    
    # Calculate stats for valid durations (excluding -1)
    first_half_valid = first_half[first_half['shot_duration'] != -1]
    second_half_valid = second_half[second_half['shot_duration'] != -1]
    
    # Calculate statistics per shot type
    first_half_stats = first_half_valid.groupby('shot_type')['shot_duration'].agg(
        mean='mean',
        q25=lambda x: x.quantile(0.25),
        median='median',
        q75=lambda x: x.quantile(0.75)
    ).round(2)
    
    second_half_stats = second_half_valid.groupby('shot_type')['shot_duration'].agg(
        mean='mean',
        q25=lambda x: x.quantile(0.25),
        median='median',
        q75=lambda x: x.quantile(0.75)
    ).round(2)
    
    # Create summary DataFrame
    summary = pd.DataFrame({
        'First_Half_Avg': first_half_stats['mean'],
        'First_Half_Q25': first_half_stats['q25'],
        'First_Half_Median': first_half_stats['median'],
        'First_Half_Q75': first_half_stats['q75'],
        'Second_Half_Avg': second_half_stats['mean'],
        'Second_Half_Q25': second_half_stats['q25'],
        'Second_Half_Median': second_half_stats['median'],
        'Second_Half_Q75': second_half_stats['q75']
    }).fillna(0)
    
    # Print results
    print(f"Shot Type Duration Analysis")
    print(f"Total valid shots - First Half: {len(first_half_valid)}, Second Half: {len(second_half_valid)}")
    print("(Durations in frames, -1 values excluded)")
    print("\nSummary:")
    
    # Print stats for each shot type
    for shot_type in summary.index:
        print(f"\nShot Type: {shot_type}")
        print(f"First Half - Avg: {summary.loc[shot_type, 'First_Half_Avg']}, "
              f"Q25: {summary.loc[shot_type, 'First_Half_Q25']}, "
              f"Median: {summary.loc[shot_type, 'First_Half_Median']}, "
              f"Q75: {summary.loc[shot_type, 'First_Half_Q75']}")
        print(f"Second Half - Avg: {summary.loc[shot_type, 'Second_Half_Avg']}, "
              f"Q25: {summary.loc[shot_type, 'Second_Half_Q25']}, "
              f"Median: {summary.loc[shot_type, 'Second_Half_Median']}, "
              f"Q75: {summary.loc[shot_type, 'Second_Half_Q75']}")
    
    # Print observations for average duration changes
    print("\nObservations (Average Duration Changes):")
    for shot_type in summary.index:
        first_avg = summary.loc[shot_type, 'First_Half_Avg']
        second_avg = summary.loc[shot_type, 'Second_Half_Avg']
        if first_avg > 0 and second_avg > 0:  # Only compare if both exist
            diff = second_avg - first_avg
            if abs(diff) > 5:  # Highlight significant changes (>5 frames)
                direction = "increased" if diff > 0 else "decreased"
                print(f"- {shot_type}: Average duration {direction} by {abs(diff):.2f} frames from first to second half")
    
    return summary

# Usage with your existing DataFrame:
result = analyze_balltype_durations(shots_merged)

Shot Type Duration Analysis
Total valid shots - First Half: 85795, Second Half: 82533
(Durations in frames, -1 values excluded)

Summary:

Shot Type: 切球
First Half - Avg: 189.76, Q25: 22.0, Median: 25.0, Q75: 28.0
Second Half - Avg: 24.61, Q25: 21.0, Median: 25.0, Q75: 28.0

Shot Type: 平球
First Half - Avg: 18.87, Q25: 14.0, Median: 17.0, Q75: 22.0
Second Half - Avg: 32.04, Q25: 14.0, Median: 17.0, Q75: 22.0

Shot Type: 挑球
First Half - Avg: 39.24, Q25: 33.0, Median: 38.0, Q75: 44.0
Second Half - Avg: 39.31, Q25: 33.0, Median: 38.0, Q75: 44.0

Shot Type: 推撲球
First Half - Avg: 72.93, Q25: 17.0, Median: 27.0, Q75: 33.0
Second Half - Avg: 24.7, Q25: 16.0, Median: 25.0, Q75: 33.0

Shot Type: 未知球種
First Half - Avg: 83.54, Q25: 22.0, Median: 36.0, Q75: 70.0
Second Half - Avg: 84.44, Q25: 24.0, Median: 38.0, Q75: 74.0

Shot Type: 殺球
First Half - Avg: 19.92, Q25: 13.0, Median: 16.0, Q75: 19.0
Second Half - Avg: 19.67, Q25: 13.0, Median: 15.0, Q75: 18.0

Shot Type: 發短球
First Half - Avg: 22.17, Q2

In [17]:
import pandas as pd

def analyze_balltype_durations(df):
    # Check required columns
    required_cols = ['shot_type', 'is_second_half', 'shot_duration']
    if not all(col in df.columns for col in required_cols):
        return "Error: Required columns not found"
    
    # Split into first and second half
    first_half = df[~df['is_second_half']]
    second_half = df[df['is_second_half']]
    
    # Exclude invalid durations (-1)
    first_half_valid = first_half[first_half['shot_duration'] != -1]
    second_half_valid = second_half[second_half['shot_duration'] != -1]
    
    # Function to remove outliers and calculate bounds
    def process_group(group):
        Q1 = group['shot_duration'].quantile(0.25)
        Q3 = group['shot_duration'].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 2 * IQR
        upper_bound = Q3 + 2 * IQR
        # Remove outliers
        cleaned = group[(group['shot_duration'] >= lower_bound) & (group['shot_duration'] <= upper_bound)]
        # Calculate stats on cleaned data
        stats = {
            'mean': cleaned['shot_duration'].mean(),
            'q25': cleaned['shot_duration'].quantile(0.25),
            'median': cleaned['shot_duration'].median(),
            'q75': cleaned['shot_duration'].quantile(0.75),
            'iqr': IQR,
            'lower_bound': lower_bound,
            'upper_bound': upper_bound
        }
        return pd.Series(stats)
    
    # Process data
    first_half_stats = first_half_valid.groupby('shot_type').apply(process_group).round(2)
    second_half_stats = second_half_valid.groupby('shot_type').apply(process_group).round(2)
    
    # Count shots after cleaning
    first_half_clean = first_half_valid.groupby('shot_type').apply(
        lambda g: g[(g['shot_duration'] >= g['shot_duration'].quantile(0.25) - 2 * (g['shot_duration'].quantile(0.75) - g['shot_duration'].quantile(0.25))) &
                    (g['shot_duration'] <= g['shot_duration'].quantile(0.75) + 2 * (g['shot_duration'].quantile(0.75) - g['shot_duration'].quantile(0.25)))]
    ).reset_index(drop=True)
    second_half_clean = second_half_valid.groupby('shot_type').apply(
        lambda g: g[(g['shot_duration'] >= g['shot_duration'].quantile(0.25) - 2 * (g['shot_duration'].quantile(0.75) - g['shot_duration'].quantile(0.25))) &
                    (g['shot_duration'] <= g['shot_duration'].quantile(0.75) + 2 * (g['shot_duration'].quantile(0.75) - g['shot_duration'].quantile(0.25)))]
    ).reset_index(drop=True)
    
    # Create summary DataFrame
    summary = pd.DataFrame({
        'First_Half_Avg': first_half_stats['mean'],
        'First_Half_Q25': first_half_stats['q25'],
        'First_Half_Median': first_half_stats['median'],
        'First_Half_Q75': first_half_stats['q75'],
        'First_Half_IQR': first_half_stats['iqr'],
        'First_Half_Lower_Bound': first_half_stats['lower_bound'],
        'First_Half_Upper_Bound': first_half_stats['upper_bound'],
        'Second_Half_Avg': second_half_stats['mean'],
        'Second_Half_Q25': second_half_stats['q25'],
        'Second_Half_Median': second_half_stats['median'],
        'Second_Half_Q75': second_half_stats['q75'],
        'Second_Half_IQR': second_half_stats['iqr'],
        'Second_Half_Lower_Bound': second_half_stats['lower_bound'],
        'Second_Half_Upper_Bound': second_half_stats['upper_bound']
    }).fillna(0)
    
    # Print results
    print(f"Shot Type Duration Analysis (Outliers Removed with 2*IQR Rule)")
    print(f"Total shots before cleaning - First Half: {len(first_half_valid)}, Second Half: {len(second_half_valid)}")
    print(f"Total shots after cleaning - First Half: {len(first_half_clean)}, Second Half: {len(second_half_clean)}")
    print("(Durations in frames, -1 values and outliers excluded)")
    print("\nSummary:")
    
    # Print stats for each shot type
    for shot_type in summary.index:
        print(f"\nShot Type: {shot_type}")
        print(f"First Half - Avg: {summary.loc[shot_type, 'First_Half_Avg']}, "
              f"Q25: {summary.loc[shot_type, 'First_Half_Q25']}, "
              f"Median: {summary.loc[shot_type, 'First_Half_Median']}, "
              f"Q75: {summary.loc[shot_type, 'First_Half_Q75']}, "
              f"IQR: {summary.loc[shot_type, 'First_Half_IQR']}, "
              f"Lower Bound: {summary.loc[shot_type, 'First_Half_Lower_Bound']}, "
              f"Upper Bound: {summary.loc[shot_type, 'First_Half_Upper_Bound']}")
        print(f"Second Half - Avg: {summary.loc[shot_type, 'Second_Half_Avg']}, "
              f"Q25: {summary.loc[shot_type, 'Second_Half_Q25']}, "
              f"Median: {summary.loc[shot_type, 'Second_Half_Median']}, "
              f"Q75: {summary.loc[shot_type, 'Second_Half_Q75']}, "
              f"IQR: {summary.loc[shot_type, 'Second_Half_IQR']}, "
              f"Lower Bound: {summary.loc[shot_type, 'Second_Half_Lower_Bound']}, "
              f"Upper Bound: {summary.loc[shot_type, 'Second_Half_Upper_Bound']}")
    
    # Print observations for average duration changes
    print("\nObservations (Average Duration Changes):")
    for shot_type in summary.index:
        first_avg = summary.loc[shot_type, 'First_Half_Avg']
        second_avg = summary.loc[shot_type, 'Second_Half_Avg']
        if first_avg > 0 and second_avg > 0:  # Only compare if both exist
            diff = second_avg - first_avg
            if abs(diff) > 5:  # Highlight significant changes (>5 frames)
                direction = "increased" if diff > 0 else "decreased"
                print(f"- {shot_type}: Average duration {direction} by {abs(diff):.2f} frames from first to second half")
    
    return summary

# Usage with your existing DataFrame:
result = analyze_balltype_durations(shots_merged)

Shot Type Duration Analysis (Outliers Removed with 2*IQR Rule)
Total shots before cleaning - First Half: 85795, Second Half: 82533
Total shots after cleaning - First Half: 84052, Second Half: 81413
(Durations in frames, -1 values and outliers excluded)

Summary:

Shot Type: 切球
First Half - Avg: 24.7, Q25: 22.0, Median: 25.0, Q75: 28.0, IQR: 6.0, Lower Bound: 10.0, Upper Bound: 40.0
Second Half - Avg: 24.53, Q25: 21.0, Median: 25.0, Q75: 27.0, IQR: 7.0, Lower Bound: 7.0, Upper Bound: 42.0

Shot Type: 平球
First Half - Avg: 18.45, Q25: 14.0, Median: 17.0, Q75: 22.0, IQR: 8.0, Lower Bound: -2.0, Upper Bound: 38.0
Second Half - Avg: 18.14, Q25: 14.0, Median: 17.0, Q75: 21.0, IQR: 8.0, Lower Bound: -2.0, Upper Bound: 38.0

Shot Type: 挑球
First Half - Avg: 38.82, Q25: 33.0, Median: 37.0, Q75: 44.0, IQR: 11.0, Lower Bound: 11.0, Upper Bound: 66.0
Second Half - Avg: 38.88, Q25: 33.0, Median: 38.0, Q75: 44.0, IQR: 11.0, Lower Bound: 11.0, Upper Bound: 66.0

Shot Type: 推撲球
First Half - Avg: 25.54, 