In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import ast
import numpy as np
import seaborn as sns
import plotly.express as px
from math import sqrt
import re

In [None]:
bat = pd.read_csv('bat.csv', index_col=0)
events = pd.read_csv('events.csv', index_col=0)
summary = pd.read_csv('summary_acts.csv', index_col=0)
scores = pd.read_csv('summary_score.csv', index_col=0)
sample = pd.read_csv('sample.csv', index_col=0)
sample_df = sample[sample['event'] != 'No']

In [None]:
events_df = pd.merge(events, summary, left_on='eventId', right_on='hit_eventId', how='inner')

In [None]:
new_column_names = {
    'time': 'time_hit',
    'head': 'head_hit',
    'handle': 'handle_hit',
}

In [None]:
events_df2 = pd.merge(events_df, bat, on='pitch_eventId', how='inner')
df = events_df2.rename(columns=new_column_names)

In [None]:
main_df = pd.merge(df, sample_df, on='pitch_eventId', how='inner')

In [None]:
df = pd.merge(main_df, scores, on='pitch_eventId', how='inner')

In [None]:
def euclidean_distance(pos1, pos2):
    return sqrt(sum((p1 - p2) ** 2 for p1, p2 in zip(pos1, pos2)))

def calculate_sweet_spot(head_positions, handle_positions, distance=0.5):
    head_positions = np.array(head_positions)
    handle_positions = np.array(handle_positions)
    vectors = head_positions - handle_positions
    unit_vectors = vectors / np.linalg.norm(vectors, axis=1)[:, np.newaxis]
    sweet_spot_positions = head_positions - distance * unit_vectors
    return sweet_spot_positions.tolist()

def calculate_total_distance(row, time_hit, target_time=0.25, distance=0.5):
    total_distance = 0
    try:
        events = ast.literal_eval(row['total_bat'])
    except (ValueError, SyntaxError):
        print("Error evaluating row:", row['total_bat'])
        return None

    head_positions = [event.get('head', {}).get('pos') for event in events if 'head' in event]
    handle_positions = [event.get('handle', {}).get('pos') for event in events if 'handle' in event]
    sweet_spot_positions = calculate_sweet_spot(head_positions, handle_positions, distance)

    closest_to_target_index = min(
        range(len(events)),
        key=lambda i: abs(events[i].get('time', float('inf')) - target_time),
        default=None
    )

    last_pos = None
    if closest_to_target_index is not None:
        for i, event in enumerate(events[closest_to_target_index:], start=closest_to_target_index):
            current_pos = sweet_spot_positions[i]
            if current_pos and last_pos:
                total_distance += euclidean_distance(last_pos, current_pos)
            last_pos = current_pos
            if event.get('time') >= time_hit:
                break

    return total_distance


df['swing_length'] = df.apply(lambda row: calculate_total_distance(row, row['time_hit']), axis=1)

In [None]:
df['bat_speed'] = (df['hit_speed_mph'] -(0.2*df['pitch_speed_mph']))/1.2