# Shota Imanaga: MLB 1st-2nd Year Pitching Analysis (2024-2025)

**カブス今永昇太の投球スタイル変化を追う**

- 2024（ルーキー）vs 2025前半 vs 2025後半 の3期間比較
- 球種配分の変化
- 球速・スピンレートの推移
- イニング別球速低下（スタミナパターン）
- 空振り率（Whiff Rate）の変化
- 2ストライク時の決め球戦略

---

In [None]:
!pip install pybaseball duckdb -q

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pybaseball import statcast_pitcher
import duckdb

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 12

# ====== Settings ======
PITCHER_ID = 684007  # Shota Imanaga MLBAM ID
YEARS = [2024, 2025]
GAME_TYPE = 'R'  # Regular season only
ASB_DATE = '2025-07-15'  # All-Star Break cutoff for 1H/2H split
# ======================

## 1. Data Acquisition

In [None]:
dfs = []
for year in YEARS:
    print(f'Fetching {year}...')
    df_year = statcast_pitcher(f'{year}-03-01', f'{year}-12-31', PITCHER_ID)
    df_year['season'] = year
    dfs.append(df_year)
    print(f'  {year}: {len(df_year):,} pitches')

df_raw = pd.concat(dfs, ignore_index=True)
print(f'\nTotal (raw): {len(df_raw):,} pitches')

# Filter regular season only
con = duckdb.connect()
df = con.execute(f"""
    SELECT *,
        CASE
            WHEN season = 2024 THEN '2024'
            WHEN season = 2025 AND game_date < '{ASB_DATE}' THEN '2025-1H'
            ELSE '2025-2H'
        END as period
    FROM df_raw
    WHERE game_type = '{GAME_TYPE}'
""").df()

print(f'Total (regular season): {len(df):,} pitches')
print(f'\nPeriod breakdown:')
for period in ['2024', '2025-1H', '2025-2H']:
    n = len(df[df['period'] == period])
    print(f'  {period}: {n:,} pitches')

## 2. Data Summary

In [None]:
PERIODS = ['2024', '2025-1H', '2025-2H']

summary = con.execute("""
    SELECT
        period,
        COUNT(*) as pitches,
        COUNT(DISTINCT game_date) as games,
        ROUND(AVG(release_speed), 1) as avg_velo,
        ROUND(MAX(release_speed), 1) as max_velo,
        ROUND(AVG(release_spin_rate), 0) as avg_spin,
        COUNT(DISTINCT pitch_type) as pitch_types
    FROM df
    GROUP BY period
    ORDER BY period
""").df()

print('=== Period Overview ===')
print(summary.to_string(index=False))
print(f'\nTotal: {len(df):,} pitches')

## 3. Pitch Arsenal Overview

In [None]:
arsenal = con.execute("""
    SELECT
        period,
        pitch_type,
        COUNT(*) as count,
        ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER(PARTITION BY period), 1) as pct,
        ROUND(AVG(release_speed), 1) as avg_velo,
        ROUND(AVG(release_spin_rate), 0) as avg_spin
    FROM df
    WHERE pitch_type IS NOT NULL
    GROUP BY period, pitch_type
    ORDER BY period, count DESC
""").df()

print('=== Pitch Arsenal by Period ===')
for period in PERIODS:
    data = arsenal[arsenal['period'] == period]
    print(f'\n--- {period} ---')
    print(data[['pitch_type', 'count', 'pct', 'avg_velo', 'avg_spin']].to_string(index=False))

## 4. Pitch Mix Evolution

3期間で球種の使い分けがどう変わったか

In [None]:
mix_pivot = arsenal.pivot_table(index='period', columns='pitch_type', values='pct', fill_value=0)
mix_pivot = mix_pivot.reindex(PERIODS)

mix_pivot.plot(kind='bar', stacked=True, figsize=(12, 7), colormap='Set3')
plt.title('Shota Imanaga - Pitch Mix by Period')
plt.xlabel('Period')
plt.ylabel('Usage %')
plt.legend(title='Pitch Type', bbox_to_anchor=(1.05, 1))
plt.tight_layout()
plt.show()

# === Text Summary ===
print('\n=== Pitch Mix (% usage) ===')
print(mix_pivot.round(1).to_string())

# 2024 vs 2025-2H changes
if '2024' in mix_pivot.index and '2025-2H' in mix_pivot.index:
    first = mix_pivot.loc['2024']
    last = mix_pivot.loc['2025-2H']
    diff = (last - first).sort_values()
    print(f'\n=== Biggest Changes (2024 → 2025-2H) ===')
    for pitch, change in diff.items():
        if abs(change) >= 1.0:
            direction = '↑' if change > 0 else '↓'
            print(f'  {pitch}: {first[pitch]:.1f}% → {last[pitch]:.1f}% ({direction}{abs(change):.1f}%)')

## 5. Velocity Trends by Period

In [None]:
velo_by_period = con.execute("""
    SELECT
        period,
        pitch_type,
        ROUND(AVG(release_speed), 1) as avg_velo,
        ROUND(AVG(release_spin_rate), 0) as avg_spin,
        COUNT(*) as count
    FROM df
    WHERE pitch_type IS NOT NULL
    GROUP BY period, pitch_type
    ORDER BY period
""").df()

top_pitches = con.execute("""
    SELECT pitch_type FROM df
    WHERE pitch_type IS NOT NULL
    GROUP BY pitch_type
    ORDER BY COUNT(*) DESC
    LIMIT 4
""").df()['pitch_type'].tolist()

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

period_order = {p: i for i, p in enumerate(PERIODS)}
for pitch in top_pitches:
    data = velo_by_period[velo_by_period['pitch_type'] == pitch].copy()
    data['period_idx'] = data['period'].map(period_order)
    data = data.sort_values('period_idx')
    axes[0].plot(data['period'], data['avg_velo'], marker='o', label=pitch, linewidth=2)
    axes[1].plot(data['period'], data['avg_spin'], marker='o', label=pitch, linewidth=2)

axes[0].set_title('Average Velocity by Period')
axes[0].set_ylabel('Velocity (mph)')
axes[0].legend()

axes[1].set_title('Average Spin Rate by Period')
axes[1].set_ylabel('Spin Rate (rpm)')
axes[1].legend()

plt.suptitle('Shota Imanaga - Velocity & Spin Trends')
plt.tight_layout()
plt.show()

# === Text Summary ===
print('\n=== Velocity & Spin by Period (Top Pitches) ===')
for pitch in top_pitches:
    data = velo_by_period[velo_by_period['pitch_type'] == pitch]
    print(f'\n{pitch}:')
    print(data[['period', 'avg_velo', 'avg_spin', 'count']].to_string(index=False))

## 6. Monthly Velocity Trend (2025 Focus)

2025年の月別球速推移で、前半→後半の変化を細かく見る

In [None]:
monthly_velo = con.execute("""
    SELECT
        season,
        EXTRACT(MONTH FROM game_date::DATE) as month,
        pitch_type,
        ROUND(AVG(release_speed), 1) as avg_velo,
        COUNT(*) as pitches
    FROM df
    WHERE pitch_type IN (SELECT pitch_type FROM df GROUP BY pitch_type ORDER BY COUNT(*) DESC LIMIT 3)
    GROUP BY season, month, pitch_type
    HAVING COUNT(*) >= 10
    ORDER BY season, month
""").df()

# Plot 2025 monthly trend
fig, ax = plt.subplots(figsize=(12, 6))
df_2025_monthly = monthly_velo[monthly_velo['season'] == 2025]
for pitch in df_2025_monthly['pitch_type'].unique():
    data = df_2025_monthly[df_2025_monthly['pitch_type'] == pitch]
    ax.plot(data['month'], data['avg_velo'], marker='o', label=pitch, linewidth=2)

ax.axvline(x=7, color='gray', linestyle='--', alpha=0.5, label='ASB')
ax.set_xlabel('Month')
ax.set_ylabel('Velocity (mph)')
ax.set_title('Shota Imanaga - 2025 Monthly Velocity Trend')
ax.set_xticks(range(3, 11))
ax.set_xticklabels(['Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct'])
ax.legend()
plt.tight_layout()
plt.show()

# === Text Summary ===
print('=== 2025 Monthly Velocity ===')
for pitch in df_2025_monthly['pitch_type'].unique():
    data = df_2025_monthly[df_2025_monthly['pitch_type'] == pitch]
    print(f'\n{pitch}:')
    print(data[['month', 'avg_velo', 'pitches']].to_string(index=False))

# Also show 2024 for comparison
print('\n=== 2024 Monthly Velocity ===')
df_2024_monthly = monthly_velo[monthly_velo['season'] == 2024]
for pitch in df_2024_monthly['pitch_type'].unique():
    data = df_2024_monthly[df_2024_monthly['pitch_type'] == pitch]
    print(f'\n{pitch}:')
    print(data[['month', 'avg_velo', 'pitches']].to_string(index=False))

## 7. Fatigue Pattern: Inning-by-Inning Velocity

In [None]:
ff_type = 'FF' if 'FF' in top_pitches else top_pitches[0]

fatigue = con.execute(f"""
    SELECT
        period,
        inning,
        ROUND(AVG(release_speed), 1) as avg_velo,
        COUNT(*) as pitches
    FROM df
    WHERE pitch_type = '{ff_type}' AND inning <= 8
    GROUP BY period, inning
    HAVING COUNT(*) >= 5
    ORDER BY period, inning
""").df()

fig, ax = plt.subplots(figsize=(12, 6))
for period in PERIODS:
    data = fatigue[fatigue['period'] == period]
    if len(data) > 0:
        ax.plot(data['inning'], data['avg_velo'], marker='o', label=period, linewidth=2)

ax.set_xlabel('Inning')
ax.set_ylabel(f'{ff_type} Velocity (mph)')
ax.set_title(f'Shota Imanaga - {ff_type} Velocity by Inning')
ax.set_xticks(range(1, 9))
ax.legend()
plt.tight_layout()
plt.show()

# === Text Summary ===
print(f'\n=== {ff_type} Velocity by Inning ===')
fatigue_pivot = fatigue.pivot_table(index='inning', columns='period', values='avg_velo')
if len(fatigue_pivot.columns) > 0:
    fatigue_pivot = fatigue_pivot.reindex(columns=PERIODS)
print(fatigue_pivot.to_string())

print(f'\n=== Velocity Drop (1st inning → last inning) ===')
for period in PERIODS:
    data = fatigue[fatigue['period'] == period]
    if len(data) >= 2:
        first_velo = data.iloc[0]['avg_velo']
        last_velo = data.iloc[-1]['avg_velo']
        last_inn = int(data.iloc[-1]['inning'])
        drop = last_velo - first_velo
        print(f'  {period}: {first_velo} → {last_velo} (inn {last_inn}) = {drop:+.1f} mph')

## 8. Whiff Rate (空振り率)

空振り率 = 空振り数 / スイング総数

In [None]:
whiff = con.execute("""
    SELECT
        period,
        pitch_type,
        COUNT(*) as total_pitches,
        SUM(CASE WHEN description IN (
            'swinging_strike', 'swinging_strike_blocked'
        ) THEN 1 ELSE 0 END) as whiffs,
        SUM(CASE WHEN description IN (
            'swinging_strike', 'swinging_strike_blocked',
            'foul', 'foul_tip', 'foul_bunt',
            'hit_into_play', 'hit_into_play_no_out', 'hit_into_play_score'
        ) THEN 1 ELSE 0 END) as total_swings,
        ROUND(100.0 * SUM(CASE WHEN description IN (
            'swinging_strike', 'swinging_strike_blocked'
        ) THEN 1 ELSE 0 END) /
        NULLIF(SUM(CASE WHEN description IN (
            'swinging_strike', 'swinging_strike_blocked',
            'foul', 'foul_tip', 'foul_bunt',
            'hit_into_play', 'hit_into_play_no_out', 'hit_into_play_score'
        ) THEN 1 ELSE 0 END), 0), 1) as whiff_rate
    FROM df
    WHERE pitch_type IS NOT NULL
    GROUP BY period, pitch_type
    ORDER BY period, total_pitches DESC
""").df()

fig, ax = plt.subplots(figsize=(12, 6))
period_order = {p: i for i, p in enumerate(PERIODS)}
for pitch in top_pitches:
    data = whiff[whiff['pitch_type'] == pitch].copy()
    data['period_idx'] = data['period'].map(period_order)
    data = data.sort_values('period_idx')
    if len(data) > 0:
        ax.plot(data['period'], data['whiff_rate'], marker='o', label=pitch, linewidth=2)

ax.set_ylabel('Whiff Rate (%)')
ax.set_title('Shota Imanaga - Whiff Rate by Pitch Type')
ax.legend()
plt.tight_layout()
plt.show()

# === Text Summary ===
print('\n=== Whiff Rate by Pitch Type ===')
whiff_pivot = whiff.pivot_table(index='pitch_type', columns='period', values='whiff_rate')
if len(whiff_pivot.columns) > 0:
    whiff_pivot = whiff_pivot.reindex(columns=PERIODS)
print(whiff_pivot.round(1).to_string())

## 9. Two-Strike Strategy

2ストライクからの「決め球」がどう変わったか

In [None]:
two_strike = con.execute("""
    SELECT
        period,
        pitch_type,
        COUNT(*) as pitches,
        ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER(PARTITION BY period), 1) as pct,
        ROUND(100.0 * SUM(CASE WHEN description IN (
            'swinging_strike', 'swinging_strike_blocked'
        ) THEN 1 ELSE 0 END) /
        NULLIF(SUM(CASE WHEN description IN (
            'swinging_strike', 'swinging_strike_blocked',
            'foul', 'foul_tip', 'foul_bunt',
            'hit_into_play', 'hit_into_play_no_out', 'hit_into_play_score'
        ) THEN 1 ELSE 0 END), 0), 1) as whiff_rate
    FROM df
    WHERE strikes = 2 AND pitch_type IS NOT NULL
    GROUP BY period, pitch_type
    ORDER BY period, pitches DESC
""").df()

print('=== Two-Strike Pitch Selection ===')
for period in PERIODS:
    data = two_strike[two_strike['period'] == period].head(5)
    print(f'\n--- {period} ---')
    print(data[['pitch_type', 'pitches', 'pct', 'whiff_rate']].to_string(index=False))

## 10. Count Situation Analysis

In [None]:
count_analysis = con.execute("""
    SELECT
        period,
        CASE
            WHEN balls = 3 AND strikes = 2 THEN 'Full Count'
            WHEN balls > strikes THEN 'Behind'
            WHEN strikes > balls THEN 'Ahead'
            ELSE 'Even'
        END as count_situation,
        pitch_type,
        COUNT(*) as pitches,
        ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER(
            PARTITION BY period,
            CASE
                WHEN balls = 3 AND strikes = 2 THEN 'Full Count'
                WHEN balls > strikes THEN 'Behind'
                WHEN strikes > balls THEN 'Ahead'
                ELSE 'Even'
            END
        ), 1) as pct
    FROM df
    WHERE pitch_type IS NOT NULL
    GROUP BY period, count_situation, pitch_type
    ORDER BY period, count_situation, pitches DESC
""").df()

print('=== Pitch Selection by Count Situation ===')
for period in PERIODS:
    print(f'\n=== {period} ===')
    for situation in ['Ahead', 'Even', 'Behind', 'Full Count']:
        data = count_analysis[
            (count_analysis['period'] == period) &
            (count_analysis['count_situation'] == situation)
        ].head(3)
        if len(data) > 0:
            top_str = ', '.join([f"{r['pitch_type']} {r['pct']}%" for _, r in data.iterrows()])
            print(f'  {situation}: {top_str}')

## 11. Batted Ball Results (被打球)

打たれた時の結果を期間別に比較

In [None]:
batted = con.execute("""
    SELECT
        period,
        COUNT(*) as batted_balls,
        ROUND(AVG(launch_speed), 1) as avg_exit_velo,
        ROUND(AVG(launch_angle), 1) as avg_launch_angle,
        ROUND(100.0 * SUM(CASE WHEN launch_speed >= 95 THEN 1 ELSE 0 END) / COUNT(*), 1) as hard_hit_pct,
        ROUND(AVG(estimated_ba_using_speedangle), 3) as avg_xBA,
        ROUND(AVG(estimated_woba_using_speedangle), 3) as avg_xwOBA
    FROM df
    WHERE launch_speed IS NOT NULL
    GROUP BY period
    ORDER BY period
""").df()

print('=== Batted Ball Results by Period ===')
print(batted.to_string(index=False))

# By pitch type
batted_by_pitch = con.execute("""
    SELECT
        period,
        pitch_type,
        COUNT(*) as batted_balls,
        ROUND(AVG(launch_speed), 1) as avg_exit_velo,
        ROUND(AVG(estimated_ba_using_speedangle), 3) as avg_xBA
    FROM df
    WHERE launch_speed IS NOT NULL AND pitch_type IS NOT NULL
    GROUP BY period, pitch_type
    HAVING COUNT(*) >= 10
    ORDER BY period, batted_balls DESC
""").df()

print('\n=== Batted Ball by Pitch Type (min 10 BIP) ===')
for period in PERIODS:
    data = batted_by_pitch[batted_by_pitch['period'] == period]
    print(f'\n--- {period} ---')
    print(data[['pitch_type', 'batted_balls', 'avg_exit_velo', 'avg_xBA']].to_string(index=False))

## 12. Summary

In [None]:
print('=' * 60)
print('SHOTA IMANAGA 2024-2025 ANALYSIS SUMMARY')
print('=' * 60)

# Games & Pitches
print('\n[Workload]')
for _, row in summary.iterrows():
    print(f'  {row["period"]}: {int(row["games"])} games, {int(row["pitches"]):,} pitches, avg {row["avg_velo"]} mph')

# Pitch mix changes
print(f'\n[Pitch Mix Changes]')
for period_pair in [('2024', '2025-1H'), ('2025-1H', '2025-2H')]:
    p1, p2 = period_pair
    if p1 in mix_pivot.index and p2 in mix_pivot.index:
        first = mix_pivot.loc[p1]
        last = mix_pivot.loc[p2]
        diff = (last - first).sort_values()
        changes = [(p, c) for p, c in diff.items() if abs(c) >= 2.0]
        if changes:
            print(f'  {p1} → {p2}:')
            for pitch, change in changes:
                direction = '↑' if change > 0 else '↓'
                print(f'    {pitch}: {first[pitch]:.1f}% → {last[pitch]:.1f}% ({direction}{abs(change):.1f}%)')

# Velocity
print(f'\n[Fastball Velocity]')
ff_data = velo_by_period[velo_by_period['pitch_type'] == ff_type]
for _, row in ff_data.iterrows():
    print(f'  {row["period"]}: {row["avg_velo"]} mph ({int(row["count"])} pitches)')

# Best whiff pitch per period
print(f'\n[Best Whiff Rate Pitch]')
for period in PERIODS:
    period_whiff = whiff[(whiff['period'] == period) & (whiff['total_swings'] >= 20)]
    if len(period_whiff) > 0:
        best = period_whiff.loc[period_whiff['whiff_rate'].idxmax()]
        print(f'  {period}: {best["pitch_type"]} ({best["whiff_rate"]}%)')

# Batted ball
print(f'\n[Batted Ball Quality]')
for _, row in batted.iterrows():
    print(f'  {row["period"]}: xwOBA {row["avg_xwOBA"]}, Hard Hit {row["hard_hit_pct"]}%, Exit Velo {row["avg_exit_velo"]} mph')

print('\n' + '=' * 60)