In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import sqlalchemy
import os
import time

In [7]:
df = pd.read_csv('~/Desktop/sql.csv')


FileNotFoundError: [Errno 2] No such file or directory: '/Users/tomwattley/Desktop/sql.csv'

In [6]:
df

Unnamed: 0,horse_name,age,horse_sex,draw,headgear,weight_carried,weight_carried_lbs,extra_weight,jockey_claim,finishing_position,...,trainer,jockey,data_type,rating,speed_figure,number_of_runs,days_since_last_ran,weeks_since_last_ran,collateral_form_type,distance_difference
0,Im A Gambler,3,Gelding,5,,9-7,133,,,2,...,Charlie Mark Johnston,Joe Fanning,historical,86,78,11,166,24,race_form,-8.0
1,Im A Gambler,3,Gelding,1,,9-9,135,,,1,...,Charlie Mark Johnston,Ben Curtis,historical,95,82,12,17,2,collateral,-8.0
2,Im A Gambler,3,Gelding,6,,9-6,132,6.0,,9,...,Charlie Mark Johnston,Joe Fanning,historical,52,46,13,6,1,collateral,-8.0
3,Matty Too,3,Gelding,3,tongue tie,8-9,121,,,4,...,Tim Easterby,David Allan,historical,60,52,10,166,24,race_form,-3.0
4,Matty Too,3,Gelding,6,,8-7,119,,,7,...,Tim Easterby,Duran Fentiman,historical,60,28,11,20,3,collateral,-3.0
5,Ana Gold,3,Filly,2,,8-11,123,,,3,...,Richard Fahey,Paul Hanagan,historical,70,62,5,208,30,race_form,-5.75
6,Evocative Spark,3,Colt,1,tongue tie,9-0,126,,,1,...,George Boughey,Ben Curtis,historical,95,87,6,8,1,race_form,-19.0


In [None]:
-- Enhanced query to analyze horse of interest vs competition ratings
WITH race_form_data AS (
    -- Get the race form data with numeric distance beaten
    SELECT
        pd.*,
        'race_form'::character varying AS collateral_form_type,
        CASE 
            WHEN pd.total_distance_beaten ~ '^-?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?$' 
            THEN CAST(pd.total_distance_beaten AS NUMERIC)
            ELSE 999
        END AS float_distance_beaten
    FROM
        public.unioned_results_data pd
    WHERE
        pd.race_id = 807569
),
target_horse_info AS (
    -- Get the target horse's performance details
    SELECT 
        horse_id,
        horse_name,
        official_rating as target_horse_rating_then,
        finishing_position as target_horse_position,
        float_distance_beaten as target_horse_distance_beaten
    FROM race_form_data 
    WHERE horse_id = 168976
),
distance_differences AS (
    -- Calculate distance differences and rating comparisons
    SELECT 
        rf.*,
        rf.float_distance_beaten - thi.target_horse_distance_beaten AS distance_difference,
        rf.official_rating - thi.target_horse_rating_then AS rating_difference_then,
        thi.target_horse_rating_then,
        thi.target_horse_position,
        thi.target_horse_distance_beaten
    FROM race_form_data rf
    CROSS JOIN target_horse_info thi
),
current_ratings AS (
    -- Get the most recent ratings for all horses from the race
    SELECT DISTINCT
        horse_id,
        FIRST_VALUE(official_rating) OVER (
            PARTITION BY horse_id 
            ORDER BY race_date DESC, race_id DESC
        ) as current_rating
    FROM public.unioned_results_data
    WHERE horse_id IN (SELECT horse_id FROM race_form_data)
      AND official_rating IS NOT NULL
      AND race_date <= '2022-05-20'  -- Up to todays_race_date
),
collateral_data AS (
    -- Get collateral form data with all the rating comparisons
    SELECT
        pd.*,
        'collateral'::character varying AS collateral_form_type,
        CASE 
            WHEN pd.total_distance_beaten ~ '^-?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?$' 
            THEN CAST(pd.total_distance_beaten AS NUMERIC)
            ELSE 999
        END AS float_distance_beaten,
        dd.distance_difference,
        dd.rating_difference_then,
        dd.target_horse_rating_then,
        cr.current_rating,
        cr.current_rating - dd.target_horse_rating_then as rating_difference_now
    FROM
        public.unioned_results_data pd
    INNER JOIN distance_differences dd ON pd.horse_id = dd.horse_id
    LEFT JOIN current_ratings cr ON pd.horse_id = cr.horse_id
    WHERE
        pd.horse_id IN (SELECT horse_id FROM race_form_data)
        AND pd.race_date > '2022-04-21'
        AND pd.race_date < '2022-05-20'
        AND pd.horse_id != 168976
)
-- Final result combining race form and collateral data
SELECT 
    horse_name,
    horse_id,
    finishing_position,
    total_distance_beaten,
    betfair_win_sp,
    official_rating as rating_at_time,
    target_horse_rating_then,
    rating_difference_then,
    CASE 
        WHEN collateral_form_type = 'race_form' THEN 
            (SELECT current_rating FROM current_ratings WHERE horse_id = dd.horse_id)
        ELSE NULL 
    END as current_rating,
    CASE 
        WHEN collateral_form_type = 'race_form' THEN 
            (SELECT current_rating FROM current_ratings WHERE horse_id = dd.horse_id) - target_horse_rating_then
        ELSE NULL 
    END as rating_difference_now,
    race_id,
    race_date,
    race_class,
    distance,
    surface,
    collateral_form_type,
    distance_difference,
    -- Add some analysis flags
    CASE 
        WHEN collateral_form_type = 'race_form' AND rating_difference_then > 0 
        THEN 'Higher rated opponent'
        WHEN collateral_form_type = 'race_form' AND rating_difference_then < 0 
        THEN 'Lower rated opponent'
        WHEN collateral_form_type = 'race_form' AND rating_difference_then = 0 
        THEN 'Same rating'
        ELSE 'Collateral form'
    END as opponent_analysis
FROM distance_differences dd
WHERE horse_id != 168976

UNION ALL

SELECT 
    horse_name,
    horse_id,
    finishing_position,
    total_distance_beaten,
    betfair_win_sp,
    official_rating as rating_at_time,
    target_horse_rating_then,
    rating_difference_now,
    current_rating,
    rating_difference_now,
    race_id,
    race_date,
    race_class,
    distance,
    surface,
    collateral_form_type,
    distance_difference,
    'Collateral performance' as opponent_analysis
FROM collateral_data

ORDER BY 
    collateral_form_type DESC,  -- Race form first
    distance_difference ASC, 
    horse_id DESC;

## Enhanced Collateral Form Analysis

This query provides a comprehensive analysis of your horse of interest (168976) compared to its competition:

### Key Additions:

1. **Rating Comparisons Then vs Now**:
   - `target_horse_rating_then`: Horse 168976's rating at the time of race 807569
   - `rating_difference_then`: How each opponent's rating compared to horse 168976 at race time
   - `current_rating`: Most recent rating for each horse (up to 2022-05-20)
   - `rating_difference_now`: How each horse's current rating compares to horse 168976's rating then

2. **Opponent Analysis**:
   - Categorizes opponents as "Higher rated", "Lower rated", or "Same rating"
   - Shows both historical and current rating contexts

3. **Performance Context**:
   - Distance differences (how much further/closer horses were beaten)
   - Collateral form performances since the race
   - Current form and rating changes

This helps you understand:
- Which horses were better/worse rated when they raced against your horse
- How those horses have performed since (rating changes)
- The quality of opposition your horse faced
- How horses that beat/lost to your horse are rated now