In [18]:
target_refund_file_path='D://work//Programming//data//race_refund_datas//2024-04_2024-05.csv'
# target_predict_file_path='D://work//Programming//predict//LGBMRanker02//predict.csv'

import pandas as pd
import sys
sys.path.append('../../')
import common
from common import const,utils, calc_methods as calc
import numpy as np
import copy
import tqdm

refund = pd.read_csv(target_refund_file_path)
# predict = pd.read_csv(target_predict_file_path)


In [19]:
# 予測対象データの読み込み
race_file_path='D://work//Programming//predict//LGBMRanker02.5//2024-04_2024-05.csv'
race_data=pd.read_csv(race_file_path)

In [20]:
# ラベルエンコーディング(LabelEncoder)
from sklearn.preprocessing import LabelEncoder

encode_tar_cols = [
'性',
'斤量',
'芝かダートか',
'回り',
'馬場状態',
'天気',]

encoded = copy.deepcopy(race_data)

le = LabelEncoder()
for col in encode_tar_cols:
    encoded[col] = le.fit_transform(encoded[col].values)

In [21]:
# objectになってしまうので、floatに変換
for col in const.VARIABLE[:-1]:
    if encoded[col].dtype == 'object':
        if col in ['レースID','馬番','枠番','齢','馬のID','騎手のID','距離','競馬場ID']:
            encoded[col] = encoded[col].astype('int64')
        else:
            encoded[col] = encoded[col].astype(float)

In [22]:
# target_refund_file_pathのデータにランク付けを行う
grouped_data = encoded.groupby('レースID')
import trueskill
mu = 25.
sigma = mu / 3.
beta = sigma / 2.
tau = sigma / 100.
draw_probability = 0.001
backend = None
env = trueskill.TrueSkill(
    mu=mu, sigma=sigma, beta=beta, tau=tau,
    draw_probability=draw_probability, backend=backend)

In [23]:
# 馬のレートの初期化
rank_file_path='D://work//Programming//predict//LGBMRanker02.5//ranked_race_data.csv'
race_data=pd.read_csv(rank_file_path)

# レースごとにレーティング
check=race_data.groupby('馬のID')
sorted_by_rate = copy.deepcopy(race_data.loc[check['日付'].idxmax(),:])


In [24]:
# ランク付け済みデータの取得
all_horse_rate_dict={item[1]:env.create_rating(mu=item[2]) for item in sorted_by_rate[['馬のID','最新レート']].itertuples()}
uma_list = list(encoded.loc[:,'馬のID'].unique())

In [25]:
# race_file_pathにのみ存在する馬のレートの初期化
for uma_id in uma_list:
    if uma_id not in all_horse_rate_dict.keys():
        all_horse_rate_dict[uma_id]=env.create_rating()

In [26]:
# 全馬のレートを初期化
rate_before_ = []
race_rate_ = []
race_after_ = []
result = pd.DataFrame(columns=encoded.columns.to_list().append(['直前レート','最新レート']))
i =0
for race_id,df_race in tqdm.tqdm(grouped_data):    
    rate_before = [env.expose(all_horse_rate_dict[el[8]]) for el in list(df_race.values)]
    # rate_mean = sum(rate_before)/len(rate_before)
    # チームを作成（1頭チーム x レースに出場する馬数）
    teams = [(all_horse_rate_dict[el[8]],) for el in list(df_race.values)]
    # レーティング
    teams = env.rate(teams, ranks=list(df_race['着順'].map(lambda x:x-1)))
    rate_after = [env.expose(t[0]) for t in teams]
    # race_rate = [(x-rate_mean)/(rate_mean + 0.001)*100 for x in rate_after]
    for i, el in enumerate(list(df_race.values)):
        # レートが高い方で更新
        all_horse_rate_dict[el[8]] = max(all_horse_rate_dict[el[8]], teams[i][0])

    df_race['直前レート']=rate_before
    df_race['最新レート']=rate_after
    result=pd.concat([result,df_race])

100%|██████████| 552/552 [00:03<00:00, 182.72it/s]


In [27]:
import pickle

# モデルの読み込み
file_dir = const.BASE_DIR+'predict\\LGBMRanker02.5\\LGBMRanker02.5.pickle'
target_variables = copy.deepcopy(const.VARIABLE)
target_variables.append('直前レート')
target_variables.remove('着順関連度')
X_test_predict =result[target_variables]

model = pickle.load(open(file_dir, 'rb'))

# 任意のレースを予測
_y_pred = model.predict(X_test_predict, num_iteration=model.best_iteration_)


# 予測結果や関連度をDataFrameに連結
predict = pd.DataFrame({
    'レースID': X_test_predict['レースID'],
    '馬番': X_test_predict['馬番'],
    '馬のID': X_test_predict['馬のID'],
    '予想スコア':  _y_pred,
})

In [28]:
# 予想スコアが最も高い馬を単勝で購入した場合の回収率を計算

# 出力結果
#   購入金額:55200
#   返還金額:42990.0
#   回収率:0.778804347826087

purchase_amaount = 0
return_amount = 0
purchase_count = 0
hit_count = 0

for race_id,race_info in predict.groupby('レースID'):
    target_horse = race_info.sort_values(by='予想スコア',ascending=False)[0:1]
    _return = calc.calculate_return_tansho(refund,race_id,100,target_horse['馬番'].values[0])
    if _return != 0:
        return_amount += _return
        hit_count += 1
    purchase_amaount += 100
    purchase_count+=1

print('購入金額:{}'.format(purchase_amaount))
print('返還金額:{}'.format(return_amount))
print('的中率{}'.format(hit_count/purchase_count))
print('回収率:{}'.format(return_amount/purchase_amaount))

購入金額:55200
返還金額:42270.0
的中率0.05434782608695652
回収率:0.7657608695652174


In [29]:
# 閾値以上の場合のみ単勝で購入した場合の回収率を計算

# 出力結果
#   購入金額:42700
#   返還金額:35020.0
#   回収率:0.8201405152224824


# 閾値を予想スコアが0以上の場合の平均で設定
threshold = predict[predict['予想スコア']>0]['予想スコア'].mean()

purchase_amaount = 0
return_amount = 0
purchase_count = 0
hit_count = 0

for race_id,race_info in predict.groupby('レースID'):

    target_horse = race_info.sort_values(by='予想スコア',ascending=False)[0:1]
    if target_horse['予想スコア'].values[0] < threshold:
        continue
    _return = calc.calculate_return_tansho(refund,race_id,100,target_horse['馬番'].values[0])
    if _return != 0:
        return_amount += _return
        hit_count += 1
    purchase_amaount += 100
    purchase_count+=1

print('購入金額:{}'.format(purchase_amaount))
print('返還金額:{}'.format(return_amount))
print('的中率{}'.format(hit_count/purchase_count))
print('回収率:{}'.format(return_amount/purchase_amaount))

購入金額:55200
返還金額:42270.0
的中率0.05434782608695652
回収率:0.7657608695652174


In [30]:
# 予想スコアが最も高い馬を複勝で購入した場合の回収率を計算

# 出力結果
#   購入金額:55200
#   返還金額:47000.0
#   回収率:0.8514492753623188

purchase_amaount = 0
return_amount = 0
purchase_count = 0
hit_count = 0

for race_id,race_info in predict.groupby('レースID'):
    target_horse = race_info.sort_values(by='予想スコア',ascending=False)[0:1]
    _return = calc.calculate_return_hukusho(refund,race_id,100,target_horse['馬番'].values[0])
    if _return != 0:
        return_amount += _return
        hit_count += 1
    purchase_amaount += 100
    purchase_count+=1


print('購入金額:{}'.format(purchase_amaount))
print('返還金額:{}'.format(return_amount))
print('的中率{}'.format(hit_count/purchase_count))
print('回収率:{}'.format(return_amount/purchase_amaount))

購入金額:55200
返還金額:38780.0
的中率0.19021739130434784
回収率:0.702536231884058


In [31]:
# 予想スコアが上位の馬2頭をワイドで購入した場合の回収率を計算

# 出力結果
#   購入金額:55200
#   返還金額:43820.0
#   回収率:0.7938405797101449

purchase_amaount = 0
return_amount = 0
purchase_count = 0
hit_count = 0

for race_id,race_info in predict.groupby('レースID'):
    target_horses = race_info.sort_values(by='予想スコア',ascending=False)[0:2]
    _return = calc.calculate_return_wide(refund,race_id,100,target_horses['馬番'].values)
    if _return != 0:
        return_amount += _return
        hit_count += 1
    purchase_amaount += 100
    purchase_count+=1


print('購入金額:{}'.format(purchase_amaount))
print('返還金額:{}'.format(return_amount))
print('的中率{}'.format(hit_count/purchase_count))
print('回収率:{}'.format(return_amount/purchase_amaount))

購入金額:55200
返還金額:35220.0
的中率0.030797101449275364
回収率:0.6380434782608696


In [32]:
# 予想スコアが上位の半分を3連複ボックスで購入した場合の回収率を計算

# 出力結果
#   購入金額:31300
#   返還金額:28610.0
#   回収率:0.9140575079872204

from itertools import combinations  

purchase_amaount = 0
return_amount = 0
purchase_count = 0
hit_count = 0

for race_id,race_info in predict.groupby('レースID'):
    # 5分の1を購入
    target_num=int(len(race_info)/5)
    target_horses = race_info.sort_values(by='予想スコア',ascending=False)[0:target_num]
    combinations_list = list(combinations(target_horses['馬番'].values, 3))
    for combination in combinations_list:
        _return = calc.calculate_return_3renpuku(refund,race_id,100,list(combination)) 
        if _return != 0:
            return_amount += _return
            hit_count += 1
        purchase_amaount += 100
        purchase_count+=1


print('購入金額:{}'.format(purchase_amaount))
print('返還金額:{}'.format(return_amount))
print('的中率{}'.format(hit_count/purchase_count))
print('回収率:{}'.format(return_amount/purchase_amaount))

購入金額:31300
返還金額:0
的中率0.0
回収率:0.0


In [33]:
# 予想スコアが上位の半分を3連複ボックスで購入した場合の回収率を計算
# 初めて100%超えた

# 出力結果
#   購入金額:3400
#   返還金額:4059.9999999999995
#   的中率0.08823529411764706
#   回収率:1.1941176470588235


from itertools import combinations  

purchase_amaount = 0
return_amount = 0
purchase_count = 0
hit_count = 0

for race_id,race_info in predict.groupby('レースID'):
    # 6分の1を購入
    target_num=int(len(race_info)/6)
    target_horses = race_info.sort_values(by='予想スコア',ascending=False)[0:target_num]
    combinations_list = list(combinations(target_horses['馬番'].values, 3))
    for combination in combinations_list:
        _return = calc.calculate_return_3renpuku(refund,race_id,100,list(combination)) 
        if _return != 0:
            return_amount += _return
            hit_count += 1
        purchase_amaount += 100
        purchase_count+=1


print('購入金額:{}'.format(purchase_amaount))
print('返還金額:{}'.format(return_amount))
print('的中率{}'.format(hit_count/purchase_count))
print('回収率:{}'.format(return_amount/purchase_amaount))

購入金額:3400
返還金額:0
的中率0.0
回収率:0.0


In [34]:
# 予想スコアが上位の半分を3連単ボックスで購入した場合の回収率を計算

# 出力結果
#   購入金額:187800
#   返還金額:177490.0
#   回収率:0.945101171458999

from itertools import permutations  

purchase_amaount = 0
return_amount = 0
purchase_count = 0
hit_count = 0

for race_id,race_info in predict.groupby('レースID'):
    target_num=int(len(race_info)/5)
    target_horses = race_info.sort_values(by='予想スコア',ascending=False)[0:target_num]
    combinations_list = list(permutations(target_horses['馬番'].values, 3))
    for combination in combinations_list:
        _return = calc.calculate_return_3rentan(refund,race_id,100,list(combination)) 
        if _return != 0:
            return_amount += _return
            hit_count += 1
        purchase_amaount += 100
        purchase_count+=1

print('購入金額:{}'.format(purchase_amaount))
print('返還金額:{}'.format(return_amount))
print('的中率{}'.format(hit_count/purchase_count))
print('回収率:{}'.format(return_amount/purchase_amaount))

購入金額:187800
返還金額:0
的中率0.0
回収率:0.0
