In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

from joblib import load

In [2]:
player_df_dropped_row = pd.read_parquet('parquet_data/player_df_missing_handled.parquet')
player_df_dropped_row.head()

Unnamed: 0,Date,Opponent,Result,S,Kills,Errors,Total Attacks,Hit Pct,Assists,SErr,Digs,Block Assists,PTS,name,team_name
0,09/17/2016,@ Yale,L 1 - 3,4.0,14.0,3.0,37.0,0.297,3.0,1.0,5.0,3.0,16.5,"Carlson, Morgan",A&M-Corpus Christi (Southland)
1,10/15/2016,Lamar University,W 3 - 0,3.0,8.0,3.0,20.0,0.25,3.0,1.0,3.0,4.0,10.0,"Carlson, Morgan",A&M-Corpus Christi (Southland)
2,10/20/2016,@ New Orleans,W 3 - 1,4.0,9.0,3.0,21.0,0.286,1.0,2.0,5.0,3.0,11.5,"Carlson, Morgan",A&M-Corpus Christi (Southland)
3,11/03/2016,McNeese,W 3 - 0,3.0,9.0,1.0,17.0,0.471,1.0,1.0,6.0,2.0,10.0,"Carlson, Morgan",A&M-Corpus Christi (Southland)
4,11/12/2016,@ Houston Baptist,W 3 - 1,4.0,7.0,6.0,21.0,0.048,1.0,2.0,8.0,4.0,9.0,"Carlson, Morgan",A&M-Corpus Christi (Southland)


In [3]:
X_player = player_df_dropped_row[['S', 'Kills', 'Errors', 'Total Attacks', 'Hit Pct', 'Assists', 
                                  'SErr', 'Digs', 'Block Assists', 'PTS', 'name']]

X_player

Unnamed: 0,S,Kills,Errors,Total Attacks,Hit Pct,Assists,SErr,Digs,Block Assists,PTS,name
0,4.0,14.0,3.0,37.0,0.297,3.0,1.0,5.0,3.0,16.5,"Carlson, Morgan"
1,3.0,8.0,3.0,20.0,0.250,3.0,1.0,3.0,4.0,10.0,"Carlson, Morgan"
2,4.0,9.0,3.0,21.0,0.286,1.0,2.0,5.0,3.0,11.5,"Carlson, Morgan"
3,3.0,9.0,1.0,17.0,0.471,1.0,1.0,6.0,2.0,10.0,"Carlson, Morgan"
4,4.0,7.0,6.0,21.0,0.048,1.0,2.0,8.0,4.0,9.0,"Carlson, Morgan"
...,...,...,...,...,...,...,...,...,...,...,...
30796,3.0,9.0,8.0,32.0,0.031,1.0,2.0,12.0,3.0,13.5,"Pe'a, Brooklen"
30797,3.0,10.0,3.0,38.0,0.184,1.0,3.0,12.0,1.0,10.5,"Thompson, Margaux"
30798,5.0,17.0,6.0,37.0,0.297,1.0,5.0,21.0,1.0,20.5,"Thompson, Margaux"
30799,3.0,3.0,7.0,26.0,-0.154,1.0,2.0,12.0,1.0,3.5,"Thompson, Margaux"


In [4]:
names = X_player['name']
numeric_df = X_player.drop(columns=['name'])

# MinMaxScaler ile normalizasyon
scaler = StandardScaler()
normalized_data = scaler.fit_transform(numeric_df)

# Normalize edilmiş veriyi DataFrame'e çevir ve 'name' sütununu geri ekle
normalized_df = pd.DataFrame(normalized_data, columns=numeric_df.columns)
normalized_df['name'] = names
normalized_df

Unnamed: 0,S,Kills,Errors,Total Attacks,Hit Pct,Assists,SErr,Digs,Block Assists,PTS,name
0,0.134070,1.035165,-0.043822,0.942644,0.234254,-0.627642,-0.782683,-0.686138,0.573142,1.032393,"Carlson, Morgan"
1,-1.096044,0.003278,-0.043822,-0.146944,0.039906,-0.627642,-0.782683,-1.098645,1.272673,-0.026967,"Carlson, Morgan"
2,0.134070,0.175259,-0.043822,-0.082851,0.188768,-0.736927,0.278430,-0.686138,0.573142,0.217500,"Carlson, Morgan"
3,-1.096044,0.175259,-0.746456,-0.339224,0.953754,-0.736927,-0.782683,-0.479885,-0.126388,-0.026967,"Carlson, Morgan"
4,0.134070,-0.168704,1.010130,-0.082851,-0.795376,-0.736927,0.278430,-0.067378,1.272673,-0.189946,"Carlson, Morgan"
...,...,...,...,...,...,...,...,...,...,...,...
30796,-1.096044,0.175259,1.712764,0.622177,-0.865672,-0.736927,0.278430,0.757635,0.573142,0.543457,"Pe'a, Brooklen"
30797,-1.096044,0.347240,-0.043822,1.006737,-0.233008,-0.736927,1.339542,0.757635,-0.825919,0.054522,"Thompson, Margaux"
30798,1.364185,1.551109,1.010130,0.942644,0.234254,-0.736927,3.461767,2.613914,-0.825919,1.684306,"Thompson, Margaux"
30799,-1.096044,-0.856629,1.361447,0.237617,-1.630658,-0.736927,0.278430,0.757635,-0.825919,-1.086327,"Thompson, Margaux"


In [5]:
averaged_df = normalized_df.groupby('name').mean(numeric_only=True).reset_index()

# Sonuç
(averaged_df)

Unnamed: 0,name,S,Kills,Errors,Total Attacks,Hit Pct,Assists,SErr,Digs,Block Assists,PTS
0,"Abalos, Jordan",-0.173458,1.271640,1.712764,1.311181,-0.224738,-0.716436,-0.119488,0.706071,0.048494,1.134254
1,"Abbott, Maddy",-1.096044,-1.372573,-0.043822,-1.108345,-3.474894,1.284848,-0.782683,-1.098645,-0.825919,-1.249306
2,"Abbott, Symone",0.022242,1.363493,1.105944,1.432084,-0.126248,-0.702154,0.760753,-0.123629,-0.380763,1.128698
3,"Abelman, Makenzi",-0.357976,-1.063006,-1.027510,-1.095526,0.479049,1.175563,-0.782683,0.221376,-0.126388,-1.135221
4,"Abram, Dree'Ana",0.031561,0.103600,-0.102375,-0.040122,0.079189,-0.736927,-0.428979,-1.218959,0.748025,0.237873
...,...,...,...,...,...,...,...,...,...,...,...
4145,"de Jesus, Sharlissa",-0.010649,0.691203,1.092793,0.863470,-0.359249,-0.698356,0.652940,0.430056,-0.496728,0.725610
4146,"de Lara, Yasmin",-0.275968,-0.054050,0.307495,0.045336,-0.559677,-0.736927,-0.782683,-0.411134,-0.592742,-0.189946
4147,"van Heijningen, Eline",1.364185,-0.598657,-0.570798,-0.819925,0.163958,-0.736927,0.278430,-1.304898,0.223377,-0.556647
4148,"van der Biezen, Taylor",0.072564,-0.985615,-1.027510,-1.053866,0.500138,1.385937,-0.092960,0.221376,-0.371224,-0.805189


In [6]:
from joblib import load

# Modeli yükle
loaded_model = load('logistic_regression_model_team.joblib')


In [53]:
def test_model(selected_players):
    filtered_df = averaged_df[averaged_df['name'].isin(selected_players)]

    # Seçilen oyuncuların ortalama özelliklerini hesaplama
    averages = filtered_df.mean(numeric_only=True)
    
    averages_df = pd.DataFrame([averages])
    averages_df=averages_df.to_numpy()
    new_predictions = loaded_model.predict(averages_df)
    return new_predictions[0]

baylor takımı kazanmış yine : https://pvpanthers.com/sports/womens-volleyball/stats/2016/baylor/boxscore/2972

In [54]:
result=test_model([
    "Wooten, Jessica",
    "Banks, Bailey",
    "O'Brien, Madison",
    "Gillard, Blair",
    "Erickson, Hailey",
    "Davenport, Kayla",
    "Richey, Ashtyn",
    "Lewis, Hannah",
    "Davis, Makenna",
    "Kankel, Courtney",
    "Vivens, Mikayla"
])
print("model output: ",int(result))

model output:  1


Prairie View takımı hauston baptiste karşı kaybetmiş takım

In [55]:
result=test_model([
    "Morton, Hailee",
    "Franklin, Mykaeli",
    "Brown, Latrice",
    "Pearson, Victoria",
    "Goudeau, Taylor",
    "Martinez, Larissa",
    "Augustine, Christen",
    "Jimenez, Erica",
    "Smith, Destiny",
    "Mitchell, Britny"
])
print("model output: ",result)

model output:  0


prairie view baylor karşı kaybetmiş takım

In [56]:
result=test_model([
    "Jimenez, Erica",
    "Hensch, Calli-Jordan",
    "Walker, Sydney",
    "Franklin, Mykaeli",
    "Tapaevalu, Asena",
    "Martinez, Larissa",
    "Augustine, Christen",
    "Kirkham, Kira",
    "Harris, Emonie",
    "Gonzales, Sommer",
    "Martinez, Marina"
])
print("model output: ",result)

model output:  0


Houston Baptist takımı kazanmış : https://pvpanthers.com/sports/womens-volleyball/stats/2017/houston-baptist/boxscore/3493

In [57]:
result=test_model(["Wooten, Jessica",
"Banks, Bailey",
"O'Brien, Madison",
"Gillard, Blair",
"Erickson, Hailey",
"Davenport, Kayla",
"Richey, Ashtyn",
"Lewis, Hannah",
"Davis, Makenna",
"Kankel, Courtney",
"Vivens, Mikayla"

])
print("model output: ",result)

model output:  1


Boise State kazanmış: https://broncosports.com/news/2017/8/25/Broncos_Sweep_NDSU_in_Season_Opener

In [58]:
result=test_model([
    "Nobley, Sierra",
    "Roberts, Sabryn",
    "Walley, Janell",
    "Lewis, Kiley",
    "Hayes, Laney",
    "Bulda, Jaymee-Lee",
    "Christensen, Celine",
    "Simon, Cassedi",
    "Osburn, Maddi"
])
print("model output: ",result)

model output:  1


North Dakota State kaybetmiş: https://broncosports.com/news/2017/8/25/Broncos_Sweep_NDSU_in_Season_Opener

In [59]:
result=test_model([
    "Burke, McKenzie",
    "Gelzinyte, Erika",
    "Mauch, Allie",
    "Lien, Bella",
    "Halverson, Emily",
    "Purnell, Mikaela",
    "Erickson, Alex",
    "Klos, Abbi",
    "Rasmusson, Brianna"
])
print("model output: ",result)

model output:  0


şimdi tek tek takımların kadrosuna bakıp modeli denemek çok zor olacak o yüzden şöyle yapacaz: takımın kazanma-kaybetme ortalamasına bakacaz hangisi yüksekse o takımın oyuncularının ortalamasını alıp modele verdiğimizde o outputun çıkmasını bekleyecez.

önce bütün takımların kazanma kaybetme oranını hesaplayalım:

In [60]:
# Load the team data
team_df_dropped_row = pd.read_parquet('parquet_data/team_df_missing_handled.parquet')
team_df_dropped_row

Unnamed: 0,Date,Opponent,Result,S,Kills,Errors,Total Attacks,Hit Pct,Assists,Aces,SErr,Digs,RErr,Block Assists,PTS,name
0,08/26/2016,"Prairie View @ Waco, Texas",W 3 - 0,3.0,46.0,13.0,107.0,0.308,40.0,6.0,11.0,51.0,3.0,2.0,53.0,A&M-Corpus Christi (Southland)
1,08/27/2016,@ Baylor,L 1 - 3,4.0,37.0,20.0,119.0,0.143,32.0,5.0,6.0,46.0,5.0,18.0,53.0,A&M-Corpus Christi (Southland)
2,09/03/2016,"San Diego @ Madison, Wis.",L 0 - 3,3.0,30.0,16.0,94.0,0.149,29.0,1.0,3.0,32.0,1.0,15.0,42.5,A&M-Corpus Christi (Southland)
3,09/06/2016,UTRGV,W 3 - 1,4.0,53.0,20.0,158.0,0.209,49.0,3.0,9.0,66.0,2.0,16.0,66.0,A&M-Corpus Christi (Southland)
4,09/09/2016,UNLV,L 1 - 3,4.0,44.0,30.0,166.0,0.084,42.0,1.0,3.0,66.0,2.0,8.0,52.0,A&M-Corpus Christi (Southland)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35350,11/01/2019,Cleveland St.,L 0 - 3,3.0,27.0,17.0,101.0,0.099,27.0,3.0,3.0,38.0,5.0,8.0,34.0,Youngstown St. (Horizon)
35351,11/03/2019,@ Oakland,L 0 - 3,3.0,26.0,22.0,119.0,0.034,24.0,5.0,7.0,40.0,4.0,6.0,35.0,Youngstown St. (Horizon)
35352,11/06/2019,IUPUI,L 2 - 3,5.0,56.0,20.0,149.0,0.242,53.0,8.0,8.0,63.0,8.0,12.0,71.0,Youngstown St. (Horizon)
35353,11/15/2019,Wright St.,L 0 - 3,3.0,28.0,19.0,113.0,0.080,27.0,1.0,4.0,43.0,6.0,6.0,32.0,Youngstown St. (Horizon)


In [61]:
team_df_dropped_row['Success'] = team_df_dropped_row['Result'].apply(
    lambda x: 1 if isinstance(x, str) and x.startswith('W') else (0 if isinstance(x, str) and x.startswith('L') else None)
)
team_df_dropped_row['Date'] = pd.to_datetime(team_df_dropped_row['Date'])

# Yıl sütunu oluştur
team_df_dropped_row['Year'] = team_df_dropped_row['Date'].dt.year
team_df_dropped_row

Unnamed: 0,Date,Opponent,Result,S,Kills,Errors,Total Attacks,Hit Pct,Assists,Aces,SErr,Digs,RErr,Block Assists,PTS,name,Success,Year
0,2016-08-26,"Prairie View @ Waco, Texas",W 3 - 0,3.0,46.0,13.0,107.0,0.308,40.0,6.0,11.0,51.0,3.0,2.0,53.0,A&M-Corpus Christi (Southland),1,2016
1,2016-08-27,@ Baylor,L 1 - 3,4.0,37.0,20.0,119.0,0.143,32.0,5.0,6.0,46.0,5.0,18.0,53.0,A&M-Corpus Christi (Southland),0,2016
2,2016-09-03,"San Diego @ Madison, Wis.",L 0 - 3,3.0,30.0,16.0,94.0,0.149,29.0,1.0,3.0,32.0,1.0,15.0,42.5,A&M-Corpus Christi (Southland),0,2016
3,2016-09-06,UTRGV,W 3 - 1,4.0,53.0,20.0,158.0,0.209,49.0,3.0,9.0,66.0,2.0,16.0,66.0,A&M-Corpus Christi (Southland),1,2016
4,2016-09-09,UNLV,L 1 - 3,4.0,44.0,30.0,166.0,0.084,42.0,1.0,3.0,66.0,2.0,8.0,52.0,A&M-Corpus Christi (Southland),0,2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35350,2019-11-01,Cleveland St.,L 0 - 3,3.0,27.0,17.0,101.0,0.099,27.0,3.0,3.0,38.0,5.0,8.0,34.0,Youngstown St. (Horizon),0,2019
35351,2019-11-03,@ Oakland,L 0 - 3,3.0,26.0,22.0,119.0,0.034,24.0,5.0,7.0,40.0,4.0,6.0,35.0,Youngstown St. (Horizon),0,2019
35352,2019-11-06,IUPUI,L 2 - 3,5.0,56.0,20.0,149.0,0.242,53.0,8.0,8.0,63.0,8.0,12.0,71.0,Youngstown St. (Horizon),0,2019
35353,2019-11-15,Wright St.,L 0 - 3,3.0,28.0,19.0,113.0,0.080,27.0,1.0,4.0,43.0,6.0,6.0,32.0,Youngstown St. (Horizon),0,2019


In [62]:
team_df= team_df_dropped_row[['name', 'Success', 'Year']]
team_df

Unnamed: 0,name,Success,Year
0,A&M-Corpus Christi (Southland),1,2016
1,A&M-Corpus Christi (Southland),0,2016
2,A&M-Corpus Christi (Southland),0,2016
3,A&M-Corpus Christi (Southland),1,2016
4,A&M-Corpus Christi (Southland),0,2016
...,...,...,...
35350,Youngstown St. (Horizon),0,2019
35351,Youngstown St. (Horizon),0,2019
35352,Youngstown St. (Horizon),0,2019
35353,Youngstown St. (Horizon),0,2019


In [63]:
team_average_success = team_df.groupby(['name', 'Year'])['Success'].mean().reset_index()
team_average_success

Unnamed: 0,name,Year,Success
0,A&M-Corpus Christi (Southland),2016,0.766667
1,A&M-Corpus Christi (Southland),2017,0.600000
2,A&M-Corpus Christi (Southland),2018,0.300000
3,A&M-Corpus Christi (Southland),2019,0.640000
4,Abilene Christian (Southland),2017,0.392857
...,...,...,...
1322,Yale (Ivy League),2019,0.714286
1323,Youngstown St. (Horizon),2016,0.222222
1324,Youngstown St. (Horizon),2017,0.285714
1325,Youngstown St. (Horizon),2018,0.269231


In [64]:
team_years=team_average_success.copy()

her takımın yıllara göre ortaama başarısını aldık şimdi takımların her yıla ait oyuncu bilgileriyle modeli test edecez

In [103]:
team_years['Success'] = team_years['Success'].apply(
    lambda x: 1 if x>0.5 else (0)
)
team_years

Unnamed: 0,name,Year,Success
0,A&M-Corpus Christi (Southland),2016,1
1,A&M-Corpus Christi (Southland),2017,1
2,A&M-Corpus Christi (Southland),2018,0
3,A&M-Corpus Christi (Southland),2019,1
4,Abilene Christian (Southland),2017,0
...,...,...,...
1322,Yale (Ivy League),2019,1
1323,Youngstown St. (Horizon),2016,0
1324,Youngstown St. (Horizon),2017,0
1325,Youngstown St. (Horizon),2018,0


In [104]:
team_2016=team_years[team_years['Year'] == 2016]
team_2016

Unnamed: 0,name,Year,Success
0,A&M-Corpus Christi (Southland),2016,1
9,Air Force (Mountain West),2016,0
11,Akron (MAC),2016,0
15,Alabama (SEC),2016,1
19,Alabama A&M (SWAC),2016,0
...,...,...,...
1307,Wright St. (Horizon),2016,0
1313,Wyoming (Mountain West),2016,1
1315,Xavier (Big East),2016,1
1319,Yale (Ivy League),2016,1


In [105]:
team_2017=team_years[team_years['Year'] == 2017]
team_2018=team_years[team_years['Year'] == 2018]
team_2019=team_years[team_years['Year'] == 2019]


her takımı yıllara göre ayırdık. takınm yıl ve başarıya göre bir df elde ettik

şimdi karmaşık yree geldik 

In [106]:
import os

In [107]:

def accuracy_team(root_folder, team_df):
    year=root_folder.split("/")
    year=year[1]
    right=0
    wrong=0
    for folder_name in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, folder_name)

        # Sadece klasörlerle çalış
        if os.path.isdir(folder_path):
            # O klasördeki CSV dosyalarının isimlerini listele (uzantıyı kaldırarak)
            csv_names = [os.path.splitext(f)[0] for f in os.listdir(folder_path) if f.endswith('.csv')]
            model_result=test_model(csv_names)
            model_result=int(model_result)
            team_result=team_df.loc[team_df['name'] == folder_name, 'Success']
            team_result=team_result.values[0]
            team_result=int(team_result)

            if model_result==team_result:
                right=right+1
            else:
                wrong=wrong+1       
    acc=right/(right+wrong)
    print(f"accuracy for {year} : {acc}")

        

In [108]:
root_folder_2016 = "volleyball_csvs/2016/player_game_wise"
root_folder_2017 = "volleyball_csvs/2017/player_game_wise"
root_folder_2018 = "volleyball_csvs/2018/player_game_wise"
root_folder_2019 = "volleyball_csvs/2019/player_game_wise"

In [109]:
accuracy_team(root_folder_2016,team_2016)

accuracy for 2016 : 0.7160493827160493


In [110]:
accuracy_team(root_folder_2016,team_2016)
accuracy_team(root_folder_2017,team_2017)
accuracy_team(root_folder_2018,team_2018)
accuracy_team(root_folder_2019,team_2019)

accuracy for 2016 : 0.7160493827160493
accuracy for 2017 : 0.6737160120845922
accuracy for 2018 : 0.6716867469879518
accuracy for 2019 : 0.641566265060241


burda yaaptığım takımı o yılki kadrosundaki oyuncuların ortalama başarılarısının linear reg modeli sonucuyla kendim ortalamaya bakarak karar verdiği sonuçla karşılaştırdım accuracy gördüünüz gibi geldi