In [86]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os import listdir
from datetime import date

In [87]:
FOLDER_PATH = "Spanish_League_Data"
COLUMNS_TO_MANTAIN = [
                      "Div", "Date", "HomeTeam", "AwayTeam", 
                      "FTHG", "FTAG", "FTR", "HTHG", "HTAG",
                      "HTR", "HS", "AS", "HST", "AST", "HF",
                      "AF", "HC", "AC", "HY", "AY", "HR", "AR",
                      "B365H", "B365D", "B365A", "BWH", "BWD",
                      "BWA", "IWH", "IWD", "IWA",
                      "WHH", "WHD", "WHA", "VCH", "VCD", "VCA"
                     ]
READABLE_HEADER = [
                   "League Division", "Match Date", "Home Team", "Away Team", 
                   "Home Team Goals", "Away Team Goals", "Match Result", 
                   "Half Time Home Team Goals", "Half Time Away Team Goals", 
                   "Half Time Result", "Home Team Shots", "Away Team Shots", 
                   "Home Team Shots Target", "Away Team Shots Target",
                   "Home Team Faults Commited", "Away Team Faults Commited",
                   "Home Team Corners", "Away Team Corners", "Home Team Yellow Cards", 
                   "Away Team Yellow Cards", "Home Team Red Cards", "Away Team Red Cards",
                   "Bet365 Home Win Odds", "Bet365 Draw Odds", "Bet365 Away Win Odds", 
                   "Bet&Win Home Win Odds", "Bet&Win Draw Odds", "Bet&Win Away Win Odds",
                   "Interwetten Home Win Odds", "Interwetten Draw Odds", "Interwetten Away Win Odds",  
                   "William Hill Home Win Odds", "William Hill Draw Odds", "William Hill Away Win Odds",
                   "VC Bet Home Win Odds", "VC Bet Draw Odds", "VC Bet Away Win Odds"
                  ]

In [88]:
def match_result_to_numeric(x):
    if x == 'H':
        return 1
    if x == 'D':
        return 2
    if x == 'A':
        return 3

In [89]:
def fill_na_values(data):
    features_to_fill = [
                        "Interwetten Home Win Odds", "Interwetten Away Win Odds", "Interwetten Draw Odds", 
                        "William Hill Home Win Odds", "William Hill Away Win Odds", "William Hill Draw Odds", 
                        "VC Bet Home Win Odds", "VC Bet Away Win Odds", "VC Bet Draw Odds"
                       ]
    
    for feature in features_to_fill:
        temp_mean = data[feature].mean()
        data[feature] = data[feature].fillna(temp_mean)
    
    return data

In [90]:
def initialize_accumulated_statistics(data):
    features_to_initialize = [
                    "Home Team Accumulated Scored Goals", "Away Team Accumulated Scored Goals",
                    "Home Team Accumulated Received Goals", "Away Team Accumulated Received Goals",
                    "Home Team Accumulated Yellow Cards", "Away Team Accumulated Yellow Cards",
                    "Home Team Accumulated Red Cards", "Away Team Accumulated Red Cards",
                    "Home Team Accumulated Thrown Shots", "Away Team Accumulated Thrown Shots",
                    "Home Team Accumulated Received Shots", "Away Team Accumulated Received Shots",
                    "Home Team Accumulated Thrown Shots Target", "Away Team Accumulated Thrown Shots Target",
                    "Home Team Accumulated Received Shots Target", "Away Team Accumulated Received Shots Target",
                    "Home Team Accumulated Thrown Corners", "Away Team Accumulated Thrown Corners",
                    "Home Team Accumulated Received Corners", "Away Team Accumulated Received Corners",
                    "Home Team Accumulated Commited Faults", "Away Team Accumulated Commited Faults", 
                    "Home Team Accumulated Received Faults", "Away Team Accumulated Received Faults",
                    "Home Team Accumulated Won Matches", "Away Team Accumulated Won Matches",
                    "Home Team Accumulated Draw Matches", "Away Team Accumulated Draw Matches",
                    "Home Team Accumulated Lost Matches", "Away Team Accumulated Lost Matches",
                   ]
    for feature in features_to_initialize:
        data[feature] = 0
        
    return data

In [91]:
def compute_accumulated_received_faults(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][12]) == 0):
                    statistics_per_team[i][12].append(0)
                else:
                    statistics_per_team[i][12].append(data.loc[j, "Away Team Faults Commited"]+statistics_per_team[i][12][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][12]) == 0):
                    statistics_per_team[i][12].append(0)
                else:
                    statistics_per_team[i][12].append(data.loc[j, "Home Team Faults Commited"]+statistics_per_team[i][12][-1])
    return data, statistics_per_team

def compute_accumulated_commited_faults(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][11]) == 0):
                    statistics_per_team[i][11].append(0)
                else:
                    statistics_per_team[i][11].append(data.loc[j, "Home Team Faults Commited"]+statistics_per_team[i][11][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][11]) == 0):
                    statistics_per_team[i][11].append(0)
                else:
                    statistics_per_team[i][11].append(data.loc[j, "Away Team Faults Commited"]+statistics_per_team[i][11][-1])
    return data, statistics_per_team

def compute_accumulated_received_corners(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][10]) == 0):
                    statistics_per_team[i][10].append(0)
                else:
                    statistics_per_team[i][10].append(data.loc[j, "Away Team Corners"]+statistics_per_team[i][10][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][10]) == 0):
                    statistics_per_team[i][10].append(0)
                else:
                    statistics_per_team[i][10].append(data.loc[j, "Home Team Corners"]+statistics_per_team[i][10][-1])
    return data, statistics_per_team

def compute_accumulated_thrown_corners(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][9]) == 0):
                    statistics_per_team[i][9].append(0)
                else:
                    statistics_per_team[i][9].append(data.loc[j, "Home Team Corners"]+statistics_per_team[i][9][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][9]) == 0):
                    statistics_per_team[i][9].append(0)
                else:
                    statistics_per_team[i][9].append(data.loc[j, "Away Team Corners"]+statistics_per_team[i][9][-1])
    return data, statistics_per_team

def compute_accumulated_received_shots_target(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][8]) == 0):
                    statistics_per_team[i][8].append(0)
                else:
                    statistics_per_team[i][8].append(data.loc[j, "Away Team Shots Target"]+statistics_per_team[i][8][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][8]) == 0):
                    statistics_per_team[i][8].append(0)
                else:
                    statistics_per_team[i][8].append(data.loc[j, "Home Team Shots Target"]+statistics_per_team[i][8][-1])
    return data, statistics_per_team

def compute_accumulated_thrown_shots_target(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][7]) == 0):
                    statistics_per_team[i][7].append(0)
                else:
                    statistics_per_team[i][7].append(data.loc[j, "Home Team Shots Target"]+statistics_per_team[i][7][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][7]) == 0):
                    statistics_per_team[i][7].append(0)
                else:
                    statistics_per_team[i][7].append(data.loc[j, "Away Team Shots Target"]+statistics_per_team[i][7][-1])
    return data, statistics_per_team

def compute_accumulated_received_shots(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][6]) == 0):
                    statistics_per_team[i][6].append(0)
                else:
                    statistics_per_team[i][6].append(data.loc[j, "Away Team Shots"]+statistics_per_team[i][6][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][6]) == 0):
                    statistics_per_team[i][6].append(0)
                else:
                    statistics_per_team[i][6].append(data.loc[j, "Home Team Shots"]+statistics_per_team[i][6][-1])
    return data, statistics_per_team

def compute_accumulated_thrown_shots(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][5]) == 0):
                    statistics_per_team[i][5].append(0)
                else:
                    statistics_per_team[i][5].append(data.loc[j, "Home Team Shots"]+statistics_per_team[i][5][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][5]) == 0):
                    statistics_per_team[i][5].append(0)
                else:
                    statistics_per_team[i][5].append(data.loc[j, "Away Team Shots"]+statistics_per_team[i][5][-1])
    return data, statistics_per_team

def compute_accumulated_received_red_cards(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][4]) == 0):
                    statistics_per_team[i][4].append(0)
                else:
                    statistics_per_team[i][4].append(data.loc[j, "Home Team Red Cards"]+statistics_per_team[i][4][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][4]) == 0):
                    statistics_per_team[i][4].append(0)
                else:
                    statistics_per_team[i][4].append(data.loc[j, "Away Team Red Cards"]+statistics_per_team[i][4][-1])
    return data, statistics_per_team

def compute_accumulated_received_yellow_cards(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][3]) == 0):
                    statistics_per_team[i][3].append(0)
                else:
                    statistics_per_team[i][3].append(data.loc[j, "Home Team Yellow Cards"]+statistics_per_team[i][3][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][3]) == 0):
                    statistics_per_team[i][3].append(0)
                else:
                    statistics_per_team[i][3].append(data.loc[j, "Away Team Yellow Cards"]+statistics_per_team[i][3][-1])
    return data, statistics_per_team

def compute_accumulated_received_goals(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][2]) == 0):
                    statistics_per_team[i][2].append(0)
                else:
                    statistics_per_team[i][2].append(data.loc[j, "Away Team Goals"]+statistics_per_team[i][2][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][2]) == 0):
                    statistics_per_team[i][2].append(0)
                else:
                    statistics_per_team[i][2].append(data.loc[j, "Home Team Goals"]+statistics_per_team[i][2][-1])
    return data, statistics_per_team

def compute_accumulated_scored_goals(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][1]) == 0):
                    statistics_per_team[i][1].append(0)
                else:
                    statistics_per_team[i][1].append(data.loc[j, "Home Team Goals"]+statistics_per_team[i][1][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][1]) == 0):
                    statistics_per_team[i][1].append(0)
                else:
                    statistics_per_team[i][1].append(data.loc[j, "Away Team Goals"]+statistics_per_team[i][1][-1])
    return data, statistics_per_team

def compute_accumulated_won_matches(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][13]) == 0):
                    statistics_per_team[i][13].append(0)
                else:
                    if(data.loc[j, "Match Result"] == 1):
                        statistics_per_team[i][13].append(1+statistics_per_team[i][13][-1])
                    else:
                        statistics_per_team[i][13].append(statistics_per_team[i][13][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][13]) == 0):
                    statistics_per_team[i][13].append(0)
                else:
                    if(data.loc[j, "Match Result"] == 3):
                        statistics_per_team[i][13].append(1+statistics_per_team[i][13][-1])
                    else:
                        statistics_per_team[i][13].append(statistics_per_team[i][13][-1])
    return data, statistics_per_team

def compute_accumulated_draw_matches(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][14]) == 0):
                    statistics_per_team[i][14].append(0)
                else:
                    if(data.loc[j, "Match Result"] == 2):
                        statistics_per_team[i][14].append(1+statistics_per_team[i][14][-1])
                    else:
                        statistics_per_team[i][14].append(statistics_per_team[i][14][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][14]) == 0):
                    statistics_per_team[i][14].append(0)
                else:
                    if(data.loc[j, "Match Result"] == 2):
                        statistics_per_team[i][14].append(1+statistics_per_team[i][14][-1])
                    else:
                        statistics_per_team[i][14].append(statistics_per_team[i][14][-1])
    return data, statistics_per_team

def compute_accumulated_lost_matches(statistics_per_team, data):
    for i in range(len(statistics_per_team)):
        for j in range(len(data)):
            if(data.loc[j, "Home Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][15]) == 0):
                    statistics_per_team[i][15].append(0)
                else:
                    if(data.loc[j, "Match Result"] == 3):
                        statistics_per_team[i][15].append(1+statistics_per_team[i][15][-1])
                    else:
                        statistics_per_team[i][15].append(statistics_per_team[i][15][-1])
            if(data.loc[j, "Away Team"] == statistics_per_team[i][0]):
                if(len(statistics_per_team[i][15]) == 0):
                    statistics_per_team[i][15].append(0)
                else:
                    if(data.loc[j, "Match Result"] == 1):
                        statistics_per_team[i][15].append(1+statistics_per_team[i][15][-1])
                    else:
                        statistics_per_team[i][15].append(statistics_per_team[i][15][-1])
    return data, statistics_per_team

In [107]:
def get_team_statistics_season(data):
    statistics_per_team = [] #Temporal cube for each team [["Alaves", [0,3,5]], ["Barcelona", [0,3,9]]]
    #Team name, goals scored, goals received, yellow cards, red cards, shots thrown, shots received,
    #shots target thron, shots target received, corners thrown, corners received, faults commited, faults received
    teams = []
    #data["Accumulated Total Goals Home"] = 0
    unique_teams = list(dict.fromkeys(data["Home Team"].values)) # Get unique teams
    
    for i in range(len(unique_teams)):
        teams.append(unique_teams[i])
        statistics_per_team.append(teams)
        #Initializing the arrays with 0 accumulated values
        statistics_per_team[i].append([0]) #Goals scored --> 1 index
        statistics_per_team[i].append([0]) #Goals received --> 2 index
        statistics_per_team[i].append([0]) #Yellow cards --> 3 index
        statistics_per_team[i].append([0]) #Red cards --> 4 index
        statistics_per_team[i].append([0]) #Shots thrown --> 5 index
        statistics_per_team[i].append([0]) #Shots received --> 6 index
        statistics_per_team[i].append([0]) #Shots target thrown --> 7 index
        statistics_per_team[i].append([0]) #Shots target received --> 8 index
        statistics_per_team[i].append([0]) #Corners thrown --> 9 index
        statistics_per_team[i].append([0]) #Corners received --> 10 index
        statistics_per_team[i].append([0]) #Faults commited --> 11 index
        statistics_per_team[i].append([0]) #Faults received --> 12 index
        statistics_per_team[i].append([0]) #Won Matches --> 13 index
        statistics_per_team[i].append([0]) #Draw Matches --> 14 index
        statistics_per_team[i].append([0]) #Lost Matches --> 15 index
        teams = []
        
    data, statistics_per_team = compute_accumulated_scored_goals(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_received_goals(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_received_yellow_cards(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_received_red_cards(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_thrown_shots(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_received_shots(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_thrown_shots_target(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_received_shots_target(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_thrown_corners(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_received_corners(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_commited_faults(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_received_faults(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_won_matches(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_draw_matches(statistics_per_team, data)
    data, statistics_per_team = compute_accumulated_lost_matches(statistics_per_team, data)

                    
    return data, statistics_per_team
    

In [93]:
def integrate_computed_accumulated_data_per_team(statistics_per_team, data):
    ACCUMULATED_COLUMNS_HOME_TEAM = ["Home Team Accumulated Scored Goals", "Home Team Accumulated Received Goals",
                                     "Home Team Accumulated Yellow Cards", "Home Team Accumulated Red Cards",
                                     "Home Team Accumulated Thrown Shots", "Home Team Accumulated Received Shots",
                                     "Home Team Accumulated Thrown Shots Target",
                                     "Home Team Accumulated Received Shots Target",
                                     "Home Team Accumulated Thrown Corners",
                                     "Home Team Accumulated Received Corners",
                                     "Home Team Accumulated Commited Faults",
                                     "Home Team Accumulated Received Faults",
                                     "Home Team Accumulated Won Matches",
                                     "Home Team Accumulated Draw Matches",
                                     "Home Team Accumulated Lost Matches"]
    ACCUMULATED_COLUMNS_AWAY_TEAM = ["Away Team Accumulated Scored Goals", "Away Team Accumulated Received Goals",
                                     "Away Team Accumulated Yellow Cards", "Away Team Accumulated Red Cards",
                                     "Away Team Accumulated Thrown Shots", "Away Team Accumulated Received Shots",
                                     "Away Team Accumulated Thrown Shots Target",
                                     "Away Team Accumulated Received Shots Target",
                                     "Away Team Accumulated Thrown Corners",
                                     "Away Team Accumulated Received Corners",
                                     "Away Team Accumulated Commited Faults",
                                     "Away Team Accumulated Received Faults",
                                     "Away Team Accumulated Won Matches",
                                     "Away Team Accumulated Draw Matches",
                                     "Away Team Accumulated Lost Matches"]
    counter_index = 0
    counter = 0
    start_counting = False
    for i in range(len(data)):
        for team_statistics in statistics_per_team:
            if(data.at[i, "Home Team"] == team_statistics[0]):
                feature_index = 1
                for accumulated_column_home_team in ACCUMULATED_COLUMNS_HOME_TEAM:
                    data.at[i, accumulated_column_home_team] = team_statistics[feature_index][counter_index]
                    feature_index += 1
            if(data.at[i, "Away Team"] == team_statistics[0]):
                feature_index = 1
                for accumulated_column_away_team in ACCUMULATED_COLUMNS_AWAY_TEAM:
                    data.at[i, accumulated_column_away_team] = team_statistics[feature_index][counter_index]
                    feature_index += 1
        counter += 1
        if counter == 9:
            start_counting = True
        if start_counting:
            if i%10 == 0:
                counter_index += 1
            
    return data

In [100]:
def generate_advanced_statistics(data):
    #OVERALL STATISTICS
    #DEFENSE
    #GAMES WON PERCENTAGE = GAMES WON HOME / (GAMES WON HOME + GAMES LOST HOME + GAMES DRAW HOME)
    data["Home Team Games Won Percentage"] = np.where(
        (data["Home Team Accumulated Won Matches"] + data["Home Team Accumulated Lost Matches"] + data["Home Team Accumulated Draw Matches"]) == 0,
        0,
        data["Home Team Accumulated Won Matches"] / (data["Home Team Accumulated Won Matches"] + data["Home Team Accumulated Lost Matches"] + data["Home Team Accumulated Draw Matches"]))
    data["Away Team Games Won Percentage"] = np.where(
        (data["Away Team Accumulated Won Matches"] + data["Away Team Accumulated Lost Matches"] + data["Away Team Accumulated Draw Matches"]) == 0,
        0,
        data["Away Team Accumulated Won Matches"] / (data["Away Team Accumulated Won Matches"] + data["Away Team Accumulated Lost Matches"] + data["Away Team Accumulated Draw Matches"]))
    #GAMES DRAW PERCENTAGE = GAMES DRAW HOME / (GAMES WON HOME + GAMES LOST HOME + GAMES DRAW HOME)
    data["Home Team Games Draw Percentage"] = np.where(
        (data["Home Team Accumulated Won Matches"] + data["Home Team Accumulated Lost Matches"] + data["Home Team Accumulated Draw Matches"]) == 0,
        0,
        data["Home Team Accumulated Draw Matches"] / (data["Home Team Accumulated Won Matches"] + data["Home Team Accumulated Lost Matches"] + data["Home Team Accumulated Draw Matches"]))
    data["Away Team Games Draw Percentage"] = np.where(
        (data["Away Team Accumulated Won Matches"] + data["Away Team Accumulated Lost Matches"] + data["Away Team Accumulated Draw Matches"]) == 0,
        0,
        data["Away Team Accumulated Draw Matches"] / (data["Away Team Accumulated Won Matches"] + data["Away Team Accumulated Lost Matches"] + data["Away Team Accumulated Draw Matches"]))
    #GAMES LOST PERCENTAGE = GAMES DRAW HOME / (GAMES WON HOME + GAMES LOST HOME + GAMES DRAW HOME)
    data["Home Team Games Lost Percentage"] = np.where(
        (data["Home Team Accumulated Won Matches"] + data["Home Team Accumulated Lost Matches"] + data["Home Team Accumulated Draw Matches"]) == 0,
        0,
        data["Home Team Accumulated Lost Matches"] / (data["Home Team Accumulated Won Matches"] + data["Home Team Accumulated Lost Matches"] + data["Home Team Accumulated Draw Matches"]))
    data["Away Team Games Lost Percentage"] = np.where(
        (data["Away Team Accumulated Won Matches"] + data["Away Team Accumulated Lost Matches"] + data["Away Team Accumulated Draw Matches"]) == 0,
        0,
        data["Away Team Accumulated Lost Matches"] / (data["Away Team Accumulated Won Matches"] + data["Away Team Accumulated Lost Matches"] + data["Away Team Accumulated Draw Matches"]))
    #SAVES GOALKEEPER PERCENTAGE HOME = (SHOTS TARGET RECEIVED HOME - GOALS RECEIVED HOME) / SHOTS TARGET RECEIVED HOME
    data["Home Team Saves Goalkeeper Percentage"] = np.where(
        data["Home Team Accumulated Received Shots Target"] == 0,
        0, 
        ((data["Home Team Accumulated Received Shots Target"] - data["Home Team Accumulated Received Goals"]) / data["Home Team Accumulated Received Shots Target"]))
    data["Away Team Saves Goalkeeper Percentage"] = np.where(
        data["Away Team Accumulated Received Shots Target"] == 0,
        0, 
        ((data["Away Team Accumulated Received Shots Target"] - data["Away Team Accumulated Received Goals"]) / data["Away Team Accumulated Received Shots Target"]))
    #SAVES GOALKEEPER RATIO HOME = SHOTS TARGET RECEIVED HOME / (SHOTS TARGET RECEIVED HOME - GOALS RECEIVED HOME)
    data["Home Team Saves Goalkeeper Ratio"] = np.where(
        (data["Home Team Accumulated Received Shots Target"] - data["Home Team Accumulated Received Goals"]) == 0,
        0,
        data["Home Team Accumulated Received Shots Target"] / (data["Home Team Accumulated Received Shots Target"] - data["Home Team Accumulated Received Goals"]))
    data["Away Team Saves Goalkeeper Ratio"] = np.where(
        (data["Away Team Accumulated Received Shots Target"] - data["Away Team Accumulated Received Goals"]) == 0,
        0,
        data["Away Team Accumulated Received Shots Target"] / (data["Away Team Accumulated Received Shots Target"] - data["Away Team Accumulated Received Goals"]))
    #OFFENSE
    #SCORING PERCENTAGE HOME = GOALS SCORED HOME / SHOTS THROWN HOME
    data["Home Team Scoring Percentage"] = np.where(
        data["Home Team Accumulated Thrown Shots"] == 0,
        0,
        data["Home Team Accumulated Scored Goals"] / data["Home Team Accumulated Thrown Shots"])
    data["Away Team Scoring Percentage"] = np.where(
        data["Away Team Accumulated Thrown Shots"] == 0,
        0,
        data["Away Team Accumulated Scored Goals"] / data["Away Team Accumulated Thrown Shots"])
    #SCORING RATIO HOME = SHOTS TARGET THROWN HOME / GOALS SCORED HOME
    data["Home Team Scoring Ratio"] = np.where(
        data["Home Team Accumulated Scored Goals"] == 0,
        0,
        data["Home Team Accumulated Thrown Shots Target"] /  data["Home Team Accumulated Scored Goals"])
    data["Away Team Scoring Ratio"] = np.where(
        data["Away Team Accumulated Scored Goals"] == 0,
        0,
        data["Away Team Accumulated Thrown Shots Target"] /  data["Away Team Accumulated Scored Goals"])
    #ACCURACY SHOTS THROWN HOME = SHOTS TARGET THROWN HOME / SHOTS THROWN HOME
    data["Home Team Accuracy Thrown Shots"] = np.where(
        data["Home Team Accumulated Thrown Shots"] == 0,
        0,
        data["Home Team Accumulated Thrown Shots Target"] / data["Home Team Accumulated Thrown Shots"])
    data["Away Team Accuracy Thrown Shots"] = np.where(
        data["Away Team Accumulated Thrown Shots"] == 0,
        0,
        data["Away Team Accumulated Thrown Shots Target"] / data["Away Team Accumulated Thrown Shots"])
    #OTHER STATS
    #GOAL AVERAGE RATIO HOME = GOALS SCORED HOME / GOALS RECEIVED HOME
    data["Home Team Goal Average Ratio"] = np.where(
        data["Home Team Accumulated Received Goals"] == 0,
        0,
        data["Home Team Accumulated Scored Goals"] / data["Home Team Accumulated Received Goals"])
    data["Away Team Goal Average Ratio"] = np.where(
        data["Away Team Accumulated Received Goals"] == 0,
        0,
        data["Away Team Accumulated Scored Goals"] / data["Away Team Accumulated Received Goals"])
    #SHOTS TARGET RATIO AVERAGE HOME = SHOTS TARGET THROWN HOME / SHOTS TARGET RECEIVE HOME
    data["Home Team Shots Target Average Ratio"] = np.where(
        data["Home Team Accumulated Received Shots Target"] == 0,
        0,
        data["Home Team Accumulated Thrown Shots Target"] / data["Home Team Accumulated Received Shots Target"])
    data["Away Team Shots Target Average Ratio"] = np.where(
        data["Away Team Accumulated Received Shots Target"] == 0,
        0,
        data["Away Team Accumulated Thrown Shots Target"] / data["Away Team Accumulated Received Shots Target"])
    #SHOTS AVERAGE RATIO HOME = SHOTS THROWN HOME / SHOTS RECEIVE HOME
    data["Home Team Shots Average Ratio"] = np.where(
        data["Home Team Accumulated Received Shots"] == 0,
        0,
        data["Home Team Accumulated Thrown Shots"] / data["Home Team Accumulated Received Shots"])
    data["Away Team Shots Average Ratio"] = np.where(
        data["Away Team Accumulated Received Shots"] == 0,
        0,
        data["Away Team Accumulated Thrown Shots"] / data["Away Team Accumulated Received Shots"])
    
    return data

In [101]:
def preprocess_data(data):
    data = data[COLUMNS_TO_MANTAIN]
    data.columns = READABLE_HEADER
    data["Match Result"] = data["Match Result"].apply(match_result_to_numeric)
    data = fill_na_values(data)
    data = initialize_accumulated_statistics(data)
    data, statistics_per_team = get_team_statistics_season(data)
    data = integrate_computed_accumulated_data_per_team(statistics_per_team, data)
    data = generate_advanced_statistics(data)
    return data

In [102]:
def read_preprocess_folder(folder):
    pd.set_option('display.max_columns', None)
    files = [f for f in listdir(folder)]
    sorted_files = []
    sorted_files = files.sort()
    count = 0
    lista_data = []
    data = pd.read_csv(folder + "/" + files[0])
    data = preprocess_data(data)
    for file in files:
        if count > 0:
            temp_data = pd.read_csv(FOLDER_PATH + "/" + file)
            temp_data = preprocess_data(temp_data)
            data = pd.concat([data, temp_data], axis = 0)
        count += 1
    
    return data

In [103]:
def main():
    folder = "Spanish_League_Data"
    data = read_preprocess_folder(folder)
    actual_date = date.today().strftime("%b-%d-%Y")
    data.to_csv("Preprocessed-Data/"+actual_date, index=False)
    return data

In [104]:
data = main()
data.describe()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


[['Alaves', [0, 0, 1, 4, 4, 4, 6, 7, 7, 8, 8, 9, 11, 13, 13, 14, 14, 14, 14, 16, 16, 19, 21, 24, 24, 26, 28, 30, 30, 31, 31, 31, 32, 32, 32, 32, 34, 34, 35], [0, 0, 2, 6, 6, 9, 10, 11, 13, 14, 14, 16, 16, 19, 22, 23, 24, 27, 29, 29, 31, 32, 34, 36, 39, 41, 44, 45, 45, 45, 45, 45, 47, 47, 48, 51, 51, 54, 54], [0, 0, 0, 2, 4, 5, 6, 8, 15, 18, 19, 21, 24, 26, 30, 32, 37, 40, 40, 42, 44, 46, 48, 51, 54, 57, 57, 61, 67, 69, 72, 74, 77, 78, 81, 81, 84, 86, 88], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4], [0, 5, 13, 29, 35, 52, 61, 73, 81, 93, 103, 112, 123, 130, 148, 159, 171, 183, 198, 210, 213, 225, 231, 242, 254, 260, 263, 278, 288, 306, 317, 328, 348, 357, 365, 374, 382, 392, 408], [0, 17, 27, 38, 51, 71, 80, 89, 102, 114, 121, 135, 153, 168, 181, 202, 216, 231, 246, 274, 299, 319, 337, 347, 359, 371, 386, 396, 411, 421, 428, 432, 440, 457, 466, 480, 498, 513, 524], [0, 0, 3, 8, 9, 15, 18, 22, 25, 34, 40, 42, 49, 

[['Valencia', [0, 2, 3, 5, 6, 10, 12, 13, 13, 14, 15, 15, 15, 15, 19, 20, 23, 24, 27, 28, 29, 32, 32, 34, 35, 36, 37, 37, 40, 40, 42, 43, 45, 47, 49, 50, 52, 54, 57], [0, 1, 1, 1, 2, 2, 5, 5, 6, 7, 8, 11, 12, 14, 14, 14, 15, 15, 15, 15, 17, 18, 21, 22, 23, 23, 24, 26, 28, 29, 29, 31, 31, 32, 32, 32, 35, 39, 42], [0, 3, 6, 9, 12, 14, 17, 17, 17, 20, 21, 23, 25, 28, 28, 33, 38, 40, 43, 45, 47, 48, 50, 53, 55, 61, 66, 69, 74, 77, 78, 80, 84, 87, 89, 91, 95, 98, 99], [0, 0, 1, 1, 1, 1, 2, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9], [0, 14, 22, 36, 42, 58, 70, 79, 84, 93, 102, 110, 128, 139, 156, 163, 175, 185, 198, 209, 225, 237, 250, 256, 267, 272, 284, 299, 310, 321, 336, 351, 368, 377, 396, 404, 418, 424, 440], [0, 8, 30, 43, 64, 74, 90, 100, 107, 126, 139, 156, 165, 172, 178, 191, 203, 213, 221, 232, 245, 262, 278, 291, 301, 316, 326, 336, 347, 355, 368, 378, 386, 396, 409, 418, 430, 447, 460], [0, 4, 9, 16, 18, 25, 31, 33, 34, 37, 4

[['Real Madrid', [0, 2, 7, 10, 11, 13, 14, 16, 17, 20, 25, 25, 29, 30, 33, 34, 36, 37, 39, 41, 43, 46, 46, 53, 54, 54, 57, 59, 59, 61, 64, 65, 66, 68, 71, 73, 77, 79, 84], [0, 1, 1, 2, 3, 3, 3, 3, 5, 6, 7, 9, 12, 13, 14, 14, 14, 14, 14, 14, 14, 16, 18, 18, 20, 21, 23, 24, 25, 28, 29, 30, 30, 30, 30, 31, 32, 34, 36], [0, 3, 3, 6, 7, 8, 11, 12, 15, 19, 20, 25, 27, 29, 31, 33, 35, 36, 38, 38, 42, 43, 47, 48, 53, 56, 58, 59, 61, 64, 68, 70, 74, 75, 77, 81, 84, 88, 89], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 6, 6, 6, 6], [0, 20, 32, 43, 54, 69, 83, 96, 105, 125, 139, 144, 159, 166, 179, 194, 210, 221, 235, 250, 258, 276, 292, 308, 316, 336, 345, 364, 373, 400, 417, 431, 448, 463, 490, 503, 522, 540, 555], [0, 20, 37, 58, 72, 85, 111, 125, 141, 152, 161, 177, 186, 201, 212, 230, 241, 257, 281, 296, 315, 330, 341, 356, 370, 380, 392, 401, 407, 420, 433, 446, 452, 467, 487, 502, 516, 550, 565], [0, 7, 13, 19, 22, 30, 35, 39, 43

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


[['Espanol', [0, 1, 2, 3, 3, 4, 6, 6, 6, 7, 7, 10, 10, 10, 11, 12, 14, 15, 17, 17, 18, 19, 20, 20, 22, 22, 22, 25, 25, 28, 28, 29, 32, 34, 37, 39, 40, 43, 46], [0, 0, 0, 1, 3, 5, 7, 7, 10, 10, 11, 15, 18, 19, 20, 22, 25, 26, 28, 32, 33, 34, 35, 37, 38, 40, 41, 44, 45, 46, 46, 46, 46, 46, 46, 49, 49, 49, 49], [0, 3, 6, 9, 15, 18, 19, 22, 24, 25, 27, 29, 32, 36, 39, 43, 45, 50, 55, 57, 61, 63, 66, 68, 74, 76, 76, 79, 82, 84, 86, 87, 87, 88, 89, 95, 98, 99, 100], [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6], [0, 10, 14, 22, 27, 32, 42, 50, 59, 69, 79, 89, 98, 116, 130, 137, 158, 168, 191, 207, 220, 230, 237, 249, 253, 262, 276, 284, 292, 304, 322, 335, 343, 360, 378, 386, 396, 409, 422], [0, 11, 19, 36, 56, 89, 113, 127, 140, 154, 168, 179, 197, 207, 233, 255, 268, 284, 293, 303, 325, 339, 368, 388, 408, 419, 429, 446, 455, 466, 475, 483, 495, 508, 522, 542, 551, 565, 577], [0, 2, 3, 5, 6, 9, 13, 16, 19, 22, 26, 31, 

[['Real Madrid', [0, 3, 6, 11, 13, 16, 17, 21, 21, 23, 26, 27, 27, 31, 34, 40, 40, 42, 42, 44, 47, 50, 53, 59, 64, 67, 71, 74, 78, 81, 83, 83, 85, 87, 89, 92, 96, 101, 102], [0, 2, 2, 2, 2, 2, 4, 6, 6, 6, 8, 8, 9, 11, 13, 13, 13, 13, 14, 14, 15, 15, 15, 17, 18, 20, 21, 22, 24, 26, 26, 28, 29, 29, 30, 32, 33, 34, 35], [0, 2, 4, 4, 7, 8, 11, 13, 17, 19, 23, 25, 29, 29, 31, 33, 36, 36, 40, 40, 40, 42, 45, 48, 49, 53, 56, 58, 60, 63, 64, 68, 72, 74, 78, 79, 81, 84, 88], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5], [0, 28, 45, 68, 85, 103, 120, 139, 152, 175, 190, 210, 226, 263, 277, 297, 316, 332, 363, 379, 396, 422, 444, 461, 475, 509, 530, 558, 579, 611, 627, 643, 675, 699, 718, 749, 763, 797, 816], [0, 9, 21, 32, 42, 62, 86, 105, 115, 132, 150, 158, 167, 176, 188, 194, 200, 206, 217, 223, 234, 247, 252, 261, 275, 277, 288, 297, 307, 317, 322, 330, 340, 349, 356, 373, 379, 385, 393], [0, 11, 17, 30, 36, 43, 46, 52,

[['Hercules', [0, 0, 2, 3, 3, 5, 5, 7, 8, 9, 9, 11, 11, 14, 14, 18, 18, 18, 22, 22, 22, 22, 22, 24, 24, 24, 24, 25, 25, 25, 28, 28, 29, 30, 31, 33, 35, 36, 36], [0, 1, 1, 3, 3, 3, 6, 8, 9, 12, 15, 16, 19, 20, 21, 22, 22, 25, 26, 28, 31, 34, 36, 37, 38, 38, 39, 41, 43, 47, 48, 48, 50, 50, 53, 56, 58, 60, 60], [0, 2, 4, 6, 8, 10, 12, 17, 20, 20, 24, 25, 26, 28, 32, 37, 39, 43, 44, 49, 53, 55, 57, 58, 61, 62, 64, 67, 68, 70, 75, 77, 80, 82, 85, 86, 88, 91, 92], [0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 8, 8, 8, 8], [0, 9, 18, 31, 36, 43, 52, 61, 70, 77, 85, 99, 111, 129, 136, 155, 163, 174, 184, 189, 196, 200, 207, 214, 221, 232, 238, 251, 260, 278, 291, 299, 308, 322, 333, 354, 369, 377, 394], [0, 14, 31, 39, 59, 74, 94, 117, 130, 155, 171, 189, 196, 205, 217, 225, 233, 253, 275, 283, 295, 317, 332, 343, 360, 377, 395, 403, 425, 438, 457, 460, 476, 485, 497, 509, 518, 537, 546], [0, 3, 10, 17, 21, 24, 25, 30, 34, 36, 37, 42,

[['Granada', [0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 4, 4, 5, 6, 8, 8, 10, 11, 12, 12, 14, 16, 17, 21, 22, 22, 22, 24, 27, 27, 28, 28, 30, 30, 31, 32, 34, 35, 35], [0, 1, 5, 5, 6, 7, 8, 8, 10, 11, 12, 12, 12, 12, 14, 15, 16, 21, 23, 26, 27, 28, 31, 32, 34, 35, 37, 38, 43, 46, 46, 46, 48, 49, 49, 52, 53, 55, 56], [0, 2, 4, 11, 13, 16, 16, 19, 20, 23, 24, 28, 32, 34, 38, 40, 41, 42, 47, 50, 56, 60, 62, 64, 68, 72, 74, 74, 81, 85, 87, 88, 90, 93, 95, 98, 98, 102, 106], [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5, 5, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 10, 10], [0, 11, 23, 31, 46, 56, 70, 88, 107, 109, 124, 140, 153, 164, 177, 184, 201, 213, 228, 242, 254, 270, 284, 304, 320, 328, 338, 351, 357, 377, 393, 412, 421, 431, 442, 451, 468, 480, 492], [0, 18, 38, 52, 66, 86, 109, 129, 145, 162, 193, 207, 225, 243, 254, 273, 286, 321, 333, 349, 365, 374, 386, 394, 409, 414, 428, 436, 454, 475, 482, 495, 514, 522, 529, 538, 547, 561, 579], [0, 2, 6, 8, 12, 14, 19, 22, 27, 27, 31

[['Celta', [0, 0, 1, 3, 4, 6, 7, 9, 9, 10, 11, 13, 14, 15, 16, 16, 16, 16, 19, 19, 20, 21, 21, 21, 22, 24, 25, 26, 27, 29, 29, 29, 31, 32, 33, 34, 34, 36, 37], [0, 1, 3, 3, 5, 6, 8, 8, 10, 11, 14, 17, 18, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 36, 38, 41, 43, 45, 46, 47, 47, 48, 51, 52, 52, 52], [0, 2, 4, 5, 7, 11, 14, 16, 18, 20, 22, 26, 27, 28, 31, 34, 36, 40, 42, 43, 48, 52, 55, 57, 58, 61, 62, 64, 67, 71, 76, 77, 83, 88, 89, 92, 95, 97, 98], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], [0, 17, 31, 43, 58, 72, 91, 102, 109, 121, 132, 139, 154, 162, 180, 191, 203, 208, 222, 234, 245, 256, 268, 274, 279, 292, 306, 325, 335, 343, 357, 365, 381, 390, 411, 424, 428, 445, 457], [0, 16, 31, 38, 53, 59, 66, 77, 101, 113, 126, 155, 170, 184, 192, 202, 213, 225, 237, 251, 262, 279, 296, 311, 326, 335, 347, 370, 387, 395, 406, 420, 430, 441, 449, 459, 475, 488, 496], [0, 3, 7, 11, 15, 21, 30, 35, 38, 40, 44,

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


[['Sociedad', [0, 2, 3, 4, 4, 4, 5, 6, 6, 8, 11, 13, 18, 19, 23, 25, 30, 33, 35, 36, 38, 42, 42, 42, 43, 46, 46, 48, 49, 52, 53, 54, 54, 56, 58, 59, 60, 61, 62], [0, 0, 1, 3, 3, 3, 7, 8, 9, 10, 10, 12, 12, 17, 20, 21, 22, 23, 23, 28, 30, 30, 34, 34, 34, 35, 36, 39, 39, 43, 43, 44, 48, 50, 51, 51, 52, 53, 55], [0, 1, 4, 6, 7, 10, 11, 11, 13, 14, 15, 16, 18, 19, 21, 24, 28, 29, 31, 35, 37, 38, 40, 42, 42, 44, 48, 48, 50, 53, 56, 61, 64, 69, 70, 70, 71, 73, 75], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], [0, 16, 27, 37, 50, 60, 69, 86, 104, 112, 122, 138, 155, 163, 186, 201, 221, 232, 240, 248, 257, 264, 274, 292, 299, 314, 325, 340, 349, 372, 386, 396, 409, 428, 447, 456, 477, 490, 503], [0, 15, 27, 43, 56, 67, 90, 100, 111, 127, 142, 160, 165, 182, 192, 205, 220, 243, 256, 278, 294, 312, 324, 330, 344, 356, 366, 381, 393, 407, 416, 431, 452, 467, 475, 489, 504, 516, 531], [0, 6, 10, 14, 17, 19, 24, 29, 35, 38, 4

[['Almeria', [0, 1, 1, 2, 4, 4, 5, 7, 7, 7, 8, 9, 9, 9, 11, 12, 13, 15, 15, 17, 17, 18, 20, 22, 22, 22, 22, 22, 23, 24, 24, 27, 27, 29, 29, 31, 32, 33, 35], [0, 1, 2, 3, 4, 5, 5, 7, 9, 10, 12, 14, 14, 15, 20, 24, 24, 25, 27, 30, 33, 33, 34, 36, 39, 39, 40, 40, 42, 46, 50, 50, 52, 52, 55, 57, 59, 61, 64], [0, 3, 5, 9, 15, 18, 22, 24, 26, 28, 30, 32, 32, 36, 39, 43, 47, 51, 52, 54, 55, 58, 63, 68, 69, 71, 71, 75, 79, 83, 84, 88, 90, 94, 94, 96, 99, 103, 105], [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 6, 6, 7, 8, 9, 9, 9, 9, 9, 9, 10, 10, 11, 11], [0, 19, 30, 42, 55, 61, 76, 92, 102, 109, 123, 129, 132, 143, 157, 173, 177, 183, 202, 210, 219, 231, 239, 248, 257, 267, 276, 286, 293, 302, 308, 325, 333, 342, 349, 361, 371, 385, 395], [0, 12, 24, 35, 57, 76, 84, 90, 107, 113, 124, 142, 166, 177, 194, 209, 228, 242, 254, 269, 287, 299, 313, 320, 329, 354, 369, 387, 406, 421, 440, 455, 468, 476, 494, 501, 515, 530, 539], [0, 6, 10, 12, 17, 18, 24, 27, 30, 33, 

[['Malaga', [0, 0, 0, 0, 0, 0, 0, 3, 3, 5, 5, 5, 5, 7, 7, 9, 10, 11, 13, 14, 15, 16, 18, 21, 21, 22, 23, 24, 27, 28, 29, 30, 30, 30, 31, 31, 34, 34, 38], [0, 0, 1, 1, 2, 3, 3, 4, 7, 7, 8, 9, 11, 13, 13, 14, 14, 14, 14, 15, 17, 19, 20, 20, 21, 22, 23, 25, 28, 28, 28, 29, 29, 30, 31, 32, 33, 34, 35], [0, 3, 6, 8, 10, 12, 16, 19, 22, 22, 25, 27, 29, 31, 34, 36, 39, 44, 48, 52, 54, 57, 60, 63, 64, 69, 74, 80, 82, 84, 86, 88, 91, 94, 95, 98, 100, 102, 103], [0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5], [0, 25, 29, 39, 52, 67, 79, 90, 100, 109, 122, 134, 148, 160, 167, 182, 199, 208, 222, 233, 247, 262, 269, 277, 284, 295, 303, 313, 318, 331, 344, 355, 364, 381, 393, 400, 421, 434, 448], [0, 10, 32, 39, 49, 58, 89, 102, 108, 118, 126, 133, 140, 151, 162, 175, 181, 188, 196, 201, 209, 221, 232, 238, 250, 264, 275, 286, 300, 309, 325, 330, 339, 345, 356, 370, 381, 393, 405], [0, 5, 6, 10, 13, 17, 20, 27, 30, 34, 39, 44, 4

[['La Coruna', [0, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 8, 10, 13, 18, 20, 22, 23, 23, 24, 25, 26, 26, 26, 27, 28, 29, 31, 31, 31, 31, 33, 35, 35, 37, 39, 40, 40, 43], [0, 1, 1, 2, 2, 4, 5, 6, 10, 14, 15, 16, 19, 23, 24, 27, 27, 28, 28, 29, 32, 34, 35, 39, 40, 40, 41, 42, 43, 46, 46, 50, 50, 51, 57, 59, 61, 61, 61], [0, 3, 5, 7, 11, 11, 13, 13, 15, 20, 21, 23, 26, 27, 28, 32, 33, 34, 34, 35, 37, 39, 44, 49, 51, 56, 60, 62, 66, 67, 68, 71, 74, 77, 80, 83, 84, 88, 92], [0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], [0, 11, 23, 39, 55, 67, 75, 96, 103, 114, 130, 143, 147, 171, 186, 190, 198, 211, 224, 231, 251, 259, 275, 286, 299, 306, 321, 335, 344, 357, 369, 378, 389, 394, 413, 428, 449, 452, 461], [0, 12, 27, 34, 43, 51, 72, 79, 104, 113, 123, 137, 157, 167, 181, 198, 211, 225, 229, 238, 250, 263, 269, 276, 286, 295, 300, 316, 325, 339, 349, 368, 379, 396, 418, 434, 443, 464, 477], [0, 6, 7, 10, 11, 16, 17, 23, 23, 24, 28

[['Leganes', [0, 1, 2, 3, 3, 3, 5, 5, 7, 8, 9, 9, 9, 9, 12, 12, 12, 13, 15, 17, 20, 20, 20, 20, 21, 21, 21, 23, 23, 25, 25, 26, 27, 28, 28, 29, 29, 31, 34], [0, 0, 0, 2, 3, 3, 3, 3, 3, 3, 5, 8, 11, 12, 13, 14, 14, 14, 17, 19, 21, 21, 22, 25, 28, 28, 32, 32, 34, 35, 36, 39, 39, 41, 41, 43, 46, 49, 51], [0, 0, 3, 4, 9, 11, 14, 16, 19, 22, 25, 27, 28, 31, 33, 37, 43, 43, 46, 49, 51, 57, 61, 66, 69, 73, 76, 77, 82, 84, 87, 91, 94, 95, 95, 96, 101, 103, 105], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 4, 5], [0, 16, 28, 39, 44, 56, 65, 72, 91, 106, 116, 129, 147, 156, 168, 171, 181, 189, 199, 207, 218, 220, 222, 232, 239, 250, 259, 269, 281, 293, 301, 312, 331, 337, 350, 366, 379, 388, 396], [0, 6, 23, 34, 44, 54, 62, 73, 86, 93, 102, 115, 126, 140, 152, 167, 177, 186, 202, 216, 228, 239, 258, 268, 281, 285, 300, 304, 317, 327, 330, 342, 347, 364, 376, 388, 398, 409, 425], [0, 9, 17, 24, 26, 27, 30, 33, 42, 46, 47, 53, 60,

[['Betis', [0, 0, 0, 1, 1, 3, 4, 5, 5, 5, 5, 8, 12, 13, 14, 16, 19, 20, 21, 22, 25, 25, 26, 26, 27, 29, 30, 31, 32, 33, 34, 36, 38, 39, 39, 40, 40, 42, 44], [0, 3, 3, 3, 3, 5, 5, 5, 6, 7, 9, 12, 15, 17, 17, 17, 18, 19, 21, 23, 25, 26, 26, 29, 30, 30, 32, 32, 36, 37, 39, 40, 43, 45, 49, 50, 51, 52, 52], [0, 0, 1, 4, 7, 9, 12, 12, 15, 15, 17, 21, 23, 24, 28, 30, 34, 36, 38, 40, 42, 44, 48, 50, 54, 56, 58, 58, 59, 62, 65, 69, 72, 76, 79, 83, 85, 88, 89], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 22, 35, 46, 57, 72, 86, 103, 108, 128, 135, 149, 164, 177, 183, 191, 203, 207, 214, 223, 235, 243, 250, 253, 272, 278, 284, 298, 311, 318, 329, 346, 358, 381, 388, 406, 418, 435, 444], [0, 6, 22, 30, 42, 49, 57, 68, 81, 87, 103, 117, 137, 150, 156, 168, 178, 193, 204, 218, 226, 237, 250, 266, 272, 282, 293, 300, 316, 336, 351, 361, 372, 376, 382, 392, 406, 415, 424], [0, 8, 13, 16, 22, 25, 31, 35, 35, 39, 42, 48, 56, 

[['Ath Bilbao', [0, 1, 2, 4, 4, 6, 7, 7, 7, 8, 8, 11, 11, 13, 15, 17, 19, 19, 19, 20, 21, 22, 22, 23, 23, 24, 25, 29], [0, 0, 1, 1, 1, 1, 2, 3, 4, 5, 7, 7, 7, 8, 9, 9, 12, 12, 12, 13, 14, 15, 17, 19, 20, 22, 22, 23], [0, 1, 2, 4, 7, 8, 9, 11, 16, 18, 20, 20, 20, 23, 26, 31, 35, 38, 38, 42, 46, 47, 49, 51, 55, 59, 65, 67], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], [0, 11, 15, 30, 41, 54, 63, 77, 83, 102, 111, 118, 130, 153, 159, 173, 181, 189, 197, 203, 225, 236, 244, 250, 262, 275, 293, 303], [0, 11, 17, 19, 29, 34, 41, 51, 64, 73, 82, 94, 105, 113, 125, 133, 143, 151, 171, 180, 187, 196, 205, 219, 225, 229, 239, 258], [0, 5, 7, 12, 17, 21, 23, 26, 28, 35, 37, 42, 44, 54, 58, 64, 66, 69, 71, 72, 78, 81, 83, 86, 88, 93, 99, 104], [0, 2, 4, 6, 7, 8, 11, 17, 19, 22, 24, 29, 36, 38, 45, 46, 52, 54, 62, 64, 67, 70, 73, 77, 79, 83, 86, 88], [0, 3, 9, 12, 18, 21, 26, 33, 37, 40, 47, 49, 59, 69, 71, 75, 78, 84, 88, 89, 102, 105, 110, 114, 118, 127, 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Unnamed: 0,Home Team Goals,Away Team Goals,Match Result,Half Time Home Team Goals,Half Time Away Team Goals,Home Team Shots,Away Team Shots,Home Team Shots Target,Away Team Shots Target,Home Team Faults Commited,Away Team Faults Commited,Home Team Corners,Away Team Corners,Home Team Yellow Cards,Away Team Yellow Cards,Home Team Red Cards,Away Team Red Cards,Bet365 Home Win Odds,Bet365 Draw Odds,Bet365 Away Win Odds,Bet&Win Home Win Odds,Bet&Win Draw Odds,Bet&Win Away Win Odds,Interwetten Home Win Odds,Interwetten Draw Odds,Interwetten Away Win Odds,William Hill Home Win Odds,William Hill Draw Odds,William Hill Away Win Odds,VC Bet Home Win Odds,VC Bet Draw Odds,VC Bet Away Win Odds,Home Team Accumulated Scored Goals,Away Team Accumulated Scored Goals,Home Team Accumulated Received Goals,Away Team Accumulated Received Goals,Home Team Accumulated Yellow Cards,Away Team Accumulated Yellow Cards,Home Team Accumulated Red Cards,Away Team Accumulated Red Cards,Home Team Accumulated Thrown Shots,Away Team Accumulated Thrown Shots,Home Team Accumulated Received Shots,Away Team Accumulated Received Shots,Home Team Accumulated Thrown Shots Target,Away Team Accumulated Thrown Shots Target,Home Team Accumulated Received Shots Target,Away Team Accumulated Received Shots Target,Home Team Accumulated Thrown Corners,Away Team Accumulated Thrown Corners,Home Team Accumulated Received Corners,Away Team Accumulated Received Corners,Home Team Accumulated Commited Faults,Away Team Accumulated Commited Faults,Home Team Accumulated Received Faults,Away Team Accumulated Received Faults,Home Team Accumulated Won Matches,Away Team Accumulated Won Matches,Home Team Accumulated Draw Matches,Away Team Accumulated Draw Matches,Home Team Accumulated Lost Matches,Away Team Accumulated Lost Matches,Home Team Games Won Percentage,Away Team Games Won Percentage,Home Team Games Draw Percentage,Away Team Games Draw Percentage,Home Team Games Lost Percentage,Away Team Games Lost Percentage,Home Team Saves Goalkeeper Percentage,Away Team Saves Goalkeeper Percentage,Home Team Saves Goalkeeper Ratio,Away Team Saves Goalkeeper Ratio,Home Team Scoring Percentage,Away Team Scoring Percentage,Home Team Scoring Ratio,Away Team Scoring Ratio,Home Team Accuracy Thrown Shots,Away Team Accuracy Thrown Shots,Home Team Goal Average Ratio,Away Team Goal Average Ratio,Home Team Shots Target Average Ratio,Away Team Shots Target Average Ratio,Home Team Shots Average Ratio,Away Team Shots Average Ratio
count,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0
mean,1.5678,1.137209,1.806798,0.691771,0.498211,13.915206,10.926297,5.01288,3.850447,15.290877,15.176208,5.877818,4.386583,2.439177,2.721825,0.13864,0.176565,2.668369,4.031478,5.019998,2.620708,3.967628,4.814259,2.552682,3.809351,4.543031,2.624901,3.846992,4.871198,2.701887,4.120946,5.232189,24.195349,24.396601,24.377996,24.203041,46.983184,46.85975,2.877102,2.859571,224.688193,226.097317,225.980322,224.727013,80.079428,80.626476,80.584436,80.12093,92.955456,93.597317,93.546691,92.961002,279.132737,279.052594,279.140966,279.113953,6.786583,6.873524,4.478175,4.478354,6.872272,6.785152,0.359163,0.366099,0.245015,0.245408,0.366304,0.358976,0.677677,0.678596,1.405157,1.402955,0.102887,0.102726,3.434123,3.452616,0.342995,0.342931,1.134431,1.15303,1.055621,1.089125,1.019166,1.04523
std,1.336671,1.142532,0.848872,0.859198,0.715031,5.014308,4.450753,2.640677,2.24525,4.866458,4.822738,2.945061,2.524937,1.479436,1.497803,0.377757,0.426936,2.193598,1.643058,4.778062,2.054616,1.553329,4.390387,1.865496,1.2171,3.823901,2.09226,1.425357,4.771138,2.399718,1.905799,5.785115,18.294262,18.399329,16.54486,16.44337,30.078815,30.066617,2.522916,2.524979,142.568534,143.320022,141.878923,140.997653,53.568274,53.858931,51.409647,51.110387,58.953475,59.289297,59.425145,59.204096,177.268017,177.122324,175.389629,175.642532,5.460514,5.499225,3.292146,3.300181,5.183186,5.1354,0.215,0.217289,0.154749,0.15668,0.201025,0.198313,0.147308,0.150072,0.327487,0.348312,0.044939,0.042335,1.409675,1.40419,0.084752,0.084108,0.989105,1.012697,0.637335,0.749532,0.450576,0.506673
min,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.02,2.5,1.08,1.03,2.4,1.09,1.03,2.5,1.1,1.02,2.38,1.08,1.03,2.5,1.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,0.0,1.0,0.0,0.0,10.0,8.0,3.0,2.0,12.0,12.0,4.0,3.0,1.0,2.0,0.0,0.0,1.67,3.25,2.63,1.65,3.25,2.6,1.655,3.2,2.6,1.67,3.2,2.62,1.67,3.25,2.62,10.0,10.0,11.0,10.0,21.0,21.0,1.0,1.0,105.0,106.0,106.0,105.0,36.0,36.0,37.0,37.0,43.0,43.0,43.0,43.0,130.0,127.0,130.0,129.25,3.0,3.0,2.0,2.0,2.0,2.0,0.225806,0.230769,0.153846,0.153846,0.25,0.235294,0.650485,0.650794,1.333333,1.333333,0.080854,0.080893,2.818182,2.818182,0.318182,0.318182,0.608696,0.621622,0.722222,0.734227,0.787234,0.800737
50%,1.0,1.0,2.0,0.0,0.0,13.0,10.0,5.0,4.0,15.0,15.0,6.0,4.0,2.0,3.0,0.0,0.0,2.1,3.4,3.6,2.05,3.4,3.5,2.1,3.4,3.4,2.1,3.3,3.5,2.1,3.5,3.6,22.0,22.0,23.0,23.0,46.0,46.0,2.0,2.0,219.0,219.0,218.0,220.0,77.0,77.0,77.0,77.0,90.0,90.0,90.0,89.0,270.0,270.0,274.0,271.5,6.0,6.0,4.0,4.0,6.0,6.0,0.333333,0.333333,0.241379,0.24,0.384615,0.378844,0.699029,0.7,1.41791,1.416667,0.099562,0.099678,3.35,3.357143,0.34689,0.346705,0.857143,0.875,0.92193,0.937742,0.963325,0.976303
75%,2.0,2.0,3.0,1.0,1.0,17.0,14.0,7.0,5.0,18.0,18.0,8.0,6.0,3.0,4.0,0.0,0.0,2.63,4.0,5.25,2.65,3.9,5.25,2.6,3.8,4.9,2.62,3.8,5.0,2.65,4.0,5.25,34.0,35.0,36.0,36.0,70.0,70.0,4.0,4.0,333.0,335.0,337.0,335.0,116.0,117.0,120.0,119.0,138.0,140.0,141.0,139.0,414.0,414.0,416.0,417.0,10.0,10.0,7.0,7.0,10.0,10.0,0.5,0.5,0.333333,0.333333,0.5,0.5,0.742857,0.743902,1.520833,1.516395,0.125,0.125,3.956522,3.965517,0.383178,0.382622,1.285714,1.307692,1.181818,1.203502,1.152309,1.170521
max,10.0,8.0,3.0,6.0,5.0,37.0,39.0,20.0,16.0,37.0,36.0,20.0,17.0,8.0,9.0,3.0,4.0,26.0,17.0,41.0,34.0,19.5,51.0,20.0,16.0,42.0,26.0,17.0,51.0,36.0,26.0,67.0,117.0,115.0,84.0,89.0,138.0,141.0,14.0,14.0,763.0,797.0,675.0,661.0,302.0,317.0,239.0,236.0,269.0,273.0,260.0,264.0,835.0,870.0,779.0,785.0,31.0,31.0,18.0,18.0,27.0,26.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,8.0,0.75,0.428571,22.0,17.0,0.857143,1.0,14.0,14.0,9.0,19.0,6.5,13.5


In [105]:
data.tail()

Unnamed: 0,League Division,Match Date,Home Team,Away Team,Home Team Goals,Away Team Goals,Match Result,Half Time Home Team Goals,Half Time Away Team Goals,Half Time Result,Home Team Shots,Away Team Shots,Home Team Shots Target,Away Team Shots Target,Home Team Faults Commited,Away Team Faults Commited,Home Team Corners,Away Team Corners,Home Team Yellow Cards,Away Team Yellow Cards,Home Team Red Cards,Away Team Red Cards,Bet365 Home Win Odds,Bet365 Draw Odds,Bet365 Away Win Odds,Bet&Win Home Win Odds,Bet&Win Draw Odds,Bet&Win Away Win Odds,Interwetten Home Win Odds,Interwetten Draw Odds,Interwetten Away Win Odds,William Hill Home Win Odds,William Hill Draw Odds,William Hill Away Win Odds,VC Bet Home Win Odds,VC Bet Draw Odds,VC Bet Away Win Odds,Home Team Accumulated Scored Goals,Away Team Accumulated Scored Goals,Home Team Accumulated Received Goals,Away Team Accumulated Received Goals,Home Team Accumulated Yellow Cards,Away Team Accumulated Yellow Cards,Home Team Accumulated Red Cards,Away Team Accumulated Red Cards,Home Team Accumulated Thrown Shots,Away Team Accumulated Thrown Shots,Home Team Accumulated Received Shots,Away Team Accumulated Received Shots,Home Team Accumulated Thrown Shots Target,Away Team Accumulated Thrown Shots Target,Home Team Accumulated Received Shots Target,Away Team Accumulated Received Shots Target,Home Team Accumulated Thrown Corners,Away Team Accumulated Thrown Corners,Home Team Accumulated Received Corners,Away Team Accumulated Received Corners,Home Team Accumulated Commited Faults,Away Team Accumulated Commited Faults,Home Team Accumulated Received Faults,Away Team Accumulated Received Faults,Home Team Accumulated Won Matches,Away Team Accumulated Won Matches,Home Team Accumulated Draw Matches,Away Team Accumulated Draw Matches,Home Team Accumulated Lost Matches,Away Team Accumulated Lost Matches,Home Team Games Won Percentage,Away Team Games Won Percentage,Home Team Games Draw Percentage,Away Team Games Draw Percentage,Home Team Games Lost Percentage,Away Team Games Lost Percentage,Home Team Saves Goalkeeper Percentage,Away Team Saves Goalkeeper Percentage,Home Team Saves Goalkeeper Ratio,Away Team Saves Goalkeeper Ratio,Home Team Scoring Percentage,Away Team Scoring Percentage,Home Team Scoring Ratio,Away Team Scoring Ratio,Home Team Accuracy Thrown Shots,Away Team Accuracy Thrown Shots,Home Team Goal Average Ratio,Away Team Goal Average Ratio,Home Team Shots Target Average Ratio,Away Team Shots Target Average Ratio,Home Team Shots Average Ratio,Away Team Shots Average Ratio
265,SP1,08/03/2020,Valladolid,Ath Bilbao,1,4,3,0,2,A,19,10,2,5,9,13,8,1,1,2,0,0,3.0,2.8,2.75,2.95,2.85,2.75,3.0,2.8,2.8,3.0,2.88,2.75,2.9,2.88,2.7,22,25,29,22,58,65,0,1,247,293,339,239,81,99,99,86,108,132,137,113,324,359,397,353,6,8,11,10,9,8,0.230769,0.307692,0.423077,0.384615,0.346154,0.307692,0.707071,0.744186,1.414286,1.34375,0.089069,0.085324,3.681818,3.96,0.327935,0.337884,0.758621,1.136364,0.818182,1.151163,0.728614,1.225941
266,SP1,08/03/2020,Levante,Granada,1,1,2,1,0,H,11,10,3,7,21,13,6,4,3,3,0,0,2.05,3.4,3.75,2.05,3.5,3.6,2.05,3.4,3.65,2.05,3.5,3.6,2.05,3.5,3.5,31,32,39,31,60,77,3,1,298,267,426,312,98,75,157,109,131,116,159,124,302,408,353,367,10,11,2,4,14,11,0.384615,0.423077,0.076923,0.153846,0.538462,0.423077,0.751592,0.715596,1.330508,1.397436,0.104027,0.11985,3.16129,2.34375,0.328859,0.280899,0.794872,1.032258,0.624204,0.688073,0.699531,0.855769
267,SP1,08/03/2020,Villarreal,Leganes,1,2,3,1,0,H,15,12,5,7,10,11,2,5,4,3,0,0,1.75,3.6,5.0,1.67,3.8,5.25,1.75,3.7,4.8,1.73,3.7,5.0,1.7,3.7,5.0,43,19,36,38,57,87,0,3,366,292,352,247,125,107,114,99,114,115,170,117,331,406,310,375,11,4,5,8,10,14,0.423077,0.153846,0.192308,0.307692,0.384615,0.538462,0.684211,0.616162,1.461538,1.622951,0.117486,0.065068,2.906977,5.631579,0.34153,0.366438,1.194444,0.5,1.096491,1.080808,1.039773,1.182186
268,SP1,08/03/2020,Betis,Real Madrid,2,1,1,1,1,D,14,13,3,4,8,11,6,2,1,1,0,0,4.5,4.0,1.72,4.33,4.1,1.75,4.15,4.0,1.77,4.33,4.0,1.75,4.1,4.1,1.75,36,48,42,17,75,59,7,4,318,420,327,217,106,164,113,69,121,148,144,108,322,337,423,339,7,16,9,8,10,2,0.269231,0.615385,0.346154,0.307692,0.384615,0.076923,0.628319,0.753623,1.591549,1.326923,0.113208,0.114286,2.944444,3.416667,0.333333,0.390476,0.857143,2.823529,0.938053,2.376812,0.972477,1.935484
269,SP1,10/03/2020,Eibar,Sociedad,1,2,3,0,1,A,13,6,4,2,12,10,3,1,3,2,0,0,3.4,3.3,2.2,3.3,3.3,2.25,3.3,3.3,2.25,3.4,3.3,2.2,3.4,3.3,2.2,26,43,39,32,69,51,3,1,286,308,263,293,90,104,100,103,125,128,125,109,377,359,320,343,7,13,6,4,13,9,0.269231,0.5,0.230769,0.153846,0.5,0.346154,0.61,0.68932,1.639344,1.450704,0.090909,0.13961,3.461538,2.418605,0.314685,0.337662,0.666667,1.34375,0.9,1.009709,1.087452,1.051195


In [106]:
data.describe()

Unnamed: 0,Home Team Goals,Away Team Goals,Match Result,Half Time Home Team Goals,Half Time Away Team Goals,Home Team Shots,Away Team Shots,Home Team Shots Target,Away Team Shots Target,Home Team Faults Commited,Away Team Faults Commited,Home Team Corners,Away Team Corners,Home Team Yellow Cards,Away Team Yellow Cards,Home Team Red Cards,Away Team Red Cards,Bet365 Home Win Odds,Bet365 Draw Odds,Bet365 Away Win Odds,Bet&Win Home Win Odds,Bet&Win Draw Odds,Bet&Win Away Win Odds,Interwetten Home Win Odds,Interwetten Draw Odds,Interwetten Away Win Odds,William Hill Home Win Odds,William Hill Draw Odds,William Hill Away Win Odds,VC Bet Home Win Odds,VC Bet Draw Odds,VC Bet Away Win Odds,Home Team Accumulated Scored Goals,Away Team Accumulated Scored Goals,Home Team Accumulated Received Goals,Away Team Accumulated Received Goals,Home Team Accumulated Yellow Cards,Away Team Accumulated Yellow Cards,Home Team Accumulated Red Cards,Away Team Accumulated Red Cards,Home Team Accumulated Thrown Shots,Away Team Accumulated Thrown Shots,Home Team Accumulated Received Shots,Away Team Accumulated Received Shots,Home Team Accumulated Thrown Shots Target,Away Team Accumulated Thrown Shots Target,Home Team Accumulated Received Shots Target,Away Team Accumulated Received Shots Target,Home Team Accumulated Thrown Corners,Away Team Accumulated Thrown Corners,Home Team Accumulated Received Corners,Away Team Accumulated Received Corners,Home Team Accumulated Commited Faults,Away Team Accumulated Commited Faults,Home Team Accumulated Received Faults,Away Team Accumulated Received Faults,Home Team Accumulated Won Matches,Away Team Accumulated Won Matches,Home Team Accumulated Draw Matches,Away Team Accumulated Draw Matches,Home Team Accumulated Lost Matches,Away Team Accumulated Lost Matches,Home Team Games Won Percentage,Away Team Games Won Percentage,Home Team Games Draw Percentage,Away Team Games Draw Percentage,Home Team Games Lost Percentage,Away Team Games Lost Percentage,Home Team Saves Goalkeeper Percentage,Away Team Saves Goalkeeper Percentage,Home Team Saves Goalkeeper Ratio,Away Team Saves Goalkeeper Ratio,Home Team Scoring Percentage,Away Team Scoring Percentage,Home Team Scoring Ratio,Away Team Scoring Ratio,Home Team Accuracy Thrown Shots,Away Team Accuracy Thrown Shots,Home Team Goal Average Ratio,Away Team Goal Average Ratio,Home Team Shots Target Average Ratio,Away Team Shots Target Average Ratio,Home Team Shots Average Ratio,Away Team Shots Average Ratio
count,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0,5590.0
mean,1.5678,1.137209,1.806798,0.691771,0.498211,13.915206,10.926297,5.01288,3.850447,15.290877,15.176208,5.877818,4.386583,2.439177,2.721825,0.13864,0.176565,2.668369,4.031478,5.019998,2.620708,3.967628,4.814259,2.552682,3.809351,4.543031,2.624901,3.846992,4.871198,2.701887,4.120946,5.232189,24.195349,24.396601,24.377996,24.203041,46.983184,46.85975,2.877102,2.859571,224.688193,226.097317,225.980322,224.727013,80.079428,80.626476,80.584436,80.12093,92.955456,93.597317,93.546691,92.961002,279.132737,279.052594,279.140966,279.113953,6.786583,6.873524,4.478175,4.478354,6.872272,6.785152,0.359163,0.366099,0.245015,0.245408,0.366304,0.358976,0.677677,0.678596,1.405157,1.402955,0.102887,0.102726,3.434123,3.452616,0.342995,0.342931,1.134431,1.15303,1.055621,1.089125,1.019166,1.04523
std,1.336671,1.142532,0.848872,0.859198,0.715031,5.014308,4.450753,2.640677,2.24525,4.866458,4.822738,2.945061,2.524937,1.479436,1.497803,0.377757,0.426936,2.193598,1.643058,4.778062,2.054616,1.553329,4.390387,1.865496,1.2171,3.823901,2.09226,1.425357,4.771138,2.399718,1.905799,5.785115,18.294262,18.399329,16.54486,16.44337,30.078815,30.066617,2.522916,2.524979,142.568534,143.320022,141.878923,140.997653,53.568274,53.858931,51.409647,51.110387,58.953475,59.289297,59.425145,59.204096,177.268017,177.122324,175.389629,175.642532,5.460514,5.499225,3.292146,3.300181,5.183186,5.1354,0.215,0.217289,0.154749,0.15668,0.201025,0.198313,0.147308,0.150072,0.327487,0.348312,0.044939,0.042335,1.409675,1.40419,0.084752,0.084108,0.989105,1.012697,0.637335,0.749532,0.450576,0.506673
min,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.02,2.5,1.08,1.03,2.4,1.09,1.03,2.5,1.1,1.02,2.38,1.08,1.03,2.5,1.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,0.0,1.0,0.0,0.0,10.0,8.0,3.0,2.0,12.0,12.0,4.0,3.0,1.0,2.0,0.0,0.0,1.67,3.25,2.63,1.65,3.25,2.6,1.655,3.2,2.6,1.67,3.2,2.62,1.67,3.25,2.62,10.0,10.0,11.0,10.0,21.0,21.0,1.0,1.0,105.0,106.0,106.0,105.0,36.0,36.0,37.0,37.0,43.0,43.0,43.0,43.0,130.0,127.0,130.0,129.25,3.0,3.0,2.0,2.0,2.0,2.0,0.225806,0.230769,0.153846,0.153846,0.25,0.235294,0.650485,0.650794,1.333333,1.333333,0.080854,0.080893,2.818182,2.818182,0.318182,0.318182,0.608696,0.621622,0.722222,0.734227,0.787234,0.800737
50%,1.0,1.0,2.0,0.0,0.0,13.0,10.0,5.0,4.0,15.0,15.0,6.0,4.0,2.0,3.0,0.0,0.0,2.1,3.4,3.6,2.05,3.4,3.5,2.1,3.4,3.4,2.1,3.3,3.5,2.1,3.5,3.6,22.0,22.0,23.0,23.0,46.0,46.0,2.0,2.0,219.0,219.0,218.0,220.0,77.0,77.0,77.0,77.0,90.0,90.0,90.0,89.0,270.0,270.0,274.0,271.5,6.0,6.0,4.0,4.0,6.0,6.0,0.333333,0.333333,0.241379,0.24,0.384615,0.378844,0.699029,0.7,1.41791,1.416667,0.099562,0.099678,3.35,3.357143,0.34689,0.346705,0.857143,0.875,0.92193,0.937742,0.963325,0.976303
75%,2.0,2.0,3.0,1.0,1.0,17.0,14.0,7.0,5.0,18.0,18.0,8.0,6.0,3.0,4.0,0.0,0.0,2.63,4.0,5.25,2.65,3.9,5.25,2.6,3.8,4.9,2.62,3.8,5.0,2.65,4.0,5.25,34.0,35.0,36.0,36.0,70.0,70.0,4.0,4.0,333.0,335.0,337.0,335.0,116.0,117.0,120.0,119.0,138.0,140.0,141.0,139.0,414.0,414.0,416.0,417.0,10.0,10.0,7.0,7.0,10.0,10.0,0.5,0.5,0.333333,0.333333,0.5,0.5,0.742857,0.743902,1.520833,1.516395,0.125,0.125,3.956522,3.965517,0.383178,0.382622,1.285714,1.307692,1.181818,1.203502,1.152309,1.170521
max,10.0,8.0,3.0,6.0,5.0,37.0,39.0,20.0,16.0,37.0,36.0,20.0,17.0,8.0,9.0,3.0,4.0,26.0,17.0,41.0,34.0,19.5,51.0,20.0,16.0,42.0,26.0,17.0,51.0,36.0,26.0,67.0,117.0,115.0,84.0,89.0,138.0,141.0,14.0,14.0,763.0,797.0,675.0,661.0,302.0,317.0,239.0,236.0,269.0,273.0,260.0,264.0,835.0,870.0,779.0,785.0,31.0,31.0,18.0,18.0,27.0,26.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,8.0,0.75,0.428571,22.0,17.0,0.857143,1.0,14.0,14.0,9.0,19.0,6.5,13.5
