In [67]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns 
import kagglehub
from kagglehub import KaggleDatasetAdapter

sns.set(style="darkgrid")

In [68]:
file_path = "PremierLeague.csv"

# Load the latest version
df = kagglehub.dataset_load(
  KaggleDatasetAdapter.PANDAS,
  "ajaxianazarenka/premier-league",
  file_path,
)

In [69]:
home = df.rename(columns={
    "HomeTeam": "Team",
    "AwayTeam": "Opponent",
    "FullTimeHomeTeamGoals": "GF",
    "FullTimeAwayTeamGoals": "GA"
})[["MatchID", "Season", "MatchWeek", "Team", "Opponent", "GF", "GA"]]

away = df.rename(columns={
    "AwayTeam": "Team",
    "HomeTeam": "Opponent",
    "FullTimeAwayTeamGoals": "GF",
    "FullTimeHomeTeamGoals": "GA"
})[["MatchID", "Season", "MatchWeek", "Team", "Opponent", "GF", "GA"]]

allMatches = pd.concat([home, away])
allMatches["GD"] = allMatches["GF"] - allMatches["GA"]
teamResults = allMatches.groupby(["Season", "Team"]).agg(
    GF = ("GF", "sum"),
    GA = ("GA", "sum"),
    G = ("MatchWeek", "count"),
    W = ("GD", lambda x: (x > 0).sum()),
    D = ("GD", lambda x: (x == 0).sum()),
)
teamResults["P"] = (3 * teamResults["W"] + 1 * teamResults["D"])
teamResults["PR"] = (3 * teamResults["W"] + 1 * teamResults["D"]) / (3 * teamResults["G"])
teamResults["GR"] = np.log((teamResults["GF"] / teamResults["GA"]))

In [70]:
X = sm.add_constant(teamResults["GR"])
y = teamResults["PR"]

fit = sm.GLM(
    y, X,
    family=sm.families.Binomial(), 
    freq_weights=teamResults["W"]
).fit(cov_type="HC1") 

alpha = fit.params["const"]
k = fit.params["GR"]

print(fit.summary())
print("alpha =", alpha)
print("k =", k)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                     PR   No. Observations:                  644
Model:                            GLM   Df Residuals:                     9065
Model Family:                Binomial   Df Model:                            1
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -3891.9
Date:                Mon, 26 Jan 2026   Deviance:                       69.010
Time:                        14:36:46   Pearson chi2:                     68.3
No. Iterations:                     4   Pseudo R-squ. (CS):             0.7048
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.1469      0.002    -79.454      0.0

In [1]:
!git status

On branch main
All conflicts fixed but you are still merging.
  (use "git commit" to conclude merge)

Changes to be committed:
	[32mnew file:   .gitignore[m
	[32mnew file:   README.md[m

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   Pythagorean Calculation.ipynb[m



In [3]:
!git add .

In [None]:
!git commit

<emier League Pythagorean Expectation/.git/COMMIT_EDITMSG" 26L, 885B[2;1Hâ–½[6n[2;1H  [3;1HPzz\[0%m[6n[3;1H           [1;1H[>c]11;?]10;?[1;1HMerge branch 'main' of https://github.com/william-w-zhao/premier-league-pythagorr[2;1Hean-expectation[2;16H[K[3;1H# Please enter a commit message to explain why this merge is necessary,[3;72H[K[4;1H# especially if it merges an updated upstream into a topic branch.
#
# Lines starting with '#' will be ignored, and an empty message aborts
# the commit.
#
# It looks like you may be committing a merge.
# If this is not correct, please run
#[7Cgit update-ref -d MERGE_HEAD
# and try again.


# Please enter the commit message for your changes. Lines starting
# with '#' will be ignored, and an empty message aborts the commit.
#
# Committer: William Zhao <williamzhao@dynamic-oit-ip4-wifirestricted03-10-48-399[19;1H-3.princeton.edu>
#
# On branch main
# All conflicts fixed but you are still merging.
#[1;1H[?25h[?4m