In [28]:
import pandas as pd

In [29]:
df = pd.read_csv("match_stats.csv")

In [30]:
print(df["plus_minus"].dtype)
print(df["plus_minus"].head())


int64
0    9
1   -2
2   -6
3   -8
4   -8
Name: plus_minus, dtype: int64


In [31]:
# Split kd into kills and deaths
df[["kills", "deaths"]] = df["kd"].str.split("-", expand=True)
df["kills"] = df["kills"].astype(int)
df["deaths"] = df["deaths"].astype(int)

# Convert kast from a string like '70.7%' into float
df["kast"] = df["kast"].str.replace("%","", regex=False).astype(float)
df["kast"] = df["kast"]/100
# Keep the columns we need
features = ["kills", "deaths", "adr", "kast"]
target = "rating"

df[features + [target]]


Unnamed: 0,kills,deaths,adr,kast,rating
0,37,28,91.1,0.707,1.24
1,28,30,70.6,0.756,0.97
2,24,30,69.3,0.659,0.92
3,22,30,71.1,0.659,0.87
4,22,30,63.5,0.610,0.74
...,...,...,...,...,...
12427,15,20,75.4,0.522,0.89
12428,12,19,55.2,0.696,0.74
12429,8,18,53.0,0.565,0.67
12430,9,18,53.8,0.522,0.64


In [32]:
from sklearn.linear_model import LinearRegression

# Prepare X (features) and y (target)
X = df[features]
y = df[target]

# Train a linear regression model
lin_reg = LinearRegression()
lin_reg.fit(X, y)

# Show coefficients and intercept
coeffs = dict(zip(features, lin_reg.coef_))
print("Coefficients:", coeffs)
print("Intercept:", lin_reg.intercept_)

# Predict on the same training data (for demonstration)
pred_linear = lin_reg.predict(X)
df["prediction_linear"] = pred_linear
df


Coefficients: {'kills': 0.010337313096696735, 'deaths': -0.011602705112321351, 'adr': 0.010046925016944867, 'kast': 0.881570654477619}
Intercept: -0.2943601658729815


Unnamed: 0,match_url,teamName,tableType,player,kd,plus_minus,adr,kast,rating,kills,deaths,prediction_linear
0,https://www.hltv.org/matches/2380124/liquid-vs...,Liquid,table totalstats,Justin 'jks' Savagejks,37-28,9,91.1,0.707,1.24,37,28,1.301790
1,https://www.hltv.org/matches/2380124/liquid-vs...,Liquid,table totalstats,Guy 'NertZ' IluzNertZ,28-30,-2,70.6,0.756,0.97,28,30,1.022784
2,https://www.hltv.org/matches/2380124/liquid-vs...,Liquid,table totalstats,Roland 'ultimate' Tomkowiakultimate,24-30,-6,69.3,0.659,0.92,24,30,0.882861
3,https://www.hltv.org/matches/2380124/liquid-vs...,Liquid,table totalstats,Russel 'Twistzz' Van DulkenTwistzz,22-30,-8,71.1,0.659,0.87,22,30,0.880271
4,https://www.hltv.org/matches/2380124/liquid-vs...,Liquid,table totalstats,Keith 'NAF' MarkovicNAF,22-30,-8,63.5,0.610,0.74,22,30,0.760717
...,...,...,...,...,...,...,...,...,...,...,...,...
12427,https://www.hltv.org/matches/2379427/imperial-...,Legacy,table tstats hidden,Eduardo 'dumau' Wolkmerdumau,15-20,-5,75.4,0.522,0.89,15,20,0.846363
12428,https://www.hltv.org/matches/2379427/imperial-...,Legacy,table tstats hidden,Bruno 'latto' Rebelattolatto,12-19,-7,55.2,0.696,0.74,12,19,0.777400
12429,https://www.hltv.org/matches/2379427/imperial-...,Legacy,table tstats hidden,Vinicius 'n1ssim' Pereiran1ssim,8-18,-10,53.0,0.565,0.67,8,18,0.610064
12430,https://www.hltv.org/matches/2379427/imperial-...,Legacy,table tstats hidden,Lucas 'lux' Meneghinilux,9-18,-9,53.8,0.522,0.64,9,18,0.590531


In [33]:
# Compare predicted vs. actual rating
df["error_linear"] = df["rating"] - df["prediction_linear"]
df["abs_error_linear"] = (df["rating"] - df["prediction_linear"]).abs()

# Show the DataFrame
df[['player', 'rating', 'prediction_linear', 'error_linear', 'abs_error_linear']]


Unnamed: 0,player,rating,prediction_linear,error_linear,abs_error_linear
0,Justin 'jks' Savagejks,1.24,1.301790,-0.061790,0.061790
1,Guy 'NertZ' IluzNertZ,0.97,1.022784,-0.052784,0.052784
2,Roland 'ultimate' Tomkowiakultimate,0.92,0.882861,0.037139,0.037139
3,Russel 'Twistzz' Van DulkenTwistzz,0.87,0.880271,-0.010271,0.010271
4,Keith 'NAF' MarkovicNAF,0.74,0.760717,-0.020717,0.020717
...,...,...,...,...,...
12427,Eduardo 'dumau' Wolkmerdumau,0.89,0.846363,0.043637,0.043637
12428,Bruno 'latto' Rebelattolatto,0.74,0.777400,-0.037400,0.037400
12429,Vinicius 'n1ssim' Pereiran1ssim,0.67,0.610064,0.059936,0.059936
12430,Lucas 'lux' Meneghinilux,0.64,0.590531,0.049469,0.049469


In [34]:
error_stats = df['error_linear'].describe()
print("Error Statistics:")
print(error_stats)


Error Statistics:
count    1.243200e+04
mean     1.375991e-16
std      1.297725e-01
min     -9.457388e-01
25%     -7.871426e-02
50%     -5.265727e-05
75%      7.308377e-02
max      2.122932e+00
Name: error_linear, dtype: float64


In [35]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# Random Forest
rf = RandomForestRegressor(n_estimators=30, random_state=5)
rf.fit(X, y)
pred_rf = rf.predict(X)

df["prediction_rf"] = pred_rf

# Gradient Boosting
gb = GradientBoostingRegressor(n_estimators=10, random_state=0)
gb.fit(X, y)
pred_gb = gb.predict(X)

df["prediction_gb"] = pred_gb

df


Unnamed: 0,match_url,teamName,tableType,player,kd,plus_minus,adr,kast,rating,kills,deaths,prediction_linear,error_linear,abs_error_linear,prediction_rf,prediction_gb
0,https://www.hltv.org/matches/2380124/liquid-vs...,Liquid,table totalstats,Justin 'jks' Savagejks,37-28,9,91.1,0.707,1.24,37,28,1.301790,-0.061790,0.061790,1.253000,1.162186
1,https://www.hltv.org/matches/2380124/liquid-vs...,Liquid,table totalstats,Guy 'NertZ' IluzNertZ,28-30,-2,70.6,0.756,0.97,28,30,1.022784,-0.052784,0.052784,0.995667,1.045190
2,https://www.hltv.org/matches/2380124/liquid-vs...,Liquid,table totalstats,Roland 'ultimate' Tomkowiakultimate,24-30,-6,69.3,0.659,0.92,24,30,0.882861,0.037139,0.037139,0.909333,0.952681
3,https://www.hltv.org/matches/2380124/liquid-vs...,Liquid,table totalstats,Russel 'Twistzz' Van DulkenTwistzz,22-30,-8,71.1,0.659,0.87,22,30,0.880271,-0.010271,0.010271,0.864667,0.952681
4,https://www.hltv.org/matches/2380124/liquid-vs...,Liquid,table totalstats,Keith 'NAF' MarkovicNAF,22-30,-8,63.5,0.610,0.74,22,30,0.760717,-0.020717,0.020717,0.757333,0.897175
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12427,https://www.hltv.org/matches/2379427/imperial-...,Legacy,table tstats hidden,Eduardo 'dumau' Wolkmerdumau,15-20,-5,75.4,0.522,0.89,15,20,0.846363,0.043637,0.043637,0.871000,0.964405
12428,https://www.hltv.org/matches/2379427/imperial-...,Legacy,table tstats hidden,Bruno 'latto' Rebelattolatto,12-19,-7,55.2,0.696,0.74,12,19,0.777400,-0.037400,0.037400,0.732333,0.909805
12429,https://www.hltv.org/matches/2379427/imperial-...,Legacy,table tstats hidden,Vinicius 'n1ssim' Pereiran1ssim,8-18,-10,53.0,0.565,0.67,8,18,0.610064,0.059936,0.059936,0.641000,0.794860
12430,https://www.hltv.org/matches/2379427/imperial-...,Legacy,table tstats hidden,Lucas 'lux' Meneghinilux,9-18,-9,53.8,0.522,0.64,9,18,0.590531,0.049469,0.049469,0.626667,0.773121


In [36]:
# Compare predicted vs. actual rating
df["error_rf"] = df["rating"] - df["prediction_rf"]
df["error_gb"] = df["rating"] - df["prediction_gb"]

error_stats = df['error_rf'].describe()
print("Error Statistics:")
print(error_stats)

error_stats = df['error_gb'].describe()
print("Error Statistics:")
print(error_stats)


Error Statistics:
count    12432.000000
mean        -0.000164
std          0.042551
min         -0.327000
25%         -0.024000
50%         -0.000667
75%          0.023000
max          0.668667
Name: error_rf, dtype: float64
Error Statistics:
count    1.243200e+04
mean    -3.907928e-17
std      1.947475e-01
min     -7.286053e-01
25%     -1.214544e-01
50%     -1.240295e-02
75%      1.033037e-01
max      2.868194e+00
Name: error_gb, dtype: float64


In [38]:
import pandas as pd

# Example: Two new "players" or entries we want to predict rating for:
new_data = pd.DataFrame([
    {"kills": 20, "deaths": 15, "adr": 80.8, "kast": 0.875},
    {"kills": 30, "deaths": 16, "adr": 122, "kast": 0.75}
], columns=features)

pred_rf_n = rf.predict(new_data)
pred_rf_n

array([1.309     , 1.70933333])