In [1]:
import pandas as pd

from sklearn import (
    model_selection, linear_model, metrics, 
    feature_selection, pipeline, base,
)

# Score prediction

## loading data

In [5]:
df1 = 'AimoScore_WeakLink_big_scores.xls'
df1 = pd.read_excel(df1)
df1

Unnamed: 0,AimoScore,No_1_Angle_Deviation,No_2_Angle_Deviation,No_3_Angle_Deviation,No_4_Angle_Deviation,No_5_Angle_Deviation,No_6_Angle_Deviation,No_7_Angle_Deviation,No_8_Angle_Deviation,No_9_Angle_Deviation,...,No_19_NASM_Deviation,No_20_NASM_Deviation,No_21_NASM_Deviation,No_22_NASM_Deviation,No_23_NASM_Deviation,No_24_NASM_Deviation,No_25_NASM_Deviation,No_1_Time_Deviation,No_2_Time_Deviation,EstimatedScore
0,0.323667,0.538020,0.815878,0.346724,0.382114,0.302248,0.947872,0.275945,0.521760,0.457198,...,0.833094,0.656624,0.642276,0.552846,0.648972,0.578192,0.560019,0.821616,0.818747,0.209947
1,0.323699,0.443807,0.306552,0.823529,0.188905,0.497370,0.140124,0.664275,0.521760,0.729316,...,0.670971,0.721186,0.826399,0.805356,0.848876,0.889048,0.816834,0.307987,0.248207,0.457198
2,0.848327,0.603539,0.373984,0.346724,0.590626,0.341942,0.298900,0.276901,0.623625,0.658058,...,0.670971,0.656624,0.642276,0.690579,0.648972,0.578192,0.555715,0.218556,0.235294,0.107126
3,0.351332,0.484935,0.623625,0.380201,0.975132,0.509326,0.888570,0.363462,0.847441,0.237207,...,0.953611,0.656624,0.642276,0.552846,0.648972,0.578192,0.744620,0.458154,0.432807,0.612626
4,0.627181,0.860832,0.657580,0.745576,0.552846,0.375897,0.483022,0.388331,0.521760,0.387853,...,0.670971,0.656624,0.642276,0.552846,0.648972,0.578192,0.308943,0.805356,0.774271,0.153515
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2089,0.407197,0.279770,0.131038,0.346724,0.317073,0.945481,0.646581,0.913439,0.521760,0.570540,...,0.670971,0.945002,0.642276,0.596365,0.648972,0.749880,0.943568,0.684840,0.711621,0.877571
2090,0.965930,0.564802,0.311813,0.376375,0.188905,0.187948,0.347202,0.227642,0.521760,0.248685,...,0.670971,0.753228,0.642276,0.552846,0.648972,0.869440,0.316117,0.148733,0.151124,0.030129
2091,0.527640,0.404489,0.214422,0.822827,0.937440,0.739255,0.909265,0.681471,0.658071,0.822350,...,0.973257,0.886819,0.640401,0.525310,0.776982,0.822827,0.367717,0.584050,0.570201,0.613658
2092,0.407197,0.606972,0.863897,0.467526,0.225406,0.946036,0.706781,0.934097,0.303247,0.576409,...,0.941738,0.886819,0.640401,0.725883,0.757402,0.822827,0.943649,0.953200,0.956543,0.761700


## applying the train test split

In [6]:
X1 = df1.drop(['AimoScore','EstimatedScore'], axis= 1)
Y1 = df1['AimoScore']

trainX1, testX1, trainY1, testY1 = model_selection.train_test_split(X1, Y1, test_size= 0.2)

## training different models

### Linear Reggression

In [7]:
model = linear_model.SGDRegressor()
model.fit(trainX1,trainY1)

testP1 = model.predict(testX1)
metrics.mean_squared_error(testY1,testP1)

0.040609169234926766

### Linear Regression with feature selection

In [8]:
model = pipeline.Pipeline([
    ('1',feature_selection.VarianceThreshold(threshold= 0.01)),
    ('2',linear_model.SGDRegressor())
])

model.fit(trainX1,trainY1)

testP1 = model.predict(testX1)
metrics.mean_squared_error(testY1,testP1)

0.04357650881647673

### Weighted linear regression

In [9]:
class WeightApplicator(base.BaseEstimator, base.TransformerMixin):
    weights = [
        1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 
        2, 2, 2, 1, 1, 1, 2, 2, 1, 1, 
        1, 1, 2, 4, 4, 2, 2, 2, 2, 2, 
        1, 1, 1, 2, 2, 2, 2, 2, 1, 1,
    ]
    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        # Perform arbitary transformation
        X * self.weights
        return X
    


model = pipeline.Pipeline([
    ('1',WeightApplicator()),
    ('2',linear_model.SGDRegressor())
])

model.fit(trainX1,trainY1)

testP1 = model.predict(testX1)
metrics.mean_squared_error(testY1,testP1)

0.04034742334401832