In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import RobustScaler
from sklearn.impute import KNNImputer
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LassoCV

In [2]:
datos_train = pd.read_excel('train.xlsx')
datos_test = pd.read_excel('test_inputs.xlsx')

X_train = datos_train.drop(columns='deseada')
y_train = datos_train['deseada']
X_test = datos_test

In [3]:
prep = Pipeline([
    ('imputadorKNN', KNNImputer(n_neighbors=7)),
    ('escalador', RobustScaler())
])

selector_lasso = SelectFromModel(
    LassoCV(
        alphas=np.logspace(-4, 1, 100),
        cv=5,
        random_state=42,
        max_iter=10000
        ),
        threshold='median'
        )

In [4]:
pipeline = Pipeline(steps=[
    ('preprocessor', prep),
    ('selector', selector_lasso),
    ('model', GradientBoostingRegressor(
        learning_rate=np.float64(0.017279572377986996),
        max_depth=4,
        max_features=np.float64(0.7428068767931133),
        min_samples_leaf=3,
        min_samples_split=17,
        n_estimators=780,
        subsample=np.float64(0.6703701010709381),
        random_state=42
    ))
])

In [5]:
pipeline.fit(X_train, y_train)
predicciones = pipeline.predict(X_test)

In [6]:
print(predicciones)

[46.31363742 50.75403124 72.932358   36.17442015 13.44993525 43.61493493
 24.16289096 55.29516774 37.17618919 48.95589619 39.08112621 11.43868258
 43.18943315 45.9639074  28.48938526 23.5741378  36.48235116 35.78424354
 39.10419172 27.87421466 36.2163677  37.13367572 47.86201015 10.12453201
 33.4544904  20.69748268 12.51294295 45.33906825 52.11133471 14.16445851
 46.5846509  44.01788456 43.6733046  57.21887072 35.41230599 39.56330273
 36.36469153 42.93243264 11.35292038 50.29383268 15.37974038  5.91315906
 38.66189906 43.96046693 10.63633603 66.58014062 35.80583238 37.21421131
 26.09022217 11.72259253 49.58896317 32.36512837 27.58633232 19.45362018
 38.46790728 29.38839342 32.23189593 12.30216656 22.93044753 23.84777355
 35.84601275 14.22417561 32.45457241 52.1542492  27.46995618 24.21591108
 35.67876953 15.2526896  33.71271598 26.21240854 20.1646296  23.47340034
  9.28360028 39.15168687 28.01824434 14.93233583 49.37963489 57.24236066
 51.12760218 11.96536842 38.2851774  37.64162949 48

In [10]:
len(predicciones)
df_output = pd.DataFrame(predicciones, columns=['predicha'])
df_output.to_excel('Carlos_Zamora.xlsx')