In [None]:
# 1. Create model to predict rating
# 2. Create model to predict box office earnings
# 3. For both models, try linear regression, then random forest regressor

In [49]:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_excel('../movies.xlsx', index_col=0)
titles = df.index.tolist()

omdb_df = pd.read_json('omdb_data.json')
omdb_df = omdb_df.set_index("Title")

trend_df = pd.read_json("./trends_data.json", orient='table')
trend_df.drop("isPartial", axis=1)
# Compute metrics for trend_df


avg_df = trend_df.groupby(level=0).mean()
std_df = trend_df.groupby(level=0).std()
max_df = trend_df.groupby(level=0).max()
min_df = trend_df.groupby(level=0).min()
# Use a 4 week rolling window
rolling_max_df = trend_df.groupby(level=0,group_keys=False).apply(lambda x: x.rolling(window=4).sum())
rolling_max_df = rolling_max_df.groupby(level=0,group_keys=False).max()


In [74]:
# Merge

df1 = omdb_df.copy()
df1 = df1.merge(avg_df['searches'], how='left', left_index=True, right_index=True)
df1 = df1.rename(columns={"searches": "searches_avg"})
df1 = df1.merge(std_df['searches'], how='left', left_index=True, right_index=True)
df1 = df1.rename(columns={"searches": "searches_std"})
df1 = df1.merge(max_df['searches'], how='left', left_index=True, right_index=True)
df1 = df1.rename(columns={"searches": "searches_max"})
df1 = df1.merge(min_df['searches'], how='left', left_index=True, right_index=True)
df1 = df1.rename(columns={"searches": "searches_min"})
df1 = df1.merge(rolling_max_df['searches'], how='left', left_index=True, right_index=True)
df1 = df1.rename(columns={"searches": "searches_rolling_max"})
df1.head()

Unnamed: 0_level_0,Year,Rated,Released,Runtime,Genre,Director,Writer,Actors,Plot,Language,...,DVD,BoxOffice,Production,Website,Response,searches_avg,searches_std,searches_max,searches_min,searches_rolling_max
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
The Age of Adaline,2015,PG-13,24 Apr 2015,112 min,"Drama, Fantasy, Romance",Lee Toland Krieger,"J. Mills Goodloe, Salvador Paskowitz","Blake Lively, Michiel Huisman, Harrison Ford","A young woman, born at the turn of the 20th ce...","English, Portuguese, Italian",...,08 Sep 2015,"$42,629,776",,,True,24.641509,23.75119,100.0,5.0,345.0
Black Christmas,1974,R,20 Dec 1974,98 min,"Horror, Mystery, Thriller",Bob Clark,Roy Moore,"Olivia Hussey, Keir Dullea, Margot Kidder","During their Christmas break, a group of soror...",English,...,05 Dec 2006,,,,True,,,,,
London,2005,R,03 Jul 2021,92 min,"Drama, Romance",Hunter Richards,Hunter Richards,"Jessica Biel, Chris Evans, Jason Statham",London is a drug laden adventure that centers ...,English,...,23 May 2006,"$20,361",,,True,89.0,5.677215,100.0,74.0,391.0
Twisted,2004,R,27 Feb 2004,97 min,"Crime, Drama, Mystery",Philip Kaufman,Sarah Thorp,"Ashley Judd, Samuel L. Jackson, Andy Garcia","Jessica, whose father killed her mother and co...","English, Italian, Spanish",...,31 Aug 2004,"$25,198,598","Summit Entertainment, Paramount Pictures",,True,81.886792,8.863626,100.0,66.0,374.0
Friends with Kids,2011,R,16 Mar 2012,107 min,"Comedy, Drama, Romance",Jennifer Westfeldt,Jennifer Westfeldt,"Jennifer Westfeldt, Adam Scott, Maya Rudolph",Two best friends decide to have a child togeth...,English,...,17 Jul 2012,"$7,251,073",Roadside Attractions,,True,25.576923,17.938179,100.0,10.0,305.0


In [75]:
# drop columns with more than 50% null values

threshold = 0.5
df1 = df1.replace("N/A", pd.NA)
df1 = df1.dropna(thresh=len(df1) * threshold, axis=1)

In [76]:
# Get all possible genres and rated
genres = set()
rated = set()

for genre in df1['Genre']:
    t = genre.split(",")
    new = set([a.strip() for a in t])
    genres = genres.union(new)
print(genres)

rated = list(df1["Rated"].dropna().unique())
print(rated)

{'Romance', 'Adventure', 'Short', 'Music', 'Family', 'Sci-Fi', 'Animation', 'Crime', 'Documentary', 'Horror', 'Comedy', 'War', 'Biography', 'Sport', 'Drama', 'Thriller', 'Mystery', 'History', 'Action', 'Fantasy'}
['PG-13', 'R', 'Not Rated', 'PG', 'Unrated', 'G', 'TV-G', 'TV-MA', 'TV-14', 'Approved', 'TV-PG', 'NC-17']


In [77]:
df1.count()

Year                    150
Rated                   147
Released                150
Runtime                 150
Genre                   150
Director                150
Writer                  150
Actors                  150
Plot                    150
Language                149
Country                 150
Awards                  128
Poster                  150
Ratings                 150
Metascore               132
imdbRating              150
imdbVotes               150
imdbID                  150
Type                    150
DVD                     145
BoxOffice               115
Response                150
searches_avg            105
searches_std            105
searches_max            105
searches_min            105
searches_rolling_max    105
dtype: int64

In [102]:
# Process numerical data
df2 = df1.copy()
df2['Runtime'] = df2['Runtime'].apply(lambda x: int(x.split()[0]) if x is not pd.NA else np.NaN)
df2['imdbVotes'] = df2['imdbVotes'].apply(lambda x: int( x.replace(",", "")) if x is not pd.NA else np.NaN)
df2['BoxOffice'] = df2['BoxOffice'].apply(lambda x: int( x.replace(",", "")[1:]) if x is not pd.NA else np.NaN)
df2['Metascore'] = df2['Metascore'].apply(lambda x: int(x)/100 if x is not pd.NA else np.NaN)

In [103]:
# Process datetime data

df2['Year'] = pd.to_datetime(df2["Year"])
df2['Released'] = pd.to_datetime(df2['Released'])
df2['DVD']= pd.to_datetime(df2['DVD'])

In [104]:
# One hot encode genre

s_genres = ["Action", "Action", "Animation"]
s_rated = ["G", "PG", "PG-13"]

df2["s_genre"] = df2["Genre"].apply(lambda x: int(any([g in x for g in s_genres])) if x is not pd.NA else pd.NA)
df2["s_rated"] = df2["Rated"].apply(lambda x: int(any([g in x for g in s_rated])) if x is not pd.NA else pd.NA)


df2 = df2.drop("Genre", axis=1)

df2 = df2.drop("Rated", axis=1)

In [105]:
df2.head()

Unnamed: 0_level_0,Year,Released,Runtime,Director,Writer,Actors,Plot,Language,Country,Awards,...,DVD,BoxOffice,Response,searches_avg,searches_std,searches_max,searches_min,searches_rolling_max,s_genre,s_rated
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
The Age of Adaline,1970-01-01 00:00:00.000002015,2015-04-24,112,Lee Toland Krieger,"J. Mills Goodloe, Salvador Paskowitz","Blake Lively, Michiel Huisman, Harrison Ford","A young woman, born at the turn of the 20th ce...","English, Portuguese, Italian","United States, Canada",1 win & 10 nominations,...,2015-09-08,42629776.0,True,24.641509,23.75119,100.0,5.0,345.0,False,True
Black Christmas,1970-01-01 00:00:00.000001974,1974-12-20,98,Bob Clark,Roy Moore,"Olivia Hussey, Keir Dullea, Margot Kidder","During their Christmas break, a group of soror...",English,Canada,3 wins & 2 nominations,...,2006-12-05,,True,,,,,,False,False
London,1970-01-01 00:00:00.000002005,2021-07-03,92,Hunter Richards,Hunter Richards,"Jessica Biel, Chris Evans, Jason Statham",London is a drug laden adventure that centers ...,English,"United Kingdom, United States",1 win & 1 nomination,...,2006-05-23,20361.0,True,89.0,5.677215,100.0,74.0,391.0,False,False
Twisted,1970-01-01 00:00:00.000002004,2004-02-27,97,Philip Kaufman,Sarah Thorp,"Ashley Judd, Samuel L. Jackson, Andy Garcia","Jessica, whose father killed her mother and co...","English, Italian, Spanish","United States, Germany",1 win,...,2004-08-31,25198598.0,True,81.886792,8.863626,100.0,66.0,374.0,False,False
Friends with Kids,1970-01-01 00:00:00.000002011,2012-03-16,107,Jennifer Westfeldt,Jennifer Westfeldt,"Jennifer Westfeldt, Adam Scott, Maya Rudolph",Two best friends decide to have a child togeth...,English,United States,2 nominations,...,2012-07-17,7251073.0,True,25.576923,17.938179,100.0,10.0,305.0,False,False


In [106]:
# Flatten ratings

def flatten_ratings(row):
    x = {s["Source"]: s["Value"] for s in row['Ratings']}
    
    if 'Rotten Tomatoes' in x:
        row['Rotten Tomatoes'] = float(x['Rotten Tomatoes'][:-1]) / 100
    else:
        row['Rotten Tomatoes'] = np.NaN 
    return row

df2 = df2.apply(flatten_ratings, axis = 1)


In [124]:
# Drop irrelevant columns
df3 = df2.copy()
cols_to_drop = ["DVD", "Year", "Released", "Runtime", "Director", "Actors", "Plot", "Language", "Country", "Awards", "Poster", "Ratings", "Response", "imdbID", "Type", "Writer", "searches_max"]
df3 = df3.drop(cols_to_drop, axis=1)
df3.info()


<class 'pandas.core.frame.DataFrame'>
Index: 150 entries, The Age of Adaline to What the Health
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Metascore             132 non-null    float64
 1   imdbRating            150 non-null    float64
 2   imdbVotes             150 non-null    int64  
 3   BoxOffice             115 non-null    float64
 4   searches_avg          105 non-null    float64
 5   searches_std          105 non-null    float64
 6   searches_min          105 non-null    float64
 7   searches_rolling_max  105 non-null    float64
 8   s_genre               150 non-null    bool   
 9   s_rated               147 non-null    object 
 10  Rotten Tomatoes       140 non-null    float64
dtypes: bool(1), float64(8), int64(1), object(1)
memory usage: 17.1+ KB


In [194]:
# Feature engineering
# QT numeric features
from sklearn.preprocessing import QuantileTransformer

df_numeric = df3.select_dtypes(include=["int64", "float64"])
df_numeric = df_numeric.fillna(df_numeric.mean())
# qt = QuantileTransformer(n_quantiles=10, random_state=0)
# df_numeric = pd.DataFrame(qt.fit_transform(df_numeric), columns=df_numeric.columns, index=df_numeric.index)

df_numeric.head()
df_numeric['log_imdbVotes'] = np.log(df_numeric['imdbVotes'] + 1)

In [196]:
df_numeric.corr()

Unnamed: 0,Metascore,imdbRating,imdbVotes,BoxOffice,searches_avg,searches_std,searches_min,searches_rolling_max,Rotten Tomatoes,log_imdbVotes
Metascore,1.0,0.714316,0.404899,0.142558,-0.028902,0.065173,-0.051495,0.043502,0.866893,0.363323
imdbRating,0.714316,1.0,0.548013,0.211559,-0.047,0.112588,-0.076031,0.083719,0.691358,0.611551
imdbVotes,0.404899,0.548013,1.0,0.526257,-0.133024,0.113598,-0.121578,-0.017587,0.297422,0.740609
BoxOffice,0.142558,0.211559,0.526257,1.0,-0.22006,0.1241,-0.18165,-0.100631,0.079918,0.465492
searches_avg,-0.028902,-0.047,-0.133024,-0.22006,1.0,-0.520379,0.932616,0.775455,-0.033739,-0.214719
searches_std,0.065173,0.112588,0.113598,0.1241,-0.520379,1.0,-0.753826,-0.089364,0.029551,0.16505
searches_min,-0.051495,-0.076031,-0.121578,-0.18165,0.932616,-0.753826,1.0,0.635681,-0.046933,-0.196713
searches_rolling_max,0.043502,0.083719,-0.017587,-0.100631,0.775455,-0.089364,0.635681,1.0,0.016225,-0.004751
Rotten Tomatoes,0.866893,0.691358,0.297422,0.079918,-0.033739,0.029551,-0.046933,0.016225,1.0,0.286066
log_imdbVotes,0.363323,0.611551,0.740609,0.465492,-0.214719,0.16505,-0.196713,-0.004751,0.286066,1.0


In [197]:
df_oh = df3[["s_rated", "s_genre"]]
df_oh = df_oh.fillna(False)
df_oh = df_oh.replace({True: 1, False: 0})
df_oh.count()
df_oh.tail()


Unnamed: 0_level_0,s_rated,s_genre
Title,Unnamed: 1_level_1,Unnamed: 2_level_1
National Treasure: Book of Secrets,1,1
I Am Mother,0,0
Awake,0,0
Book of Dragons,1,1
What the Health,0,0


In [198]:
X = df_numeric.merge(df_oh, how='inner', left_index=True, right_index=True)
y = X.pop("BoxOffice")

In [205]:
from sklearn.linear_model import LinearRegression, Ridge,Lasso 

reg = LinearRegression().fit(X_train, y_train)
reg.score(X_test, y_test)


0.4732722298178963

In [211]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
dt = DecisionTreeRegressor().fit(X_train, y_train)
dt.score(X_test, y_test)


-0.8049619505332395

In [214]:
from sklearn.ensemble import GradientBoostingRegressor
reg = GradientBoostingRegressor(random_state=0)
reg.fit(X_train, y_train)
reg.score(X_test, y_test)

-0.23002544927878854

In [231]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.compose import TransformedTargetRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import lightgbm as lgbm
import xgboost as xg

#generic function to fit model and return metrics for every algorithm
def models(x):
    #transforming target variable through quantile transformer
    regr_trans = TransformedTargetRegressor(regressor=x, func=np.log, inverse_func=np.exp)
    regr_trans.fit(X_train, y_train)
    yhat = regr_trans.predict(X_test)
    algoname= x.__class__.__name__
    return algoname, round(r2_score(y_test, yhat),3), round(mean_absolute_error(y_test, yhat),2), round(np.sqrt(mean_squared_error(y_test, yhat)),2)

algo=[GradientBoostingRegressor(), lgbm.LGBMRegressor(), xg.XGBRFRegressor(), DecisionTreeRegressor(), LinearRegression()]
score=[]
for a in algo:
    score.append(boost_models(a))

#Collate all scores in a table
pd.DataFrame(score, columns=['Model', 'Score', 'MAE', 'RMSE'])

Unnamed: 0,Model,Score,MAE,RMSE
0,GradientBoostingRegressor,-0.208,51943773.78,77504596.64
1,LGBMRegressor,0.407,39731373.03,54290531.24
2,XGBRFRegressor,0.18,45271512.28,63858674.22
3,DecisionTreeRegressor,-0.727,62947634.82,92668027.15
4,LinearRegression,0.473,41227019.09,51184044.22


In [232]:
def models(x):
    #transforming target variable through quantile transformer
    regr_trans = TransformedTargetRegressor(regressor=x, transformer=QuantileTransformer(output_distribution='normal'))
    regr_trans.fit(X_train, y_train)
    yhat = regr_trans.predict(X_test)
    algoname= x.__class__.__name__
    return algoname, round(r2_score(y_test, yhat),3), round(mean_absolute_error(y_test, yhat),2), round(np.sqrt(mean_squared_error(y_test, yhat)),2)

algo=[GradientBoostingRegressor(), lgbm.LGBMRegressor(), xg.XGBRFRegressor(), DecisionTreeRegressor(), LinearRegression()]
score=[]
for a in algo:
    score.append(boost_models(a))

#Collate all scores in a table
pd.DataFrame(score, columns=['Model', 'Score', 'MAE', 'RMSE'])

Unnamed: 0,Model,Score,MAE,RMSE
0,GradientBoostingRegressor,-0.196,51851614.72,77137779.16
1,LGBMRegressor,0.407,39731373.03,54290531.24
2,XGBRFRegressor,0.18,45271512.28,63858674.22
3,DecisionTreeRegressor,-0.801,67616487.35,94656034.14
4,LinearRegression,0.473,41227019.09,51184044.22


In [252]:
# from sklearn.model_selection import GridSearchCV

# param_grid = {'n_estimators': [100, 80, 60, 55, 51, 45, 20],  
#               'max_depth': [0, 20, 40],
#               'reg_lambda' :[0.26, 0.25, 0.2, 0]
#              }
                
# grid = GridSearchCV(lgbm.LGBMRegressor(), param_grid, refit = True, verbose = 3, n_jobs=-1)
# regr_trans = TransformedTargetRegressor(regressor=grid, transformer=QuantileTransformer(output_distribution='normal'))

# # fitting the model for grid search 
# grid_result=regr_trans.fit(X_train, y_train)
# best_params=grid_result.regressor_.best_params_
# print(best_params)

# #using best params to create and fit model
# best_model = lgbm.LGBMRegressor(max_depth=best_params["max_depth"], n_estimators=best_params["n_estimators"], reg_lambda=best_params["reg_lambda"])
# regr_trans = TransformedTargetRegressor(regressor=best_model, transformer=QuantileTransformer(output_distribution='normal'))
# regr_trans.fit(X_train, y_train)
# yhat = regr_trans.predict(X_test)

# #evaluate metrics
# r2_score(y_test, yhat), mean_absolute_error(y_test, yhat), np.sqrt(mean_squared_error(y_test, yhat))




Fitting 5 folds for each of 84 candidates, totalling 420 fits
[CV 5/5] END max_depth=40, n_estimators=45, reg_lambda=0.25;, score=0.222 total time=   0.0s
[CV 5/5] END max_depth=40, n_estimators=45, reg_lambda=0.2;, score=0.220 total time=   0.0s
[CV 3/5] END max_depth=40, n_estimators=45, reg_lambda=0;, score=0.657 total time=   0.0s
[CV 2/5] END max_depth=20, n_estimators=200, reg_lambda=0.25;, score=0.467 total time=   0.0s
[CV 5/5] END max_depth=20, n_estimators=200, reg_lambda=0.2;, score=0.237 total time=   0.0s
[CV 1/5] END max_depth=20, n_estimators=150, reg_lambda=0.26;, score=-0.040 total time=   0.0s
[CV 2/5] END max_depth=20, n_estimators=150, reg_lambda=0.26;, score=0.477 total time=   0.0s
[CV 1/5] END max_depth=20, n_estimators=150, reg_lambda=0.2;, score=-0.052 total time=   0.0s
[CV 2/5] END max_depth=20, n_estimators=150, reg_lambda=0.2;, score=0.486 total time=   0.0s
[CV 2/5] END max_depth=20, n_estimators=100, reg_lambda=0.25;, score=0.490 total time=   0.0s
[CV 3/

[CV 5/5] END max_depth=20, n_estimators=60, reg_lambda=0.25;, score=0.253 total time=   0.0s
[CV 1/5] END max_depth=20, n_estimators=60, reg_lambda=0.2;, score=0.095 total time=   0.0s
[CV 2/5] END max_depth=20, n_estimators=60, reg_lambda=0.2;, score=0.510 total time=   0.0s
[CV 1/5] END max_depth=20, n_estimators=51, reg_lambda=0.26;, score=0.094 total time=   0.0s
[CV 2/5] END max_depth=20, n_estimators=51, reg_lambda=0.26;, score=0.497 total time=   0.0s
[CV 3/5] END max_depth=20, n_estimators=51, reg_lambda=0.26;, score=0.646 total time=   0.0s
[CV 4/5] END max_depth=20, n_estimators=51, reg_lambda=0.26;, score=0.313 total time=   0.0s
[CV 5/5] END max_depth=20, n_estimators=51, reg_lambda=0.26;, score=0.254 total time=   0.0s
[CV 1/5] END max_depth=20, n_estimators=51, reg_lambda=0.25;, score=0.094 total time=   0.0s
[CV 2/5] END max_depth=20, n_estimators=51, reg_lambda=0.25;, score=0.501 total time=   0.0s
[CV 3/5] END max_depth=20, n_estimators=51, reg_lambda=0.25;, score=0.64



(0.17809865135210234, 43140066.68227263, 63936864.81497491)

[CV 3/5] END max_depth=20, n_estimators=45, reg_lambda=0.2;, score=0.667 total time=   0.0s
[CV 4/5] END max_depth=20, n_estimators=45, reg_lambda=0.2;, score=0.328 total time=   0.0s
[CV 5/5] END max_depth=20, n_estimators=45, reg_lambda=0.2;, score=0.257 total time=   0.0s
[CV 1/5] END max_depth=20, n_estimators=45, reg_lambda=0;, score=0.091 total time=   0.0s
[CV 3/5] END max_depth=40, n_estimators=51, reg_lambda=0.2;, score=0.654 total time=   0.0s
[CV 2/5] END max_depth=40, n_estimators=51, reg_lambda=0;, score=0.498 total time=   0.0s
[CV 4/5] END max_depth=40, n_estimators=51, reg_lambda=0;, score=0.309 total time=   0.0s
[CV 1/5] END max_depth=40, n_estimators=45, reg_lambda=0.26;, score=0.104 total time=   0.0s
[CV 4/5] END max_depth=40, n_estimators=45, reg_lambda=0.26;, score=0.315 total time=   0.0s
[CV 2/5] END max_depth=40, n_estimators=45, reg_lambda=0.25;, score=0.490 total time=   0.0s
[CV 1/5] END max_depth=40, n_estimators=45, reg_lambda=0.2;, score=0.103 total time

[CV 5/5] END max_depth=10, n_estimators=45, reg_lambda=0.26;, score=0.220 total time=   0.0s
[CV 1/5] END max_depth=10, n_estimators=45, reg_lambda=0.25;, score=-0.130 total time=   0.0s
[CV 2/5] END max_depth=10, n_estimators=45, reg_lambda=0.25;, score=0.421 total time=   0.0s
[CV 3/5] END max_depth=10, n_estimators=45, reg_lambda=0.25;, score=0.686 total time=   0.0s
[CV 3/5] END max_depth=20, n_estimators=100, reg_lambda=0.2;, score=0.707 total time=   0.0s
[CV 4/5] END max_depth=20, n_estimators=100, reg_lambda=0.2;, score=0.290 total time=   0.0s
[CV 5/5] END max_depth=20, n_estimators=100, reg_lambda=0.2;, score=0.213 total time=   0.0s
[CV 1/5] END max_depth=20, n_estimators=100, reg_lambda=0;, score=-0.224 total time=   0.0s
[CV 2/5] END max_depth=20, n_estimators=100, reg_lambda=0;, score=0.221 total time=   0.0s
[CV 3/5] END max_depth=20, n_estimators=100, reg_lambda=0;, score=0.686 total time=   0.0s
[CV 4/5] END max_depth=20, n_estimators=100, reg_lambda=0;, score=0.297 to

[CV 3/5] END max_depth=40, n_estimators=55, reg_lambda=0.2;, score=0.638 total time=   0.0s
[CV 4/5] END max_depth=40, n_estimators=55, reg_lambda=0.2;, score=0.326 total time=   0.0s
[CV 5/5] END max_depth=40, n_estimators=55, reg_lambda=0.2;, score=0.260 total time=   0.0s
[CV 1/5] END max_depth=40, n_estimators=55, reg_lambda=0;, score=0.082 total time=   0.0s
[CV 2/5] END max_depth=40, n_estimators=55, reg_lambda=0;, score=0.510 total time=   0.0s
[CV 3/5] END max_depth=40, n_estimators=55, reg_lambda=0;, score=0.643 total time=   0.0s
[CV 4/5] END max_depth=40, n_estimators=55, reg_lambda=0;, score=0.311 total time=   0.0s
[CV 5/5] END max_depth=40, n_estimators=55, reg_lambda=0;, score=0.258 total time=   0.0s
[CV 1/5] END max_depth=40, n_estimators=51, reg_lambda=0.26;, score=0.094 total time=   0.0s
[CV 2/5] END max_depth=40, n_estimators=51, reg_lambda=0.26;, score=0.497 total time=   0.0s
[CV 3/5] END max_depth=40, n_estimators=51, reg_lambda=0.26;, score=0.646 total time=   

[CV 4/5] END max_depth=20, n_estimators=20, reg_lambda=0;, score=0.311 total time=   0.0s
[CV 5/5] END max_depth=20, n_estimators=20, reg_lambda=0;, score=0.212 total time=   0.0s
[CV 1/5] END max_depth=40, n_estimators=100, reg_lambda=0.26;, score=0.043 total time=   0.0s
[CV 2/5] END max_depth=40, n_estimators=100, reg_lambda=0.26;, score=0.488 total time=   0.0s
[CV 3/5] END max_depth=40, n_estimators=100, reg_lambda=0.26;, score=0.622 total time=   0.0s
[CV 4/5] END max_depth=40, n_estimators=100, reg_lambda=0.26;, score=0.320 total time=   0.0s
[CV 5/5] END max_depth=40, n_estimators=100, reg_lambda=0.26;, score=0.251 total time=   0.0s
[CV 1/5] END max_depth=40, n_estimators=100, reg_lambda=0.25;, score=0.043 total time=   0.0s
[CV 2/5] END max_depth=40, n_estimators=100, reg_lambda=0.25;, score=0.490 total time=   0.0s
[CV 3/5] END max_depth=40, n_estimators=100, reg_lambda=0.25;, score=0.621 total time=   0.0s
[CV 3/5] END max_depth=40, n_estimators=51, reg_lambda=0;, score=0.6