In [1]:
import pandas as pd
import numpy as np
from pandas import Series,DataFrame
from sklearn.ensemble import RandomForestRegressor
import sklearn.preprocessing as preprocessing
from sklearn import linear_model
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve, cross_val_score, train_test_split

### 处理缺失值

In [2]:
train_raw_data = pd.read_csv("raw_data/train.csv",parse_dates=['purchase_date','release_date'])
#处理NaN
train_raw_data['purchase_date'] = train_raw_data.purchase_date.fillna(method='backfill')
train_raw_data['total_positive_reviews'] = train_raw_data.total_positive_reviews.fillna(method='backfill')
train_raw_data['total_negative_reviews'] = train_raw_data.total_negative_reviews.fillna(method='backfill')

In [3]:
test_raw_data = pd.read_csv("raw_data/test.csv",parse_dates=['purchase_date','release_date'])
#处理NaN
test_raw_data['purchase_date'] = test_raw_data.purchase_date.fillna(method='backfill')
test_raw_data['total_positive_reviews'] = test_raw_data.total_positive_reviews.fillna(method='backfill')
test_raw_data['total_negative_reviews'] = test_raw_data.total_negative_reviews.fillna(method='backfill')

### 提取时间特征

In [4]:
#日期提取
def extract_date(df_copy,df,column):
    df_copy[column+'_year']=df[column].apply(lambda x: x.year)
    df_copy[column+'_month']=df[column].apply(lambda x: x.month)
    df_copy[column+'_day']=df[column].apply(lambda x: x.day)
    return df_copy

In [5]:
#提取purchase_date
train_raw_copy = train_raw_data.copy()
train_extract_purchase_date = extract_date(train_raw_copy, train_raw_data, 'purchase_date')

#提取release_date
train_extract_purchase_date_copy = train_extract_purchase_date.copy()
train_extract_date = extract_date(train_extract_purchase_date_copy, train_extract_purchase_date, 'release_date')

import datetime
train_extract_date['date_interval'] = (train_extract_date['purchase_date']-train_extract_date['release_date'])\
.apply(lambda x: x.days)

In [6]:
#提取purchase_date
test_raw_copy = test_raw_data.copy()
test_extract_purchase_date = extract_date(test_raw_copy, test_raw_data, 'purchase_date')

#提取release_date
test_extract_purchase_date_copy = test_extract_purchase_date.copy()
test_extract_date = extract_date(test_extract_purchase_date_copy, test_extract_purchase_date, 'release_date')

import datetime
test_extract_date['date_interval'] = (test_extract_date['purchase_date']-test_extract_date['release_date'])\
.apply(lambda x: x.days)

### 训练集字符串数据

In [7]:
#处理categories
train_categories_one_hot = train_raw_data["categories"].str.get_dummies(",") 
test_categories_one_hot = test_raw_data["categories"].str.get_dummies(",") 
categories_train_diff_test = train_categories_one_hot.columns.difference(test_categories_one_hot.columns)
categories_test_diff_train = test_categories_one_hot.columns.difference(train_categories_one_hot.columns)
print(categories_train_diff_test)
print(categories_test_diff_train)

Index(['Valve Anti-Cheat enabled'], dtype='object')
Index([], dtype='object')


In [8]:
test_categories_one_hot = pd.concat([test_categories_one_hot,pd.DataFrame(columns=list(categories_train_diff_test))],axis=1).fillna(0)

In [9]:
#处理genres
train_genres_one_hot = train_raw_data["genres"].str.get_dummies(",") 
test_genres_one_hot = test_raw_data["genres"].str.get_dummies(",") 
genres_train_diff_test = train_genres_one_hot.columns.difference(test_genres_one_hot.columns)
genres_test_diff_train = test_genres_one_hot.columns.difference(train_genres_one_hot.columns)
print(genres_train_diff_test)
print(genres_test_diff_train)

Index(['Animation & Modeling', 'Audio Production', 'Design & Illustration',
       'Racing', 'Sexual Content', 'Utilities'],
      dtype='object')
Index([], dtype='object')


In [10]:
test_genres_one_hot = pd.concat([test_genres_one_hot,pd.DataFrame(columns=list(genres_train_diff_test))],axis=1).fillna(0)

In [11]:
#处理tags
train_tags_one_hot = train_raw_data["tags"].str.get_dummies(",") 
test_tags_one_hot = test_raw_data["tags"].str.get_dummies(",") 
tags_train_diff_test = train_tags_one_hot.columns.difference(test_tags_one_hot.columns)
tags_test_diff_train = test_tags_one_hot.columns.difference(train_tags_one_hot.columns)
print(tags_train_diff_test)
print(tags_test_diff_train)

Index(['3D', 'ATV', 'Addictive', 'Animation & Modeling',
       'Artificial Intelligence', 'Audio Production', 'Automation', 'Batman',
       'Battle Royale', 'Bikes', 'Board Game', 'Bullet Hell', 'Capitalism',
       'Card Game', 'Cartoon', 'Cats', 'Character Action Game', 'Chess',
       'Choose Your Own Adventure', 'Clicker', 'Co-op Campaign', 'Comic Book',
       'Conspiracy', 'Dark Comedy', 'Design & Illustration', 'Documentary',
       'Dungeons & Dragons', 'Experience', 'Flight', 'Game Development',
       'God Game', 'Gothic', 'Gun Customization', 'Hidden Object', 'Horses',
       'Immersive Sim', 'Intentionally Awkward Controls', 'Investigation',
       'LGBTQ+', 'Lara Croft', 'Logic', 'MMORPG', 'Mars', 'Martial Arts',
       'Metroidvania', 'Motocross', 'Motorbike', 'Movie', 'Multiple Endings',
       'Naval', 'Offroad', 'Perma Death', 'Pirates', 'Programming', 'PvE',
       'Quick-Time Events', 'Racing', 'Rome', 'Sailing', 'Satire',
       'Score Attack', 'Sequel', 'Sniper',

In [12]:
test_tags_one_hot = pd.concat([test_tags_one_hot,pd.DataFrame(columns=list(tags_train_diff_test))],axis=1).fillna(0)
train_tags_one_hot = pd.concat([train_tags_one_hot,pd.DataFrame(columns=list(tags_test_diff_train))],axis=1).fillna(0)

### 添加特征到训练集/测试集

In [13]:
train = pd.concat([train_extract_date, train_categories_one_hot,train_genres_one_hot,train_tags_one_hot],axis=1)
test = pd.concat([test_extract_date, test_categories_one_hot,test_genres_one_hot,test_tags_one_hot],axis=1)
print('train shape',train.shape)
print('test shape',test.shape)

train shape (357, 384)
test shape (90, 383)


### 第一次预测

In [14]:
train_input = train.drop(['categories','genres','tags','purchase_date','release_date'],axis=1)
test_input = test.drop(['categories','genres','tags','purchase_date','release_date'],axis=1)

train_x = train_input.drop(['playtime_forever'],axis=1)
train_y = train_input[['playtime_forever']]

from sklearn.decomposition import PCA
pca = PCA(n_components=50)
train_x_pca = pca.fit_transform(train_x)
test_x_pca = pca.fit_transform(test_input)
print('train_x_pca shape',train_x_pca.shape)
print('test_x_pca shape',test_x_pca.shape)

train_x_pca shape (357, 50)
test_x_pca shape (90, 50)


In [16]:
####决策树回归####
from sklearn import tree
model_dt_1 = tree.DecisionTreeRegressor()
model_dt_1.fit(train_x_pca,train_y)
predictions_dt_1 = model_dt_1.predict(test_x_pca)
predictions_dt_df_1 = pd.DataFrame(predictions_dt_1)
predictions_dt_df_1.columns=['playtime_forever']
result_1 = pd.DataFrame({'id':test_raw_data['id'], 'playtime_forever':predictions_dt_df_1['playtime_forever']})

from sklearn.model_selection import cross_val_score
scores_dt_1 = np.sqrt(-cross_val_score(model_dt_1, train_x_pca, train_y, cv=80,scoring='neg_mean_squared_error'))
mean_scores_dt_1 = np.mean(scores_dt_1)
print(scores_dt_1)
print('mean_scores_dt_1:',mean_scores_dt_1)

[10.04024125  2.14645703  7.75199616  1.72106233  0.66524849  2.48713356
 26.67264204 25.41179735  5.80971409 11.28795572 19.15291101  2.2930935
  0.42071368  2.53795631  5.65350432  1.60168314  3.77682995  3.09910381
 15.02536744  0.68572751  1.21780768  2.08350666  0.74531872  1.40846646
  2.33427362  0.29068884 21.42004513  0.49446941  2.20592636  1.09427805
  2.51302164  5.0003111  24.05397288 10.90569933 10.0522883   2.22761706
  2.42355341 24.38206735  6.68938836 24.33019529 46.32170717  0.61508807
 10.54053354 55.55351587  6.97102137  4.88055239 16.44714176  9.86962582
  2.58642288 48.56365668  0.56056767  3.00779081 28.22193173  3.0594798
  1.50980591  1.76381452  0.54012601  6.62127254 38.9561978  45.73003055
  1.43507646 38.80833333  0.60190669  1.58933075  2.69537928  1.45043097
  0.52895337 11.28036027  2.15963603 16.01951718  1.06503782  0.97471506
  1.58440753  9.12129681  0.21294887  4.38379275  0.8903027  30.28307306
  1.38809422  0.125     ]
mean_scores_dt_1: 9.4128988

### 第二次预测（预测大值）

In [74]:
train_big_id = train_raw_data[train_raw_data['playtime_forever']>1]['id'].values
train_big_input = train_input[train_input['id'].isin(train_big_id)]

tset_big_id = result_1[result_1['playtime_forever']>1]['id'].values
test_big_input = test_input[test_input['id'].isin(tset_big_id)]

train_big_x = train_big_input.drop(['playtime_forever'],axis=1)
train_big_y = train_big_input[['playtime_forever']]

from sklearn.decomposition import PCA
pca = PCA(n_components=20)
train_big_x_pca = pca.fit_transform(train_big_x)
test_big_x_pca = pca.fit_transform(test_big_input)
print('train_big_x_pca shape',train_big_x_pca.shape)
print('test_big_x_pca shape',test_big_x_pca.shape)

train_big_x_pca shape (106, 20)
test_big_x_pca shape (47, 20)


In [75]:
####决策树回归####
from sklearn import tree
model_dt_2_big = tree.DecisionTreeRegressor()
model_dt_2_big.fit(train_big_x_pca,train_big_y)
predictions_dt_2_big = model_dt_2_big.predict(test_big_x_pca)
predictions_dt_df_2_big = pd.DataFrame(predictions_dt_2_big)
predictions_dt_df_2_big.columns=['playtime_forever']
result_2_big = pd.DataFrame({'id':tset_big_id, 'playtime_forever':predictions_dt_df_2_big['playtime_forever']})

from sklearn.model_selection import cross_val_score
scores_dt_2_big = np.sqrt(-cross_val_score(model_dt_2_big, train_big_x_pca, train_big_y, cv=50,scoring='neg_mean_squared_error'))
mean_scores_dt_2_big = np.mean(scores_dt_2_big)
print(scores_dt_2_big)
print('mean_scores_dt_2_big:',mean_scores_dt_2_big)

[65.70633954 35.94482655 10.57289511 30.24321167 31.79313489  6.76540081
  9.46646127  1.4615916   0.43461349 13.81396737  7.66336886  2.84724526
  7.36841985  1.36101474 19.29218682  0.44472214 31.55516554  2.2519436
  1.13804169  8.64473186 35.20342471  1.97642354  9.10447082  3.82740484
  6.30187362 62.48491929  7.20796242 63.67085501 47.52159889 13.68626643
 22.54423122 53.53322048  2.31531735 41.63365359 15.50113347  5.61774223
 41.92033781  1.33046567 14.43451208 20.73323285 75.87654447 41.89487505
 12.71104616 49.70589195  9.03531651  4.56268074 43.19555822 43.39004622
 34.98152886  9.35646687]
mean_scores_dt_2_big: 21.68056567963501


In [76]:
####随机森林回归####
from sklearn import ensemble
model_rf_2_big = ensemble.RandomForestRegressor(n_estimators=50)#这里使用20个决策树
model_rf_2_big.fit(train_big_x_pca,train_big_y)
predictions_rf_2_big = model_rf_2_big.predict(test_big_x_pca)
predictions_rf_df_2_big = pd.DataFrame(predictions_rf_2_big)
predictions_rf_df_2_big.columns=['playtime_forever']
result_2_big = pd.DataFrame({'id':tset_big_id, 'playtime_forever':predictions_rf_df_2_big['playtime_forever']})

from sklearn.model_selection import cross_val_score
scores_rf_2_big = np.sqrt(-cross_val_score(model_rf_2_big, train_big_x_pca, train_big_y, cv=50,scoring='neg_mean_squared_error'))
mean_scores_rf_2_big = np.mean(scores_rf_2_big)
print(scores_rf_2_big)
print('mean_scores_rf_2_big:',mean_scores_rf_2_big)

  after removing the cwd from sys.path.
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)


  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

[28.41882273  9.44807521  8.58571667 17.77106727 31.05989674  4.19160012
  7.34237632 14.50231887  2.80393567  4.36679605  6.09204022  2.56016667
 11.34815551  6.6001783   8.98333989  7.30841744 22.67275128  6.33607936
  3.37048418  4.76464881 26.34608739  4.31072477  3.48694228  1.28612089
 19.61799771 57.25171712  3.38426737 22.97705958 16.43391323  9.742072
 20.69993777 10.57043631 23.6042448  11.72181651 16.30507501 15.22769646
  7.79686793  3.76065859 19.06891506 40.85317482 76.77518794 33.54006713
  6.21568672  7.17838052 22.73698025  4.33951617  8.46555531 29.24285466
 41.67788655  8.86878584]
mean_scores_rf_2_big: 15.640269879252623


In [77]:
####GBRT回归####
from sklearn import ensemble
model_gbrt_2_big = ensemble.GradientBoostingRegressor(n_estimators=100)#这里使用100个决策树
model_gbrt_2_big.fit(train_big_x_pca,train_big_y)
predictions_gbrt_2_big = model_gbrt_2_big.predict(test_big_x_pca)
predictions_gbrt_df_2_big = pd.DataFrame(predictions_gbrt_2_big)
predictions_gbrt_df_2_big.columns=['playtime_forever']
result_2_big = pd.DataFrame({'id':tset_big_id, 'playtime_forever':predictions_gbrt_df_2_big['playtime_forever']})

from sklearn.model_selection import cross_val_score
scores_gbrt_2_big = np.sqrt(-cross_val_score(model_gbrt_2_big, train_big_x_pca, train_big_y, cv=50,scoring='neg_mean_squared_error'))
mean_scores_gbrt_2_big = np.mean(scores_gbrt_2_big)
print(scores_gbrt_2_big)
print('mean_scores_gbrt_2_big:',mean_scores_gbrt_2_big)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[51.75006867 11.91342189  3.37414824 23.72190848 30.27517408  4.46357705
  7.8918936  16.53350937  1.11798036  4.84875841  8.55426406  4.29218987
 11.0168749   3.00932607 12.89176792  7.57069822 23.65419749  0.95237906
  5.3539672   9.2474477  35.85432458  7.8171015   6.56315682  1.61879316
 15.25426454 56.24359544  7.92662531 33.16642143 17.34419912  4.06746054
 19.58490383 35.48881349 20.89847461  7.70528288 18.54335133 10.59887246
 11.31702599  2.17657137 15.60792938 32.34233839 71.14817536 42.48935657
  7.05680302 36.20957276  5.18616314  8.00394967  5.33281807  6.82678261
 40.86515985 13.73812247]
mean_scores_gbrt_2_big: 16.788199246853683


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [None]:
'''
####KNN回归####
from sklearn import neighbors
model_knn_2_big = neighbors.KNeighborsRegressor(n_neighbors =2)
model_knn_2_big.fit(train_big_x_pca,train_big_y)
predictions_knn_2_big = model_knn_2_big.predict(test_big_x_pca)
predictions_knn_df_2_big = pd.DataFrame(predictions_knn_2_big)
predictions_knn_df_2_big.columns=['playtime_forever']
result_2_big = pd.DataFrame({'id':tset_big_id, 'playtime_forever':predictions_knn_df_2_big['playtime_forever']})
'''

In [50]:
result_2_big.sort_values("playtime_forever",inplace=False,ascending=False)

Unnamed: 0,id,playtime_forever
22,53,92.633333
35,89,78.7
21,52,78.7
33,87,78.7
5,10,78.7
31,83,78.7
24,63,78.7
17,46,63.8
16,45,56.666667
8,19,56.666667


### 第二次预测（预测小值）

In [52]:
train_small_id = train_raw_data[train_raw_data['playtime_forever']<=2]['id'].values
train_small_input = train_input[train_input['id'].isin(train_small_id)]

tset_small_id = result_1[result_1['playtime_forever']<=2]['id'].values
test_small_input = test_input[test_input['id'].isin(tset_small_id)]

train_small_x = train_small_input.drop(['playtime_forever'],axis=1)
train_small_y = train_small_input[['playtime_forever']]

from sklearn.decomposition import PCA
pca = PCA(n_components=25)
train_small_x_pca = pca.fit_transform(train_small_x)
test_small_x_pca = pca.fit_transform(test_small_input)
print('train_small_x_pca shape',train_small_x_pca.shape)
print('test_small_x_pca shape',test_small_x_pca.shape)

train_small_x_pca shape (279, 25)
test_small_x_pca shape (54, 25)


In [53]:
####决策树回归####
from sklearn import tree
model_dt_2_small = tree.DecisionTreeRegressor()
model_dt_2_small.fit(train_small_x_pca,train_small_y)
predictions_dt_2_small = model_dt_2_small.predict(test_small_x_pca)
predictions_dt_df_2_small = pd.DataFrame(predictions_dt_2_small)
predictions_dt_df_2_small.columns=['playtime_forever']
result_2_small = pd.DataFrame({'id':tset_small_id, 'playtime_forever':predictions_dt_df_2_small['playtime_forever']})

from sklearn.model_selection import cross_val_score
scores_dt_2_small = np.sqrt(-cross_val_score(model_dt_2_small, train_small_x_pca, train_small_y, cv=5,scoring='neg_mean_squared_error'))
mean_scores_dt_2_small = np.mean(scores_dt_2_small)
print(scores_dt_2_small)
print('mean_scores_dt_2_small:',mean_scores_dt_2_small)

[0.63103436 0.76245609 0.74801987 0.55316998 0.54824329]
mean_scores_dt_2_small: 0.6485847181997026


In [29]:
result_2_small.sort_values("playtime_forever",inplace=False,ascending=False)

Unnamed: 0,id,playtime_forever
55,58,49.383333
47,50,32.766667
57,60,24.933333
75,78,22.350000
10,10,14.550000
84,88,9.683333
22,22,9.016667
12,12,6.316667
3,3,5.033333
58,61,5.033333


In [56]:
result_2 = pd.concat([result_2_big, result_2_small])
result_2.sort_values("id",inplace=True)

In [57]:
result_2.sort_values(by='playtime_forever',inplace=False,ascending = False)

Unnamed: 0,id,playtime_forever
22,53,92.633333
35,89,78.700000
5,10,78.700000
33,87,78.700000
31,83,78.700000
24,63,78.700000
21,52,78.700000
17,46,63.800000
8,19,56.666667
16,45,56.666667


In [59]:
result_2.to_csv('result/submit_1110_1.csv',index=0,header=1)

In [None]:
'''
####决策树回归####
from sklearn import tree
model_DecisionTreeRegressor = tree.DecisionTreeRegressor()
####线性回归####
from sklearn import linear_model
model_LinearRegression = linear_model.LinearRegression()
####SVM回归####
from sklearn import svm
model_SVR = svm.SVR()
####KNN回归####
from sklearn import neighbors
model_KNeighborsRegressor = neighbors.KNeighborsRegressor()
####随机森林回归####
from sklearn import ensemble
model_RandomForestRegressor = ensemble.RandomForestRegressor(n_estimators=20)#这里使用20个决策树
####Adaboost回归####
from sklearn import ensemble
model_AdaBoostRegressor = ensemble.AdaBoostRegressor(n_estimators=50)#这里使用50个决策树
####GBRT回归####
from sklearn import ensemble
model_GradientBoostingRegressor = ensemble.GradientBoostingRegressor(n_estimators=100)#这里使用100个决策树
####Bagging回归####
from sklearn.ensemble import BaggingRegressor
model_BaggingRegressor = BaggingRegressor()
####ExtraTree极端随机树回归####
from sklearn.tree import ExtraTreeRegressor
model_ExtraTreeRegressor = ExtraTreeRegressor()
'''