In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from lightgbm import LGBMRegressor
from sklearn.linear_model import LinearRegression
import gresearch_crypto
from pathlib import Path
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/g-research-crypto-forecasting/example_sample_submission.csv
/kaggle/input/g-research-crypto-forecasting/asset_details.csv
/kaggle/input/g-research-crypto-forecasting/example_test.csv
/kaggle/input/g-research-crypto-forecasting/train.csv
/kaggle/input/g-research-crypto-forecasting/supplemental_train.csv
/kaggle/input/g-research-crypto-forecasting/gresearch_crypto/competition.cpython-37m-x86_64-linux-gnu.so
/kaggle/input/g-research-crypto-forecasting/gresearch_crypto/__init__.py


In [2]:
path = "/kaggle/input/g-research-crypto-forecasting/"
df_train = pd.read_csv(path + "train.csv")
df_test = pd.read_csv(path + "example_test.csv")
df_asset_details = pd.read_csv(path + "asset_details.csv")
df_supp_train = pd.read_csv(path + "supplemental_train.csv")

In [3]:
df_train.head()

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target
0,1514764860,2,40.0,2376.58,2399.5,2357.14,2374.59,19.233005,2373.116392,-0.004218
1,1514764860,0,5.0,8.53,8.53,8.53,8.53,78.38,8.53,-0.014399
2,1514764860,1,229.0,13835.194,14013.8,13666.11,13850.176,31.550062,13827.062093,-0.014643
3,1514764860,5,32.0,7.6596,7.6596,7.6567,7.6576,6626.71337,7.657713,-0.013922
4,1514764860,7,5.0,25.92,25.92,25.874,25.877,121.08731,25.891363,-0.008264


In [4]:
df_train.isnull().sum()

timestamp         0
Asset_ID          0
Count             0
Open              0
High              0
Low               0
Close             0
Volume            0
VWAP              9
Target       750338
dtype: int64

In [5]:
df_train.columns

Index(['timestamp', 'Asset_ID', 'Count', 'Open', 'High', 'Low', 'Close',
       'Volume', 'VWAP', 'Target'],
      dtype='object')

In [6]:
df_train['datetime']=df_train.timestamp.astype('datetime64[s]')
df_test['datetime']=df_test.timestamp.astype('datetime64[s]')
df_train['time'] = df_train['datetime'].dt.time
df_train['date'] = df_train['datetime'].dt.date
df_test['time'] = df_test['datetime'].dt.time
df_test['date'] = df_test['datetime'].dt.date

In [7]:
cols=['Count', 'Open', 'High', 'Low', 'Close',
       'Volume']
def addAnalytics(dtFrame,funType,cols):
  if funType=='roll5':
    for i in cols:
      dtFrame[i.replace(' ','')+'_roll5']=dtFrame.sort_values(by=['date'])\
                       .groupby(['time'])[i]\
                       .rolling(5, min_periods = 1).mean()\
                       .reset_index()[i].round(3)
  if funType=='roll30':
    for i in cols:
      dtFrame[i.replace(' ','')+'_roll30']=dtFrame.sort_values(by=['date'])\
                       .groupby(['time'])[i]\
                       .rolling(10, min_periods = 1).mean()\
                       .reset_index()[i].round(3)

In [8]:
df_train.dropna(inplace=True)

In [9]:
# df_test.drop(['timestamp','datetime','time','date'],axis=1)
df_test.head()
# df_test.columns

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,group_num,row_id,datetime,time,date
0,1623542400,3,1201.0,1.478556,1.48603,1.478,1.483681,654799.561103,1.481439,0,0,2021-06-13,00:00:00,2021-06-13
1,1623542400,2,1020.0,580.306667,583.89,579.91,582.276667,1227.988328,581.697038,0,1,2021-06-13,00:00:00,2021-06-13
2,1623542400,0,626.0,343.7895,345.108,343.64,344.598,1718.832569,344.441729,0,2,2021-06-13,00:00:00,2021-06-13
3,1623542400,1,2888.0,35554.289632,35652.46465,35502.67,35602.004286,163.811537,35583.469303,0,3,2021-06-13,00:00:00,2021-06-13
4,1623542400,4,433.0,0.312167,0.3126,0.31192,0.312208,585577.410442,0.312154,0,4,2021-06-13,00:00:00,2021-06-13


Reducing the data frame by choosing appropraite data types depending on the type of data not only decreases the modelling time also makes the use of computational resources appropriately. https://towardsdatascience.com/make-working-with-large-dataframes-easier-at-least-for-your-memory-6f52b5f4b5c4

In [10]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int8','int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [11]:
def getDataByAsset(dataFrameTrain,dataFrameTest,assetId):
    dataFrameTrain=dataFrameTrain[dataFrameTrain["Asset_ID"]==assetId]
    X=reduce_mem_usage(dataFrameTrain.drop(['Target','VWAP'],axis=1))
    y=dataFrameTrain['Target']
#     addAnalytics(X,'roll5',cols)
#     addAnalytics(X,'roll30',cols)
    X.index=X.datetime
    X.drop(['timestamp','datetime','time','date'],axis=1,inplace=True)
    X_test=reduce_mem_usage(dataFrameTest)
#     addAnalytics(X_test,'roll5',cols)
#     addAnalytics(X_test,'roll30',cols)
    X_test.index=X_test.datetime
    X_test=reduce_mem_usage(X_test[X.columns])
    min_max_scaler = MinMaxScaler()
    X = min_max_scaler.fit_transform(X)
    X_test = min_max_scaler.transform(X_test)
    return X,y,X_test,min_max_scaler

In [12]:
from sklearn.multioutput import MultiOutputRegressor
import lightgbm as lgb
!pip install optuna
import optuna

from sklearn.metrics import make_scorer, mean_absolute_error
from sklearn.metrics import mean_squared_error as MSE
class Objective():

    def __init__(self):
        self.best_booster = None
        self._booster = None

    def __call__(self, trial):
        param = {
            "objective": "regression",
            "metric": "mse",
            "verbosity": -1,
            "boosting_type": "gbdt",
            "feature_fraction": trial.suggest_loguniform("feature_fraction", 0.5, 1.0),
            "min_gain_to_split": trial.suggest_loguniform("min_gain_to_split", 0.3, 1.0),
            "max_depth": trial.suggest_int("max_depth", 20, 100),
            "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0),
            "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0),
            "num_leaves": trial.suggest_int("num_leaves", 40, 220),
            "num_iterations": trial.suggest_int("num_iterations", 20, 400),
            "learning_rate":trial.suggest_loguniform("learning_rate", 0.001, 0.1)
        }
        # Add a callback for pruning.
        pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "mse")
        model= lgb.LGBMRegressor()
        gbm = model.fit(X,y)

        self._booster = gbm

        preds = gbm.predict(X)
        pred_labels = np.rint(preds)
        accuracy = mean_absolute_error(y, pred_labels)
        return accuracy




In [13]:

from warnings import simplefilter
simplefilter("ignore", category=RuntimeWarning)
def getBooster():
    objective = Objective()
    optuna.logging.set_verbosity(optuna.logging.ERROR)
    study = optuna.create_study(
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=10), direction="minimize"
    )
    study.optimize(objective, n_trials=50)

    print("Best trial:")
    trial_co = study.best_trial

    print("  Params: ")
    for key, value in trial_co.params.items():
        print("    {}: {}".format(key, value))

    return objective.best_booster

In [14]:
from sklearn.metrics import make_scorer, mean_absolute_error
from sklearn.metrics import mean_squared_error as MSE
from sklearn.preprocessing import MinMaxScaler

In [15]:
from scipy.stats import pearsonr
X_dict = {}
y_dict = {}
model_dict = {}
preds_dict={}
for i in df_train.Asset_ID.unique():
    X,y,X_test,min_max_scaler=getDataByAsset(df_train,df_test,i)
    X_dict[i]=X
    y_dict[i]=y
    params =  getBooster()
    model = LGBMRegressor(params)
    model_co = model.fit(X,y)
    preds=model_co.predict(X)
    corr, _ = pearsonr(preds, y)
    print(corr)
    model_dict[i]=model
    preds_dict[i]=preds

Mem. usage decreased to 91.06 Mb (49.0% reduction)
Mem. usage decreased to  0.00 Mb (56.9% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.6756692840489984
    min_gain_to_split: 0.41429126515368936
    max_depth: 33
    lambda_l1: 1.8060610574405617e-05
    lambda_l2: 1.6172359400389085
    num_leaves: 168
    num_iterations: 349
    learning_rate: 0.008048903396751765
0.2874417033260248
Mem. usage decreased to 93.83 Mb (46.9% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.9003239859979544
    min_gain_to_split: 0.34843094855499196
    max_depth: 30
    lambda_l1: 0.004007610120638784
    lambda_l2: 0.9714180319489194
    num_leaves: 91
    num_iterations: 197
    learning_rate: 0.008683748014376333
0.1681497472775368
Mem. usage decreased to 95.13 Mb (46.9% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.8551121023685185
    min_gain_to_split: 0.9227319453605158
    max_depth: 49
    lambda_l1: 5.915645330691594e-07
    lambda_l2: 0.07876375012393437
    num_leaves: 176
    num_iterations: 59
    learning_rate: 0.008272735866739322
0.15599206316053466
Mem. usage decreased to 98.71 Mb (44.8% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.5945872508573609
    min_gain_to_split: 0.7198667639769795
    max_depth: 36
    lambda_l1: 2.1255070652230755e-05
    lambda_l2: 6.420231335865724e-05
    num_leaves: 156
    num_iterations: 363
    learning_rate: 0.0013638532067827348
0.13874738666216596
Mem. usage decreased to 98.15 Mb (44.8% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.6127667409981742
    min_gain_to_split: 0.9651328615076978
    max_depth: 77
    lambda_l1: 1.8165105180106462e-05
    lambda_l2: 1.8222903599890097
    num_leaves: 182
    num_iterations: 57
    learning_rate: 0.0457892510019898
0.17810523389096936
Mem. usage decreased to 98.86 Mb (44.8% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.6352485383876779
    min_gain_to_split: 0.7821518837302749
    max_depth: 78
    lambda_l1: 0.0237879982278414
    lambda_l2: 0.00011191273219619198
    num_leaves: 156
    num_iterations: 288
    learning_rate: 0.078699865320015
0.14891804426010977
Mem. usage decreased to 95.11 Mb (46.9% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.9376842940208138
    min_gain_to_split: 0.9575967036975195
    max_depth: 66
    lambda_l1: 3.478515812027147e-08
    lambda_l2: 0.08488632841318439
    num_leaves: 205
    num_iterations: 313
    learning_rate: 0.03320605179784992
0.14095626391004829
Mem. usage decreased to 68.30 Mb (49.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.7018657510130585
    min_gain_to_split: 0.4896950712412561
    max_depth: 40
    lambda_l1: 0.00475732674135826
    lambda_l2: 5.5130535850927e-08
    num_leaves: 215
    num_iterations: 275
    learning_rate: 0.0010416522555783995
0.1425189722724705
Mem. usage decreased to 90.15 Mb (46.9% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.5555176437803118
    min_gain_to_split: 0.8183356046973684
    max_depth: 61
    lambda_l1: 0.526123623529915
    lambda_l2: 5.0008665571378665e-08
    num_leaves: 193
    num_iterations: 208
    learning_rate: 0.07636722689130082
0.15230432252817655
Mem. usage decreased to 83.51 Mb (46.9% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.6869351414751467
    min_gain_to_split: 0.6079858888314128
    max_depth: 48
    lambda_l1: 0.0004791762270186651
    lambda_l2: 0.02415538380756882
    num_leaves: 178
    num_iterations: 181
    learning_rate: 0.014081962403281216
0.17290619283479902
Mem. usage decreased to 89.62 Mb (44.8% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.65615274346652
    min_gain_to_split: 0.837649065758834
    max_depth: 65
    lambda_l1: 0.002240668840091261
    lambda_l2: 0.022885401762336614
    num_leaves: 122
    num_iterations: 96
    learning_rate: 0.019544202510925965
0.13455889928306766
Mem. usage decreased to 68.04 Mb (46.9% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.5110091242525115
    min_gain_to_split: 0.48676016993435184
    max_depth: 32
    lambda_l1: 5.443312434738262e-06
    lambda_l2: 1.6322177705408288e-07
    num_leaves: 168
    num_iterations: 356
    learning_rate: 0.009680747791992923
0.13777342235694678
Mem. usage decreased to 29.46 Mb (49.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.8857264461427196
    min_gain_to_split: 0.446188468421254
    max_depth: 49
    lambda_l1: 1.342263867198407e-05
    lambda_l2: 0.0011869253003603114
    num_leaves: 53
    num_iterations: 382
    learning_rate: 0.002801911077815866
0.20920284272901501
Mem. usage decreased to 51.15 Mb (44.8% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)
Mem. usage decreased to  0.00 Mb (0.0% reduction)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the docume

Best trial:
  Params: 
    feature_fraction: 0.5605326980096121
    min_gain_to_split: 0.8198206539767006
    max_depth: 96
    lambda_l1: 1.2058674753071064
    lambda_l2: 0.0021882935483696535
    num_leaves: 185
    num_iterations: 326
    learning_rate: 0.0011184701851387894
0.24009759065457423


In [16]:
# X_test
# x_test = get_features(2)
# model=model_dict[2]
# model.predict(pd.DataFrame(X_test))

In [17]:
def get_features(df):
    df_feat = df[['timestamp', 'Asset_ID', 'Count', 'Open', 'High', 'Low', 'Close','Volume']]
    df_feat['datetime']=df_feat.timestamp.astype('datetime64[s]')
    df_feat.index=df_feat.datetime
    df_feat.drop(['timestamp','datetime'],axis=1,inplace=True)
    df_feat = min_max_scaler.transform(df_feat)
    return df_feat

In [18]:
import gresearch_crypto
env = gresearch_crypto.make_env()
iter_test = env.iter_test()
for i, (df_test, df_pred) in enumerate(iter_test):
    for j , row in df_test.iterrows():  
        if model_dict[row['Asset_ID']] is not None:
            try:
                model = model_dict[row['Asset_ID']]
                x_test = row
                y_pred = model.predict(np.array(get_features(pd.DataFrame(x_test).T)))[0]
                df_pred.loc[df_pred['row_id'] == row['row_id'], 'Target'] = y_pred
            except:
                df_pred.loc[df_pred['row_id'] == row['row_id'], 'Target'] = 0
                traceback.print_exc()
        else: 
            df_pred.loc[df_pred['row_id'] == row['row_id'], 'Target'] = 0  
    
    env.predict(df_pred)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

In [19]:
df_pred
# model.predict(np.array(pd.DataFrame([x_test])))
# model.predict(np.array(get_features(x_test)))[0]
# np.array(get_features(pd.DataFrame(x_test).T))
# X[0]
# x_test
# df_pred
# df_test[['timestamp', 'Asset_ID', 'Count', 'Open', 'High', 'Low', 'Close','Volume']]

Unnamed: 0,row_id,Target
0,42,-0.00029
1,43,0.001709
2,44,-0.007627
3,45,-0.000178
4,46,-3e-05
5,47,-0.001826
6,48,-0.007168
7,49,-0.000214
8,50,-0.000561
9,51,-0.000312


In [20]:
# # model_dict
# for j , row in df_test.iterrows(): 
#     print(pd.DataFrame(row).T)
#     x_test = pd.DataFrame(row).T
#     x_test