In [1]:
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta
pd.options.display.max_columns = 999
from sklearn.metrics import mean_absolute_error

In [16]:
data = pd.read_csv("/dsg/demand_anonymized_20170802.csv", sep=";", parse_dates=["Month"])
eval = pd.read_csv("/dsg/eval.csv", parse_dates=["date"])

In [17]:
# features
clients = data['Ship_To']
cvc = clients.value_counts()
corporate_clients = list(cvc[cvc > 500].index)
regular_clients = list(cvc[cvc <= 500].index)
data['is_corporate'] = data['Ship_To'].isin(corporate_clients)
data['is_regular'] = data['Ship_To'].isin(regular_clients)

In [18]:
aff_func = {"PL": 'first',
            "OrderQty": "sum", 
            "LT": "median", 
            "MOQ": "median", 
            "SafetyStk": "median", 
            "CBO_CBO_Qty_Shortage": "median",
            "Name_Of_Competitor": "median", 
            "Comp_reference_number": "median", 
            "COMP_PRICE_MIN": "median", 
            "COMP_PRICE_AVG": "median", 
            "COMP_PRICE_MAX": "median", 
            "PRICE": "median", 
            "NEAREST_COMP_PRICE_MIN": "median", 
            "NEAREST_COMP_PRICE_MAX": "median",
            'is_corporate' : 'sum', 
            'is_regular' : 'sum'
            }

final_df = data.groupby(["SalOrg", "Material", "Month"]).agg(aff_func).reset_index()

In [21]:
def agg_features(data, columns):
    functions = ["size", "min", "max", "sum", "std", "mean", "median"]
    temp = data.groupby(columns + ['Month'])["OrderQty"].agg(functions).reset_index()
    col = "_".join(columns)
    temp.columns = columns + ['Month'] + [col + func for func in functions]
    data = data.merge(temp, how='left')
    return data

In [22]:
final_df = agg_features(final_df, ["PL"])

In [23]:
def get_cathegorical_stats(df, column_name):
    final_df = df.groupby(['SalOrg', 'Material', 'Month', column_name])['OrderQty']
    final_df_sum = final_df.sum().unstack(column_name).add_prefix(column_name + 'sum_')
    final_df_count = final_df.size().unstack(column_name).add_prefix(column_name + 'count_')
    final_df_mean = final_df.mean().unstack(column_name).add_prefix(column_name + 'mean_')
    final_df_max = final_df.max().unstack(column_name).add_prefix(column_name + 'max_')
    final_df_min = final_df.min().unstack(column_name).add_prefix(column_name + 'min_')
    final_df = final_df_sum.reset_index().merge(final_df_count.reset_index(), on = ['SalOrg', 'Material', 'Month'], copy = False)
    final_df = final_df.merge(final_df_mean.reset_index(), on = ['SalOrg', 'Material', 'Month'], copy = False)
    final_df = final_df.merge(final_df_max.reset_index(), on = ['SalOrg', 'Material', 'Month'], copy = False)
    final_df = final_df.merge(final_df_min.reset_index(), on = ['SalOrg', 'Material', 'Month'], copy = False)
    final_df = final_df.fillna(0)
    return final_df

#data = get_cathegorical_stats(df, "DC")
#final_df = final_df.merge(data, how='left', on=["SalOrg", "Material", "Month"]).fillna(0)

In [24]:
final_df.columns[3:]

Index(['NEAREST_COMP_PRICE_MAX', 'PRICE', 'NEAREST_COMP_PRICE_MIN',
       'CBO_CBO_Qty_Shortage', 'COMP_PRICE_AVG', 'LT', 'is_corporate',
       'COMP_PRICE_MIN', 'is_regular', 'MOQ', 'Comp_reference_number', 'PL',
       'OrderQty', 'COMP_PRICE_MAX', 'Name_Of_Competitor', 'SafetyStk',
       'PLsize', 'PLmin', 'PLmax', 'PLsum', 'PLstd', 'PLmean', 'PLmedian'],
      dtype='object')

In [25]:
import itertools

eval_comb = eval[['Material', 'SalOrg']]
eval_comb = list(set([tuple(x) for x in eval_comb.values]))

comb = list(itertools.product(*[eval_comb, list(final_df['Month'].unique())]))
comb = [(t[0], t[1], m) for t, m in comb]

series2 = pd.DataFrame(comb, columns=['Material', 'SalOrg', 'Month'])
series2 = series2.sort_values(by=['Material', 'SalOrg', 'Month' ])
final_df = series2.merge(final_df, how='left')
final_df = final_df.fillna(0)

In [26]:
def get_correlation(df):
    df['LaggedOrderQty'] = df.OrderQty.shift(12)
    df = df.fillna(0)
    df = df.sort_values(by=['SalOrg', 'Material','Month'])
    df_corr = df.loc[(df["Month"] >= '2016-01-01') &
                     (df["Month"] < '2017-01-01')].groupby(['SalOrg', 'Material']).apply(lambda x: np.corrcoef(np.array(x['OrderQty']), np.array(x['LaggedOrderQty']))[0][1]).reset_index()
    df_corr.columns = ["SalOrg"] + ['Material'] + ['Corr']
    df_corr["Corr"].fillna(0, inplace=True)
    return df_corr

In [27]:
temp = get_correlation(final_df)
final_df = final_df.merge(temp, on=["SalOrg", "Material"], how='left').fillna(0)

  c /= stddev[:, None]
  c /= stddev[None, :]


In [29]:
final_df.drop(["LaggedOrderQty"], axis=1, inplace=True)

In [31]:
oil = pd.read_csv("/dsg/naf.csv", index_col=False, parse_dates=["time"])
oil.columns = ["Oil_price","Month"]

currency = pd.read_csv("/dsg/currency.csv", index_col=False, parse_dates=["period"])
currency.drop(["Unnamed: 0"], axis=1, inplace=True)
currency.columns = ["curr1", "Month", "curr2"]

external_data = oil.merge(currency)

In [32]:
final_df = final_df.merge(external_data)

In [33]:
def create_lag_features(data, target, indexes, lags, target_column):
    temp = pd.pivot_table(data.loc[(data["Month"] > data.loc[indexes, "Month"].min() - relativedelta(months=lags))&
                                   (data["Month"] < data.loc[indexes, "Month"].min())], 
                          index=["SalOrg", "Material"], values=[target_column], columns=["Month"]).reset_index()
    temp.columns = ["{}_{}".format(target_column, i)  if j[1] != "" else j[0] for i, j in enumerate(temp.columns)]
    target = target.merge(temp, how='left')
    return target

def create_agg_features(data, indexes, lags, columns, name):
    temp = data.loc[(data["Month"] > data.loc[indexes, "Month"].min() - relativedelta(months=lags))&
                    (data["Month"] < data.loc[indexes, "Month"].min())].groupby(columns)["OrderQty"].agg(["median", "mean", "std", "min", "max"]).reset_index()
    temp.columns = columns + [col + name + str(lags) for col in ["median", "mean", "std", "min", "max"]]
    return temp

def creating_features(df, indexes, lag_features):
    temp = df.loc[indexes].merge(create_agg_features(df, indexes, 4, ["SalOrg", "Material"], "_s_m_"), how='left', on=["SalOrg", "Material"])
    temp = temp.merge(create_agg_features(df, indexes, 7, ["SalOrg", "Material"], "_s_m_"), how='left', on=["SalOrg", "Material"])
    temp = temp.merge(create_agg_features(df, indexes, 13, ["SalOrg", "Material"], "_s_m_"), how='left', on=["SalOrg", "Material"])
    for col in lag_features:
        if col == "OrderQty":
            temp = create_lag_features(df, temp, indexes, 15, col)
        else:
            temp = create_lag_features(df, temp, indexes, 4, col)
    return temp

In [34]:
def create_validation(data, date, m):
    return data[(data["Month"] > pd.to_datetime(date) - relativedelta(months=m))&
                (data["Month"] < pd.to_datetime(date))].index, data[(data["Month"] >= pd.to_datetime(date)) & 
                (data["Month"] < pd.to_datetime(date) + relativedelta(months=3))].index
validation_months = ['2016-10-01' ,'2016-11-01', '2016-12-01', '2017-01-01']
#validation_months = ['2016-10-01' , '2017-01-01']


folds = []
for month in validation_months:
    folds.append(create_validation(final_df, month, 4))

In [35]:
for fold in folds:
    print("Train:", str(final_df.loc[fold[0], "Month"].min())[:10], "–", str(final_df.loc[fold[0], "Month"].max())[:10],
          "    Validation:", str(final_df.loc[fold[1], "Month"].min())[:10], "–", str(final_df.loc[fold[1], "Month"].max())[:10])

Train: 2016-07-01 – 2016-09-01     Validation: 2016-10-01 – 2016-12-01
Train: 2016-08-01 – 2016-10-01     Validation: 2016-11-01 – 2017-01-01
Train: 2016-09-01 – 2016-11-01     Validation: 2016-12-01 – 2017-02-01
Train: 2016-10-01 – 2016-12-01     Validation: 2017-01-01 – 2017-03-01


In [36]:
def replace_month(data):
    data["Month"] = data["Month"].astype(str)
    month_replace = {}
    for i, j in enumerate(sorted(list(data["Month"].unique()))):
        month_replace[j] = i

    data["Month"] = data["Month"].replace(month_replace)
    return data

In [37]:
final_df.columns

Index(['Material', 'SalOrg', 'Month', 'NEAREST_COMP_PRICE_MAX', 'PRICE',
       'NEAREST_COMP_PRICE_MIN', 'CBO_CBO_Qty_Shortage', 'COMP_PRICE_AVG',
       'LT', 'is_corporate', 'COMP_PRICE_MIN', 'is_regular', 'MOQ',
       'Comp_reference_number', 'PL', 'OrderQty', 'COMP_PRICE_MAX',
       'Name_Of_Competitor', 'SafetyStk', 'PLsize', 'PLmin', 'PLmax', 'PLsum',
       'PLstd', 'PLmean', 'PLmedian', 'Corr', 'Oil_price', 'curr1', 'curr2'],
      dtype='object')

In [38]:
import lightgbm as lgb
param = {}
param['application'] = 'regression_l2'
param['learning_rate'] = 0.1
param['feature_fraction'] = 0.4
param['bagging_fraction'] = 0.4
param['bagging_freq'] = 1
param['max_depth'] = 7
param['num_threads'] = 40
param["verbose"] = 0
param["metric"] = 'l1'

lag_features = ['OrderQty', 'Name_Of_Competitor',
               'COMP_PRICE_MAX', 'COMP_PRICE_AVG', 'Comp_reference_number',
               'COMP_PRICE_MIN', 'LT', 'NEAREST_COMP_PRICE_MIN', 'SafetyStk', 'MOQ',
               'CBO_CBO_Qty_Shortage', 'PRICE', 'NEAREST_COMP_PRICE_MAX', 'PLsize',
               'PLmin', 'PLmax', 'PLsum', 'PLstd', 'PLmean', 'PLmedian', 'Corr',
               'Oil_price', 'curr1', 'curr2', 'is_corporate', 'is_regular']

def lgb_smape(preds, df):
    labels = df.get_label()
    labels, preds = np.expm1(np.array(labels)), np.expm1(np.array(preds))
    return 'mae', mean_absolute_error(labels, preds), False

from sklearn.model_selection import ParameterGrid
grid = ParameterGrid({"feature_fraction": [0.3, 0.5, 0.7], 
                      "bagging_fraction": [0.3, 0.5, 0.7], 
                      "max_depth": [3, 5, 7]})

for params in grid:
    param.update(params)
    results = {"train": [], "val": [], "iteration": []}
    results = {"train": [], "val": [], "iteration": []}
    for small_train, small_val in folds:
        # TRAIN & VAL
        # create aggregated features

        fold_train = creating_features(final_df, small_train, lag_features)
        fold_val = creating_features(final_df, small_val, lag_features)

        fold_train = replace_month(fold_train)
        fold_val = replace_month(fold_val)

        # apply log to all numeric features
        numeric_features = list(fold_train.columns[28:])+['OrderQty']
        fold_train[numeric_features] = np.log1p(fold_train[numeric_features])
        fold_val[numeric_features] = np.log1p(fold_val[numeric_features])
        # preparing data from model
        train_features = list(fold_train.columns[24:]) + ["Month"]
        lgb_train = lgb.Dataset(fold_train[train_features], label=fold_train["OrderQty"], free_raw_data=False)
        lgb_val = lgb.Dataset(fold_val[train_features], label=fold_val["OrderQty"], free_raw_data=False, reference=lgb_train)

        # model train
        model = lgb.train(param, lgb_train, 1000, valid_sets=[lgb_train, lgb_val], early_stopping_rounds=10, feval=lgb_smape, verbose_eval=0)

        #save results
        results['train'].append(model.best_score['training']['mae'])
        results['val'].append(model.best_score['valid_1']['mae'])
        results['iteration'].append(int(model.best_iteration))
    print(params)
    print(results)

{'feature_fraction': 0.3, 'bagging_fraction': 0.3, 'max_depth': 3}
{'train': [7.8279992807908645, 8.7527638654835886, 11.387899103761532, 9.6054681839696787], 'val': [11.457966282620681, 10.267253369095082, 8.7698559137541121, 9.1847410507959957], 'iteration': [98, 86, 38, 47]}
{'feature_fraction': 0.3, 'bagging_fraction': 0.3, 'max_depth': 5}
{'train': [7.1524577429396716, 8.4377836310744083, 10.90666465252785, 8.9145382291424777], 'val': [11.608590544727157, 10.182717675629702, 8.5368515850343893, 9.1989848956115559], 'iteration': [103, 58, 32, 47]}
{'feature_fraction': 0.3, 'bagging_fraction': 0.3, 'max_depth': 7}
{'train': [7.074294239671425, 7.9260821200421923, 10.732445759106952, 9.1875868079754692], 'val': [11.499007452424161, 10.065893725794892, 8.5813670808510754, 9.2290491673158428], 'iteration': [80, 88, 32, 39]}
{'feature_fraction': 0.5, 'bagging_fraction': 0.3, 'max_depth': 3}
{'train': [7.8995810970175269, 8.7933871476870245, 10.382995132447164, 9.3805807839382123], 'val'

In [39]:
results

{'iteration': [72, 58, 37, 55],
 'train': [6.7333615200467651,
  7.8246668978700642,
  9.8562445258835805,
  8.1857079858414945],
 'val': [11.827766763085947,
  10.111658287661902,
  8.3763538099752992,
  9.0403624885513754]}

In [40]:
model.best_iteration

55

In [41]:
train_features


['PLmean',
 'PLmedian',
 'Corr',
 'Oil_price',
 'curr1',
 'curr2',
 'median_s_m_4',
 'mean_s_m_4',
 'std_s_m_4',
 'min_s_m_4',
 'max_s_m_4',
 'median_s_m_7',
 'mean_s_m_7',
 'std_s_m_7',
 'min_s_m_7',
 'max_s_m_7',
 'median_s_m_13',
 'mean_s_m_13',
 'std_s_m_13',
 'min_s_m_13',
 'max_s_m_13',
 'OrderQty_2',
 'OrderQty_3',
 'OrderQty_4',
 'OrderQty_5',
 'OrderQty_6',
 'OrderQty_7',
 'OrderQty_8',
 'OrderQty_9',
 'OrderQty_10',
 'OrderQty_11',
 'OrderQty_12',
 'OrderQty_13',
 'OrderQty_14',
 'OrderQty_15',
 'Name_Of_Competitor_2',
 'Name_Of_Competitor_3',
 'Name_Of_Competitor_4',
 'COMP_PRICE_MAX_2',
 'COMP_PRICE_MAX_3',
 'COMP_PRICE_MAX_4',
 'COMP_PRICE_AVG_2',
 'COMP_PRICE_AVG_3',
 'COMP_PRICE_AVG_4',
 'Comp_reference_number_2',
 'Comp_reference_number_3',
 'Comp_reference_number_4',
 'COMP_PRICE_MIN_2',
 'COMP_PRICE_MIN_3',
 'COMP_PRICE_MIN_4',
 'LT_2',
 'LT_3',
 'LT_4',
 'NEAREST_COMP_PRICE_MIN_2',
 'NEAREST_COMP_PRICE_MIN_3',
 'NEAREST_COMP_PRICE_MIN_4',
 'SafetyStk_2',
 'SafetyStk_

# Prediction

In [118]:
final_df.columns[3:]

Index(['PL', 'OrderQty', 'Name_Of_Competitor', 'COMP_PRICE_MAX',
       'COMP_PRICE_AVG', 'Comp_reference_number', 'COMP_PRICE_MIN', 'LT',
       'NEAREST_COMP_PRICE_MIN', 'SafetyStk', 'MOQ', 'CBO_CBO_Qty_Shortage',
       'PRICE', 'NEAREST_COMP_PRICE_MAX', 'PLsize', 'PLmin', 'PLmax', 'PLsum',
       'PLstd', 'PLmean', 'PLmedian', 'Corr', 'Oil_price', 'curr1', 'curr2'],
      dtype='object')

In [119]:
final_df["ID"] = -1
for col in final_df.columns[3:-1]:
    eval_df[col] = 0
eval_df.rename(columns={"date": "Month"}, inplace=True)

In [120]:
eval_df.head()

Unnamed: 0,ID,SalOrg,Material,Month,PL,OrderQty,Name_Of_Competitor,COMP_PRICE_MAX,COMP_PRICE_AVG,Comp_reference_number,COMP_PRICE_MIN,LT,NEAREST_COMP_PRICE_MIN,SafetyStk,MOQ,CBO_CBO_Qty_Shortage,PRICE,NEAREST_COMP_PRICE_MAX,PLsize,PLmin,PLmax,PLsum,PLstd,PLmean,PLmedian,Corr,Oil_price,curr1,curr2
0,0,97LK,00IYcj,2017-04-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,97LK,00lqzT,2017-04-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,97LK,00MFcK,2017-04-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3,97LK,00mt9e,2017-04-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,4,97LK,00Ok8y,2017-04-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [121]:
big_df = pd.concat([final_df, eval_df]).reset_index(drop=True)

In [122]:
def create_validation(data, date, m):
    return data[(data["Month"] > pd.to_datetime(date) - relativedelta(months=m))&
                (data["Month"] < pd.to_datetime(date))].index, data[(data["Month"] >= pd.to_datetime(date)) & 
                (data["Month"] < pd.to_datetime(date) + relativedelta(months=3))].index
validation_months = ['2017-04-01']
folds = []
for month in validation_months:
    folds.append(create_validation(big_df, month, 4))

In [123]:
for fold in folds:
    print("Train:", str(big_df.loc[fold[0], "Month"].min())[:10], "–", str(big_df.loc[fold[0], "Month"].max())[:10],
          "    Validation:", str(big_df.loc[fold[1], "Month"].min())[:10], "–", str(big_df.loc[fold[1], "Month"].max())[:10])

Train: 2017-01-01 – 2017-03-01     Validation: 2017-04-01 – 2017-06-01


In [124]:
import lightgbm as lgb
param = {}
param['application'] = 'regression_l2'
param['learning_rate'] = 0.1
param['feature_fraction'] = 0.4
param['bagging_fraction'] = 0.4
param['bagging_freq'] = 1
param['max_depth'] = 7
param['num_threads'] = 40
param["verbose"] = 0

def lgb_smape(preds, df):
    labels = df.get_label()
    labels, preds = np.expm1(np.array(labels)), np.expm1(np.array(preds))
    return 'mae', mean_absolute_error(labels, preds), False



for small_train, small_val in folds:
    # TRAIN & VAL
    # create aggregated features
    fold_train = creating_features(big_df, small_train, lag_features)
    fold_test = creating_features(big_df, small_val, lag_features)
    fold_train = replace_month(fold_train)
    fold_test = replace_month(fold_test)
    # apply log to all numeric features
    fold_train[numeric_features] = np.log1p(fold_train[numeric_features])
    fold_test[numeric_features] = np.log1p(fold_test[numeric_features])
    
    # preparing data from model
    lgb_train = lgb.Dataset(fold_train[train_features], label=fold_train["OrderQty"], free_raw_data=False)
    
    # model train
    model = lgb.train(param, lgb_train, 60, verbose_eval=0)

In [125]:
predictions = np.expm1(model.predict(fold_test[train_features]))

In [126]:
fold_test["OrderQty"] = predictions

In [127]:
submit = fold_test[["ID", "OrderQty"]]
submit.columns = ["ID", "demand"]
submit[["ID", "demand"]].to_csv("lightgbm_4.csv", index=False)

In [527]:
lighgbm1 = pd.read_csv("lightgbm.csv")
lighgbm2 = pd.read_csv("lightgbm_2.csv")
lighgbm3 = pd.read_csv("lightgbm_3.csv")

In [528]:
knn6 = pd.read_csv("knn6.csv")
knn8 = pd.read_csv("../../../dsg/knn8_9.62.csv")

In [538]:
linear = pd.read_csv("linear_models_11.98.csv")
linear2 = pd.read_csv("linear_models_elastic_11.92.csv")

In [539]:
np.corrcoef([lighgbm1.demand, lighgbm2.demand, lighgbm3.demand, knn6.demand, knn8.demand, linear.demand, linear2.demand])

array([[ 1.        ,  0.97497321,  0.97395888,  0.92382771,  0.92738676,
         0.9280783 ,  0.93132801],
       [ 0.97497321,  1.        ,  0.97471658,  0.90705455,  0.92309211,
         0.92605789,  0.92790435],
       [ 0.97395888,  0.97471658,  1.        ,  0.91534597,  0.92489863,
         0.92257918,  0.92566752],
       [ 0.92382771,  0.90705455,  0.91534597,  1.        ,  0.97585298,
         0.93258184,  0.93328377],
       [ 0.92738676,  0.92309211,  0.92489863,  0.97585298,  1.        ,
         0.93453808,  0.93853629],
       [ 0.9280783 ,  0.92605789,  0.92257918,  0.93258184,  0.93453808,
         1.        ,  0.98951746],
       [ 0.93132801,  0.92790435,  0.92566752,  0.93328377,  0.93853629,
         0.98951746,  1.        ]])

In [540]:
fold_test["OrderQty"] = (lighgbm1.demand + lighgbm2.demand + lighgbm3.demand) / 9. + (knn6.demand + knn8.demand) / 6. + (linear.demand + linear2.demand)/6.
submit = fold_test[["ID", "OrderQty"]]
submit.columns = ["ID", "demand"]
submit[["ID", "demand"]].to_csv("lightgbm_knn_linear_ensemble_4.csv", index=False)

In [186]:
list(fold_train.columns[24:])

['SalOrg',
 'median_s_m_2',
 'mean_s_m_2',
 'std_s_m_2',
 'min_s_m_2',
 'max_s_m_2',
 'OrderQty_2',
 'OrderQty_3',
 'OrderQty_4',
 'OrderQty_5',
 'OrderQty_6',
 'OrderQty_7',
 'OrderQty_8',
 'OrderQty_9',
 'OrderQty_10',
 'OrderQty_11',
 'OrderQty_12',
 'OrderQty_13',
 'OrderQty_14',
 'OrderQty_15',
 'OrderQty_16',
 'OrderQty_17',
 'OrderQty_18',
 'OrderQty_19',
 'OrderQty_20',
 'OrderQty_21',
 'OrderQty_22',
 'OrderQty_23',
 'OrderQty_24',
 'OrderQty_25',
 'OrderQty_26',
 'DCsum_1aVE_2',
 'DCsum_1aVE_3',
 'DCsum_1aVE_4',
 'DCsum_1aVE_5',
 'DCsum_1aVE_6',
 'DCsum_1aVE_7',
 'DCsum_1aVE_8',
 'DCsum_1aVE_9',
 'DCsum_1aVE_10',
 'DCsum_1aVE_11',
 'DCsum_1aVE_12',
 'DCsum_1aVE_13',
 'DCsum_1aVE_14',
 'DCsum_1aVE_15',
 'DCsum_1aVE_16',
 'DCsum_1aVE_17',
 'DCsum_1aVE_18',
 'DCsum_1aVE_19',
 'DCsum_1aVE_20',
 'DCsum_1aVE_21',
 'DCsum_1aVE_22',
 'DCsum_1aVE_23',
 'DCsum_1aVE_24',
 'DCsum_1aVE_25',
 'DCsum_1aVE_26',
 'DCsum_TUiR_2',
 'DCsum_TUiR_3',
 'DCsum_TUiR_4',
 'DCsum_TUiR_5',
 'DCsum_TUiR_

In [222]:
submit

Unnamed: 0,ID,demand
0,0,3.944233
1,1,1.909186
2,2,0.167849
3,3,1.030204
4,4,0.440303
5,5,0.271694
6,6,0.270254
7,7,0.112237
8,8,0.467501
9,9,199.765456
