# Random forest

random forest regressor: multiple inputs - multiple outputs
- input
  - user: 'Orig', 'Dest', 'depDay', 'arrDay' 
  - other attributes: 'con1', 'con2', 'op_flight1', 'op_flight2', 'op_flight3',
               'elaptime', 'detour', 'stops', 'cluster', 'real_dist',
               'total_time', 'connection_time', 'dep_hour', 'arr_hour'
- output: 'market_share', 'paxe', 'TOT_pax'

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.multioutput import MultiOutputRegressor

### 1. load data

In [11]:
# read the preprocessed data
df = pd.read_csv('C:/GitHub/pdsp2023/backend/dataset/dataprep_v2.csv').drop(columns=['Unnamed: 0'])
display(df.head())

Unnamed: 0,Orig,con1,con2,Dest,op_flight1,op_flight2,op_flight3,depDay,elaptime,detour,...,stops,paxe,cluster,TOT_pax,market_share,real_dist,total_time,connection_time,dep_hour,arr_hour
0,26,64,239,181,732.0,2624,1101,2.0,535.0,1.02676,...,1.0,0.0,111.0,0.0,0.0,3899.12,535.0,0.0,13,22
1,26,64,239,181,732.0,2624,1101,4.0,535.0,1.02676,...,1.0,0.0,111.0,0.0,0.0,3899.12,535.0,0.0,13,22
2,26,64,239,181,732.0,1748,1101,4.0,775.0,1.02676,...,1.0,0.0,111.0,0.0,0.0,3899.12,775.0,0.0,13,2
3,26,64,239,181,732.0,2624,1101,5.0,535.0,1.02676,...,1.0,0.0,111.0,0.0,0.0,3899.12,535.0,0.0,13,22
4,26,64,239,181,732.0,2624,1101,6.0,535.0,1.02676,...,1.0,0.0,111.0,0.0,0.0,3899.12,535.0,0.0,13,22


In [4]:
# add index
df['index'] = df.index
df.head()

Unnamed: 0,Orig,con1,con2,Dest,op_flight1,op_flight2,op_flight3,depDay,elaptime,detour,...,paxe,cluster,TOT_pax,market_share,real_dist,total_time,connection_time,dep_hour,arr_hour,index
0,26,64,239,181,732.0,2624,1101,2.0,535.0,1.02676,...,0.0,111.0,0.0,0.0,3899.12,535.0,0.0,13,22,0
1,26,64,239,181,732.0,2624,1101,4.0,535.0,1.02676,...,0.0,111.0,0.0,0.0,3899.12,535.0,0.0,13,22,1
2,26,64,239,181,732.0,1748,1101,4.0,775.0,1.02676,...,0.0,111.0,0.0,0.0,3899.12,775.0,0.0,13,2,2
3,26,64,239,181,732.0,2624,1101,5.0,535.0,1.02676,...,0.0,111.0,0.0,0.0,3899.12,535.0,0.0,13,22,3
4,26,64,239,181,732.0,2624,1101,6.0,535.0,1.02676,...,0.0,111.0,0.0,0.0,3899.12,535.0,0.0,13,22,4


### 2. Splitting the dataset into training/test set

In [5]:
X = df.loc[:, ['Orig', 'con1', 'con2', 'Dest', 'op_flight1', 'op_flight2', 'op_flight3',
               'depDay', 'elaptime', 'detour', 'arrDay', 'stops', 'cluster', 'real_dist',
               'total_time', 'connection_time', 'dep_hour', 'arr_hour', 'index']]
y = df.loc[:, ['market_share', 'paxe', 'TOT_pax']]

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0)

In [6]:
display(X.head())
display(y.head())

Unnamed: 0,Orig,con1,con2,Dest,op_flight1,op_flight2,op_flight3,depDay,elaptime,detour,arrDay,stops,cluster,real_dist,total_time,connection_time,dep_hour,arr_hour,index
0,26,64,239,181,732.0,2624,1101,2.0,535.0,1.02676,3.0,1.0,111.0,3899.12,535.0,0.0,13,22,0
1,26,64,239,181,732.0,2624,1101,4.0,535.0,1.02676,5.0,1.0,111.0,3899.12,535.0,0.0,13,22,1
2,26,64,239,181,732.0,1748,1101,4.0,775.0,1.02676,5.0,1.0,111.0,3899.12,775.0,0.0,13,2,2
3,26,64,239,181,732.0,2624,1101,5.0,535.0,1.02676,6.0,1.0,111.0,3899.12,535.0,0.0,13,22,3
4,26,64,239,181,732.0,2624,1101,6.0,535.0,1.02676,7.0,1.0,111.0,3899.12,535.0,0.0,13,22,4


Unnamed: 0,market_share,paxe,TOT_pax
0,0.0,0.0,0.0
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
4,0.0,0.0,0.0


In [5]:
sc_X = StandardScaler()
X_train_std = sc_X.fit_transform(X_train)
X_test_std = sc_X.fit_transform(X_test)

### 3. Creating a random forest regression model and fitting it to the training data

In [6]:
# Create a model
regr_multirf = MultiOutputRegressor(
    RandomForestRegressor(n_estimators=1000, max_depth=60, random_state=0)
)

# Fit into the model
regr_multirf.fit(X_train, y_train)

In [7]:
# Predict on new data
y_pred_train = regr_multirf.predict(X_train)
y_pred_test = regr_multirf.predict(X_test)

print('Multioutput Regressor')
print('--------------------------------------------------------------------------------\n')
train_mse = mean_squared_error(y_train, y_pred_train)
test_mse = mean_squared_error(y_test, y_pred_test)
print("MSE (Training) = %.4f" % train_mse)
print("MSE (Testing) = %.4f" % test_mse)

Multioutput Regressor
--------------------------------------------------------------------------------

MSE (Training) = 2925.9722
MSE (Testing) = 59793.0069


### 4. Concatenating dataframes

Unnamed: 0,market_share_pred,paxe_pred,TOT_pax_pred
0,0.00018,0.006020,2.167342
1,0.00031,0.948551,594.908440
2,0.00000,0.000000,2207.438540
3,0.00000,0.000000,933.960000
4,0.00009,0.000860,54.500379
...,...,...,...
418239,0.00000,0.005160,3.433053
418240,0.00002,1.687707,924.560200
418241,0.00000,0.000000,13.514989
418242,0.00000,0.000000,10.762467


In [16]:
# train
y_pred_train_df = pd.DataFrame(y_pred_train, columns=['market_share_pred', 'paxe_pred', 'TOT_pax_pred'])
X_train_df = pd.DataFrame(X_train, columns=['Orig', 'con1', 'con2', 'Dest', 'op_flight1', 'op_flight2', 'op_flight3',
               'depDay', 'elaptime', 'detour', 'arrDay', 'stops', 'cluster', 'real_dist',
               'total_time', 'connection_time', 'dep_hour', 'arr_hour'])
y_train_df = y_train.reset_index(drop=True)
train_df = pd.concat([X_train_df, y_train_df, y_pred_train_df], axis=1)
train_df

Unnamed: 0,Orig,con1,con2,Dest,op_flight1,op_flight2,op_flight3,depDay,elaptime,detour,...,total_time,connection_time,dep_hour,arr_hour,market_share,paxe,TOT_pax,market_share_pred,paxe_pred,TOT_pax_pred
0,0.364340,-0.470338,0.550821,0.778530,0.024904,-1.392203,0.471197,0.522154,-1.599781,0.137548,...,-1.599781,0.128039,-0.580777,0.588805,0.0,0.00,2.58,0.00018,0.006020,2.167342
1,0.483984,0.141899,0.550821,0.487790,-0.858116,-0.340277,0.471197,1.522176,-1.226662,2.014739,...,-1.226662,0.128039,-1.852801,-0.771738,0.0,0.86,575.34,0.00031,0.948551,594.908440
2,-0.186020,0.644809,0.550821,-0.941683,-0.700449,-0.959395,0.471197,0.022143,-0.892818,-0.738794,...,-0.892818,0.128039,-0.580777,1.171895,0.0,0.00,2209.34,0.00000,0.000000,2207.438540
3,-0.784238,0.382421,0.550821,-0.457116,-0.873318,-0.007788,0.471197,-0.477868,0.776400,-0.747726,...,0.776400,0.128039,1.256590,0.588805,0.0,0.00,933.96,0.00000,0.000000,933.960000
4,-0.784238,-1.016979,0.550821,-0.457116,-0.367308,1.420577,0.471197,-1.477890,0.163699,0.521480,...,0.163699,0.128039,-0.015434,-1.743554,0.0,0.00,55.04,0.00009,0.000860,54.500379
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
418239,-0.784238,-1.016979,0.550821,-0.457116,0.769803,1.420577,0.471197,-0.477868,0.619297,0.853420,...,0.619297,0.128039,-0.156770,-1.743554,0.0,0.00,4.30,0.00000,0.005160,3.433053
418240,-0.784238,2.197268,0.550821,-0.457116,-0.866802,-1.726603,0.471197,1.522176,-0.755353,-0.044807,...,-0.755353,0.128039,1.256590,-0.771738,0.0,1.72,933.96,0.00002,1.687707,924.560200
418241,0.172910,-0.142354,-2.078440,-0.457116,-0.783409,1.130127,-0.655111,0.522154,0.776400,0.580687,...,0.776400,0.297911,0.408574,-0.771738,0.0,0.00,9.46,0.00000,0.000000,13.514989
418242,2.302566,-0.470338,0.550821,0.221278,-0.774722,-1.305259,0.471197,-0.977879,-1.835436,1.083448,...,-1.835436,0.128039,-0.722113,0.394442,0.0,0.00,10.32,0.00000,0.000000,10.762467


In [17]:
# test
y_pred_test_df = pd.DataFrame(y_pred_test, columns=['market_share_pred', 'paxe_pred', 'TOT_pax_pred'])
X_test_df = pd.DataFrame(X_test, columns=['Orig', 'con1', 'con2', 'Dest', 'op_flight1', 'op_flight2', 'op_flight3',
               'depDay', 'elaptime', 'detour', 'arrDay', 'stops', 'cluster', 'real_dist',
               'total_time', 'connection_time', 'dep_hour', 'arr_hour'])
y_test_df = y_test.reset_index(drop=True)
test_df = pd.concat([X_test_df, y_test_df, y_pred_test_df], axis=1)
test_df

Unnamed: 0,Orig,con1,con2,Dest,op_flight1,op_flight2,op_flight3,depDay,elaptime,detour,...,total_time,connection_time,dep_hour,arr_hour,market_share,paxe,TOT_pax,market_share_pred,paxe_pred,TOT_pax_pred
0,0.164767,-1.019843,0.55236,-0.461397,1.058909,-0.140953,0.472397,0.521473,1.271376,0.525408,...,1.271376,0.126542,-0.433723,-1.357516,0.00,0.00,19.78,0.006180,0.010320,26.149355
1,-0.621852,0.139703,0.55236,0.214190,2.031283,0.716543,0.472397,1.522221,-0.396498,-0.732675,...,-0.396498,0.126542,-0.574766,1.748284,0.05,0.86,18.92,0.038630,0.104060,47.380983
2,2.286256,1.977473,0.55236,0.214190,-0.578340,1.280231,0.472397,1.522221,0.702336,1.592421,...,0.702336,0.126542,-0.856852,2.136509,0.00,0.00,79.12,0.006555,0.018920,140.630830
3,0.236278,-0.472887,0.55236,2.047925,-0.633421,-0.838625,0.472397,0.021099,-1.514955,0.064206,...,-1.514955,0.126542,-0.856852,0.389497,0.00,0.00,24.94,0.006336,0.128570,30.917935
4,-0.788711,0.380363,0.55236,-0.461397,-0.785219,1.406559,0.472397,1.522221,-0.062923,-0.766101,...,-0.062923,0.126542,-1.844152,0.195384,0.00,0.00,577.06,0.002936,0.967858,303.522671
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104557,-0.788711,-0.472887,0.55236,-0.461397,-0.791291,-0.716126,0.472397,1.021847,0.545360,-0.007387,...,0.545360,0.126542,0.976706,0.001272,0.00,0.00,452.36,0.017555,0.178020,324.559184
104558,-0.192787,2.218133,0.55236,3.278458,-0.444324,0.475372,0.472397,1.021847,-0.906671,-0.632221,...,-0.906671,0.126542,-0.151637,1.942397,0.00,0.00,762.82,0.001583,0.131867,739.821880
104559,0.474648,0.270972,0.55236,0.479599,0.355434,-0.629036,0.472397,0.521473,-1.887774,0.164008,...,-1.887774,0.126542,-0.433723,0.777722,0.00,0.00,125.56,0.013222,0.137170,464.334200
104560,-0.788711,-1.019843,0.55236,-0.461397,0.242670,0.830429,0.472397,-0.979649,1.055533,0.113455,...,1.055533,0.126542,-0.433723,-1.551628,0.00,0.00,2002.08,0.000200,0.063640,1441.359300


In [18]:
# whole
df_new = pd.concat([train_df, test_df], axis=0).reset_index(drop=True)
df_new

Unnamed: 0,Orig,con1,con2,Dest,op_flight1,op_flight2,op_flight3,depDay,elaptime,detour,...,total_time,connection_time,dep_hour,arr_hour,market_share,paxe,TOT_pax,market_share_pred,paxe_pred,TOT_pax_pred
0,0.364340,-0.470338,0.550821,0.778530,0.024904,-1.392203,0.471197,0.522154,-1.599781,0.137548,...,-1.599781,0.128039,-0.580777,0.588805,0.0,0.00,2.58,0.000180,0.006020,2.167342
1,0.483984,0.141899,0.550821,0.487790,-0.858116,-0.340277,0.471197,1.522176,-1.226662,2.014739,...,-1.226662,0.128039,-1.852801,-0.771738,0.0,0.86,575.34,0.000310,0.948551,594.908440
2,-0.186020,0.644809,0.550821,-0.941683,-0.700449,-0.959395,0.471197,0.022143,-0.892818,-0.738794,...,-0.892818,0.128039,-0.580777,1.171895,0.0,0.00,2209.34,0.000000,0.000000,2207.438540
3,-0.784238,0.382421,0.550821,-0.457116,-0.873318,-0.007788,0.471197,-0.477868,0.776400,-0.747726,...,0.776400,0.128039,1.256590,0.588805,0.0,0.00,933.96,0.000000,0.000000,933.960000
4,-0.784238,-1.016979,0.550821,-0.457116,-0.367308,1.420577,0.471197,-1.477890,0.163699,0.521480,...,0.163699,0.128039,-0.015434,-1.743554,0.0,0.00,55.04,0.000090,0.000860,54.500379
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
522801,-0.788711,-0.472887,0.552360,-0.461397,-0.791291,-0.716126,0.472397,1.021847,0.545360,-0.007387,...,0.545360,0.126542,0.976706,0.001272,0.0,0.00,452.36,0.017555,0.178020,324.559184
522802,-0.192787,2.218133,0.552360,3.278458,-0.444324,0.475372,0.472397,1.021847,-0.906671,-0.632221,...,-0.906671,0.126542,-0.151637,1.942397,0.0,0.00,762.82,0.001583,0.131867,739.821880
522803,0.474648,0.270972,0.552360,0.479599,0.355434,-0.629036,0.472397,0.521473,-1.887774,0.164008,...,-1.887774,0.126542,-0.433723,0.777722,0.0,0.00,125.56,0.013222,0.137170,464.334200
522804,-0.788711,-1.019843,0.552360,-0.461397,0.242670,0.830429,0.472397,-0.979649,1.055533,0.113455,...,1.055533,0.126542,-0.433723,-1.551628,0.0,0.00,2002.08,0.000200,0.063640,1441.359300


In [19]:
df_new.to_csv('./dataset/final.csv', index=False)

## Concatenate with index and reproduce the datset

In [12]:
df2 = pd.read_csv('C:/GitHub/pdsp2023/backend/dataset/final.csv')
df2

Unnamed: 0,Orig,con1,con2,Dest,op_flight1,op_flight2,op_flight3,depDay,elaptime,detour,...,total_time,connection_time,dep_hour,arr_hour,market_share,paxe,TOT_pax,market_share_pred,paxe_pred,TOT_pax_pred
0,0.364340,-0.470338,0.550821,0.778530,0.024904,-1.392203,0.471197,0.522154,-1.599781,0.137548,...,-1.599781,0.128039,-0.580777,0.588805,0.0,0.00,2.58,0.000180,0.006020,2.167342
1,0.483984,0.141899,0.550821,0.487790,-0.858116,-0.340277,0.471197,1.522176,-1.226662,2.014739,...,-1.226662,0.128039,-1.852801,-0.771738,0.0,0.86,575.34,0.000310,0.948551,594.908440
2,-0.186020,0.644809,0.550821,-0.941683,-0.700449,-0.959395,0.471197,0.022143,-0.892818,-0.738794,...,-0.892818,0.128039,-0.580777,1.171895,0.0,0.00,2209.34,0.000000,0.000000,2207.438540
3,-0.784238,0.382421,0.550821,-0.457116,-0.873318,-0.007788,0.471197,-0.477868,0.776400,-0.747726,...,0.776400,0.128039,1.256590,0.588805,0.0,0.00,933.96,0.000000,0.000000,933.960000
4,-0.784238,-1.016979,0.550821,-0.457116,-0.367308,1.420577,0.471197,-1.477890,0.163699,0.521480,...,0.163699,0.128039,-0.015434,-1.743554,0.0,0.00,55.04,0.000090,0.000860,54.500379
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
522801,-0.788711,-0.472887,0.552360,-0.461397,-0.791291,-0.716126,0.472397,1.021847,0.545360,-0.007387,...,0.545360,0.126542,0.976706,0.001272,0.0,0.00,452.36,0.017555,0.178020,324.559184
522802,-0.192787,2.218133,0.552360,3.278458,-0.444324,0.475372,0.472397,1.021847,-0.906671,-0.632221,...,-0.906671,0.126542,-0.151637,1.942397,0.0,0.00,762.82,0.001583,0.131867,739.821880
522803,0.474648,0.270972,0.552360,0.479599,0.355434,-0.629036,0.472397,0.521473,-1.887774,0.164008,...,-1.887774,0.126542,-0.433723,0.777722,0.0,0.00,125.56,0.013222,0.137170,464.334200
522804,-0.788711,-1.019843,0.552360,-0.461397,0.242670,0.830429,0.472397,-0.979649,1.055533,0.113455,...,1.055533,0.126542,-0.433723,-1.551628,0.0,0.00,2002.08,0.000200,0.063640,1441.359300


In [21]:
Xtrain_concat = X_train.loc[:, ['Orig', 'con1', 'con2', 'Dest', 'op_flight1', 'op_flight2', 'op_flight3',
               'depDay', 'elaptime', 'detour', 'arrDay', 'stops', 'cluster', 'real_dist',
               'total_time', 'connection_time', 'dep_hour', 'arr_hour', 'index']]
Xtrain_concat

Unnamed: 0,Orig,con1,con2,Dest,op_flight1,op_flight2,op_flight3,depDay,elaptime,detour,arrDay,stops,cluster,real_dist,total_time,connection_time,dep_hour,arr_hour,index
77410,49,26,239,77,2070.0,366,1101,5.0,390.0,1.15323,5.0,1.0,150000.0,2292.937,390.0,0.0,9,15,77410
50715,54,54,239,65,37.0,1467,1101,7.0,485.0,1.47060,7.0,1.0,1003.0,3104.810,485.0,0.0,0,8,50715
272237,26,77,239,6,400.0,819,1101,4.0,570.0,1.00507,4.0,1.0,111.0,4911.960,570.0,0.0,9,18,272237
116883,1,65,239,26,2.0,1815,1101,3.0,995.0,1.00356,4.0,1.0,1601.0,6054.930,995.0,0.0,22,15,116883
491044,1,1,239,26,1167.0,3310,1101,1.0,839.0,1.21814,2.0,1.0,1601.0,7976.131,839.0,0.0,13,3,491044
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
439107,1,1,239,26,3785.0,3310,1101,3.0,955.0,1.27426,4.0,1.0,1601.0,8045.040,955.0,0.0,12,3,439107
117952,1,148,239,26,17.0,16,1101,7.0,605.0,1.12240,1.0,1.0,1601.0,6749.470,605.0,0.0,22,8,117952
435829,41,41,26,26,209.0,3006,782,5.0,995.0,1.22815,6.0,2.0,1601.0,9059.447,995.0,276.0,16,8,435829
305711,130,26,239,54,229.0,457,1101,2.0,330.0,1.31315,2.0,1.0,710.0,2646.808,330.0,0.0,8,14,305711


In [22]:
Xtest_concat = X_test.loc[:, ['Orig', 'con1', 'con2', 'Dest', 'op_flight1', 'op_flight2', 'op_flight3',
               'depDay', 'elaptime', 'detour', 'arrDay', 'stops', 'cluster', 'real_dist',
               'total_time', 'connection_time', 'dep_hour', 'arr_hour', 'index']]
Xtest_concat

Unnamed: 0,Orig,con1,con2,Dest,op_flight1,op_flight2,op_flight3,depDay,elaptime,detour,arrDay,stops,cluster,real_dist,total_time,connection_time,dep_hour,arr_hour,index
244654,41,1,239,26,4460.0,1679,1101,5.0,1120.0,1.21888,6.0,1.0,1601.0,7234.727,1120.0,0.0,10,5,244654
291389,8,54,239,54,6702.0,2575,1101,7.0,695.0,1.00660,1.0,1.0,510.0,6373.190,695.0,0.0,9,21,291389
301622,130,138,239,54,685.0,3164,1101,7.0,975.0,1.39892,1.0,1.0,710.0,3184.927,975.0,0.0,7,23,301622
13871,44,26,239,130,558.0,950,1101,4.0,410.0,1.14106,4.0,1.0,507.0,2296.728,410.0,0.0,7,14,13871
111418,1,65,239,26,208.0,3296,1101,7.0,780.0,1.00096,1.0,1.0,1601.0,7625.109,780.0,0.0,0,13,111418
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
225190,1,26,239,26,194.0,1078,1101,6.0,935.0,1.12898,7.0,1.0,1601.0,10060.892,935.0,0.0,20,12,225190
4540,26,149,239,181,994.0,2323,1101,6.0,565.0,1.02355,7.0,1.0,111.0,4227.720,565.0,0.0,12,22,4540
50738,54,60,239,65,2838.0,1169,1101,5.0,315.0,1.15790,5.0,1.0,1003.0,2429.483,315.0,0.0,10,16,50738
414084,1,1,239,26,2578.0,2694,1101,2.0,1065.0,1.14937,3.0,1.0,1601.0,7886.140,1065.0,0.0,10,4,414084


In [25]:
df2.columns

Index(['Orig', 'con1', 'con2', 'Dest', 'op_flight1', 'op_flight2',
       'op_flight3', 'depDay', 'elaptime', 'detour', 'arrDay', 'stops',
       'cluster', 'real_dist', 'total_time', 'connection_time', 'dep_hour',
       'arr_hour', 'market_share', 'paxe', 'TOT_pax', 'market_share_pred',
       'paxe_pred', 'TOT_pax_pred'],
      dtype='object')

In [29]:
Xorg_concat = pd.concat([Xtrain_concat, Xtest_concat], axis=0).reset_index(drop=True)
final = pd.concat([Xorg_concat, df2[['market_share', 'paxe', 'TOT_pax', 'market_share_pred', 'paxe_pred', 'TOT_pax_pred']]], axis=1).drop(columns='index')
final

Unnamed: 0,Orig,con1,con2,Dest,op_flight1,op_flight2,op_flight3,depDay,elaptime,detour,...,total_time,connection_time,dep_hour,arr_hour,market_share,paxe,TOT_pax,market_share_pred,paxe_pred,TOT_pax_pred
0,49,26,239,77,2070.0,366,1101,5.0,390.0,1.15323,...,390.0,0.0,9,15,0.0,0.00,2.58,0.000180,0.006020,2.167342
1,54,54,239,65,37.0,1467,1101,7.0,485.0,1.47060,...,485.0,0.0,0,8,0.0,0.86,575.34,0.000310,0.948551,594.908440
2,26,77,239,6,400.0,819,1101,4.0,570.0,1.00507,...,570.0,0.0,9,18,0.0,0.00,2209.34,0.000000,0.000000,2207.438540
3,1,65,239,26,2.0,1815,1101,3.0,995.0,1.00356,...,995.0,0.0,22,15,0.0,0.00,933.96,0.000000,0.000000,933.960000
4,1,1,239,26,1167.0,3310,1101,1.0,839.0,1.21814,...,839.0,0.0,13,3,0.0,0.00,55.04,0.000090,0.000860,54.500379
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
522801,1,26,239,26,194.0,1078,1101,6.0,935.0,1.12898,...,935.0,0.0,20,12,0.0,0.00,452.36,0.017555,0.178020,324.559184
522802,26,149,239,181,994.0,2323,1101,6.0,565.0,1.02355,...,565.0,0.0,12,22,0.0,0.00,762.82,0.001583,0.131867,739.821880
522803,54,60,239,65,2838.0,1169,1101,5.0,315.0,1.15790,...,315.0,0.0,10,16,0.0,0.00,125.56,0.013222,0.137170,464.334200
522804,1,1,239,26,2578.0,2694,1101,2.0,1065.0,1.14937,...,1065.0,0.0,10,4,0.0,0.00,2002.08,0.000200,0.063640,1441.359300


In [30]:
final.to_csv('C:/GitHub/pdsp2023/backend/dataset/final_v2.csv')