- ## ****Product-level features****


 - ### ****product_reorder_rate****

In [None]:
# Calculate reorder statistics for each product
product_features = order_products_prior.groupby('product_id')['reordered'].agg(['mean', 'count']).reset_index()
product_features.columns = ['product_id', 'product_reorder_rate', 'product_total_purchases']

# Convert data to float32 to save RAM
product_features['product_reorder_rate'] = product_features['product_reorder_rate'].astype('float32')
product_features['product_total_purchases'] = product_features['product_total_purchases'].astype('int32')

print(product_features.head())

   product_id  product_reorder_rate  product_total_purchases
0           1              0.613391                     1852
1           2              0.133333                       90
2           3              0.732852                      277
3           4              0.446809                      329
4           5              0.600000                       15


 - ### ****avg_pos_in_cart****

In [None]:
# Calculating the average product ranking in the cart
avg_pos = order_products_prior.groupby('product_id')['add_to_cart_order'].mean().reset_index()
avg_pos.columns = ['product_id', 'avg_pos_in_cart']

# Convert to float32
avg_pos['avg_pos_in_cart'] = avg_pos['avg_pos_in_cart'].astype('float32')

# Integrate it with the product features table
product_features = product_features.merge(avg_pos, on='product_id', how='left')

del avg_pos
gc.collect()
print(product_features.head())

   product_id  product_reorder_rate  product_total_purchases  avg_pos_in_cart
0           1              0.613391                     1852         5.801836
1           2              0.133333                       90         9.888889
2           3              0.732852                      277         6.415163
3           4              0.446809                      329         9.507599
4           5              0.600000                       15         6.466667


 - ### ****popularity_over_time****

In [None]:
# Temporary merge to get the order time for each product
product_time = order_products_prior[['order_id', 'product_id']].merge(orders[['order_id', 'order_hour_of_day']], on='order_id', how='left')

# Calculating the average purchase time for each product
avg_hour = product_time.groupby('product_id')['order_hour_of_day'].mean().reset_index()
avg_hour.columns = ['product_id', 'product_avg_hour_of_day']

# Convert to float32 and final merge
avg_hour['product_avg_hour_of_day'] = avg_hour['product_avg_hour_of_day'].astype('float32')
product_features = product_features.merge(avg_hour, on='product_id', how='left')

# Memory Cleansing
del product_time, avg_hour
gc.collect()
print(product_features.head())

   product_id  product_reorder_rate  product_total_purchases  avg_pos_in_cart  \
0           1              0.613391                     1852         5.801836   
1           2              0.133333                       90         9.888889   
2           3              0.732852                      277         6.415163   
3           4              0.446809                      329         9.507599   
4           5              0.600000                       15         6.466667   

   product_avg_hour_of_day  
0                13.238121  
1                13.277778  
2                12.104693  
3                13.714286  
4                10.666667  


 - ### ****Final integration with (My_Data)****

In [None]:
# Integrating product features with the main table
My_Data = My_Data.merge(product_features, on='product_id', how='left')

# Delete the intermediate table to save space
del product_features
gc.collect()
print(My_Data.shape)

(32640698, 73)


In [None]:
print(My_Data.shape)
print(My_Data.head())

(32640698, 73)
   order_id  user_id  order_number  days_since_prior_order  product_id  \
0   2539329        1     -1.015522               -1.260272         196   
1   2539329        1     -1.015522               -1.260272       14084   
2   2539329        1     -1.015522               -1.260272       12427   
3   2539329        1     -1.015522               -1.260272       26088   
4   2539329        1     -1.015522               -1.260272       26405   

   add_to_cart_order  reordered  product_name  aisle_id  eval_set_test  ...  \
0          -1.147241          0         35791        77              0  ...   
1          -0.982113          0         15935        91              0  ...   
2          -0.816986          0          6476        23              0  ...   
3          -0.651859          0          2523        23              0  ...   
4          -0.486731          0          1214        54              0  ...   

   aisle_id_kfold_te  user_total_orders  user_avg_basket_size  \


  has_large_values = (abs_vals > 1e6).any()
