- ## ****UserÃ—Product interaction features :****


 - ### **up_purchase_count**

In [None]:
# Calculate the number of times each user purchases each product
up_features = order_products_prior.groupby(['user_id', 'product_id'])['order_id'].count().reset_index()
up_features.columns = ['user_id', 'product_id', 'up_purchase_count']

# Convert to int32 to save RAM
up_features['up_purchase_count'] = up_features['up_purchase_count'].astype('int32')

print(up_features.head())

   user_id  product_id  up_purchase_count
0        1         196                 10
1        1       10258                  9
2        1       10326                  1
3        1       12427                 10
4        1       13032                  3


 - ### **up_days_since_last_purchase**

In [None]:
# Merge the products table with the orders table to get order_number
# We only need two columns from the orders table to save RAM.
temp_orders = orders[['order_id', 'order_number']]
order_products_prior = order_products_prior.merge(temp_orders, on='order_id', how='left')

# Account last order number where the user purchased this product
last_order_per_up = order_products_prior.groupby(['user_id', 'product_id'])['order_number'].max().reset_index()

# Get the time (how many days have passed)
up_last_order = last_order_per_up.merge(orders[['user_id', 'order_number', 'days_since_prior_order']],
                                        on=['user_id', 'order_number'], how='left')

# Rename the column to make it clear
up_last_order.rename(columns={'days_since_prior_order': 'up_days_since_last_purchase'}, inplace=True)

# Integrate the feature with the up_features table
up_features = up_features.merge(up_last_order[['user_id', 'product_id', 'up_days_since_last_purchase']],
                                 on=['user_id', 'product_id'], how='left')

# Clean the RAM immediately
del temp_orders, last_order_per_up, up_last_order
gc.collect()

print(up_features.head())

   user_id  product_id  up_purchase_count  up_days_since_last_purchase
0        1         196                 10                         30.0
1        1       10258                  9                         30.0
2        1       10326                  1                         28.0
3        1       12427                 10                         30.0
4        1       13032                  3                         30.0


  has_large_values = (abs_vals > 1e6).any()


 - ### **up_reorder_probability**

In [None]:
user_orders_count = My_Data[['user_id', 'user_total_orders']].drop_duplicates()

up_stats = order_products_prior.groupby(['user_id', 'product_id'])['order_number'].agg(['min', 'max']).reset_index()
up_stats.columns = ['user_id', 'product_id', 'first_order_number', 'last_order_number']

up_counts = order_products_prior.groupby(['user_id', 'product_id'])['order_id'].count().reset_index()
up_counts.columns = ['user_id', 'product_id', 'up_purchase_count']

up_features = up_counts.merge(up_stats, on=['user_id', 'product_id'], how='left')
up_features = up_features.merge(user_orders_count, on='user_id', how='left')

up_features['up_reorder_probability'] = up_features['up_purchase_count'] / \
                                         (up_features['user_total_orders'] - up_features['first_order_number'] + 1)

up_features = up_features.merge(orders[['user_id', 'order_number', 'days_since_prior_order']],
                                 left_on=['user_id', 'last_order_number'],
                                 right_on=['user_id', 'order_number'], how='left')

up_features.rename(columns={'days_since_prior_order': 'up_days_since_last_purchase'}, inplace=True)

up_features['up_reorder_probability'] = up_features['up_reorder_probability'].astype('float32')
up_features['up_days_since_last_purchase'] = up_features['up_days_since_last_purchase'].fillna(0).astype('float32')

up_features.drop(['first_order_number', 'last_order_number', 'order_number', 'user_total_orders'], axis=1, inplace=True)

del up_stats, up_counts, user_orders_count
gc.collect()
print(up_features.columns.tolist())
print(up_features.head())

['user_id', 'product_id', 'up_purchase_count', 'up_reorder_probability', 'up_days_since_last_purchase']
   user_id  product_id  up_purchase_count  up_reorder_probability  \
0        1         196                 10                0.909091   
1        1       10258                  9                0.900000   
2        1       10326                  1                0.142857   
3        1       12427                 10                0.909091   
4        1       13032                  3                0.300000   

   up_days_since_last_purchase  
0                         30.0  
1                         30.0  
2                         28.0  
3                         30.0  
4                         30.0  


 - ### **Final integration with My_Data**

In [None]:
My_Data['user_id'] = My_Data['user_id'].astype('float32')
My_Data['product_id'] = My_Data['product_id'].astype('float32')
up_features['user_id'] = up_features['user_id'].astype('float32')
up_features['product_id'] = up_features['product_id'].astype('float32')

My_Data = My_Data.merge(up_features, on=['user_id', 'product_id'], how='left')

del up_features
gc.collect()

print(My_Data.shape)
print(My_Data.columns.tolist())

(32640698, 76)
['order_id', 'user_id', 'order_number', 'days_since_prior_order', 'product_id', 'add_to_cart_order', 'reordered', 'product_name', 'aisle_id', 'eval_set_test', 'eval_set_train', 'order_dow_1', 'order_dow_2', 'order_dow_3', 'order_dow_4', 'order_dow_5', 'order_dow_6', 'department_id_1', 'department_id_2', 'department_id_3', 'department_id_4', 'department_id_5', 'department_id_6', 'department_id_7', 'department_id_8', 'department_id_9', 'department_id_10', 'department_id_11', 'department_id_12', 'department_id_13', 'department_id_14', 'department_id_15', 'department_id_16', 'department_id_17', 'department_id_18', 'department_id_19', 'department_id_20', 'department_id_21', 'order_hour_of_day_1', 'order_hour_of_day_2', 'order_hour_of_day_3', 'order_hour_of_day_4', 'order_hour_of_day_5', 'order_hour_of_day_6', 'order_hour_of_day_7', 'order_hour_of_day_8', 'order_hour_of_day_9', 'order_hour_of_day_10', 'order_hour_of_day_11', 'order_hour_of_day_12', 'order_hour_of_day_13', 'ord