- ## ****Calculating the features of the "last 3 orders" for each user and product****
  - #### ****We will know how many times the user purchased this product in their last 3 visits to the store****

In [None]:
# Determine the last order number for each user (Max Order Number)
user_last_orders = orders.groupby('user_id')['order_number'].max().reset_index()
user_last_orders.columns = ['user_id', 'max_order_number']

# Create the missing variable by combining product data with maximum order numbers
# This is the line you were missing, which was causing the NameError
last_3_orders_data = order_products_prior.merge(user_last_orders, on='user_id', how='left')

# Filter data to only the last 3 requests
last_3_orders_data = last_3_orders_data[last_3_orders_data['order_number'] > (last_3_orders_data['max_order_number'] - 3)]

# Calculate the number of purchases within this time window
up_last_3_count = last_3_orders_data.groupby(['user_id', 'product_id'])['order_id'].count().reset_index()
up_last_3_count.columns = ['user_id', 'product_id', 'up_last_3_purchase_count']

# Final integration with the My_Data table
My_Data = My_Data.merge(up_last_3_count, on=['user_id', 'product_id'], how='left')
My_Data['up_last_3_purchase_count'] = My_Data['up_last_3_purchase_count'].fillna(0).astype('int8')

# Memory cleaning
del last_3_orders_data, up_last_3_count, user_last_orders
gc.collect()

print(My_Data[['user_id', 'product_id', 'up_last_3_purchase_count']].head())

   user_id  product_id  up_last_3_purchase_count
0      1.0       196.0                         2
1      1.0     14084.0                         0
2      1.0     12427.0                         2
3      1.0     26088.0                         0
4      1.0     26405.0                         0


- ## ****Calculating the "Last Order Rate" feature****
  - #### ****This is a very powerful feature that tells the model if the product is in the last basket purchased by the user.****

In [None]:
# Pre-cleaning: Drop any existing versions of the column to avoid duplication errors
cols_to_drop = [c for c in My_Data.columns if 'is_in_last_order' in c]
if cols_to_drop:
    My_Data.drop(columns=cols_to_drop, inplace=True)

#  Limit the last order to each user.
user_last_order = orders.groupby('user_id')['order_number'].max().reset_index()
user_last_order.columns = ['user_id', 'order_number']

# Knowing which products were in that order
last_order_products = order_products_prior.merge(user_last_order, on=['user_id', 'order_number'], how='inner')
last_order_products['is_in_last_order'] = 1

# Integration with My_Data
My_Data = My_Data.merge(last_order_products[['user_id', 'product_id', 'is_in_last_order']],
                        on=['user_id', 'product_id'], how='left')

My_Data['is_in_last_order'] = My_Data['is_in_last_order'].fillna(0).astype('int8')
My_Data[col] = My_Data[col].replace([np.inf, -np.inf], np.nan).fillna(0).astype('int8')

del user_last_order, last_order_products
gc.collect()

print(My_Data.head())

   order_id  user_id  order_number  days_since_prior_order  product_id  \
0   2539329      1.0     -1.015522                      -1       196.0   
1   2539329      1.0     -1.015522                      -1     14084.0   
2   2539329      1.0     -1.015522                      -1     12427.0   
3   2539329      1.0     -1.015522                      -1     26088.0   
4   2539329      1.0     -1.015522                      -1     26405.0   

   add_to_cart_order  reordered  product_name  aisle_id  eval_set_test  ...  \
0          -1.147241          0         35791        77              0  ...   
1          -0.982113          0         15935        91              0  ...   
2          -0.816986          0          6476        23              0  ...   
3          -0.651859          0          2523        23              0  ...   
4          -0.486731          0          1214        54              0  ...   

   product_avg_hour_of_day  up_purchase_count  up_reorder_probability  \
0      