In [1]:
import numpy as np
import pandas as pd
import random

from surprise import SVD
from surprise import SVDpp
from surprise import NMF
from surprise import KNNBasic
from surprise import KNNWithZScore
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate

from surprise.model_selection import GridSearchCV

from sklearn import preprocessing

In [2]:
# Reading the data sets into data frames
train_data = pd.read_csv('./Files_Folder/train_data.csv')
test_data = pd.read_csv('./Files_Folder/test_data.csv')
test_data_sub = pd.read_csv('./Files_Folder/test_data_sub.csv')
hold_data = pd.read_csv('./Files_Folder/hold_data.csv')
hold_data_sub = pd.read_csv('./Files_Folder/hold_data_sub.csv')

In [3]:
print ("train shape = ", train_data.shape)
print ("test shape = ", test_data.shape)
print ("test shape (No Purchase column) = ", test_data_sub.shape)
print ("holdout shape = ", hold_data.shape)
print ("holdout shape (No Purchase column) = ", hold_data_sub.shape)

train shape =  (495062, 12)
test shape =  (27503, 12)
test shape (No Purchase column) =  (27503, 11)
holdout shape =  (27503, 12)
holdout shape (No Purchase column) =  (27503, 11)


In [4]:
train_data.head(5)

Unnamed: 0,User_ID,Product_ID,Gender,Age,Occupation,City_Category,Stay_In_Current_City_Years,Marital_Status,Product_Category_1,Product_Category_2,Product_Category_3,Purchase
0,1000001,P00069042,F,0-17,10,A,2,0,3,,,8370
1,1000001,P00248942,F,0-17,10,A,2,0,1,6.0,14.0,15200
2,1000001,P00087842,F,0-17,10,A,2,0,12,,,1422
3,1000001,P00085442,F,0-17,10,A,2,0,12,14.0,,1057
4,1000002,P00285442,M,55+,16,C,4+,0,8,,,7969


In [5]:
train_data.describe()

Unnamed: 0,User_ID,Occupation,Marital_Status,Product_Category_1,Product_Category_2,Product_Category_3,Purchase
count,495062.0,495062.0,495062.0,495062.0,338906.0,150075.0,495062.0
mean,1003027.0,8.076748,0.40971,5.401764,9.844337,12.671438,9265.688885
std,1727.099,6.523106,0.491781,3.935342,5.087848,4.124031,5023.84293
min,1000001.0,0.0,0.0,1.0,2.0,3.0,12.0
25%,1001514.0,2.0,0.0,1.0,5.0,9.0,5823.0
50%,1003075.0,7.0,0.0,5.0,9.0,14.0,8048.0
75%,1004473.0,14.0,1.0,8.0,15.0,16.0,12054.0
max,1006040.0,20.0,1.0,20.0,18.0,18.0,23961.0


In [6]:
# Checking out features that have missing values for the train data set
train_data.isnull().sum()

User_ID                            0
Product_ID                         0
Gender                             0
Age                                0
Occupation                         0
City_Category                      0
Stay_In_Current_City_Years         0
Marital_Status                     0
Product_Category_1                 0
Product_Category_2            156156
Product_Category_3            344987
Purchase                           0
dtype: int64

In [7]:
# Checking out the statistical description of numeric features for the test data set
test_data_sub.describe()

Unnamed: 0,User_ID,Occupation,Marital_Status,Product_Category_1,Product_Category_2,Product_Category_3
count,27503.0,27503.0,27503.0,27503.0,18780.0,8352.0
mean,1003045.0,8.077592,0.40941,5.4145,9.811022,12.617816
std,1728.325,6.509187,0.491734,3.931152,5.084547,4.147387
min,1000001.0,0.0,0.0,1.0,2.0,3.0
25%,1001548.0,2.0,0.0,1.0,5.0,9.0
50%,1003103.0,7.0,0.0,5.0,9.0,14.0
75%,1004506.0,14.0,1.0,8.0,15.0,16.0
max,1006040.0,20.0,1.0,20.0,18.0,18.0


In [8]:
# Checking out the features that have missing values in the test data set
test_data_sub.isnull().sum()

User_ID                           0
Product_ID                        0
Gender                            0
Age                               0
Occupation                        0
City_Category                     0
Stay_In_Current_City_Years        0
Marital_Status                    0
Product_Category_1                0
Product_Category_2             8723
Product_Category_3            19151
dtype: int64

In [9]:
hold_data_sub.describe()

Unnamed: 0,User_ID,Occupation,Marital_Status,Product_Category_1,Product_Category_2,Product_Category_3
count,27503.0,27503.0,27503.0,27503.0,18744.0,8394.0
mean,1003047.0,8.075083,0.408864,5.439152,9.837388,12.661306
std,1735.572,6.52834,0.491633,3.956815,5.066007,4.126847
min,1000001.0,0.0,0.0,1.0,2.0,3.0
25%,1001532.0,2.0,0.0,1.0,5.0,9.0
50%,1003081.0,7.0,0.0,5.0,9.0,14.0
75%,1004510.0,14.0,1.0,8.0,15.0,16.0
max,1006040.0,20.0,1.0,20.0,18.0,18.0


In [10]:
# Checking out the features that have missing values in the hold out data set
hold_data_sub.isnull().sum()

User_ID                           0
Product_ID                        0
Gender                            0
Age                               0
Occupation                        0
City_Category                     0
Stay_In_Current_City_Years        0
Marital_Status                    0
Product_Category_1                0
Product_Category_2             8759
Product_Category_3            19109
dtype: int64

In [11]:
# Filling missing values with place holders
train_data.Product_Category_2.fillna('BB', inplace = True)
train_data.Product_Category_3.fillna('CC', inplace = True)
test_data_sub.Product_Category_2.fillna('BB', inplace = True)
test_data_sub.Product_Category_3.fillna('CC', inplace = True)
hold_data_sub.Product_Category_2.fillna('BB', inplace = True)
hold_data_sub.Product_Category_3.fillna('CC', inplace = True)

train_data.isnull().sum()

User_ID                       0
Product_ID                    0
Gender                        0
Age                           0
Occupation                    0
City_Category                 0
Stay_In_Current_City_Years    0
Marital_Status                0
Product_Category_1            0
Product_Category_2            0
Product_Category_3            0
Purchase                      0
dtype: int64

In [12]:
test_data_sub.isnull().sum()

User_ID                       0
Product_ID                    0
Gender                        0
Age                           0
Occupation                    0
City_Category                 0
Stay_In_Current_City_Years    0
Marital_Status                0
Product_Category_1            0
Product_Category_2            0
Product_Category_3            0
dtype: int64

In [13]:
hold_data_sub.isnull().sum()

User_ID                       0
Product_ID                    0
Gender                        0
Age                           0
Occupation                    0
City_Category                 0
Stay_In_Current_City_Years    0
Marital_Status                0
Product_Category_1            0
Product_Category_2            0
Product_Category_3            0
dtype: int64

In [49]:
# Checking the number of unique Product_IDs in the test, train & hold out data sets
train_Prod_IDs = list(set(train_data.Product_ID))
test_Prod_IDs = list(set(test_data_sub.Product_ID))
hold_Prod_IDs = list(set(hold_data_sub.Product_ID))
print (len(train_Prod_IDs))
print (len(test_Prod_IDs))
print (len(hold_Prod_IDs))

3615
2819
2824


In [15]:
nid = test_Prod_IDs
print ('length of test_IDs = ', len (nid))

nid.extend(hold_Prod_IDs)
print ('length of test_IDs & holdout_IDs = ', len(nid))

nid = set(nid)
print ('length of unique test_IDs & holdout_IDs = ', len(nid) )

length of test_IDs =  2819
length of test_IDs & holdout_IDs =  5643
length of unique test_IDs & holdout_IDs =  3122


In [16]:
New_Prod_IDs = []
for i in nid:
    if i not in train_Prod_IDs:
        New_Prod_IDs.append(i)

In [17]:
print (len(New_Prod_IDs))
New_Prod_IDs

16


['P00152842',
 'P00022042',
 'P00063742',
 'P00306942',
 'P00300742',
 'P00012342',
 'P00068742',
 'P00065942',
 'P00325342',
 'P00301942',
 'P00069742',
 'P00171142',
 'P00169842',
 'P00315142',
 'P00239442',
 'P00150242']

In [18]:
# Putting test and hold in a single dataframe
print (test_data_sub.shape)
print (hold_data_sub.shape)
frames = [test_data_sub, hold_data_sub]
test_hold_data_sub = pd.concat(frames)
print (test_hold_data_sub.shape)

(27503, 11)
(27503, 11)
(55006, 11)


In [19]:
# Getting the summary of the products in train data set that has the same product categories as 
# the new products in test data sets.
# New products in test data sets are products that were not in the train data sets but are found in
# test data sets

agg_mean = []
agg_median = []
agg_std = []
agg_count = []
agg_min = []
agg_max = []
agg_25 = []
agg_75 = []

for i in New_Prod_IDs:
    
    a = test_hold_data_sub[test_hold_data_sub.Product_ID == i].iloc[0]
    agg_mean.append(train_data[(train_data.Product_Category_1 == a.Product_Category_1) & (train_data.Product_Category_2 == a.Product_Category_2) & (train_data.Product_Category_3 == a.Product_Category_3)].describe().Purchase['mean'])
    agg_median.append(train_data[(train_data.Product_Category_1 == a.Product_Category_1) & (train_data.Product_Category_2 == a.Product_Category_2) & (train_data.Product_Category_3 == a.Product_Category_3)].describe().Purchase['50%'])
    agg_std.append(train_data[(train_data.Product_Category_1 == a.Product_Category_1) & (train_data.Product_Category_2 == a.Product_Category_2) & (train_data.Product_Category_3 == a.Product_Category_3)].describe().Purchase['std'])
    agg_count.append(train_data[(train_data.Product_Category_1 == a.Product_Category_1) & (train_data.Product_Category_2 == a.Product_Category_2) & (train_data.Product_Category_3 == a.Product_Category_3)].describe().Purchase['count'])
    agg_min.append(train_data[(train_data.Product_Category_1 == a.Product_Category_1) & (train_data.Product_Category_2 == a.Product_Category_2) & (train_data.Product_Category_3 == a.Product_Category_3)].describe().Purchase['min'])
    agg_max.append(train_data[(train_data.Product_Category_1 == a.Product_Category_1) & (train_data.Product_Category_2 == a.Product_Category_2) & (train_data.Product_Category_3 == a.Product_Category_3)].describe().Purchase['max'])
    agg_25.append(train_data[(train_data.Product_Category_1 == a.Product_Category_1) & (train_data.Product_Category_2 == a.Product_Category_2) & (train_data.Product_Category_3 == a.Product_Category_3)].describe().Purchase['25%'])
    agg_75.append(train_data[(train_data.Product_Category_1 == a.Product_Category_1) & (train_data.Product_Category_2 == a.Product_Category_2) & (train_data.Product_Category_3 == a.Product_Category_3)].describe().Purchase['75%'])

  

In [20]:
agg_dic = {'Product_ID':New_Prod_IDs, 'mean': agg_mean, '50%': agg_median, 'std': agg_std, 'count':agg_count, 'min':agg_min, 'max':agg_max, '25%': agg_25, '75%': agg_75}    

In [21]:
# Storing the details of similar products in a csv file
# This can be used as a look up table during the analysis.
# The Product_ID are products that are in the test data set but not in the train data set
# The other parameters like mean, 50% etc are the summary of purchases of similar products 
# found in the train data set
New_Product = pd.DataFrame (agg_dic)
New_Product.head()

Unnamed: 0,Product_ID,mean,50%,std,count,min,max,25%,75%
0,P00152842,7511.514088,7908.0,1990.488557,54586.0,1939.0,10082.0,6047.0,9715.0
1,P00022042,14777.288566,16294.0,4343.472147,8816.0,4036.0,20971.0,12357.0,16831.0
2,P00063742,7511.514088,7908.0,1990.488557,54586.0,1939.0,10082.0,6047.0,9715.0
3,P00306942,4677.189748,4609.0,1877.3103,11120.0,1472.0,7654.0,3119.0,6066.0
4,P00300742,6101.874236,6886.0,1960.053653,57290.0,1713.0,8907.0,5211.0,7139.0


In [22]:
New_Product.to_csv('./Files_Folder/New_Product_Details.csv', index = False)

In [23]:
train_data.head(2)

Unnamed: 0,User_ID,Product_ID,Gender,Age,Occupation,City_Category,Stay_In_Current_City_Years,Marital_Status,Product_Category_1,Product_Category_2,Product_Category_3,Purchase
0,1000001,P00069042,F,0-17,10,A,2,0,3,BB,CC,8370
1,1000001,P00248942,F,0-17,10,A,2,0,1,6,14,15200


In [24]:
# Creating a pivot Table from User & Product IDs
train_User_Prod = pd.pivot_table(train_data, values = 'Purchase', index = 'User_ID', columns = 'Product_ID')
train_User_Prod.head(2)

Product_ID,P00000142,P00000242,P00000342,P00000442,P00000542,P00000642,P00000742,P00000842,P00000942,P00001042,...,P0098942,P0099042,P0099142,P0099242,P0099342,P0099442,P0099642,P0099742,P0099842,P0099942
User_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000001,13650.0,,,,,,,,,,...,,,,,,,,,,
1000002,,,,,,,,,,,...,,,,,,,,,,


In [25]:
train_User_Prod.to_csv('./Files_Folder/tr_User_Prod_Original_Pivot.csv')

In [26]:
# The statistical description of each of the Product_ID
# The minimum and maximum for each Product_ID can be used for reversing the normalization step after prediction
tr_Us_Pr_statistics = train_User_Prod.describe()
tr_Us_Pr_statistics

Product_ID,P00000142,P00000242,P00000342,P00000442,P00000542,P00000642,P00000742,P00000842,P00000942,P00001042,...,P0098942,P0099042,P0099142,P0099242,P0099342,P0099442,P0099642,P0099742,P0099842,P0099942
count,1042.0,344.0,220.0,80.0,135.0,455.0,213.0,31.0,48.0,453.0,...,30.0,135.0,6.0,235.0,394.0,181.0,13.0,115.0,100.0,10.0
mean,11127.769674,10488.671512,5353.263636,4778.95,5412.511111,14921.810989,6051.098592,10004.322581,10611.416667,13694.812362,...,6917.866667,6188.933333,6018.666667,6778.174468,6986.515228,14298.375691,6439.230769,7929.956522,7269.55,5148.4
std,2305.016456,3188.308881,1844.410602,1649.117846,1689.678327,3626.883186,1617.500347,2878.013833,4411.512443,3392.092634,...,1677.481111,1728.052265,2075.220149,2180.072511,1597.193797,3622.093836,2108.412442,2941.448874,1541.688051,2210.058431
min,2725.0,3214.0,1727.0,1764.0,1730.0,3950.0,1723.0,3347.0,4022.0,3847.0,...,4071.0,1738.0,2319.0,2012.0,1803.0,4093.0,3903.0,2670.0,1912.0,1791.0
25%,10593.0,9559.5,3727.5,3599.75,4448.0,11900.0,5235.0,6756.5,7926.25,11634.0,...,5883.25,5314.5,5840.0,5871.0,5458.25,12190.0,4222.0,5630.5,6932.25,4110.75
50%,10925.5,10007.5,5293.0,5185.5,5374.0,15516.0,5457.0,9939.0,9921.5,15231.0,...,6951.0,6908.0,5995.5,6179.0,7041.0,15998.0,5972.0,8185.0,7129.0,5294.5
75%,13370.75,13023.0,6928.75,5384.75,6943.5,18991.5,7066.0,12919.0,15283.0,15695.0,...,8053.0,7099.5,7486.75,8066.5,8667.5,16406.0,7862.0,10632.5,8673.0,6549.25
max,13716.0,16497.0,8896.0,8891.0,8903.0,19705.0,8902.0,13322.0,19154.0,19670.0,...,10001.0,8882.0,8030.0,10076.0,8906.0,20684.0,10056.0,13661.0,8891.0,8833.0


In [27]:
# Saving the Train_User_Product_Statistics file as a csv
tr_Us_Pr_statistics.to_csv('./Files_Folder/train_User_Prod_stat.csv')

In [28]:
# Since all the counts add up to (90% of 500068), it imples that there is no product that was purchased 
# by two User_IDs.  
tr_Us_Pr_statistics.loc['count', :].sum()

495062.0

In [29]:
# Normalizing the Purchases
minmax = preprocessing.MinMaxScaler(feature_range=(1,5))
train_User_Prod_Scaled = minmax.fit_transform(train_User_Prod)

In [30]:
train_User_Prod_Scaled

array([[4.97598035,        nan,        nan, ...,        nan,        nan,
               nan],
       [       nan,        nan,        nan, ...,        nan,        nan,
               nan],
       [       nan,        nan,        nan, ...,        nan,        nan,
               nan],
       ...,
       [       nan,        nan,        nan, ...,        nan,        nan,
               nan],
       [       nan,        nan,        nan, ...,        nan,        nan,
               nan],
       [       nan,        nan,        nan, ...,        nan,        nan,
               nan]])

In [31]:
x_row = list(train_User_Prod.index)
x_col = list(train_User_Prod.columns)
train_User_Prod_Scaled_df = pd.DataFrame(train_User_Prod_Scaled, index = x_row, columns = x_col)

In [32]:
train_User_Prod_Scaled_df.head(3)

Unnamed: 0,P00000142,P00000242,P00000342,P00000442,P00000542,P00000642,P00000742,P00000842,P00000942,P00001042,...,P0098942,P0099042,P0099142,P0099242,P0099342,P0099442,P0099642,P0099742,P0099842,P0099942
1000001,4.97598,,,,,,,,,,...,,,,,,,,,,
1000002,,,,,,,,,,,...,,,,,,,,,,
1000003,,,,,,,,,,,...,,,,,,,,,,


In [33]:
train_User_Prod_Scaled_df.describe()

Unnamed: 0,P00000142,P00000242,P00000342,P00000442,P00000542,P00000642,P00000742,P00000842,P00000942,P00001042,...,P0098942,P0099042,P0099142,P0099242,P0099342,P0099442,P0099642,P0099742,P0099842,P0099942
count,1042.0,344.0,220.0,80.0,135.0,455.0,213.0,31.0,48.0,453.0,...,30.0,135.0,6.0,235.0,394.0,181.0,13.0,115.0,100.0,10.0
mean,4.058055,3.190671,3.023302,2.692129,3.05354,3.785607,3.411533,3.669603,2.74185,3.489493,...,2.920315,3.492124,3.591257,3.364174,3.919057,3.460461,2.648777,2.914278,4.070669,2.907072
std,0.838874,0.960117,1.029103,0.925561,0.942244,0.920821,0.90124,1.154091,1.166141,0.857509,...,1.131522,0.967554,1.45349,1.081385,0.899447,0.873267,1.370657,1.070494,0.883615,1.255358
min,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,3.863434,2.910864,2.116195,2.030307,2.515684,3.018407,2.956818,2.367218,2.032051,2.968527,...,2.222428,3.00252,3.466118,2.914187,3.058426,2.952143,1.207379,2.077427,3.877346,2.317665
50%,3.984442,3.045773,2.989678,2.920303,3.032065,3.936465,3.080513,3.643409,2.559477,3.877836,...,2.942664,3.894737,3.575031,3.066964,3.94974,3.870231,2.345035,3.007097,3.990113,2.99006
75%,4.874352,3.953851,3.902357,3.032131,3.907291,4.818851,3.977016,4.838396,3.976738,3.995134,...,3.686003,4.00196,4.619506,4.003224,4.865691,3.968597,3.573704,3.897825,4.875054,3.702783
max,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0


In [34]:
train_User_Prod_Scaled_df.head()

Unnamed: 0,P00000142,P00000242,P00000342,P00000442,P00000542,P00000642,P00000742,P00000842,P00000942,P00001042,...,P0098942,P0099042,P0099142,P0099242,P0099342,P0099442,P0099642,P0099742,P0099842,P0099942
1000001,4.97598,,,,,,,,,,...,,,,,,,,,,
1000002,,,,,,,,,,,...,,,,,,,,,,
1000003,,,,,,,,,,,...,,,,,,,,,,
1000004,,,,,,,,,,,...,,,,,,,,,,
1000005,,,,,,,,,,,...,,,,,,,,,,


In [35]:
train_User_Prod.head()

Product_ID,P00000142,P00000242,P00000342,P00000442,P00000542,P00000642,P00000742,P00000842,P00000942,P00001042,...,P0098942,P0099042,P0099142,P0099242,P0099342,P0099442,P0099642,P0099742,P0099842,P0099942
User_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000001,13650.0,,,,,,,,,,...,,,,,,,,,,
1000002,,,,,,,,,,,...,,,,,,,,,,
1000003,,,,,,,,,,,...,,,,,,,,,,
1000004,,,,,,,,,,,...,,,,,,,,,,
1000005,,,,,,,,,,,...,,,,,,,,,,


In [36]:
train_User_Prod_Scaled_df.to_csv('./Files_Folder/tr_User_Prod_Normalized_Pivot.csv')

In [37]:
# Preparing Data in the form acceptable by Surprise Package

In [38]:
train_data.head()

Unnamed: 0,User_ID,Product_ID,Gender,Age,Occupation,City_Category,Stay_In_Current_City_Years,Marital_Status,Product_Category_1,Product_Category_2,Product_Category_3,Purchase
0,1000001,P00069042,F,0-17,10,A,2,0,3,BB,CC,8370
1,1000001,P00248942,F,0-17,10,A,2,0,1,6,14,15200
2,1000001,P00087842,F,0-17,10,A,2,0,12,BB,CC,1422
3,1000001,P00085442,F,0-17,10,A,2,0,12,14,CC,1057
4,1000002,P00285442,M,55+,16,C,4+,0,8,BB,CC,7969


In [39]:
User_ID_list = list(train_data.User_ID)
Prod_ID_list = list(train_data.Product_ID)

In [40]:
# Creating a list of normalized purchases in the same order as listed in the train data set
Purc_list = []
for i in range(len(User_ID_list)):
    Purc_list.append(train_User_Prod_Scaled_df.loc[User_ID_list[i], Prod_ID_list[i]])

In [41]:
User_Prod_NormPurc = {'User_ID': User_ID_list, 'Product_ID': Prod_ID_list, 'Normalized_Purchase': Purc_list}

In [42]:
User_Prod_NormPurc_df = pd.DataFrame(User_Prod_NormPurc)

In [43]:
User_Prod_NormPurc_df.head()

Unnamed: 0,User_ID,Product_ID,Normalized_Purchase
0,1000001,P00069042,3.067944
1,1000001,P00248942,3.857687
2,1000001,P00087842,4.011863
3,1000001,P00085442,2.958953
4,1000002,P00285442,3.636497


In [44]:
User_Prod_NormPurc_df.describe()

Unnamed: 0,User_ID,Normalized_Purchase
count,495062.0,495062.0
mean,1003027.0,3.538755
std,1727.099,1.082961
min,1000001.0,1.0
25%,1001514.0,2.955803
50%,1003075.0,3.887335
75%,1004473.0,4.035739
max,1006040.0,5.0


In [45]:
User_Prod_NormPurc_df.to_csv('./Files_Folder/User_Prod_NormPurc.csv', index=False)