## Ecommerce customer segementation 
## Part I: Data processing and Feature Engineering

## 0) Setup

### 0.a) Import packages


In [1]:
import numpy as np
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt
from scipy.stats import norm 
import os

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [4]:
import boto3, os, sagemaker
from sagemaker import get_execution_role

### 0.b) Get Amazon IAM execution role and instance region

In [5]:
# Define IAM role- this will be necessary when defining your model
iam_role = get_execution_role()

# set the region of the instance and get a reference to the client
my_region = boto3.Session().region_name

# sagemaker session handle
sm_session = sagemaker.session.Session(boto3.Session())

print("Success - the SageMaker instance is in the " + my_region + " region")

Success - the SageMaker instance is in the us-west-2 region


## 1) Load and examine data

In [6]:
#bucket = "radybigdata"
bucket = "finalcustomersegmentation"
prefix = "ecommerce"

In [7]:
order =  pd.read_csv("s3://{}/{}/orders_dataset.csv".format(bucket,prefix))
review = pd.read_csv("s3://{}/{}/customer_reviews_dataset.csv".format(bucket,prefix))
payment = pd.read_csv("s3://{}/{}/order_payments_dataset.csv".format(bucket,prefix))
items = pd.read_csv("s3://{}/{}/order_items_dataset.csv".format(bucket,prefix))
product = pd.read_csv("s3://{}/{}/products_dataset.csv".format(bucket,prefix))
category = pd.read_csv("s3://{}/{}/product_category_name_translation.csv".format(bucket,prefix))
seller = pd.read_csv("s3://{}/{}/sellers_dataset.csv".format(bucket,prefix))
customer = pd.read_csv("s3://{}/{}/customers_dataset.csv".format(bucket,prefix))
geolocation = pd.read_csv("s3://{}/{}/geolocation_dataset.csv".format(bucket,prefix))

Check the dataset and schema 

In [8]:
order.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99441 entries, 0 to 99440
Data columns (total 8 columns):
order_id                         99441 non-null object
customer_id                      99441 non-null object
order_status                     99441 non-null object
order_purchase_timestamp         99441 non-null object
order_approved_at                99281 non-null object
order_carrier_delivery_date      97658 non-null object
order_customer_delivery_date     96476 non-null object
order_estimated_delivery_date    99441 non-null object
dtypes: object(8)
memory usage: 6.1+ MB


In [9]:
order.head(1)

Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_approved_at,order_carrier_delivery_date,order_customer_delivery_date,order_estimated_delivery_date
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,2017-10-02 11:07:15,2017-10-04 19:55:00,2017-10-10 21:25:13,2017-10-18 00:00:00


In [10]:
items.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112650 entries, 0 to 112649
Data columns (total 7 columns):
order_id               112650 non-null object
order_item_id          112650 non-null int64
product_id             112650 non-null object
seller_id              112650 non-null object
shipping_limit_date    112650 non-null object
price                  112650 non-null float64
freight_value          112650 non-null float64
dtypes: float64(2), int64(1), object(4)
memory usage: 6.0+ MB


In [11]:
items.head(1)

Unnamed: 0,order_id,order_item_id,product_id,seller_id,shipping_limit_date,price,freight_value
0,00010242fe8c5a6d1ba2dd792cb16214,1,4244733e06e7ecb4970a6e2683c13e61,48436dade18ac8b2bce089ec2a041202,2017-09-19 09:45:35,58.9,13.29


In [12]:
review.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 7 columns):
review_id                 100000 non-null object
order_id                  100000 non-null object
survey_score              100000 non-null int64
survey_review_title       11715 non-null object
survey_review_content     41753 non-null object
survey_send_date          100000 non-null object
survey_completion_date    100000 non-null object
dtypes: int64(1), object(6)
memory usage: 5.3+ MB


In [13]:
review.head(1)

Unnamed: 0,review_id,order_id,survey_score,survey_review_title,survey_review_content,survey_send_date,survey_completion_date
0,7bc2406110b926393aa56f80a40eba40,73fc7af87114b39712e6da79b0a377eb,4,,,2018-01-18 00:00:00,2018-01-18 21:46:59


In [14]:
customer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99441 entries, 0 to 99440
Data columns (total 5 columns):
customer_id                 99441 non-null object
customer_unique_id          99441 non-null object
customer_zip_code_prefix    99441 non-null int64
customer_city               99441 non-null object
customer_state              99441 non-null object
dtypes: int64(1), object(4)
memory usage: 3.8+ MB


In [15]:
customer.head(1)

Unnamed: 0,customer_id,customer_unique_id,customer_zip_code_prefix,customer_city,customer_state
0,06b8999e2fba1a1fbc88172c00ba8bc7,861eff4711a542e4b93843c6dd7febb0,14409,franca,SP


In [16]:
product.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32951 entries, 0 to 32950
Data columns (total 9 columns):
product_id                    32951 non-null object
product_category_name         32341 non-null object
product_name_lenght           32341 non-null float64
product_description_lenght    32341 non-null float64
product_photos_qty            32341 non-null float64
product_weight_g              32949 non-null float64
product_length_cm             32949 non-null float64
product_height_cm             32949 non-null float64
product_width_cm              32949 non-null float64
dtypes: float64(7), object(2)
memory usage: 2.3+ MB


In [17]:
product.head(1)

Unnamed: 0,product_id,product_category_name,product_name_lenght,product_description_lenght,product_photos_qty,product_weight_g,product_length_cm,product_height_cm,product_width_cm
0,1e9e8ef04dbcff4541ed26657ea517e5,perfumaria,40.0,287.0,1.0,225.0,16.0,10.0,14.0


In [18]:
seller.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3095 entries, 0 to 3094
Data columns (total 4 columns):
seller_id                 3095 non-null object
seller_zip_code_prefix    3095 non-null int64
seller_city               3095 non-null object
seller_state              3095 non-null object
dtypes: int64(1), object(3)
memory usage: 96.8+ KB


In [19]:
seller.head(1)

Unnamed: 0,seller_id,seller_zip_code_prefix,seller_city,seller_state
0,3442f8959a84dea7ee197c632cb2df15,13023,campinas,SP


In [20]:
payment.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103886 entries, 0 to 103885
Data columns (total 5 columns):
order_id                103886 non-null object
payment_sequential      103886 non-null int64
payment_type            103886 non-null object
payment_installments    103886 non-null int64
payment_value           103886 non-null float64
dtypes: float64(1), int64(2), object(2)
memory usage: 4.0+ MB


In [21]:
payment.head(1)

Unnamed: 0,order_id,payment_sequential,payment_type,payment_installments,payment_value
0,b81ef226f3fe1789b1e8b2acac839d17,1,credit_card,8,99.33


In [22]:
geolocation.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000163 entries, 0 to 1000162
Data columns (total 5 columns):
geo_zip_code_prefix    1000163 non-null int64
geo_lat                1000163 non-null float64
geo_lng                1000163 non-null float64
geo_city               1000163 non-null object
geo_state              1000163 non-null object
dtypes: float64(2), int64(1), object(2)
memory usage: 38.2+ MB


In [23]:
geolocation.head(1)

Unnamed: 0,geo_zip_code_prefix,geo_lat,geo_lng,geo_city,geo_state
0,1037,-23.545621,-46.639292,sao paulo,SP


In [24]:
category.head(1)

Unnamed: 0,product_category_name,product_category_name_english
0,beleza_saude,health_beauty


## 2) Merge data

In [25]:
order.columns
items.columns

Index(['order_id', 'customer_id', 'order_status', 'order_purchase_timestamp', 'order_approved_at', 'order_carrier_delivery_date', 'order_customer_delivery_date', 'order_estimated_delivery_date'], dtype='object')

Index(['order_id', 'order_item_id', 'product_id', 'seller_id', 'shipping_limit_date', 'price', 'freight_value'], dtype='object')

The pd.merge() function recognizes that each DataFrame has an "order_id" column, and automatically joins using this column as a key. The result of the merge is a new DataFrame that combines the information from the two inputs.

In [26]:
df = pd.merge(order[['order_id', 
                       'customer_id', 
                       'order_status', 
                       'order_purchase_timestamp']],
                items[['order_id', 'order_item_id', 
                       'product_id', 'seller_id', 
                       'price', 'freight_value']])

In [27]:
order.shape
items.shape
df.shape

(99441, 8)

(112650, 7)

(112650, 9)

In [28]:
product.columns

Index(['product_id', 'product_category_name', 'product_name_lenght', 'product_description_lenght', 'product_photos_qty', 'product_weight_g', 'product_length_cm', 'product_height_cm', 'product_width_cm'], dtype='object')

In [29]:
df = pd.merge(df, product[['product_id', 'product_category_name','product_photos_qty', 'product_weight_g', 'product_length_cm', 'product_height_cm', 'product_width_cm']])

In [30]:
payment.columns

Index(['order_id', 'payment_sequential', 'payment_type', 'payment_installments', 'payment_value'], dtype='object')

In [31]:
df = pd.merge(df, payment[['order_id', 'payment_type', 'payment_installments']])

In [32]:
seller.columns
#ignor seller

Index(['seller_id', 'seller_zip_code_prefix', 'seller_city', 'seller_state'], dtype='object')

In [33]:
customer.columns

Index(['customer_id', 'customer_unique_id', 'customer_zip_code_prefix', 'customer_city', 'customer_state'], dtype='object')

In [34]:
df = pd.merge(df, customer[['customer_id','customer_unique_id','customer_state']])

In [35]:
review.columns

Index(['review_id', 'order_id', 'survey_score', 'survey_review_title', 'survey_review_content', 'survey_send_date', 'survey_completion_date'], dtype='object')

In [36]:
df = pd.merge(df, review[['order_id', 'survey_score']])

In [37]:
category.columns

Index(['product_category_name', 'product_category_name_english'], dtype='object')

In [38]:
df = pd.merge(df, category[['product_category_name', 'product_category_name_english']])

In [39]:
df.head(5)

Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_item_id,product_id,seller_id,price,freight_value,product_category_name,product_photos_qty,product_weight_g,product_length_cm,product_height_cm,product_width_cm,payment_type,payment_installments,customer_unique_id,customer_state,survey_score,product_category_name_english
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,1,87285b34884572647811a353c7ac498a,3504c0cb71d7fa48d967e0e4c94d59d9,29.99,8.72,utilidades_domesticas,4.0,500.0,19.0,8.0,13.0,credit_card,1,7c396fd4830fd04220f754e42b4e5bff,SP,4,housewares
1,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,1,87285b34884572647811a353c7ac498a,3504c0cb71d7fa48d967e0e4c94d59d9,29.99,8.72,utilidades_domesticas,4.0,500.0,19.0,8.0,13.0,voucher,1,7c396fd4830fd04220f754e42b4e5bff,SP,4,housewares
2,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,1,87285b34884572647811a353c7ac498a,3504c0cb71d7fa48d967e0e4c94d59d9,29.99,8.72,utilidades_domesticas,4.0,500.0,19.0,8.0,13.0,voucher,1,7c396fd4830fd04220f754e42b4e5bff,SP,4,housewares
3,128e10d95713541c87cd1a2e48201934,a20e8105f23924cd00833fd87daa0831,delivered,2017-08-15 18:29:31,1,87285b34884572647811a353c7ac498a,3504c0cb71d7fa48d967e0e4c94d59d9,29.99,7.78,utilidades_domesticas,4.0,500.0,19.0,8.0,13.0,credit_card,3,3a51803cc0d012c3b5dc8b7528cb05f7,SP,4,housewares
4,0e7e841ddf8f8f2de2bad69267ecfbcf,26c7ac168e1433912a51b924fbd34d34,delivered,2017-08-02 18:24:47,1,87285b34884572647811a353c7ac498a,3504c0cb71d7fa48d967e0e4c94d59d9,29.99,7.78,utilidades_domesticas,4.0,500.0,19.0,8.0,13.0,credit_card,1,ef0996a1a279c26e7ecbd737be23d235,SP,5,housewares


In [40]:
df.shape

(116581, 21)

## 3) Write merged dataset to S3 

In [41]:
from io import StringIO # python3; python2: BytesIO 

csv_buffer = StringIO()
df.to_csv(csv_buffer,index=False)
s3_resource = boto3.resource('s3')
s3_resource.Object(bucket, key = 'ecommerce/merged_orders.csv').put(Body=csv_buffer.getvalue())

{'ResponseMetadata': {'RequestId': '6538ADC3BC5BC667',
  'HostId': 'LzQ5PMocO+5GJwmJ0FK4YFofimvWjkbpbME0sW6YQ0/yAXinXnWw+lI1byn7LVERCekKMef3Kus=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'LzQ5PMocO+5GJwmJ0FK4YFofimvWjkbpbME0sW6YQ0/yAXinXnWw+lI1byn7LVERCekKMef3Kus=',
   'x-amz-request-id': '6538ADC3BC5BC667',
   'date': 'Wed, 18 Mar 2020 03:41:11 GMT',
   'etag': '"a6721ab2437a68c79edb6cd39503b870"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"a6721ab2437a68c79edb6cd39503b870"'}

## 4) Create aggregate dataset and new feature generation

In [42]:
tt =  pd.read_csv("s3://{}/{}/merged_orders.csv".format(bucket,prefix))

In [43]:
tt.head(10)

Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_item_id,product_id,seller_id,price,freight_value,product_category_name,product_photos_qty,product_weight_g,product_length_cm,product_height_cm,product_width_cm,payment_type,payment_installments,customer_unique_id,customer_state,survey_score,product_category_name_english
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,1,87285b34884572647811a353c7ac498a,3504c0cb71d7fa48d967e0e4c94d59d9,29.99,8.72,utilidades_domesticas,4.0,500.0,19.0,8.0,13.0,credit_card,1,7c396fd4830fd04220f754e42b4e5bff,SP,4,housewares
1,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,1,87285b34884572647811a353c7ac498a,3504c0cb71d7fa48d967e0e4c94d59d9,29.99,8.72,utilidades_domesticas,4.0,500.0,19.0,8.0,13.0,voucher,1,7c396fd4830fd04220f754e42b4e5bff,SP,4,housewares
2,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,1,87285b34884572647811a353c7ac498a,3504c0cb71d7fa48d967e0e4c94d59d9,29.99,8.72,utilidades_domesticas,4.0,500.0,19.0,8.0,13.0,voucher,1,7c396fd4830fd04220f754e42b4e5bff,SP,4,housewares
3,128e10d95713541c87cd1a2e48201934,a20e8105f23924cd00833fd87daa0831,delivered,2017-08-15 18:29:31,1,87285b34884572647811a353c7ac498a,3504c0cb71d7fa48d967e0e4c94d59d9,29.99,7.78,utilidades_domesticas,4.0,500.0,19.0,8.0,13.0,credit_card,3,3a51803cc0d012c3b5dc8b7528cb05f7,SP,4,housewares
4,0e7e841ddf8f8f2de2bad69267ecfbcf,26c7ac168e1433912a51b924fbd34d34,delivered,2017-08-02 18:24:47,1,87285b34884572647811a353c7ac498a,3504c0cb71d7fa48d967e0e4c94d59d9,29.99,7.78,utilidades_domesticas,4.0,500.0,19.0,8.0,13.0,credit_card,1,ef0996a1a279c26e7ecbd737be23d235,SP,5,housewares
5,bfc39df4f36c3693ff3b63fcbea9e90a,53904ddbea91e1e92b2b3f1d09a7af86,delivered,2017-10-23 23:26:46,1,87285b34884572647811a353c7ac498a,3504c0cb71d7fa48d967e0e4c94d59d9,29.99,14.1,utilidades_domesticas,4.0,500.0,19.0,8.0,13.0,boleto,1,e781fdcc107d13d865fc7698711cc572,SC,3,housewares
6,6ea2f835b4556291ffdc53fa0b3b95e8,c7340080e394356141681bd4c9b8fe31,delivered,2017-11-24 21:27:48,1,be021417a6acb56b9b50d3fd2714baa8,f5f46307a4d15880ca14fab4ad9dfc9b,339.0,17.12,utilidades_domesticas,6.0,14300.0,38.0,34.0,34.0,credit_card,10,3e4fd73f1e86b135b9b121d6abbe9597,SP,1,housewares
7,7b419fad2990895cf43c738f8e7d2d55,9ce014a42bfa0fe0ff389adde017bb24,delivered,2017-04-04 09:36:28,1,eac611ce220f095680e35c1c1124bcb3,dbb9b48c841a0e39e21f98e1a6b2ec3e,82.9,3.08,utilidades_domesticas,1.0,700.0,30.0,15.0,20.0,boleto,1,1383a85c693c3af1f959f635a1cf9c4f,RS,1,housewares
8,edf0c87086ae34eca8a6f72c8b31eeae,c5a4107bca600a202e2de778a540464a,delivered,2018-04-25 23:33:01,1,7fb7c9580222a2af9eb7a95a6ce85fc5,688756f717c462a206ad854c5027a64a,28.99,4.35,utilidades_domesticas,1.0,825.0,52.0,18.0,74.0,credit_card,2,5afbb1d2cc7c072e60088e682aa600a3,MG,2,housewares
9,3272770f15ff5da42dc4973b1b179552,e4eb839347c15a4998942e6ee3a032a2,delivered,2018-06-20 14:44:03,2,7a8dfec813ae58cfaa3e1ef9f534adab,4c18691b6037662be2df78a765d98ab5,36.5,11.17,utilidades_domesticas,2.0,1200.0,33.0,10.0,30.0,boleto,1,b496bda4b2d6934b9e731a96889fd89e,RS,5,housewares



+ Recency: How much time has elapsed since a customer’s last activity or transaction with the brand? Activity is usually a purchase, although variations are sometimes used, e.g., the last visit to a website or use of a mobile app. In most cases, the more recently a customer has interacted or transacted with a brand, the more likely that customer will be responsive to communications from the brand.
+ Frequency: How often has a customer transacted or interacted with the brand during a particular period of time? Clearly, customers with frequent activities are more engaged, and probably more loyal, than customers who rarely do so. And one-time-only customers are in a class of their own.
+ Monetary: Also referred to as “monetary value,” this factor reflects how much a customer has spent with the brand during a particular period of time. Big spenders should usually be treated differently than customers who spend little. Looking at monetary divided by frequency indicates the average purchase amount – an important secondary factor to consider when segmenting customers.

Others features to include:
+ survey_score: averaging customer rating scores 
+ payment_installments: average customer payment installments 

In [44]:
tt_agg = tt.groupby('customer_unique_id').agg({'order_purchase_timestamp': [('latest_purchase','max'),('first_purchase',"min")],
                                        'price': [('total_purchase','sum')],
                                        'customer_unique_id' : [('total_purchase_num','count')],
                                               "survey_score": [('rating','mean')],
                                               "payment_installments": [('installments','mean')]
                                              })

In [45]:
tt_agg.columns = tt_agg.columns.droplevel(0)

In [46]:
tt_agg = tt_agg.reset_index()

In [47]:
tt_agg.shape

(94087, 7)

In [48]:
tt_agg["latest_purchase"] = pd.to_datetime(tt_agg["latest_purchase"])
tt_agg["first_purchase"] = pd.to_datetime(tt_agg["first_purchase"])

In [49]:
tt_agg.dtypes

customer_unique_id            object
latest_purchase       datetime64[ns]
first_purchase        datetime64[ns]
total_purchase               float64
total_purchase_num             int64
rating                       float64
installments                 float64
dtype: object

In [50]:
maxtime = max(tt_agg["latest_purchase"])
maxtime

Timestamp('2018-09-03 09:06:57')

In [51]:
tt_agg["recency"] = maxtime - tt_agg["latest_purchase"] 

In [52]:
tt_agg["recency"]  = tt_agg["recency"].astype('timedelta64[D]')

Add feature:
+ Length: Just an addition of customer relation length (L) to RFM Model results in the extended reliability and accuracy of customer segments. By adopting the traditional RFM Model, companies cannot distinguish between the long-term and short-term customers, which becomes possible with the LRFM Model.

In [53]:
tt_agg["length"] =   tt_agg["latest_purchase"] - tt_agg["first_purchase"]

In [54]:
tt_agg["length"] = tt_agg["length"].astype('timedelta64[D]')

In [55]:
tt_agg.columns

Index(['customer_unique_id', 'latest_purchase', 'first_purchase', 'total_purchase', 'total_purchase_num', 'rating', 'installments', 'recency', 'length'], dtype='object')

In [56]:
tt_agg_final = tt_agg[['customer_unique_id', 
                       'total_purchase', 
                       'total_purchase_num', 
                       'rating', 
                       'installments', 
                       'recency', 
                       'length']]

In [57]:
tt_agg_final.rename(columns={'total_purchase': 'monetary',
                            'total_purchase_num': 'frequency',
                            'installments':'payment_installments'},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [58]:
tt_agg_final.head(3)

Unnamed: 0,customer_unique_id,monetary,frequency,rating,payment_installments,recency,length
0,0000366f3b9a7992bf8c76cfdf3221e2,129.9,1,5.0,8.0,115.0,0.0
1,0000b849f77a49e4a4ce2b2a4ca5be3f,18.9,1,4.0,1.0,118.0,0.0
2,0000f46a3911fa3c0805444483337064,69.0,1,3.0,8.0,541.0,0.0


In [59]:
tt_agg_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94087 entries, 0 to 94086
Data columns (total 7 columns):
customer_unique_id      94087 non-null object
monetary                94087 non-null float64
frequency               94087 non-null int64
rating                  94087 non-null float64
payment_installments    94087 non-null float64
recency                 94087 non-null float64
length                  94087 non-null float64
dtypes: float64(5), int64(1), object(1)
memory usage: 5.0+ MB


#### Save to S3

In [60]:
from io import StringIO # python3; python2: BytesIO 

csv_buffer = StringIO()
tt_agg_final.to_csv(csv_buffer,index=False)
s3_resource = boto3.resource('s3')
s3_resource.Object(bucket, key = 'ecommerce/agg_info.csv').put(Body=csv_buffer.getvalue())

{'ResponseMetadata': {'RequestId': 'CC703560F85EE004',
  'HostId': '4BZJyYVDdXzz2tH09Baq3QDmR+nyYZUA6/lIT2nNg6FI1YUZzMrFWYMzQ/Iz6RIFYimsNp1AInA=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': '4BZJyYVDdXzz2tH09Baq3QDmR+nyYZUA6/lIT2nNg6FI1YUZzMrFWYMzQ/Iz6RIFYimsNp1AInA=',
   'x-amz-request-id': 'CC703560F85EE004',
   'date': 'Wed, 18 Mar 2020 03:41:37 GMT',
   'etag': '"60e22df950642469cff4ceee1ad21ab2"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"60e22df950642469cff4ceee1ad21ab2"'}