# Part I: Preprocessing and EDA

## Problem 1

In [30]:
import numpy as np
import pandas as pd
import re
import plotly.express as px
import plotly.graph_objects as go

order = pd.read_csv('../data/Orders.csv') #, parse_dates = ['Order.Date'])
pd.set_option('display.max_columns', 100)

returns = pd.read_csv('../data/Returns.csv') #, parse_dates = ['Ship.Date'])
pd.set_option('display.max_columns', 100)

In [31]:
#Renaming columns for better usage
cols_order = order.columns.tolist()
cols_order = [i.replace('.', '_').lower() for i in cols_order]
order.columns = cols_order

#Changing types for columns
order.order_date = pd.to_datetime(order.order_date.astype(str), format = '%m/%d/%y')
order.ship_date = pd.to_datetime(order.ship_date.astype(str), format = '%m/%d/%y')
order['sales'] = order['sales'].str.replace('$', '').str.replace(',','').astype(float)
order['profit'] = order['profit'].str.replace('$', '').str.replace(',','').astype(float)

#Checking for missingness within the dataframe
#orders.isna().sum() #works the same as below
np.sum(order.isnull())

row_id                0
order_id              0
order_date            0
ship_date             0
ship_mode             0
customer_id           0
customer_name         0
segment               0
postal_code       41296
city                  0
state                 0
country               0
region                0
market                0
product_id            0
category              0
sub_category          0
product_name          0
sales                 0
quantity              0
discount              0
profit                0
shipping_cost         0
order_priority        0
dtype: int64

## Problem 2

### Adding Quarters for Seasonality

In [32]:
#Seeing how many years of data we have
order.order_date.describe()

count                   51290
unique                   1430
top       2015-06-18 00:00:00
freq                      135
first     2012-01-01 00:00:00
last      2015-12-31 00:00:00
Name: order_date, dtype: object

In [33]:
#Setting the index for easy time series manipulation
order = order.set_index('order_date')
order['quarter'] = order.index.to_period('Q')
order

Unnamed: 0_level_0,row_id,order_id,ship_date,ship_mode,customer_id,customer_name,segment,postal_code,city,state,country,region,market,product_id,category,sub_category,product_name,sales,quantity,discount,profit,shipping_cost,order_priority,quarter
order_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
2014-11-11,40098,CA-2014-AB10015140-41954,2014-11-13,First Class,AB-100151402,Aaron Bergman,Consumer,73120.0,Oklahoma City,Oklahoma,United States,Central US,USCA,TEC-PH-5816,Technology,Phones,Samsung Convoy 3,221.98,2,0.0,62.15,40.770,High,2014Q4
2014-02-05,26341,IN-2014-JR162107-41675,2014-02-07,Second Class,JR-162107,Justin Ritter,Corporate,,Wollongong,New South Wales,Australia,Oceania,Asia Pacific,FUR-CH-5379,Furniture,Chairs,"Novimex Executive Leather Armchair, Black",3709.40,9,0.1,-288.77,923.630,Critical,2014Q1
2014-10-17,25330,IN-2014-CR127307-41929,2014-10-18,First Class,CR-127307,Craig Reiter,Consumer,,Brisbane,Queensland,Australia,Oceania,Asia Pacific,TEC-PH-5356,Technology,Phones,"Nokia Smart Phone, with Caller ID",5175.17,9,0.1,919.97,915.490,Medium,2014Q4
2014-01-28,13524,ES-2014-KM1637548-41667,2014-01-30,First Class,KM-1637548,Katherine Murray,Home Office,,Berlin,Berlin,Germany,Western Europe,Europe,TEC-PH-5267,Technology,Phones,"Motorola Smart Phone, Cordless",2892.51,5,0.1,-96.54,910.160,Medium,2014Q1
2014-11-05,47221,SG-2014-RH9495111-41948,2014-11-06,Same Day,RH-9495111,Rick Hansen,Consumer,,Dakar,Dakar,Senegal,Western Africa,Africa,TEC-CO-6011,Technology,Copiers,"Sharp Wireless Fax, High-Speed",2832.96,8,0.0,311.52,903.040,Critical,2014Q4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015-06-19,29002,IN-2015-KE1642066-42174,2015-06-19,Same Day,KE-1642066,Katrina Edelman,Corporate,,Kure,Hiroshima,Japan,Eastern Asia,Asia Pacific,OFF-FA-3072,Office Supplies,Fasteners,"Advantus Thumb Tacks, 12 Pack",65.10,5,0.0,4.50,1.010,Medium,2015Q2
2014-05-06,34337,US-2014-ZD21925140-41765,2014-05-10,Standard Class,ZD-219251408,Zuschuss Donatelli,Consumer,37421.0,Chattanooga,Tennessee,United States,Southern US,USCA,FUR-FU-4070,Furniture,Furnishings,"Eldon Image Series Desk Accessories, Burgundy",16.72,5,0.2,3.34,1.930,High,2014Q2
2012-08-26,31315,CA-2012-ZD21925140-41147,2012-08-31,Second Class,ZD-219251404,Zuschuss Donatelli,Consumer,94109.0,San Francisco,California,United States,Western US,USCA,OFF-AR-5321,Office Supplies,Art,Newell 341,8.56,2,0.0,2.48,1.580,High,2012Q3
2013-02-17,9596,MX-2013-RB1979518-41322,2013-02-21,Standard Class,RB-1979518,Ross Baird,Home Office,,Valinhos,São Paulo,Brazil,South America,LATAM,OFF-BI-2919,Office Supplies,Binders,"Acco Index Tab, Economy",13.44,2,0.0,2.40,1.003,Medium,2013Q1


#### Sanity Check - what is quantity looking at?

In [34]:
#Sanity check - what is quantity actually counting? 
#Total quantity for order or quantity per item?
check = order.loc[order['order_id'] == 'CA-2014-AB10015140-41954']
check1 = order.loc[order['order_id'] == 'IN-2014-JR162107-41675']
check

Unnamed: 0_level_0,row_id,order_id,ship_date,ship_mode,customer_id,customer_name,segment,postal_code,city,state,country,region,market,product_id,category,sub_category,product_name,sales,quantity,discount,profit,shipping_cost,order_priority,quarter
order_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
2014-11-11,40098,CA-2014-AB10015140-41954,2014-11-13,First Class,AB-100151402,Aaron Bergman,Consumer,73120.0,Oklahoma City,Oklahoma,United States,Central US,USCA,TEC-PH-5816,Technology,Phones,Samsung Convoy 3,221.98,2,0.0,62.15,40.77,High,2014Q4
2014-11-11,40099,CA-2014-AB10015140-41954,2014-11-13,First Class,AB-100151402,Aaron Bergman,Consumer,73120.0,Oklahoma City,Oklahoma,United States,Central US,USCA,FUR-BO-5957,Furniture,Bookcases,"Sauder Facets Collection Library, Sky Alder Fi...",341.96,2,0.0,54.71,25.27,High,2014Q4


In [35]:
check1

Unnamed: 0_level_0,row_id,order_id,ship_date,ship_mode,customer_id,customer_name,segment,postal_code,city,state,country,region,market,product_id,category,sub_category,product_name,sales,quantity,discount,profit,shipping_cost,order_priority,quarter
order_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
2014-02-05,26341,IN-2014-JR162107-41675,2014-02-07,Second Class,JR-162107,Justin Ritter,Corporate,,Wollongong,New South Wales,Australia,Oceania,Asia Pacific,FUR-CH-5379,Furniture,Chairs,"Novimex Executive Leather Armchair, Black",3709.4,9,0.1,-288.77,923.63,Critical,2014Q1
2014-02-05,26339,IN-2014-JR162107-41675,2014-02-07,Second Class,JR-162107,Justin Ritter,Corporate,,Wollongong,New South Wales,Australia,Oceania,Asia Pacific,TEC-CO-3594,Technology,Copiers,"Brother Fax and Copier, Laser",344.68,2,0.1,34.42,65.35,Critical,2014Q1
2014-02-05,26340,IN-2014-JR162107-41675,2014-02-07,Second Class,JR-162107,Justin Ritter,Corporate,,Wollongong,New South Wales,Australia,Oceania,Asia Pacific,OFF-ST-5697,Office Supplies,Storage,"Rogers Folders, Wire Frame",133.92,5,0.1,-6.03,41.64,Critical,2014Q1
2014-02-05,26342,IN-2014-JR162107-41675,2014-02-07,Second Class,JR-162107,Justin Ritter,Corporate,,Wollongong,New South Wales,Australia,Oceania,Asia Pacific,TEC-AC-4152,Technology,Accessories,"Enermax Flash Drive, Erganomic",70.79,2,0.1,25.13,10.48,Critical,2014Q1


From the code above, we can see that the quantity column is actually looking at the unique quantities of items for each sub-category. With that, we can sum the quantity by segment / categories / sub-categories.

#### Seasonal Trend of Inventory

In [36]:
#Looking at total quantities per category - grouping by quarter to see if there are seasonality trends
category = order.groupby(['quarter', 'category'])['quantity'].agg('sum').to_frame(name = 'quantity').reset_index()
category

Unnamed: 0,quarter,category,quantity
0,2012Q1,Furniture,843
1,2012Q1,Office Supplies,2890
2,2012Q1,Technology,876
3,2012Q2,Furniture,1415
4,2012Q2,Office Supplies,4247
5,2012Q2,Technology,1441
6,2012Q3,Furniture,1639
7,2012Q3,Office Supplies,5291
8,2012Q3,Technology,1570
9,2012Q4,Furniture,2399


##### Plot below to show trends

In [37]:
#There are issues where JSON cannot read time series, hence putting it into a string
category.quarter = category.quarter.astype('str')

#Using plotly express to see if there are any trends for inventory
fig = px.line(category, x = 'quarter', y = 'quantity', color = 'category')

fig.update_layout(
    xaxis_title = 'Quarters',
    yaxis_title = 'Quantity',
    title={
        'text': "Looking at Inventory Seasonality",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 12,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
)

fig.show()

#### Repeating for sub-category to see if there are any specific items that have more weight than others

In [38]:
sub_category = order.groupby(['quarter', 'sub_category'])[['quantity']]\
                .agg('sum').reset_index()

##### Plot below to show trends

In [39]:
#There are issues where JSON cannot read time series, hence putting it into a string
sub_category.quarter = sub_category.quarter.astype('str')

#Using plotly express to see if there are any trends for inventory
fig = px.line(sub_category, x = 'quarter', y = 'quantity', color = 'sub_category')

fig.update_layout(
    xaxis_title = 'Quarters',
    yaxis_title = 'Quantity',
    title={
        'text': "Looking at Inventory Seasonality per Item",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 12,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
)

fig.show()

Generally speaking, there is a seasonality trend seen, particularly in Q4.

## Problem 3

#### Part a - Profit Lost due to Returns

In [59]:
order_returns = pd.merge(order.reset_index(), returns, on = 'order_id')
order_returns

Unnamed: 0,order_date,row_id,order_id,ship_date,ship_mode,customer_id,customer_name,segment,postal_code,city,state,country,region_x,market,product_id,category,sub_category,product_name,sales,quantity,discount,profit,shipping_cost,order_priority,quarter,returned,region_y
0,2012-12-16,30191,IN-2012-PB19210127-41259,2012-12-19,First Class,PB-19210127,Phillip Breyer,Corporate,,Taipei,Taipei City,Taiwan,Eastern Asia,Asia Pacific,FUR-TA-5060,Furniture,Tables,"Lesro Conference Table, with Bottom Storage",1715.16,2,0.0,720.36,725.570,Critical,2012Q4,Yes,Eastern Asia
1,2012-12-16,30190,IN-2012-PB19210127-41259,2012-12-19,First Class,PB-19210127,Phillip Breyer,Corporate,,Taipei,Taipei City,Taiwan,Eastern Asia,Asia Pacific,FUR-BO-5762,Furniture,Bookcases,"Safco Classic Bookcase, Pine",2197.50,5,0.0,153.75,627.270,Critical,2012Q4,Yes,Eastern Asia
2,2012-12-16,30187,IN-2012-PB19210127-41259,2012-12-19,First Class,PB-19210127,Phillip Breyer,Corporate,,Taipei,Taipei City,Taiwan,Eastern Asia,Asia Pacific,FUR-TA-5065,Furniture,Tables,"Lesro Round Table, with Bottom Storage",1356.03,3,0.0,311.85,458.970,Critical,2012Q4,Yes,Eastern Asia
3,2012-12-16,30193,IN-2012-PB19210127-41259,2012-12-19,First Class,PB-19210127,Phillip Breyer,Corporate,,Taipei,Taipei City,Taiwan,Eastern Asia,Asia Pacific,FUR-CH-5454,Furniture,Chairs,"Office Star Swivel Stool, Adjustable",882.15,5,0.0,114.60,203.920,Critical,2012Q4,Yes,Eastern Asia
4,2012-12-16,30189,IN-2012-PB19210127-41259,2012-12-19,First Class,PB-19210127,Phillip Breyer,Corporate,,Taipei,Taipei City,Taiwan,Eastern Asia,Asia Pacific,TEC-MA-5494,Technology,Machines,"Okidata Calculator, Red",148.32,3,0.0,68.22,25.480,Critical,2012Q4,Yes,Eastern Asia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2215,2014-01-15,35040,CA-2014-WB21850140-41654,2014-01-21,Standard Class,WB-218501406,William Brown,Consumer,10011.0,New York City,New York,United States,Eastern US,USCA,OFF-BI-6098,Office Supplies,Binders,"Square Ring Data Binders, Rigid 75 Pt. Covers,...",49.54,3,0.2,17.34,2.670,Medium,2014Q1,Yes,Eastern US
2216,2014-01-15,35038,CA-2014-WB21850140-41654,2014-01-21,Standard Class,WB-218501406,William Brown,Consumer,10011.0,New York City,New York,United States,Eastern US,USCA,OFF-EN-2850,Office Supplies,Envelopes,"#10 Gummed Flap White Envelopes, 100/Box",16.52,4,0.0,7.60,1.690,Medium,2014Q1,Yes,Eastern US
2217,2013-09-26,25,US-2013-SJ2021582-41543,2013-09-29,First Class,SJ-2021582,Sarah Jordon,Consumer,,Hermosillo,Sonora,Mexico,Central America,LATAM,FUR-FU-3928,Furniture,Furnishings,"Deflect-O Clock, Black",181.12,9,0.4,-75.56,1.085,Medium,2013Q3,Yes,Central America
2218,2012-12-28,36974,CA-2012-ZC21910140-41271,2013-01-04,Standard Class,ZC-219101402,Zuschuss Carroll,Consumer,60610.0,Chicago,Illinois,United States,Central US,USCA,FUR-FU-6235,Furniture,Furnishings,Tenex Chairmats For Use with Hard Floors,38.98,3,0.6,-50.67,5.290,Low,2012Q4,Yes,Central US


In [40]:
profit_lost = abs(round(order.profit.agg(lambda x : x[x < 0].sum()),2))

print(f'Total profit lost on returns is ${profit_lost}')

Total profit lost on returns is $920649.86


#### Part b - Customers Returning >1 times

In [43]:
id_df = order.groupby(['order_id', 'customer_name'])['customer_name'].count().to_frame(name = 'count').reset_index().drop(columns = 'order_id')
cust_1 = len(id_df.loc[id_df['count']>1])
cust_5 = len(id_df.loc[id_df['count']>5])

print(f'Customers returning more than once: {cust_1}')
print(f'Customers returning more than 5 times: {cust_5}')

Customers returning more than once: 12793
Customers returning more than 5 times: 805


#### Part c - Regions Likely to Return Orders

In [44]:
#Renaming columns for better usage
cols_returns = returns.columns.tolist()
cols_returns = [i.replace(' ', '_').lower() for i in cols_returns]
returns.columns = cols_returns

#Sanity check - are all the values in returned 'Yes'?
if len(returns.returned.unique()) == 1:
    print('All values in returned are \'Yes\'.\n')

#Sanity check - are all order_id's unique?
if returns.shape[0] == len(returns.order_id.unique()):
    print('All order_id\'s are unique.\n')

#Grouping by Region and looking at top 5 regions with highest returns
returns.groupby('region')['returned'].count().sort_values(ascending = False)[:5]

All values in returned are 'Yes'.

All order_id's are unique.



region
Western Europe     121
Central America    117
Oceania             78
Western US          72
Eastern US          69
Name: returned, dtype: int64

#### Part d - Categories More Likely to be Returned

Noticed that region_y is created, even though both dataframes have the same column name. An inspection is done below to see whether all values between the two columns match.

In [49]:
(order_returns['region_x'] == order_returns['region_y']).value_counts()

True     2202
False      18
dtype: int64

A total of 18 rows have mismatched regions. Taking a closer look at this columns below.

In [123]:
order_returns.loc[order_returns['region_x'] != order_returns['region_y']]\
    [['region_x', 'region_y']]

Unnamed: 0,region_x,region_y
832,Canada,Eastern Canada
833,Canada,Eastern Canada
1290,Central US,Western US
1291,Central US,Western US
1473,Central US,Western US
1647,Canada,Eastern Canada
1945,Canada,Western Canada
1946,Canada,Western Canada
1947,Canada,Western Canada
1948,Canada,Western Canada


Judging by the Canadian return locations, the original **returns** dataframe has more divided regions. Additionally, the three rows with *Central US* in the **order** dataframe appear to be *Western US* in the **returns** dataframe. For the purpose of this exercise,  *region_y* will be used.

##### Top Category to be returned

In [56]:
cat_returns = order_returns.groupby('category').agg('sum')['quantity']
cat_returns.groupby('category').sum().sort_values(ascending = False)[:1]

category
Office Supplies    4605
Name: quantity, dtype: int64

##### Top 5 Subcategories to be returned

In [53]:
subcat_returns = order_returns.groupby('sub_category').agg('sum')['quantity']
subcat_returns.groupby('sub_category').sum().sort_values(ascending = False)[:5]

sub_category
Binders    912
Art        749
Storage    702
Paper      528
Phones     523
Name: quantity, dtype: int64

# Part II: Machine Learning and Business Use Case

## Problem 4

### Step 1

In [57]:
#Creating a merged dataframe where if an order was not returned, adding No as attribute
merged_df = pd.merge(order.reset_index(), returns, how = 'left', on = 'order_id')
merged_df['returned'] = merged_df['returned'].replace(np.nan, 'No')

### Step 2

In [170]:
#Creating the process_time column in units of days
merged_df['process_time'] = merged_df['ship_date'] - merged_df['order_date']
merged_df

Unnamed: 0,order_date,row_id,order_id,ship_date,ship_mode,customer_id,customer_name,segment,postal_code,city,state,country,region_x,market,product_id,category,sub_category,product_name,sales,quantity,discount,profit,shipping_cost,order_priority,quarter,returned,region_y,process_time
0,2014-11-11,40098,CA-2014-AB10015140-41954,2014-11-13,First Class,AB-100151402,Aaron Bergman,Consumer,73120.0,Oklahoma City,Oklahoma,United States,Central US,USCA,TEC-PH-5816,Technology,Phones,Samsung Convoy 3,221.98,2,0.0,62.15,40.770,High,2014Q4,No,,2 days
1,2014-02-05,26341,IN-2014-JR162107-41675,2014-02-07,Second Class,JR-162107,Justin Ritter,Corporate,,Wollongong,New South Wales,Australia,Oceania,Asia Pacific,FUR-CH-5379,Furniture,Chairs,"Novimex Executive Leather Armchair, Black",3709.40,9,0.1,-288.77,923.630,Critical,2014Q1,No,,2 days
2,2014-10-17,25330,IN-2014-CR127307-41929,2014-10-18,First Class,CR-127307,Craig Reiter,Consumer,,Brisbane,Queensland,Australia,Oceania,Asia Pacific,TEC-PH-5356,Technology,Phones,"Nokia Smart Phone, with Caller ID",5175.17,9,0.1,919.97,915.490,Medium,2014Q4,No,,1 days
3,2014-01-28,13524,ES-2014-KM1637548-41667,2014-01-30,First Class,KM-1637548,Katherine Murray,Home Office,,Berlin,Berlin,Germany,Western Europe,Europe,TEC-PH-5267,Technology,Phones,"Motorola Smart Phone, Cordless",2892.51,5,0.1,-96.54,910.160,Medium,2014Q1,No,,2 days
4,2014-11-05,47221,SG-2014-RH9495111-41948,2014-11-06,Same Day,RH-9495111,Rick Hansen,Consumer,,Dakar,Dakar,Senegal,Western Africa,Africa,TEC-CO-6011,Technology,Copiers,"Sharp Wireless Fax, High-Speed",2832.96,8,0.0,311.52,903.040,Critical,2014Q4,No,,1 days
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51285,2015-06-19,29002,IN-2015-KE1642066-42174,2015-06-19,Same Day,KE-1642066,Katrina Edelman,Corporate,,Kure,Hiroshima,Japan,Eastern Asia,Asia Pacific,OFF-FA-3072,Office Supplies,Fasteners,"Advantus Thumb Tacks, 12 Pack",65.10,5,0.0,4.50,1.010,Medium,2015Q2,No,,0 days
51286,2014-05-06,34337,US-2014-ZD21925140-41765,2014-05-10,Standard Class,ZD-219251408,Zuschuss Donatelli,Consumer,37421.0,Chattanooga,Tennessee,United States,Southern US,USCA,FUR-FU-4070,Furniture,Furnishings,"Eldon Image Series Desk Accessories, Burgundy",16.72,5,0.2,3.34,1.930,High,2014Q2,No,,4 days
51287,2012-08-26,31315,CA-2012-ZD21925140-41147,2012-08-31,Second Class,ZD-219251404,Zuschuss Donatelli,Consumer,94109.0,San Francisco,California,United States,Western US,USCA,OFF-AR-5321,Office Supplies,Art,Newell 341,8.56,2,0.0,2.48,1.580,High,2012Q3,No,,5 days
51288,2013-02-17,9596,MX-2013-RB1979518-41322,2013-02-21,Standard Class,RB-1979518,Ross Baird,Home Office,,Valinhos,São Paulo,Brazil,South America,LATAM,OFF-BI-2919,Office Supplies,Binders,"Acco Index Tab, Economy",13.44,2,0.0,2.40,1.003,Medium,2013Q1,No,,4 days


### Step 3

In [62]:
order_returns.groupby(['product_id', 'returned'])['quantity'].value_counts()

product_id   returned  quantity
FUR-BO-3176  Yes       7           1
FUR-BO-3615  Yes       1           1
FUR-BO-3616  Yes       2           1
FUR-BO-3621  Yes       3           1
FUR-BO-3624  Yes       3           1
                                  ..
TEC-PH-6348  Yes       6           1
TEC-PH-6364  Yes       5           1
TEC-PH-6365  Yes       2           1
TEC-PH-6413  Yes       3           1
TEC-PH-6425  Yes       3           1
Name: quantity, Length: 2054, dtype: int64