# The Fruit company

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer

```Python
data = {
    'Sales_ID': ['AU_1382578        !', 'TW_8312566       !', 'US_2289987       !', 'UK_1382574       !', 'TW_1382578       !', 
                 'AU_1335358       !', 'TW_6662597       !', 'TW_3352571       !', 'US_7683478       !', 'US_4523771       !'],
    'Sales_Branch': np.nan,
    'Product_Description': ['The Fruit, a company that sells fruits from all over the world, has branches in Australia, the United Kingdom, the United States, and Taiwan. Product: Apple Mango Banana Watermelon Orange Blueberry Banana Watermelon Kiwifruit', 
                            'The Fruit, a company that sells fruits from all over the world, has branches in Australia, the United Kingdom, the United States, and Taiwan. Product: Orange Pear Mango Papaya Peach Persimmon Lychee Guava', 
                            'The Fruit, a company that sells fruits from all over the world, has branches in Australia, the United Kingdom, the United States, and Taiwan. Product: Apple Orange Blueberry Lemon Cantaloupe Cherry', 
                            'The Fruit, a company that sells fruits from all over the world, has branches in Australia, the United Kingdom, the United States, and Taiwan. Product: Strawberry', 
                            'The Fruit, a company that sells fruits from all over the world, has branches in Australia, the United Kingdom, the United States, and Taiwan. Product: Mango Papaya Watermelon Lychee Guava Grape Longan', 
                            'The Fruit, a company that sells fruits from all over the world, has branches in Australia, the United Kingdom, the United States, and Taiwan. Product: Apple Orange Blueberry Mango Banana Watermelon Kiwifruit Stonefruit Avocado', 
                            'The Fruit, a company that sells fruits from all over the world, has branches in Australia, the United Kingdom, the United States, and Taiwan. Product: Apple Pomelo Mango Guava', 
                            'The Fruit, a company that sells fruits from all over the world, has branches in Australia, the United Kingdom, the United States, and Taiwan. Product: Pomelo Persimmon Roselle Tangerine Pineapple', 
                            'The Fruit, a company that sells fruits from all over the world, has branches in Australia, the United Kingdom, the United States, and Taiwan. Product: Raspberry Blueberry Pineapple Peach', 
                            'The Fruit, a company that sells fruits from all over the world, has branches in Australia, the United Kingdom, the United States, and Taiwan. Product: Blueberry Cherry Pear Blackberry'],
    'Recent_Sales_Date': ['2019/12/31 GMT+8 18:00', '2019/12/31 GMT+8 18:00', '2019/12/29 GMT+8 18:00', '2019/12/15 GMT+8 05:00', '2019/12/29 GMT+8 05:00', 
                          '2019/12/31 GMT+8 05:00', '2019/12/31 GMT+8 15:00', '2019/12/27 GMT+8 18:00', '2019/12/31 GMT+8 18:00', '2019/12/30 GMT+8 18:00'],
    'Customer_Count': [10, 27, 12, 1, 5, 10, 1, 3, 17, 51],
}

df = pd.DataFrame(data, columns=['Sales_ID', 'Sales_Branch', 'Product_Description', 'Recent_Sales_Date', 'Customer_Count'])
```

In [2]:
df = pd.read_csv('data/TheFruit_sales.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Sales_ID,Sales_Branch,Product_Description,Recent_Sales_Date,Customer_Count
0,0,AU_1382578 !,,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 18:00,10
1,1,TW_8312566 !,,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 18:00,27
2,2,US_2289987 !,,"The Fruit, a company that sells fruits from al...",2019/12/29 GMT+8 18:00,12
3,3,UK_1382574 !,,"The Fruit, a company that sells fruits from al...",2019/12/15 GMT+8 05:00,1
4,4,TW_1382578 !,,"The Fruit, a company that sells fruits from al...",2019/12/29 GMT+8 05:00,5


## Sales_ID

In [3]:
df[ 'Sales_ID'][0]

'AU_1382578        !'

In [4]:
df['Sales_ID'].str.strip('!').str.strip()[0]

'AU_1382578'

In [5]:
df['Sales_ID'].str.rstrip('!').str.rstrip()[0]

'AU_1382578'

In [6]:
df['Sales_ID'] = df['Sales_ID'].str.rstrip('!').str.strip()

In [7]:
df['Sales_ID']

0    AU_1382578
1    TW_8312566
2    US_2289987
3    UK_1382574
4    TW_1382578
5    AU_1335358
6    TW_6662597
7    TW_3352571
8    US_7683478
9    US_4523771
Name: Sales_ID, dtype: object

## Sales_Branch

In [8]:
df['Sales_Branch'] = df['Sales_ID'].str.slice(stop=2)

In [9]:
df['Sales_Branch'] 

0    AU
1    TW
2    US
3    UK
4    TW
5    AU
6    TW
7    TW
8    US
9    US
Name: Sales_Branch, dtype: object

## Product_Description

In [10]:
df['Product_Description'][0]

'The Fruit, a company that sells fruits from all over the world, has branches in Australia, the United Kingdom, the United States, and Taiwan. Product: Apple Mango Banana Watermelon Orange Blueberry Banana Watermelon Kiwifruit'

In [11]:
df['Product_Description'].str.split(' Product: ', expand=True)

Unnamed: 0,0,1
0,"The Fruit, a company that sells fruits from al...",Apple Mango Banana Watermelon Orange Blueberry...
1,"The Fruit, a company that sells fruits from al...",Orange Pear Mango Papaya Peach Persimmon Lyche...
2,"The Fruit, a company that sells fruits from al...",Apple Orange Blueberry Lemon Cantaloupe Cherry
3,"The Fruit, a company that sells fruits from al...",Strawberry
4,"The Fruit, a company that sells fruits from al...",Mango Papaya Watermelon Lychee Guava Grape Longan
5,"The Fruit, a company that sells fruits from al...",Apple Orange Blueberry Mango Banana Watermelon...
6,"The Fruit, a company that sells fruits from al...",Apple Pomelo Mango Guava
7,"The Fruit, a company that sells fruits from al...",Pomelo Persimmon Roselle Tangerine Pineapple
8,"The Fruit, a company that sells fruits from al...",Raspberry Blueberry Pineapple Peach
9,"The Fruit, a company that sells fruits from al...",Blueberry Cherry Pear Blackberry


In [12]:
df['Description'] = df['Product_Description'].str.split(': ', expand=True)[0]
df['Product'] = df['Product_Description'].str.split(': ', expand=True)[1]
df['Product'] = df['Product'].str.split().apply(sorted)

In [13]:
df[['Description', 'Product']] = df['Product_Description'].str.split(': ', expand=True)
df['Product'] = df['Product'].str.split().apply(sorted)

In [14]:
df[['Description', 'Product']]

Unnamed: 0,Description,Product
0,"The Fruit, a company that sells fruits from al...","[Apple, Banana, Banana, Blueberry, Kiwifruit, ..."
1,"The Fruit, a company that sells fruits from al...","[Guava, Lychee, Mango, Orange, Papaya, Peach, ..."
2,"The Fruit, a company that sells fruits from al...","[Apple, Blueberry, Cantaloupe, Cherry, Lemon, ..."
3,"The Fruit, a company that sells fruits from al...",[Strawberry]
4,"The Fruit, a company that sells fruits from al...","[Grape, Guava, Longan, Lychee, Mango, Papaya, ..."
5,"The Fruit, a company that sells fruits from al...","[Apple, Avocado, Banana, Blueberry, Kiwifruit,..."
6,"The Fruit, a company that sells fruits from al...","[Apple, Guava, Mango, Pomelo]"
7,"The Fruit, a company that sells fruits from al...","[Persimmon, Pineapple, Pomelo, Roselle, Tanger..."
8,"The Fruit, a company that sells fruits from al...","[Blueberry, Peach, Pineapple, Raspberry]"
9,"The Fruit, a company that sells fruits from al...","[Blackberry, Blueberry, Cherry, Pear]"


## Product_Count

In [15]:
df['Product_Count'] = df['Product'].str.len()

In [16]:
df[df['Product_Count'] == max(df['Product_Count'])]

Unnamed: 0.1,Unnamed: 0,Sales_ID,Sales_Branch,Product_Description,Recent_Sales_Date,Customer_Count,Description,Product,Product_Count
0,0,AU_1382578,AU,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 18:00,10,"The Fruit, a company that sells fruits from al...","[Apple, Banana, Banana, Blueberry, Kiwifruit, ...",9
5,5,AU_1335358,AU,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 05:00,10,"The Fruit, a company that sells fruits from al...","[Apple, Avocado, Banana, Blueberry, Kiwifruit,...",9


## Product

In [17]:
Products = df['Product'].apply(pd.Series)
Products

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,Apple,Banana,Banana,Blueberry,Kiwifruit,Mango,Orange,Watermelon,Watermelon
1,Guava,Lychee,Mango,Orange,Papaya,Peach,Pear,Persimmon,
2,Apple,Blueberry,Cantaloupe,Cherry,Lemon,Orange,,,
3,Strawberry,,,,,,,,
4,Grape,Guava,Longan,Lychee,Mango,Papaya,Watermelon,,
5,Apple,Avocado,Banana,Blueberry,Kiwifruit,Mango,Orange,Stonefruit,Watermelon
6,Apple,Guava,Mango,Pomelo,,,,,
7,Persimmon,Pineapple,Pomelo,Roselle,Tangerine,,,,
8,Blueberry,Peach,Pineapple,Raspberry,,,,,
9,Blackberry,Blueberry,Cherry,Pear,,,,,


In [18]:
Products = Products.rename(columns=lambda x: 'Product_'+str(x))
Products

Unnamed: 0,Product_0,Product_1,Product_2,Product_3,Product_4,Product_5,Product_6,Product_7,Product_8
0,Apple,Banana,Banana,Blueberry,Kiwifruit,Mango,Orange,Watermelon,Watermelon
1,Guava,Lychee,Mango,Orange,Papaya,Peach,Pear,Persimmon,
2,Apple,Blueberry,Cantaloupe,Cherry,Lemon,Orange,,,
3,Strawberry,,,,,,,,
4,Grape,Guava,Longan,Lychee,Mango,Papaya,Watermelon,,
5,Apple,Avocado,Banana,Blueberry,Kiwifruit,Mango,Orange,Stonefruit,Watermelon
6,Apple,Guava,Mango,Pomelo,,,,,
7,Persimmon,Pineapple,Pomelo,Roselle,Tangerine,,,,
8,Blueberry,Peach,Pineapple,Raspberry,,,,,
9,Blackberry,Blueberry,Cherry,Pear,,,,,


In [19]:
pd.concat([df, Products], axis=1)

Unnamed: 0.1,Unnamed: 0,Sales_ID,Sales_Branch,Product_Description,Recent_Sales_Date,Customer_Count,Description,Product,Product_Count,Product_0,Product_1,Product_2,Product_3,Product_4,Product_5,Product_6,Product_7,Product_8
0,0,AU_1382578,AU,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 18:00,10,"The Fruit, a company that sells fruits from al...","[Apple, Banana, Banana, Blueberry, Kiwifruit, ...",9,Apple,Banana,Banana,Blueberry,Kiwifruit,Mango,Orange,Watermelon,Watermelon
1,1,TW_8312566,TW,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 18:00,27,"The Fruit, a company that sells fruits from al...","[Guava, Lychee, Mango, Orange, Papaya, Peach, ...",8,Guava,Lychee,Mango,Orange,Papaya,Peach,Pear,Persimmon,
2,2,US_2289987,US,"The Fruit, a company that sells fruits from al...",2019/12/29 GMT+8 18:00,12,"The Fruit, a company that sells fruits from al...","[Apple, Blueberry, Cantaloupe, Cherry, Lemon, ...",6,Apple,Blueberry,Cantaloupe,Cherry,Lemon,Orange,,,
3,3,UK_1382574,UK,"The Fruit, a company that sells fruits from al...",2019/12/15 GMT+8 05:00,1,"The Fruit, a company that sells fruits from al...",[Strawberry],1,Strawberry,,,,,,,,
4,4,TW_1382578,TW,"The Fruit, a company that sells fruits from al...",2019/12/29 GMT+8 05:00,5,"The Fruit, a company that sells fruits from al...","[Grape, Guava, Longan, Lychee, Mango, Papaya, ...",7,Grape,Guava,Longan,Lychee,Mango,Papaya,Watermelon,,
5,5,AU_1335358,AU,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 05:00,10,"The Fruit, a company that sells fruits from al...","[Apple, Avocado, Banana, Blueberry, Kiwifruit,...",9,Apple,Avocado,Banana,Blueberry,Kiwifruit,Mango,Orange,Stonefruit,Watermelon
6,6,TW_6662597,TW,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 15:00,1,"The Fruit, a company that sells fruits from al...","[Apple, Guava, Mango, Pomelo]",4,Apple,Guava,Mango,Pomelo,,,,,
7,7,TW_3352571,TW,"The Fruit, a company that sells fruits from al...",2019/12/27 GMT+8 18:00,3,"The Fruit, a company that sells fruits from al...","[Persimmon, Pineapple, Pomelo, Roselle, Tanger...",5,Persimmon,Pineapple,Pomelo,Roselle,Tangerine,,,,
8,8,US_7683478,US,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 18:00,17,"The Fruit, a company that sells fruits from al...","[Blueberry, Peach, Pineapple, Raspberry]",4,Blueberry,Peach,Pineapple,Raspberry,,,,,
9,9,US_4523771,US,"The Fruit, a company that sells fruits from al...",2019/12/30 GMT+8 18:00,51,"The Fruit, a company that sells fruits from al...","[Blackberry, Blueberry, Cherry, Pear]",4,Blackberry,Blueberry,Cherry,Pear,,,,,


In [20]:
pd.concat([df, Products], axis=1).shape

(10, 18)

In [21]:
df['Product_Description'].str.split(': ', expand=True)[1]

0    Apple Mango Banana Watermelon Orange Blueberry...
1    Orange Pear Mango Papaya Peach Persimmon Lyche...
2       Apple Orange Blueberry Lemon Cantaloupe Cherry
3                                           Strawberry
4    Mango Papaya Watermelon Lychee Guava Grape Longan
5    Apple Orange Blueberry Mango Banana Watermelon...
6                             Apple Pomelo Mango Guava
7         Pomelo Persimmon Roselle Tangerine Pineapple
8                  Raspberry Blueberry Pineapple Peach
9                     Blueberry Cherry Pear Blackberry
Name: 1, dtype: object

In [22]:
Products2 = df['Product_Description'].str.split(': ', expand=True)[1].str.get_dummies(' ')
Products2

Unnamed: 0,Apple,Avocado,Banana,Blackberry,Blueberry,Cantaloupe,Cherry,Grape,Guava,Kiwifruit,...,Pear,Persimmon,Pineapple,Pomelo,Raspberry,Roselle,Stonefruit,Strawberry,Tangerine,Watermelon
0,1,0,1,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,1,0,...,1,1,0,0,0,0,0,0,0,0
2,1,0,0,0,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,1
5,1,1,1,0,1,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,1
6,1,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,1,1,1,0,1,0,0,1,0
8,0,0,0,0,1,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
9,0,0,0,1,1,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [23]:
pd.concat([df, Products2], axis=1)

Unnamed: 0.1,Unnamed: 0,Sales_ID,Sales_Branch,Product_Description,Recent_Sales_Date,Customer_Count,Description,Product,Product_Count,Apple,...,Pear,Persimmon,Pineapple,Pomelo,Raspberry,Roselle,Stonefruit,Strawberry,Tangerine,Watermelon
0,0,AU_1382578,AU,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 18:00,10,"The Fruit, a company that sells fruits from al...","[Apple, Banana, Banana, Blueberry, Kiwifruit, ...",9,1,...,0,0,0,0,0,0,0,0,0,1
1,1,TW_8312566,TW,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 18:00,27,"The Fruit, a company that sells fruits from al...","[Guava, Lychee, Mango, Orange, Papaya, Peach, ...",8,0,...,1,1,0,0,0,0,0,0,0,0
2,2,US_2289987,US,"The Fruit, a company that sells fruits from al...",2019/12/29 GMT+8 18:00,12,"The Fruit, a company that sells fruits from al...","[Apple, Blueberry, Cantaloupe, Cherry, Lemon, ...",6,1,...,0,0,0,0,0,0,0,0,0,0
3,3,UK_1382574,UK,"The Fruit, a company that sells fruits from al...",2019/12/15 GMT+8 05:00,1,"The Fruit, a company that sells fruits from al...",[Strawberry],1,0,...,0,0,0,0,0,0,0,1,0,0
4,4,TW_1382578,TW,"The Fruit, a company that sells fruits from al...",2019/12/29 GMT+8 05:00,5,"The Fruit, a company that sells fruits from al...","[Grape, Guava, Longan, Lychee, Mango, Papaya, ...",7,0,...,0,0,0,0,0,0,0,0,0,1
5,5,AU_1335358,AU,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 05:00,10,"The Fruit, a company that sells fruits from al...","[Apple, Avocado, Banana, Blueberry, Kiwifruit,...",9,1,...,0,0,0,0,0,0,1,0,0,1
6,6,TW_6662597,TW,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 15:00,1,"The Fruit, a company that sells fruits from al...","[Apple, Guava, Mango, Pomelo]",4,1,...,0,0,0,1,0,0,0,0,0,0
7,7,TW_3352571,TW,"The Fruit, a company that sells fruits from al...",2019/12/27 GMT+8 18:00,3,"The Fruit, a company that sells fruits from al...","[Persimmon, Pineapple, Pomelo, Roselle, Tanger...",5,0,...,0,1,1,1,0,1,0,0,1,0
8,8,US_7683478,US,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 18:00,17,"The Fruit, a company that sells fruits from al...","[Blueberry, Peach, Pineapple, Raspberry]",4,0,...,0,0,1,0,1,0,0,0,0,0
9,9,US_4523771,US,"The Fruit, a company that sells fruits from al...",2019/12/30 GMT+8 18:00,51,"The Fruit, a company that sells fruits from al...","[Blackberry, Blueberry, Cherry, Pear]",4,0,...,1,0,0,0,0,0,0,0,0,0


In [24]:
mlb = MultiLabelBinarizer(sparse_output=True)
Products3 = pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(df['Product']),
                index=df.index,
                columns=mlb.classes_)
Products3

Unnamed: 0,Apple,Avocado,Banana,Blackberry,Blueberry,Cantaloupe,Cherry,Grape,Guava,Kiwifruit,...,Pear,Persimmon,Pineapple,Pomelo,Raspberry,Roselle,Stonefruit,Strawberry,Tangerine,Watermelon
0,1,0,1,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,1,0,...,1,1,0,0,0,0,0,0,0,0
2,1,0,0,0,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,1
5,1,1,1,0,1,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,1
6,1,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,1,1,1,0,1,0,0,1,0
8,0,0,0,0,1,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
9,0,0,0,1,1,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [25]:
pd.concat([df, Products3], axis=1)

Unnamed: 0.1,Unnamed: 0,Sales_ID,Sales_Branch,Product_Description,Recent_Sales_Date,Customer_Count,Description,Product,Product_Count,Apple,...,Pear,Persimmon,Pineapple,Pomelo,Raspberry,Roselle,Stonefruit,Strawberry,Tangerine,Watermelon
0,0,AU_1382578,AU,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 18:00,10,"The Fruit, a company that sells fruits from al...","[Apple, Banana, Banana, Blueberry, Kiwifruit, ...",9,1,...,0,0,0,0,0,0,0,0,0,1
1,1,TW_8312566,TW,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 18:00,27,"The Fruit, a company that sells fruits from al...","[Guava, Lychee, Mango, Orange, Papaya, Peach, ...",8,0,...,1,1,0,0,0,0,0,0,0,0
2,2,US_2289987,US,"The Fruit, a company that sells fruits from al...",2019/12/29 GMT+8 18:00,12,"The Fruit, a company that sells fruits from al...","[Apple, Blueberry, Cantaloupe, Cherry, Lemon, ...",6,1,...,0,0,0,0,0,0,0,0,0,0
3,3,UK_1382574,UK,"The Fruit, a company that sells fruits from al...",2019/12/15 GMT+8 05:00,1,"The Fruit, a company that sells fruits from al...",[Strawberry],1,0,...,0,0,0,0,0,0,0,1,0,0
4,4,TW_1382578,TW,"The Fruit, a company that sells fruits from al...",2019/12/29 GMT+8 05:00,5,"The Fruit, a company that sells fruits from al...","[Grape, Guava, Longan, Lychee, Mango, Papaya, ...",7,0,...,0,0,0,0,0,0,0,0,0,1
5,5,AU_1335358,AU,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 05:00,10,"The Fruit, a company that sells fruits from al...","[Apple, Avocado, Banana, Blueberry, Kiwifruit,...",9,1,...,0,0,0,0,0,0,1,0,0,1
6,6,TW_6662597,TW,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 15:00,1,"The Fruit, a company that sells fruits from al...","[Apple, Guava, Mango, Pomelo]",4,1,...,0,0,0,1,0,0,0,0,0,0
7,7,TW_3352571,TW,"The Fruit, a company that sells fruits from al...",2019/12/27 GMT+8 18:00,3,"The Fruit, a company that sells fruits from al...","[Persimmon, Pineapple, Pomelo, Roselle, Tanger...",5,0,...,0,1,1,1,0,1,0,0,1,0
8,8,US_7683478,US,"The Fruit, a company that sells fruits from al...",2019/12/31 GMT+8 18:00,17,"The Fruit, a company that sells fruits from al...","[Blueberry, Peach, Pineapple, Raspberry]",4,0,...,0,0,1,0,1,0,0,0,0,0
9,9,US_4523771,US,"The Fruit, a company that sells fruits from al...",2019/12/30 GMT+8 18:00,51,"The Fruit, a company that sells fruits from al...","[Blackberry, Blueberry, Cherry, Pear]",4,0,...,1,0,0,0,0,0,0,0,0,0


## Recent_Sales_Date

In [26]:
Dates = df['Recent_Sales_Date'].str[:10]
Dates

0    2019/12/31
1    2019/12/31
2    2019/12/29
3    2019/12/15
4    2019/12/29
5    2019/12/31
6    2019/12/31
7    2019/12/27
8    2019/12/31
9    2019/12/30
Name: Recent_Sales_Date, dtype: object

In [27]:
Dates2 = df['Recent_Sales_Date'].str.slice(start=0, stop=10)
Dates2

0    2019/12/31
1    2019/12/31
2    2019/12/29
3    2019/12/15
4    2019/12/29
5    2019/12/31
6    2019/12/31
7    2019/12/27
8    2019/12/31
9    2019/12/30
Name: Recent_Sales_Date, dtype: object

In [28]:
pd.to_datetime(Dates)

0   2019-12-31
1   2019-12-31
2   2019-12-29
3   2019-12-15
4   2019-12-29
5   2019-12-31
6   2019-12-31
7   2019-12-27
8   2019-12-31
9   2019-12-30
Name: Recent_Sales_Date, dtype: datetime64[ns]

In [29]:
pd.to_datetime(df['Recent_Sales_Date'].str.slice(stop=10)).dt.strftime('%m/%d/%Y')

0    12/31/2019
1    12/31/2019
2    12/29/2019
3    12/15/2019
4    12/29/2019
5    12/31/2019
6    12/31/2019
7    12/27/2019
8    12/31/2019
9    12/30/2019
Name: Recent_Sales_Date, dtype: object