In [0]:
from google.colab import drive
drive.mount('drive') # Or drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at drive


In [0]:
import os

dir = "/content/drive/My Drive/Practice/"
files = os.listdir(dir)
files

['user_order.csv', 'order_product.csv']

In [0]:
list_of_files = [dir + i for i in files]
list_of_files

['/content/drive/My Drive/Practice/user_order.csv',
 '/content/drive/My Drive/Practice/order_product.csv']

In [0]:
# Read multiple CSVs and merge
import csv
import pandas as pd
import numpy as np

list_of_dfs = []
for f in list_of_files:
  temp_df = pd.read_csv(f)
  list_of_dfs.append(temp_df)

df = pd.merge(list_of_dfs[0],list_of_dfs[1], on='order_id')
print(df)


    user_id  order_id  order_dow  ...    product_name  product_price  product_type
0     20100         1     Monday  ...        Lingerie            250      Clothing
1     20268         2   Thursday  ...          Shirts            600      Clothing
2     20118         3   Saturday  ...        Bannanas            150     Groceries
3     20108         4     Sunday  ...         Notepad            250    Stationary
4     20100         5    Tuesday  ...          Cereal            240     Groceries
5     20269         6     Monday  ...         Stapler            200    Stationary
6     20120         7     Friday  ...            Tape            120    Stationary
7     20041         8  Wednesday  ...           Pants            400      Clothing
8     20268         9   Thursday  ...          Apples            180     Groceries
9     20100        10     Sunday  ...           Wheat            350     Groceries
10    20118        11  Wednesday  ...         Chicken            500     Groceries
11  

In [0]:
# Column Names
print(df.keys()) # Gives the columns

attributes = df.columns.values
attributes

Index(['user_id', 'order_id', 'order_dow', 'items_purchased', 'total_bill',
       'product_id', 'product_name', 'product_price', 'product_type'],
      dtype='object')


array(['user_id', 'order_id', 'order_dow', 'items_purchased',
       'total_bill', 'product_id', 'product_name', 'product_price',
       'product_type'], dtype=object)

In [0]:
# Set
customers = set()
for u in df['user_id']:
  customers.add(u)

customers

{20006, 20041, 20056, 20100, 20108, 20118, 20120, 20268, 20269, 20284, 20285}

In [0]:
# Group By (It comines all the same entries together as one unique entry)
for group, frame in df.groupby('user_id'):
  sum = np.sum(frame['total_bill'])
  print("Total sum for user "+ str(group) + " is :" + str(sum))

Total sum for user 20006 is :160
Total sum for user 20041 is :1860
Total sum for user 20056 is :1320
Total sum for user 20100 is :2850
Total sum for user 20108 is :750
Total sum for user 20118 is :2080
Total sum for user 20120 is :2300
Total sum for user 20268 is :3110
Total sum for user 20269 is :2200
Total sum for user 20284 is :850
Total sum for user 20285 is :500


In [0]:
# Apply (A function you want to apply on the dataframe columns)
def sumTotal(row): # The whole row comes in this
  row['sum_total_price'] = row['items_purchased']*row['product_price']
  return row

df = df.apply(sumTotal, axis=1)
print(df.head())

   user_id  order_id order_dow  ...  product_price  product_type  sum_total_price
0    20100         1    Monday  ...            250      Clothing             1000
1    20268         2  Thursday  ...            600      Clothing             1800
2    20118         3  Saturday  ...            150     Groceries              450
3    20108         4    Sunday  ...            250    Stationary              500
4    20100         5   Tuesday  ...            240     Groceries             1680

[5 rows x 10 columns]


In [0]:
# Add a Series after using apply & lamda
import copy
cdf = df.copy()
cdf = cdf.apply(lambda row: row['total_bill']*1.6, axis = 1)

df['addTax'] = pd.Series(cdf)
df.head()

Unnamed: 0,user_id,order_id,order_dow,items_purchased,total_bill,product_id,product_name,product_price,product_type,sum_total_price,addTax
0,20100,1,Monday,4,600,206,Lingerie,250,Clothing,1000,960.0
1,20268,2,Thursday,3,300,200,Shirts,600,Clothing,1800,480.0
2,20118,3,Saturday,3,700,102,Bannanas,150,Groceries,450,1120.0
3,20108,4,Sunday,2,150,301,Notepad,250,Stationary,500,240.0
4,20100,5,Tuesday,7,1000,104,Cereal,240,Groceries,1680,1600.0


In [0]:
# Using Groupby and Apply in the same phrase
# All the Tax one user gave
def onlyTax(df, withTax, withoutTax):
  return np.sum(df[withTax] - df[withoutTax])
        
print(df.groupby('user_id').apply(onlyTax, 'addTax', 'total_bill'))

user_id
20006      96.0
20041    1116.0
20056     792.0
20100    1710.0
20108     450.0
20118    1248.0
20120    1380.0
20268    1866.0
20269    1320.0
20284     510.0
20285     300.0
dtype: float64


In [0]:
# In agg you pass in a dictionary the name the columns 
# you want and after the : the function you want to apply
# If want to a new column put the thing you are applying function on in [total_bill]
# And the new name as dict key ow just use the df column name as the dict key
bdf =df.groupby('order_dow').agg({'total_bill': np.sum})
bdf

Unnamed: 0_level_0,total_bill
order_dow,Unnamed: 1_level_1
Friday,3960
Monday,3310
Saturday,1190
Sunday,2100
Thursday,2430
Tuesday,2170
Wednesday,2820


In [0]:
# Another way to use agg, here on total_bill we use two functions
# In this dictionary the keys are new columns you wish to add to the df
df.groupby('order_dow')['total_bill','sum_total_price'].agg({'sum':np.sum, 'avg':np.average})

  return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)


Unnamed: 0_level_0,sum,sum,avg,avg
Unnamed: 0_level_1,total_bill,sum_total_price,total_bill,sum_total_price
order_dow,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Friday,3960,5530,792.0,1106.0
Monday,3310,3250,662.0,650.0
Saturday,1190,1000,396.666667,333.333333
Sunday,2100,4050,525.0,1012.5
Thursday,2430,5490,486.0,1098.0
Tuesday,2170,3220,542.5,805.0
Wednesday,2820,5480,705.0,1370.0
