## Example

In [12]:
import sys
import pandas as pd
import numpy as np

In [13]:
df_cs = pd.read_csv('ComputerSales.csv')

In [14]:
df_cs.columns

Index(['Sale ID', 'Contact', 'Sex', 'Age', 'State', 'Product ID',
       'Product Type', 'Sale Price', 'Profit', 'Lead', 'Month', 'Year'],
      dtype='object')

In [18]:
def split_name(df):
    def get_names(full_name):
        f_name, l_name = full_name.split()
        return pd.Series(
            (f_name, l_name),
            index=['First Name', 'Last Name']
        )
    names = df['Contact'].apply(get_names)
    df[names.columns] = names

    return df

split_name(df_cs).head()

def crea_age_groups(df):
    bins = [0, 30, 50, sys.maxsize]
    labels = ['<30', '30-50', '>50']
    age_group = pd.cut(df['Age'], bins = bins, labels=labels)
    df['Age Group'] = age_group
    return df

crea_age_groups(df_cs).head()

Unnamed: 0,Sale ID,Contact,Sex,Age,State,Product ID,Product Type,Sale Price,Profit,Lead,Month,Year,First Name,Last Name,Age Group
0,1,Paul Thomas,M,43,OH,M01-F0024,Desktop,479.99,143.39,Website,January,2018,Paul,Thomas,30-50
1,2,Margo Simms,F,37,WV,GT13-0024,Desktop,1249.99,230.89,Flyer 4,January,2018,Margo,Simms,30-50
2,3,Sam Stine,M,26,PA,I3670,Desktop,649.99,118.64,Website,February,2018,Sam,Stine,<30
3,4,Moe Eggert,M,35,PA,I3593,Laptop,399.99,72.09,Website,March,2018,Moe,Eggert,30-50
4,5,Jessica Elk,F,55,PA,15M-ED,Laptop,699.99,98.09,Flyer 4,March,2018,Jessica,Elk,>50


In [16]:
df_cs.columns

Index(['Sale ID', 'Contact', 'Sex', 'Age', 'State', 'Product ID',
       'Product Type', 'Sale Price', 'Profit', 'Lead', 'Month', 'Year',
       'First Name', 'Last Name', 'Age Group'],
      dtype='object')

In [19]:
df_cs['Profit'].mean()


139.97461538461542

In [22]:
df_cs[['Product ID', 'Profit']].max(axis=0)

Product ID    Q526FA
Profit        230.89
dtype: object

In [23]:
df_cs[df_cs['State'] == 'OH']

Unnamed: 0,Sale ID,Contact,Sex,Age,State,Product ID,Product Type,Sale Price,Profit,Lead,Month,Year,First Name,Last Name,Age Group
0,1,Paul Thomas,M,43,OH,M01-F0024,Desktop,479.99,143.39,Website,January,2018,Paul,Thomas,30-50
6,7,Michelle Samms,F,46,OH,GA401IV,Laptop,1349.99,180.34,Email,May,2018,Michelle,Samms,30-50
7,8,Mick Roberts,M,23,OH,MY2J2LL,Tablet,999.99,146.69,Website,July,2018,Mick,Roberts,<30
8,9,Ed Klondike,M,52,OH,81TC00,Laptop,649.99,122.34,Email,July,2018,Ed,Klondike,>50
11,12,Sue Etna,F,54,OH,GT13-0024,Desktop,1249.99,230.89,Flyer 2,November,2018,Sue,Etna,>50
14,15,Andy Sands,M,56,OH,MY2J2LL,Tablet,999.99,146.69,Flyer 1,December,2018,Andy,Sands,>50
16,17,Edna Sanders,F,46,OH,15M-ED,Laptop,699.99,98.09,Email,February,2019,Edna,Sanders,30-50
22,23,Paul Thomas,M,43,OH,81TC00,Laptop,649.99,122.34,Website,August,2019,Paul,Thomas,30-50
26,27,Ed Klondike,M,52,OH,Q526FA,Laptop,1049.99,143.09,Website,December,2019,Ed,Klondike,>50
31,32,Sue Etna,F,54,OH,GT13-0024,Desktop,1249.99,230.89,Flyer 2,February,2020,Sue,Etna,>50


In [27]:
df_cs[df_cs['State'] == 'OH'].count()   ### count everythig
df_cs[df_cs['State'] == 'OH']['State'].count()


11

In [29]:
### number of sales 2019
len(df_cs[df_cs['Year'] == 2019])
len(df_cs[df_cs['Year'] == 2019].index)

14

In [31]:
### number of sales for each product type
df_cs['Product ID'].value_counts()

Product ID
GA401IV      6
M01-F0024    5
I3593        5
81TC00       5
GT13-0024    4
15M-ED       4
MY2J2LL      4
I3670        3
Q526FA       3
Name: count, dtype: int64

In [32]:
df_cs['Product Type'].value_counts()

Product Type
Laptop     23
Desktop    12
Tablet      4
Name: count, dtype: int64

In [34]:
### customers purchased a specific product
df_cs[df_cs['Product ID'] == 'M01-F0024']
df_cs[df_cs['Product ID'] == 'M01-F0024']['Contact']

0     Paul Thomas
9      Phil Jones
20     Jason Case
29     Phil Jones
36     Jason Case
Name: Contact, dtype: object

In [43]:
df_cs["Lead"]
df_cs["Lead"] == 'Website'
df_cs[(df_cs['Lead'] == 'Website') & (df_cs['Profit'] > 150)]
df_cs[(df_cs['Lead'] == 'Website') & (df_cs['Profit'] > 150)]['Lead']
df_cs[(df_cs['Lead'] == 'Website') & (df_cs['Profit'] > 150)]['Lead'].count()

2

In [48]:
### how many product profits end with 80 cents
df_cs['Profit'].apply(lambda cents: str(cents))
df_cs['Profit'].apply(lambda cents: str(cents).split('.')[1] == '89')
df_cs['Profit'].apply(lambda cents: str(cents).split('.')[1] == '89').value_counts()

Profit
False    35
True      4
Name: count, dtype: int64