# Tips and Examples of Filtering Dataframes

In [1]:
# Import Pandas and Numpy
import pandas as pd
import numpy as np

# Download fictitious dataset 
- This is a fictitious dataset for an online bike shop that I created and donated to the Creative Commons domain
- Questions about this dataset can be seen here....

In [2]:
df = pd.read_csv('https://github.com/troygreen/datasets/raw/master/fictitous-datasets/online_bikeshop.csv')

# Let's filter down to only items purchased in the transaction with a unit_price at or above 10

In [3]:
results = df[(df['unit_price'] >= 10)].head()

# IF YOU WANT TO SAVE RESULTS AS A NEW DATAFRAME USE THE CODE SNIPPET BELOW
# results = df[(df['unit_price'] >= 10)]

# Now, let's pull all items purchased in California and in the "Baskets" sub-category.

In [4]:
results = df[(df['state'] == 'California') &
       (df['sub_category_dsc'] == 'Bells')]

# Only include customers that contain the letters "an" in their last name and save as a new dataframe

In [5]:
results = df[df['customer_last_name'].str.contains("an", case=False)]

# Did you notice in the filter above I used "case = False"?
- That means we aren't requiring the string value to be case sensitive, meaning the "an" could be upper or lower case
- Pull a sample of 20 records to see for yourself

In [6]:
results.customer_last_name.head(20)

17      Anderson
22     Hernandez
43       Sanchez
72         Evans
73         Evans
93        Morgan
127      Sanders
148    Alexander
149    Alexander
150    Alexander
162       Bryant
193      Freeman
194      Freeman
246         Ryan
247         Ryan
248         Ryan
249    Fernandez
250    Fernandez
254      Daniels
255      Daniels
Name: customer_last_name, dtype: object

# Now, let's make the string value requirement case sensitive

In [7]:
results = df[df['customer_last_name'].str.contains("an", case=True)]

In [8]:
results.customer_last_name.head(20)

22     Hernandez
43       Sanchez
72         Evans
73         Evans
93        Morgan
127      Sanders
148    Alexander
149    Alexander
150    Alexander
162       Bryant
193      Freeman
194      Freeman
246         Ryan
247         Ryan
248         Ryan
249    Fernandez
250    Fernandez
254      Daniels
255      Daniels
266         Tran
Name: customer_last_name, dtype: object