## Pandas DataFrame Useful Methods

### The .apply() Method

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('tips.csv')

In [4]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   total_bill        244 non-null    float64
 1   tip               244 non-null    float64
 2   sex               244 non-null    object 
 3   smoker            244 non-null    object 
 4   day               244 non-null    object 
 5   time              244 non-null    object 
 6   size              244 non-null    int64  
 7   price_per_person  244 non-null    float64
 8   Payer Name        244 non-null    object 
 9   CC Number         244 non-null    int64  
 10  Payment ID        244 non-null    object 
dtypes: float64(3), int64(2), object(6)
memory usage: 21.1+ KB


In [9]:
num = str(1234567890)
num[-4:]

'7890'

In [10]:
def last_four(num):
    return str(num)[-4:]

In [11]:
last_four(num)

'7890'

In [12]:
df['CC Number'][0]

3560325168603410

In [13]:
last_four(df['CC Number'][0])

'3410'

In [15]:
df['last_four'] = df['CC Number'].apply(last_four)

In [16]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221


### Using .apply with more complex functions

In [18]:
df['total_bill'].mean()

19.78594262295082

In [19]:
def yelp(price):
    if price < 10:
        return '$'
    elif price >= 10 and price < 30:
        return '$$'
    else:
        return '$$$'

In [22]:
yelp(65.5)

'$$$'

In [24]:
df['Expensive'] = df['total_bill'].apply(yelp)

In [25]:
df.head(20)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$
5,25.29,4.71,Male,No,Sun,Dinner,4,6.32,Erik Smith,213140353657882,Sun9679,7882,$$
6,8.77,2.0,Male,No,Sun,Dinner,2,4.38,Kristopher Johnson,2223727524230344,Sun5985,344,$
7,26.88,3.12,Male,No,Sun,Dinner,4,6.72,Robert Buck,3514785077705092,Sun8157,5092,$$
8,15.04,1.96,Male,No,Sun,Dinner,2,7.52,Joseph Mcdonald,3522866365840377,Sun6820,377,$$
9,14.78,3.23,Male,No,Sun,Dinner,2,7.39,Jerome Abbott,3532124519049786,Sun3775,9786,$$


### apply that uses multiple columns

In [26]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$


In [33]:
# def quality(total_bill, tip):
#     if tip/total_bill > 0.25:
#         return 'Generous'
#     else:
#         return 'Normal'

### apply with lambda

In [28]:
def func(num):
    return num * 2

In [29]:
lambda num: num*2

<function __main__.<lambda>(num)>

In [32]:
df['total_bill'].apply(lambda bill: bill*0.18)

0      3.0582
1      1.8612
2      3.7818
3      4.2624
4      4.4262
        ...  
239    5.2254
240    4.8924
241    4.0806
242    3.2076
243    3.3804
Name: total_bill, Length: 244, dtype: float64

In [35]:
def quality(total_bill, tip):
    if tip/total_bill > 0.25:
        return 'Generous'
    else:
        return 'Normal'

In [37]:
df['Tip Quality'] = df[['total_bill','tip']].apply(lambda df: quality(df['total_bill'],df['tip']), axis=1)

In [40]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Normal
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Normal
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Normal
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Normal


In [41]:
import numpy as np

In [42]:
df['Tip Quality'] = np.vectorize(quality)(df['total_bill'], df['tip'])

In [43]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Normal
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Normal
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Normal
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Normal


In [44]:
import timeit

In [45]:
setup = '''
import numpy as np
import pandas as pd
df = pd.read_csv('tips.csv')
def quality(total_bill,tip):
    if tip/total_bill  > 0.25:
        return "Generous"
    else:
        return "Other"
'''

In [46]:
stmt_one = ''' 
df['Tip Quality'] = df[['total_bill','tip']].apply(lambda df: quality(df['total_bill'],df['tip']),axis=1)
'''

stmt_two = '''
df['Tip Quality'] = np.vectorize(quality)(df['total_bill'], df['tip'])
'''

In [48]:
timeit.timeit(setup=setup, stmt= stmt_one, number= 1000)

8.52118799999971

In [49]:
timeit.timeit(setup=setup, stmt= stmt_two, number= 1000)

0.5854846000001999

### Statistical Methods for DataFrames

In [50]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Normal
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Normal
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Normal
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Normal


In [51]:
df.describe()

Unnamed: 0,total_bill,tip,size,price_per_person,CC Number
count,244.0,244.0,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672,7.888197,2563496000000000.0
std,8.902412,1.383638,0.9511,2.914234,2369340000000000.0
min,3.07,1.0,1.0,2.88,60406790000.0
25%,13.3475,2.0,2.0,5.8,30407310000000.0
50%,17.795,2.9,2.0,7.255,3525318000000000.0
75%,24.1275,3.5625,3.0,9.39,4553675000000000.0
max,50.81,10.0,6.0,20.27,6596454000000000.0


In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   total_bill        244 non-null    float64
 1   tip               244 non-null    float64
 2   sex               244 non-null    object 
 3   smoker            244 non-null    object 
 4   day               244 non-null    object 
 5   time              244 non-null    object 
 6   size              244 non-null    int64  
 7   price_per_person  244 non-null    float64
 8   Payer Name        244 non-null    object 
 9   CC Number         244 non-null    int64  
 10  Payment ID        244 non-null    object 
 11  last_four         244 non-null    object 
 12  Expensive         244 non-null    object 
 13  Tip Quality       244 non-null    object 
dtypes: float64(3), int64(2), object(9)
memory usage: 26.8+ KB


In [53]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
total_bill,244.0,19.78594,8.902412,3.07,13.3475,17.795,24.1275,50.81
tip,244.0,2.998279,1.383638,1.0,2.0,2.9,3.5625,10.0
size,244.0,2.569672,0.9510998,1.0,2.0,2.0,3.0,6.0
price_per_person,244.0,7.888197,2.914234,2.88,5.8,7.255,9.39,20.27
CC Number,244.0,2563496000000000.0,2369340000000000.0,60406790000.0,30407310000000.0,3525318000000000.0,4553675000000000.0,6596454000000000.0


In [54]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Normal
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Normal
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Normal
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Normal


In [55]:
df.sort_values('tip')

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
67,3.07,1.00,Female,Yes,Sat,Dinner,1,3.07,Tiffany Brock,4359488526995267,Sat3455,5267,$,Generous
236,12.60,1.00,Male,Yes,Sat,Dinner,2,6.30,Matthew Myers,3543676378973965,Sat5032,3965,$$,Normal
92,5.75,1.00,Female,Yes,Fri,Dinner,2,2.88,Leah Ramirez,3508911676966392,Fri3780,6392,$,Normal
111,7.25,1.00,Female,No,Sat,Dinner,1,7.25,Terri Jones,3559221007826887,Sat4801,6887,$,Normal
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,34.30,6.70,Male,No,Thur,Lunch,6,5.72,Steven Carlson,3526515703718508,Thur1025,8508,$$$,Normal
59,48.27,6.73,Male,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595,Sat8139,0595,$$$,Normal
23,39.42,7.58,Male,No,Sat,Dinner,4,9.86,Lance Peterson,3542584061609808,Sat239,9808,$$$,Normal
212,48.33,9.00,Male,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590,5212,$$$,Normal


In [56]:
df.sort_values(['tip','size'])

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
67,3.07,1.00,Female,Yes,Sat,Dinner,1,3.07,Tiffany Brock,4359488526995267,Sat3455,5267,$,Generous
111,7.25,1.00,Female,No,Sat,Dinner,1,7.25,Terri Jones,3559221007826887,Sat4801,6887,$,Normal
92,5.75,1.00,Female,Yes,Fri,Dinner,2,2.88,Leah Ramirez,3508911676966392,Fri3780,6392,$,Normal
236,12.60,1.00,Male,Yes,Sat,Dinner,2,6.30,Matthew Myers,3543676378973965,Sat5032,3965,$$,Normal
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,34.30,6.70,Male,No,Thur,Lunch,6,5.72,Steven Carlson,3526515703718508,Thur1025,8508,$$$,Normal
59,48.27,6.73,Male,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595,Sat8139,0595,$$$,Normal
23,39.42,7.58,Male,No,Sat,Dinner,4,9.86,Lance Peterson,3542584061609808,Sat239,9808,$$$,Normal
212,48.33,9.00,Male,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590,5212,$$$,Normal


In [57]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Normal
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Normal
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Normal
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Normal


In [58]:
df.corr()

Unnamed: 0,total_bill,tip,size,price_per_person,CC Number
total_bill,1.0,0.675734,0.598315,0.647554,0.104576
tip,0.675734,1.0,0.489299,0.347405,0.110857
size,0.598315,0.489299,1.0,-0.175359,-0.030239
price_per_person,0.647554,0.347405,-0.175359,1.0,0.13524
CC Number,0.104576,0.110857,-0.030239,0.13524,1.0


In [59]:
df[['total_bill','tip']].corr()

Unnamed: 0,total_bill,tip
total_bill,1.0,0.675734
tip,0.675734,1.0


### idxmin, idxmax

In [60]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Normal
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Normal
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Normal
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Normal


In [62]:
df['total_bill'].max()

50.81

In [63]:
df['total_bill'].idxmax()

170

In [64]:
df['total_bill'].idxmin()

67

In [65]:
df.iloc[170]

total_bill                     50.81
tip                             10.0
sex                             Male
smoker                           Yes
day                              Sat
time                          Dinner
size                               3
price_per_person               16.94
Payer Name             Gregory Clark
CC Number           5473850968388236
Payment ID                   Sat1954
last_four                       8236
Expensive                        $$$
Tip Quality                   Normal
Name: 170, dtype: object

In [66]:
df.iloc[67]

total_bill                      3.07
tip                              1.0
sex                           Female
smoker                           Yes
day                              Sat
time                          Dinner
size                               1
price_per_person                3.07
Payer Name             Tiffany Brock
CC Number           4359488526995267
Payment ID                   Sat3455
last_four                       5267
Expensive                          $
Tip Quality                 Generous
Name: 67, dtype: object

### value_count()

In [67]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Normal
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Normal
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Normal
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Normal


In [68]:
df['sex'].value_counts()

Male      157
Female     87
Name: sex, dtype: int64

In [69]:
df['time'].value_counts()

Dinner    176
Lunch      68
Name: time, dtype: int64

### replace()

In [70]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Normal
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Normal
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Normal
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Normal


In [71]:
df['sex'].replace(to_replace='Female', value='F')

0         F
1      Male
2      Male
3      Male
4         F
       ... 
239    Male
240       F
241    Male
242    Male
243       F
Name: sex, Length: 244, dtype: object

### unique(), nunique()

In [72]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Normal
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Normal
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Normal
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Normal


In [73]:
df['day'].unique()

array(['Sun', 'Sat', 'Thur', 'Fri'], dtype=object)

In [74]:
df['day'].nunique()

4

In [75]:
df['size'].unique()

array([2, 3, 4, 1, 6, 5], dtype=int64)

In [79]:
df['size'].nunique()

6

### map()

In [80]:
my_map = {'Dinner':'D', 'Lunch':'L'}

In [87]:
df['time'].map(my_map).tail(30)

214    D
215    D
216    D
217    D
218    D
219    D
220    L
221    L
222    L
223    L
224    L
225    L
226    L
227    D
228    D
229    D
230    D
231    D
232    D
233    D
234    D
235    D
236    D
237    D
238    D
239    D
240    D
241    D
242    D
243    D
Name: time, dtype: object

### Duplicated() 

In [88]:
df1 = pd.DataFrame([1,2,2],['a','b','c'])
df1

Unnamed: 0,0
a,1
b,2
c,2


In [89]:
df1.duplicated()

a    False
b    False
c     True
dtype: bool

In [90]:
df1.drop_duplicates()

Unnamed: 0,0
a,1
b,2


### between

In [91]:
df['total_bill'].between(10,20,inclusive=True)

  df['total_bill'].between(10,20,inclusive=True)


0       True
1       True
2      False
3      False
4      False
       ...  
239    False
240    False
241    False
242     True
243     True
Name: total_bill, Length: 244, dtype: bool

In [92]:
df[df['total_bill'].between(10,20,inclusive=True)]

  df[df['total_bill'].between(10,20,inclusive=True)]


Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Normal
8,15.04,1.96,Male,No,Sun,Dinner,2,7.52,Joseph Mcdonald,3522866365840377,Sun6820,0377,$$,Normal
9,14.78,3.23,Male,No,Sun,Dinner,2,7.39,Jerome Abbott,3532124519049786,Sun3775,9786,$$,Normal
10,10.27,1.71,Male,No,Sun,Dinner,2,5.14,William Riley,566287581219,Sun2546,1219,$$,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
234,15.53,3.00,Male,Yes,Sat,Dinner,2,7.76,Tracy Douglas,4097938155941930,Sat7220,1930,$$,Normal
235,10.07,1.25,Male,No,Sat,Dinner,2,5.04,Sean Gonzalez,3534021246117605,Sat4615,7605,$$,Normal
236,12.60,1.00,Male,Yes,Sat,Dinner,2,6.30,Matthew Myers,3543676378973965,Sat5032,3965,$$,Normal
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17,0950,$$,Normal


## Sample()

In [93]:
df.sample(10)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
112,38.07,4.0,Male,No,Sun,Dinner,3,12.69,Jeff Lopez,3572865915176463,Sun591,6463,$$$,Normal
170,50.81,10.0,Male,Yes,Sat,Dinner,3,16.94,Gregory Clark,5473850968388236,Sat1954,8236,$$$,Normal
8,15.04,1.96,Male,No,Sun,Dinner,2,7.52,Joseph Mcdonald,3522866365840377,Sun6820,377,$$,Normal
56,38.01,3.0,Male,Yes,Sat,Dinner,4,9.5,James Christensen DDS,349793629453226,Sat8903,3226,$$$,Normal
12,15.42,1.57,Male,No,Sun,Dinner,2,7.71,Chad Harrington,577040572932,Sun1300,2932,$$,Normal
191,19.81,4.19,Female,Yes,Thur,Lunch,2,9.9,Kristy Boyd,4317015327600068,Thur967,68,$$,Normal
21,20.29,2.75,Female,No,Sat,Dinner,2,10.14,Natalie Gardner,5448125351489749,Sat9618,9749,$$,Normal
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Normal
65,20.08,3.15,Male,No,Sat,Dinner,3,6.69,Justin Dixon,180021262464926,Sat6840,4926,$$,Normal
47,32.4,6.0,Male,No,Sun,Dinner,4,8.1,James Barnes,3552002592874186,Sun9677,4186,$$$,Normal


In [94]:
df.sample(frac=0.1)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
199,13.51,2.0,Male,Yes,Thur,Lunch,2,6.76,Joseph Murphy MD,6547218923471275,Thur2428,1275,$$,Normal
17,16.29,3.71,Male,No,Sun,Dinner,3,5.43,John Pittman,6521340257218708,Sun2998,8708,$$,Normal
36,16.31,2.0,Male,No,Sat,Dinner,3,5.44,William Ford,3527691170179398,Sat9139,9398,$$,Normal
50,12.54,2.5,Male,No,Sun,Dinner,2,6.27,Jeremiah Neal,2225400829691416,Sun2021,1416,$$,Normal
52,34.81,5.2,Female,No,Sun,Dinner,4,8.7,Emily Daniel,4291280793094374,Sun6165,4374,$$$,Normal
201,12.74,2.01,Female,Yes,Thur,Lunch,2,6.37,Abigail Parks,3586645396220590,Thur2544,590,$$,Normal
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Normal
114,25.71,4.0,Female,No,Sun,Dinner,3,8.57,Katie Smith,5400160161311292,Sun6492,1292,$$,Normal
64,17.59,2.64,Male,No,Sat,Dinner,3,5.86,Michael Johnson,2222114458088108,Sat1667,8108,$$,Normal
60,20.29,3.21,Male,Yes,Sat,Dinner,2,10.14,Anthony Mclean,347614304015027,Sat2353,5027,$$,Normal


### nlargest, nsmallest

In [95]:
df.nlargest(10,'tip')

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
170,50.81,10.0,Male,Yes,Sat,Dinner,3,16.94,Gregory Clark,5473850968388236,Sat1954,8236,$$$,Normal
212,48.33,9.0,Male,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590,5212,$$$,Normal
23,39.42,7.58,Male,No,Sat,Dinner,4,9.86,Lance Peterson,3542584061609808,Sat239,9808,$$$,Normal
59,48.27,6.73,Male,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595,Sat8139,595,$$$,Normal
141,34.3,6.7,Male,No,Thur,Lunch,6,5.72,Steven Carlson,3526515703718508,Thur1025,8508,$$$,Normal
183,23.17,6.5,Male,Yes,Sun,Dinner,4,5.79,Dr. Michael James,4718501859162,Sun6059,9162,$$,Generous
214,28.17,6.5,Female,Yes,Sat,Dinner,3,9.39,Marissa Jackson,4922302538691962,Sat3374,1962,$$,Normal
47,32.4,6.0,Male,No,Sun,Dinner,4,8.1,James Barnes,3552002592874186,Sun9677,4186,$$$,Normal
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657,2842,$$,Normal
88,24.71,5.85,Male,No,Thur,Lunch,2,12.36,Roger Taylor,4410248629955,Thur9003,9955,$$,Normal


In [96]:
df.nsmallest(10,'tip')

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
67,3.07,1.0,Female,Yes,Sat,Dinner,1,3.07,Tiffany Brock,4359488526995267,Sat3455,5267,$,Generous
92,5.75,1.0,Female,Yes,Fri,Dinner,2,2.88,Leah Ramirez,3508911676966392,Fri3780,6392,$,Normal
111,7.25,1.0,Female,No,Sat,Dinner,1,7.25,Terri Jones,3559221007826887,Sat4801,6887,$,Normal
236,12.6,1.0,Male,Yes,Sat,Dinner,2,6.3,Matthew Myers,3543676378973965,Sat5032,3965,$$,Normal
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Normal
215,12.9,1.1,Female,Yes,Sat,Dinner,2,6.45,Jessica Owen,4726904879471,Sat6983,9471,$$,Normal
237,32.83,1.17,Male,Yes,Sat,Dinner,2,16.42,Thomas Brown,4284722681265508,Sat2929,5508,$$$,Normal
75,10.51,1.25,Male,No,Sat,Dinner,2,5.26,Kenneth Hayes,213142079731108,Sat5056,1108,$$,Normal
135,8.51,1.25,Female,No,Thur,Lunch,2,4.26,Rebecca Harris,4320272020376174,Thur6600,6174,$,Normal
235,10.07,1.25,Male,No,Sat,Dinner,2,5.04,Sean Gonzalez,3534021246117605,Sat4615,7605,$$,Normal
