In [1]:
import pandas as pd
import numpy as np

In [2]:
# pip install pandas --upgrade

In [3]:
pd.__version__

'1.4.4'

#### Section Intro

#### What Is A DataFrame?

In [4]:
# some python lists
names = ['Olga', 'Andrew', 'Brian', 'Telulah', 'Nicole', 'Tilda']
ages = [29, 21, 45, 23, 39, 46]
married = [False, True, True, True, False, True]

In [5]:
# pandas series
ser = pd.Series(names, name='name')

In [6]:
# pandas dataframe
df = pd.DataFrame({'name': names, 'age': ages, 'married': married})

In [7]:
df

Unnamed: 0,name,age,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


In [8]:
ser

0       Olga
1     Andrew
2      Brian
3    Telulah
4     Nicole
5      Tilda
Name: name, dtype: object

In [9]:
ser.iloc[2]

'Brian'

In [10]:
df.iloc[2,0]

'Brian'

In [11]:
df.iloc[2]

name       Brian
age           45
married     True
Name: 2, dtype: object

In [12]:
# .ndim

In [13]:
ser.ndim

1

In [14]:
df.ndim

2

In [15]:
# .shape

In [16]:
ser.shape

(6,)

In [17]:
df.shape

(6, 3)

In [18]:
# word of the lecture: collate - to collect in a given (or proper) order

In [19]:
ser

0       Olga
1     Andrew
2      Brian
3    Telulah
4     Nicole
5      Tilda
Name: name, dtype: object

In [20]:
df.name

0       Olga
1     Andrew
2      Brian
3    Telulah
4     Nicole
5      Tilda
Name: name, dtype: object

In [21]:
# df.dtype

In [22]:
df.dtypes

name       object
age         int64
married      bool
dtype: object

#### Creating A DataFrame

In [23]:
df

Unnamed: 0,name,age,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


In [24]:
names

['Olga', 'Andrew', 'Brian', 'Telulah', 'Nicole', 'Tilda']

In [25]:
ages

[29, 21, 45, 23, 39, 46]

In [26]:
married

[False, True, True, True, False, True]

In [27]:
pd.DataFrame({'name': names, 'age': ages, 'married': married})

Unnamed: 0,name,age,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


In [28]:
len(names)

6

In [29]:
new_names = names + ['Ryan']

In [30]:
new_names

['Olga', 'Andrew', 'Brian', 'Telulah', 'Nicole', 'Tilda', 'Ryan']

In [31]:
len(new_names)

7

In [32]:
# pd.DataFrame({'name': new_names, 'age': ages, 'married': married})

#### BONUS - Four More Ways To Build DataFrames

In [33]:
tuple_names = tuple(names)
tuple_ages = tuple(ages)
tuple_married = tuple(married)

In [34]:
tuple_names

('Olga', 'Andrew', 'Brian', 'Telulah', 'Nicole', 'Tilda')

In [35]:
names

['Olga', 'Andrew', 'Brian', 'Telulah', 'Nicole', 'Tilda']

In [36]:
pd.DataFrame({'name': tuple_names,
              'ages': tuple_ages,
              'married': tuple_married})

Unnamed: 0,name,ages,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


In [37]:
# 2 - dict of series

In [38]:
series_names = pd.Series(names)

In [39]:
series_ages = pd.Series(ages)

In [40]:
series_married = pd.Series(married)

In [41]:
pd.DataFrame({'name': series_names,
              'ages': series_ages,
              'married': series_married})

Unnamed: 0,name,ages,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


In [42]:
pd.DataFrame({'names': ['Olga', 'Andrew']})

Unnamed: 0,names
0,Olga
1,Andrew


In [43]:
pd.DataFrame({'names': {0:'Olga', 1:'Andrew'}})

Unnamed: 0,names
0,Olga
1,Andrew


In [44]:
# the enumerate function

In [45]:
enumerate(names)

<enumerate at 0x251b4ce0f80>

In [46]:
list(enumerate(names))

[(0, 'Olga'),
 (1, 'Andrew'),
 (2, 'Brian'),
 (3, 'Telulah'),
 (4, 'Nicole'),
 (5, 'Tilda')]

In [47]:
def convert_list_to_dict(l):
  return {k:v for k,v in enumerate(l)}

In [48]:
dict_names = convert_list_to_dict(names)

In [49]:
dict_names

{0: 'Olga', 1: 'Andrew', 2: 'Brian', 3: 'Telulah', 4: 'Nicole', 5: 'Tilda'}

In [50]:
dict_ages = convert_list_to_dict(ages)
dict_married = convert_list_to_dict(married)

In [51]:
pd.DataFrame({'name': dict_names,
              'ages': dict_ages,
              'married': dict_married})

Unnamed: 0,name,ages,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


In [52]:
# 4 - list of dicts

In [53]:
pd.DataFrame([{
    'name': 'Olga',
    'age': 29,
    'married': False
}])

Unnamed: 0,name,age,married
0,Olga,29,False


In [54]:
# the zip method

In [55]:
list(zip(names, ages, married))

[('Olga', 29, False),
 ('Andrew', 21, True),
 ('Brian', 45, True),
 ('Telulah', 23, True),
 ('Nicole', 39, False),
 ('Tilda', 46, True)]

In [56]:
rowwise = [{'name': name, 'age': ages, 'married': married} for name, ages, married in zip(names, ages, married)]

In [57]:
pd.DataFrame(rowwise)

Unnamed: 0,name,age,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


#### The info() Method

In [58]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     6 non-null      object
 1   age      6 non-null      int64 
 2   married  6 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 230.0+ bytes


In [59]:
df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     6 non-null      object
 1   age      6 non-null      int64 
 2   married  6 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 230.0+ bytes


In [60]:
df.info(max_cols=2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Columns: 3 entries, name to married
dtypes: bool(1), int64(1), object(1)
memory usage: 230.0+ bytes


In [61]:
df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     6 non-null      object
 1   age      6 non-null      int64 
 2   married  6 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 557.0 bytes


#### Reading In Nutrition Data

In [352]:
dataurl = 'https://andybek.com/pandas-nutrition'

In [353]:
nutrition = pd.read_csv(dataurl)

In [354]:
nutrition.head(10)

Unnamed: 0.1,Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
5,5,"Cauliflower, raw",100 g,25,0.3g,0.1g,0,30.00 mg,44.3 mg,57.00 mcg,...,0.28 g,0.130 g,0.034 g,0.031 g,0.00 mg,0.0 g,0.76 g,0.00 mg,0.00 mg,92.07 g
6,6,"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
7,7,"Lamb, raw, ground",100 g,282,23g,10g,73mg,59.00 mg,69.3 mg,18.00 mcg,...,23.41 g,10.190 g,9.600 g,1.850 g,73.00 mg,0.0 g,0.87 g,0.00 mg,0.00 mg,59.47 g
8,8,"Cheese, camembert",100 g,300,24g,15g,72mg,842.00 mg,15.4 mg,62.00 mcg,...,24.26 g,15.259 g,7.023 g,0.724 g,72.00 mg,0.0 g,3.68 g,0.00 mg,0.00 mg,51.80 g
9,9,Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


In [355]:
nutrition.info(verbose=False, memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8789 entries, 0 to 8788
Columns: 77 entries, Unnamed: 0 to water
dtypes: int64(3), object(74)
memory usage: 39.2 MB


#### Some Cleanup: Removing The Duplicated Index

In [66]:
nutrition.head()

Unnamed: 0.1,Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [67]:
nutrition['Unnamed: 0']

0          0
1          1
2          2
3          3
4          4
        ... 
8784    8784
8785    8785
8786    8786
8787    8787
8788    8788
Name: Unnamed: 0, Length: 8789, dtype: int64

In [68]:
nutrition.drop('Unnamed: 0', axis=1)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8784,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
8785,"Lamb, cooked, separable lean only, composite o...",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
8786,"Lamb, raw, separable lean and fat, composite o...",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
8787,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [336]:
nutrition.set_index('Unnamed: 0')

Unnamed: 0_level_0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8784,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
8785,"Lamb, cooked, separable lean only, composite o...",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
8786,"Lamb, raw, separable lean and fat, composite o...",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
8787,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [337]:
nutrition.head()

Unnamed: 0.1,Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [71]:
dataurl

'https://andybek.com/pandas-nutrition'

In [72]:
nutrition = pd.read_csv(dataurl, index_col=[0])


In [73]:
type(nutrition.columns)

pandas.core.indexes.base.Index

In [338]:
nutrition.head()

Unnamed: 0.1,Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


#### The sample() Method

In [75]:
nutrition.sample()
#Return a random sample of items from an axis of object.

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
2519,"Mollusks, raw, wild, eastern, oyster",100 g,51,1.7g,0.5g,40mg,85.00 mg,65.0 mg,7.00 mcg,0.00 mcg,...,1.71 g,0.474 g,0.253 g,0.528 g,40.00 mg,0.0 g,0.82 g,0.00 mg,0.00 mg,89.04 g


In [76]:
nutrition.sample(random_state=1)
#random state is for ensuring same output is obtain everytime

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
8785,"Lamb, cooked, separable lean only, composite o...",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g


In [77]:
nutrition.sample(n=3)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
3184,"Seeds, dried, whole, sisymbrium sp. seeds",100 g,318,4.6g,0.9g,0,92.00 mg,0,95.00 mcg,0.00 mcg,...,4.60 g,0.902 g,1.486 g,2.023 g,0.00 mg,0,18.90 g,0,0,6.10 g
1375,"SPAGHETTIOS, SpaghettiOs A to Z's",100 g,67,0.4g,0.2g,2mg,238.00 mg,0,40.00 mcg,0,...,0.40 g,0.198 g,0.000 g,0.198 g,2.00 mg,0,1.93 g,0,0,81.40 g
6068,"Beef macaroni with tomato sauce, reduced fat, ...",100 g,113,2g,0.4g,10mg,178.00 mg,17.8 mg,31.00 mcg,23.00 mcg,...,1.96 g,0.443 g,0.816 g,0.228 g,10.00 mg,0.0 g,1.15 g,0.00 mg,0.00 mg,73.00 g


In [78]:
nutrition.sample(frac=0.01)
#frac is use to obtain a % of dataframe 1%=0.01
#total rows = 8789 and columns=76
#frac= 0.0.1 means a sample will be 1% of the data i.e. 88rows and 76 columns

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
2157,"WORTHINGTON FriPats, unprepared, frozen",100 g,209,9.1g,1.4g,2mg,517.00 mg,0,0,0,...,9.10 g,1.400 g,2.200 g,5.200 g,2.00 mg,0,2.40 g,0,0,56.80 g
8625,"Beef, braised, cooked, choice, trimmed to 1/8""...",100 g,254,13g,4.8g,101mg,42.00 mg,125.1 mg,10.00 mcg,0.00 mcg,...,12.56 g,4.766 g,5.355 g,0.484 g,101.00 mg,0.0 g,0.95 g,0.00 mg,0.00 mg,54.27 g
5237,"Jams, sweetened with fruit juice, marmalades, ...",100 g,212,0g,,0,0.00 mg,2.5 mg,7.00 mcg,0.00 mcg,...,0.00 g,0.000 g,0.000 g,0.000 g,0.00 mg,0.0 g,0.23 g,0.00 mg,0.00 mg,46.84 g
3453,"Babyfood, strained, garden vegetable, vegetables",100 g,32,0.2g,,0,31.00 mg,13.4 mg,40.00 mcg,0.00 mcg,...,0.20 g,0.035 g,0.011 g,0.090 g,0.00 mg,0.0 g,0.70 g,0.00 mg,0.00 mg,90.00 g
4833,"Yardlong beans, without salt, boiled, cooked, ...",100 g,118,0.5g,0.1g,0,5.00 mg,0,146.00 mcg,0.00 mcg,...,0.45 g,0.116 g,0.039 g,0.193 g,0.00 mg,0,1.37 g,0,0,68.80 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6635,"Fish, dry heat, cooked, mixed species, Pacific...",100 g,201,10g,2.9g,60mg,110.00 mg,83.6 mg,2.00 mcg,0.00 mcg,...,10.12 g,2.881 g,3.371 g,2.488 g,60.00 mg,0.0 g,2.08 g,0.00 mg,0.00 mg,61.73 g
8314,"Beef, grilled, cooked, select, trimmed to 0"" f...",100 g,207,9.1g,3.7g,85mg,57.00 mg,66.5 mg,7.00 mcg,0.00 mcg,...,9.12 g,3.667 g,4.002 g,0.502 g,85.00 mg,0.0 g,1.14 g,0.00 mg,0.00 mg,61.71 g
4989,"Veal, grilled, cooked, separable lean and fat,...",100 g,198,9.5g,3.4g,79mg,86.00 mg,139.0 mg,6.00 mcg,0.00 mcg,...,9.48 g,3.378 g,4.246 g,0.465 g,79.00 mg,0.0 g,1.03 g,0.00 mg,0.00 mg,61.41 g
3317,"Fish, dry heat, cooked, european, turbot",100 g,122,3.8g,,62mg,192.00 mg,0,9.00 mcg,0.00 mcg,...,3.78 g,0,0,0,62.00 mg,0,2.69 g,0,0,70.45 g


In [79]:
nutrition.shape[0] * 0.01

87.89

In [80]:
# nutrition.sample(frac=0.01, n=10)

#### BONUS - Sampling With Replacement Or Weights

In [81]:
# with or without replacement

In [82]:
nutrition.sample(n=3, replace=True)
#sampling with replacement is for making it possible to pick a data multiple time while sampling
#i.e. once a data is pick by sample with replace it is possible to pick that sample again

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
191,"Ostrich, raw, oyster",100 g,125,3.7g,1.2g,73mg,83.00 mg,0,8.00 mcg,0.00 mcg,...,3.67 g,1.220 g,1.250 g,0.610 g,73.00 mg,0.0 g,0.62 g,0.00 mg,0.00 mg,75.62 g
3546,"Steelhead trout, flesh (Shoshone Bannock), dried",100 g,382,8.1g,0.8g,227mg,2850.00 mg,0,0,0,...,8.06 g,0.829 g,1.228 g,1.739 g,227.00 mg,0.0 g,10.58 g,0.00 mg,0.00 mg,6.49 g
1827,"Alcoholic beverage, light, wine",100 g,49,0g,,0,7.00 mg,5.0 mg,1.00 mcg,0.00 mcg,...,0.00 g,0.000 g,0.000 g,0.000 g,0.00 mg,6.4 g,0.13 g,0.00 mg,0.00 mg,92.23 g


In [83]:
# weighted sampling

In [84]:
weights = pd.Series(data=[10, 10, 10, 1, 2], index=[7, 17, 29, 5, 6])

In [85]:
weights

7     10
17    10
29    10
5      1
6      2
dtype: int64

In [86]:
nutrition.sample(n=3, weights=weights)


Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
7,"Lamb, raw, ground",100 g,282,23g,10g,73mg,59.00 mg,69.3 mg,18.00 mcg,0.00 mcg,...,23.41 g,10.190 g,9.600 g,1.850 g,73.00 mg,0.0 g,0.87 g,0.00 mg,0.00 mg,59.47 g
17,"Peppers, raw, jalapeno",100 g,29,0.4g,0.1g,0,3.00 mg,7.5 mg,27.00 mcg,0.00 mcg,...,0.37 g,0.092 g,0.029 g,0.112 g,0.00 mg,0.0 g,0.53 g,0.00 mg,0.00 mg,91.69 g
29,"Nuts, dried, pine nuts",100 g,673,68g,4.9g,0,2.00 mg,55.8 mg,34.00 mcg,0.00 mcg,...,68.37 g,4.899 g,18.764 g,34.071 g,0.00 mg,0.0 g,2.59 g,0.00 mg,0.00 mg,2.28 g


#### BONUS - How Are Random Numbers Generated? 

In [87]:
nutrition.sample()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
5219,"Pork, unheated, extra lean, boneless, steak, h...",100 g,122,4.3g,1.4g,45mg,1269.00 mg,0,4.00 mcg,0.00 mcg,...,4.25 g,1.440 g,1.960 g,0.470 g,45.00 mg,0,3.95 g,0,0,72.24 g


In [88]:
nutrition.sample(random_state=0)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
5584,"Lamb, soaked and simmered, cooked, heart, impo...",100 g,161,6.2g,2g,186mg,67.00 mg,0,0,0,...,6.21 g,2.001 g,1.006 g,0.933 g,186.00 mg,0.0 g,0.94 g,0.00 mg,0.00 mg,66.65 g


#### DataFrame Axes

In [315]:
nutrition.head()

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Teff, uncooked",100.0,367.0,2.4,0.4,0.0,12.0,13.1,0.0,0.0,3.363,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
"Sherbet, orange",100.0,144.0,2.0,1.2,1.0,46.0,7.7,4.0,0.0,0.063,...,2.0,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1
"Cauliflower, raw",100.0,25.0,0.3,0.1,0.0,30.0,44.3,57.0,0.0,0.507,...,0.28,0.13,0.034,0.031,0.0,0.0,0.76,0.0,0.0,92.07
"Taro leaves, raw",100.0,42.0,0.7,0.2,0.0,3.0,12.8,126.0,0.0,1.513,...,0.74,0.151,0.06,0.307,0.0,0.0,1.92,0.0,0.0,85.66


In [316]:
nutrition.axes

[Index(['Nuts, pecans', 'Teff, uncooked', 'Sherbet, orange', 'Cauliflower, raw',
        'Taro leaves, raw', 'Lamb, raw, ground', 'Cheese, camembert',
        'Vegetarian fillets', 'Crackers, rusk toast', 'Chicken, boiled, feet',
        ...
        'Beef, braised, cooked, all grades, trimmed to 1/8" fat, separable lean and fat, flat half, brisket',
        'Beef, raw, select, trimmed to 1/8" fat, separable lean only, lip-on, boneless, rib eye steak/roast',
        'Beef, raw, choice, trimmed to 1/8" fat, separable lean only, lip-on, boneless, rib eye steak/roast',
        'Oil, uses similar to 95 degree hard butter, confection fat, palm kernel (hydrogenated), industrial',
        'Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round steak, round',
        'Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round',
        'Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, Ne

In [317]:
nutrition.axes[0][3]

'Cauliflower, raw'

In [318]:
nutrition.index[3]

'Cauliflower, raw'

In [319]:
nutrition.axes[1][45]

'isoleucine_g'

In [320]:
nutrition.columns[69]

'fatty_acids_total_trans_mg'

In [321]:
# the axis param

In [322]:
nutrition.dropna(axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.180,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Teff, uncooked",100.0,367.0,2.4,0.4,0.0,12.0,13.1,0.0,0.0,3.363,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
"Sherbet, orange",100.0,144.0,2.0,1.2,1.0,46.0,7.7,4.0,0.0,0.063,...,2.00,1.160,0.530,0.080,1.0,0.0,0.40,0.0,0.0,66.10
"Cauliflower, raw",100.0,25.0,0.3,0.1,0.0,30.0,44.3,57.0,0.0,0.507,...,0.28,0.130,0.034,0.031,0.0,0.0,0.76,0.0,0.0,92.07
"Taro leaves, raw",100.0,42.0,0.7,0.2,0.0,3.0,12.8,126.0,0.0,1.513,...,0.74,0.151,0.060,0.307,0.0,0.0,1.92,0.0,0.0,85.66
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beef, raw, all grades, trimmed to 0"" fat, separable lean and fat, boneless, top round roast, round",100.0,125.0,3.5,1.4,62.0,54.0,64.5,4.0,0.0,6.422,...,3.50,1.353,1.554,0.244,62.0,0.0,1.11,0.0,0.0,72.51
"Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand",100.0,206.0,8.9,3.9,109.0,50.0,0.0,0.0,0.0,7.680,...,8.86,3.860,3.480,0.520,109.0,0.0,1.60,0.0,0.0,59.95
"Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand",100.0,277.0,23.0,12.0,78.0,39.0,0.0,1.0,0.0,6.550,...,22.74,11.570,8.720,0.980,78.0,0.0,0.92,0.0,0.0,59.80
"Beef, raw, all grades, trimmed to 0"" fat, separable lean only, boneless, eye of round roast, round",100.0,121.0,3.0,1.1,60.0,53.0,64.2,4.0,0.0,6.720,...,3.04,1.086,1.266,0.233,60.0,0.0,1.10,0.0,0.0,73.43


In [323]:
# 0 => "rows"; 1 => "columns"

In [324]:
nutrition.axes[1]

Index(['serving_size_g', 'calories', 'total_fat_g', 'saturated_fat_g',
       'cholesterol_mg', 'sodium_mg', 'choline_mg', 'folate_mcg',
       'folic_acid_mcg', 'niacin_mg', 'pantothenic_acid_mg', 'riboflavin_mg',
       'thiamin_mg', 'vitamin_a_IU', 'vitamin_a_rae_mcg', 'carotene_alpha_mcg',
       'carotene_beta_mcg', 'cryptoxanthin_beta_mcg', 'lutein_zeaxanthin_mcg',
       'lucopene', 'vitamin_b12_mcg', 'vitamin_b6_mg', 'vitamin_c_mg',
       'vitamin_d_IU', 'vitamin_e_mg', 'tocopherol_alpha_mg', 'vitamin_k_mcg',
       'calcium_mg', 'copper_mg', 'irom_mg', 'magnesium_mg', 'manganese_mg',
       'phosphorous_mg', 'potassium_mg', 'selenium_mcg', 'zink_mg',
       'protein_g', 'alanine_g', 'arginine_g', 'aspartic_acid_g', 'cystine_g',
       'glutamic_acid_g', 'glycine_g', 'histidine_g', 'hydroxyproline',
       'isoleucine_g', 'leucine_g', 'lysine_g', 'methionine_g',
       'phenylalanine_g', 'proline_g', 'serine_g', 'threonine_g',
       'tryptophan_g', 'tyrosine_g', 'valine_g', '

#### Changing The Index

In [339]:
nutrition.head()

Unnamed: 0.1,Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [326]:
nutrition.index

Index(['Nuts, pecans', 'Teff, uncooked', 'Sherbet, orange', 'Cauliflower, raw',
       'Taro leaves, raw', 'Lamb, raw, ground', 'Cheese, camembert',
       'Vegetarian fillets', 'Crackers, rusk toast', 'Chicken, boiled, feet',
       ...
       'Beef, braised, cooked, all grades, trimmed to 1/8" fat, separable lean and fat, flat half, brisket',
       'Beef, raw, select, trimmed to 1/8" fat, separable lean only, lip-on, boneless, rib eye steak/roast',
       'Beef, raw, choice, trimmed to 1/8" fat, separable lean only, lip-on, boneless, rib eye steak/roast',
       'Oil, uses similar to 95 degree hard butter, confection fat, palm kernel (hydrogenated), industrial',
       'Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round steak, round',
       'Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round',
       'Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand',

In [327]:
type(nutrition.index)

pandas.core.indexes.base.Index

In [340]:
nutrition.index = pd.RangeIndex(start=0, stop=8789, step=1)

In [361]:
type(nutrition.index)

pandas.core.indexes.base.Index

In [362]:
nutrition.set_index('name', inplace=True)

KeyError: "None of ['name'] are in the columns"

In [358]:
nutrition.head()

Unnamed: 0_level_0,Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,0,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",1,100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",2,100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",3,100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",4,100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [344]:
nutrition.set_index('folic_acid', drop=False, append=True, verify_integrity=False).head()

#drop means if want to remove the column 'folic_acid' from the resulting data since its false we can see the column 
#is not removed, by default drop=True

#append=True means the new index will be appended to the existing index and the resultant will be dataframe will multi indexed dataframe

#verify_integrity is to check for uniqueness of the the index values. its by default is true

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,folic_acid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Cornstarch,0.00 mcg,0,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",0.00 mcg,1,100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",0.00 mcg,2,100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",0,3,100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",0.00 mcg,4,100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [345]:
nutrition.calories.value_counts()

884    78
47     45
56     43
0      39
50     38
       ..
549     1
643     1
556     1
584     1
605     1
Name: calories, Length: 671, dtype: int64

In [346]:
nutrition.set_index('calories', verify_integrity=False)

Unnamed: 0_level_0,Unnamed: 0,serving_size,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
calories,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
381,0,100 g,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
691,1,100 g,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
25,2,100 g,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
367,3,100 g,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
144,4,100 g,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,8784,100 g,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,6.422 mg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
206,8785,100 g,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,7.680 mg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
277,8786,100 g,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,6.550 mg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
121,8787,100 g,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,6.720 mg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


#### Extracting From DataFrames By Label

In [348]:
nutrition.head()

Unnamed: 0_level_0,Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,0,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",1,100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",2,100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",3,100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",4,100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [359]:
nutrition.loc['Eggplant, raw']

Unnamed: 0             2
serving_size       100 g
calories              25
total_fat           0.2g
saturated_fat        NaN
                  ...   
alcohol            0.0 g
ash               0.66 g
caffeine         0.00 mg
theobromine      0.00 mg
water            92.30 g
Name: Eggplant, raw, Length: 76, dtype: object

In [360]:
type(nutrition.loc['Eggplant, raw'])

pandas.core.series.Series

In [112]:
nutrition.loc['Eggplant, raw']['calories']

25

In [113]:
nutrition.loc['Eggplant, raw', 'calories']

25

In [114]:
nutrition.loc['Eggplant, raw':'Sherbet, orange', 'calories':'cholesterol']

Unnamed: 0_level_0,calories,total_fat,saturated_fat,cholesterol
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Eggplant, raw",25,0.2g,,0
"Teff, uncooked",367,2.4g,0.4g,0
"Sherbet, orange",144,2g,1.2g,1mg


In [115]:
nutrition.loc[
              ['Raspberries, raw', 'Blackberries, raw'],
              ['protein', 'vitamin_b6', 'water']
]

Unnamed: 0_level_0,protein,vitamin_b6,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Raspberries, raw",1.20 g,0.055 mg,85.75 g
"Blackberries, raw",1.39 g,0.030 mg,88.15 g


#### DataFrame Extraction By Position

In [116]:
nutrition.head(10)

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
"Cauliflower, raw",100 g,25,0.3g,0.1g,0,30.00 mg,44.3 mg,57.00 mcg,0.00 mcg,0.507 mg,...,0.28 g,0.130 g,0.034 g,0.031 g,0.00 mg,0.0 g,0.76 g,0.00 mg,0.00 mg,92.07 g
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
"Lamb, raw, ground",100 g,282,23g,10g,73mg,59.00 mg,69.3 mg,18.00 mcg,0.00 mcg,5.960 mg,...,23.41 g,10.190 g,9.600 g,1.850 g,73.00 mg,0.0 g,0.87 g,0.00 mg,0.00 mg,59.47 g
"Cheese, camembert",100 g,300,24g,15g,72mg,842.00 mg,15.4 mg,62.00 mcg,0.00 mcg,0.630 mg,...,24.26 g,15.259 g,7.023 g,0.724 g,72.00 mg,0.0 g,3.68 g,0.00 mg,0.00 mg,51.80 g
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


In [117]:
nutrition.iloc[3]

serving_size      100 g
calories            367
total_fat          2.4g
saturated_fat      0.4g
cholesterol           0
                  ...  
alcohol               0
ash              2.37 g
caffeine              0
theobromine           0
water            8.82 g
Name: Teff, uncooked, Length: 75, dtype: object

In [118]:
nutrition.iloc[3, :]

serving_size      100 g
calories            367
total_fat          2.4g
saturated_fat      0.4g
cholesterol           0
                  ...  
alcohol               0
ash              2.37 g
caffeine              0
theobromine           0
water            8.82 g
Name: Teff, uncooked, Length: 75, dtype: object

In [119]:
nutrition.iloc[[4,6,9], :]

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


In [120]:
nutrition.iloc[[4,6,9]]

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


In [121]:
# nutrition.iloc[[4,6,9], 'total_fat']

In [122]:
nutrition.iloc[[4,6,9], 2]

name
Sherbet, orange         2g
Taro leaves, raw      0.7g
Vegetarian fillets     18g
Name: total_fat, dtype: object

In [123]:
nutrition.iloc[[4,6,9], 2:5]

Unnamed: 0_level_0,total_fat,saturated_fat,cholesterol
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Sherbet, orange",2g,1.2g,1mg
"Taro leaves, raw",0.7g,0.2g,0
Vegetarian fillets,18g,2.8g,0


In [124]:
# boolean masks

In [125]:
new_nutr = nutrition.iloc[
               [True if i%2==0 else False for i in range(8789)],
               [True if i%2==0 else False for i in range(75)]
]

In [126]:
nutrition.shape # x rows and y cols

(8789, 75)

In [127]:
new_nutr.shape # x/2 rows and y/2 cols

(4395, 38)

In [128]:
nutrition.head(10)

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
"Cauliflower, raw",100 g,25,0.3g,0.1g,0,30.00 mg,44.3 mg,57.00 mcg,0.00 mcg,0.507 mg,...,0.28 g,0.130 g,0.034 g,0.031 g,0.00 mg,0.0 g,0.76 g,0.00 mg,0.00 mg,92.07 g
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
"Lamb, raw, ground",100 g,282,23g,10g,73mg,59.00 mg,69.3 mg,18.00 mcg,0.00 mcg,5.960 mg,...,23.41 g,10.190 g,9.600 g,1.850 g,73.00 mg,0.0 g,0.87 g,0.00 mg,0.00 mg,59.47 g
"Cheese, camembert",100 g,300,24g,15g,72mg,842.00 mg,15.4 mg,62.00 mcg,0.00 mcg,0.630 mg,...,24.26 g,15.259 g,7.023 g,0.724 g,72.00 mg,0.0 g,3.68 g,0.00 mg,0.00 mg,51.80 g
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


In [129]:
nutrition.iloc[9,1]

290

#### Single Value Access With .at And .iat

In [130]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [131]:
nutrition.loc['Nuts, pecans', 'calories']

691

In [132]:
nutrition.iloc[1,1]

691

In [133]:
nutrition.at['Nuts, pecans', 'calories']

691

In [134]:
nutrition.iat[1,1]

691

In [135]:
%timeit nutrition.loc['Nuts, pecans', 'calories']

7.75 µs ± 144 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [136]:
%timeit nutrition.at['Nuts, pecans', 'calories']

3.95 µs ± 145 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


#### BONUS - The get_loc() Method

In [137]:
# column label: 'vitamin_k'
# index position: 2

In [138]:
# word of the lecture: pari passu -> kinda equal; on equal footing

In [139]:
# Approach #1 - get label from position

In [140]:
index_label = nutrition.index[2]

In [141]:
column_label = 'vitamin_k'

In [142]:
nutrition.loc[index_label, column_label]

'3.5 mcg'

In [143]:
nutrition.at[index_label, column_label]

'3.5 mcg'

In [144]:
# Approach #2 - get int location from label

In [145]:
nutrition.columns.get_loc('vitamin_k')

26

In [146]:
column_loc = 26

In [147]:
index_loc = 2

In [148]:
nutrition.iloc[index_loc, column_loc]

'3.5 mcg'

In [149]:
nutrition.iat[index_loc, column_loc]

'3.5 mcg'

#### Skill Challenge

###### 1.

Randomly select 10 food items and assign the resulting dataframe to a new variable called *nutr_mini*.

###### 2.

From *nutr_mini*, extract the **total_fat** and **cholesterol** columns for all rows. 

###### 3.

Extract all the columns from **vitamin_b12** to the end, for the first, second, and third rows.

###### 4.

Get the calories for the third food in *nutr_mini* using an attribute-based approach that is faster than .loc or .iloc. 

#### Solution

In [150]:
# 1

In [151]:
nutr_mini = nutrition.sample(10, axis=0)

In [152]:
nutr_mini.shape

(10, 75)

In [153]:
# 2

In [154]:
nutr_mini.loc[:, ['total_fat', 'cholesterol']]

Unnamed: 0_level_0,total_fat,cholesterol
name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Alcoholic beverage, whiskey and powder mix, prepared with water, whiskey sour",0g,0
"Beverages, French Vanilla Cafe, instant, coffee, KRAFT",19g,0
"Gravy, classic chicken, HEINZ Home Style",2.6g,4mg
"Ginger root, raw",0.8g,0
"Plums, solids and liquids, light syrup pack, purple, canned",0.1g,0
"Babyfood, strained, turkey and rice, dinner",1.2g,5mg
"DOMINO'S 14"" Sausage Pizza, Crunchy Thin Crust",19g,35mg
KRAFT VELVEETA LIGHT Reduced Fat Pasteurized Process Cheese Product,11g,42mg
"Shortening, soybean (hydrogenated), special purpose for cakes and frostings",100g,0
"Babyfood, strained, blueberry yogurt, dessert",0.7g,4mg


In [155]:
# 3

In [156]:
b12_loc = nutr_mini.columns.get_loc('vitamin_b12')

In [157]:
nutr_mini.iloc[0:3, b12_loc:]

Unnamed: 0_level_0,vitamin_b12,vitamin_b6,vitamin_c,vitamin_d,vitamin_e,tocopherol_alpha,vitamin_k,calcium,copper,irom,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Alcoholic beverage, whiskey and powder mix, prepared with water, whiskey sour",0.00 mcg,0.000 mg,0.4 mg,0,0.00 mg,0.00 mg,0.0 mcg,46.00 mg,0.034 mg,0.08 mg,...,0.02 g,0.003 g,0.001 g,0.006 g,0.00 mg,14.6 g,0.27 g,0.00 mg,0.00 mg,69.15 g
"Beverages, French Vanilla Cafe, instant, coffee, KRAFT",0,0,0.0 mg,0,0,0,0,12.00 mg,0,0.33 mg,...,19.20 g,4.600 g,0,0,0.00 mg,0,2.20 g,0,0,1.50 g
"Gravy, classic chicken, HEINZ Home Style",0.00 mcg,0.027 mg,0,0,0.11 mg,0.11 mg,0.8 mcg,10.00 mg,0.019 mg,0.07 mg,...,2.57 g,0.656 g,1.105 g,0.429 g,4.00 mg,0,1.03 g,0,0,90.73 g


In [158]:
# 4

In [159]:
nutr_mini.iat[2,1]

46

#### More Cleanup: Going Numeric

In [160]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [161]:
nutrition.total_fat.head()

name
Cornstarch         0.1g
Nuts, pecans        72g
Eggplant, raw      0.2g
Teff, uncooked     2.4g
Sherbet, orange      2g
Name: total_fat, dtype: object

In [162]:
nutrition.total_fat.sum()

'0.1g72g0.2g2.4g2g0.3g0.7g23g24g18g0g0.4g0.1g7.2g15g4.5g16g0.4g0.9g9.2g0.5g1.7g5.9g33g15g0g0g3g0.4g68g1.5g0.2g11g0.8g16g0.2g0.5g50g0.3g0.6g3g16g3.1g11g2.4g0g0.3g0.5g99g0.1g5.3g5.3g0.4g5.3g0.2g0.3g6.4g22g1g14g10g17g14g4.3g22g36g0.7g100g27g2.8g5.8g14g0.7g1.5g11g34g0.2g0.9g4.6g0g0.4g19g0.2g10g28g6.7g6.7g0.3g0.3g44g37g14g50g0.9g0.7g0.1g0.7g8g10g19g9.2g0g0.2g8.6g1.3g14g0.4g0.2g8.2g5.2g3.3g2.1g0.1g80g0.5g1.4g0.4g3.3g0.2g0.7g0.1g9.4g0.3g0.6g29g0.2g1.4g0.2g0.4g1.2g1.8g0g0.9g0.2g1.4g0.8g1.5g9.8g0g13g16g19g7.4g0.2g5.2g29g0.3g9.9g22g14g15g4.1g0.5g3.5g15g20g32g1g81g0.2g1.6g0g17g22g7.1g7.4g0.2g8.7g1.4g34g0.3g6.3g30g8.1g0.2g0.1g3.7g0.6g0.3g3.9g0.3g1.2g29g14g26g1.1g2g13g0g9g22g3.7g100g0.1g2.1g2.1g0.3g0.2g6.8g8.1g8.3g2.4g0.2g23g6.7g1g0.3g0.2g0.1g12g17g0.1g100g3.4g0.2g6.7g1.5g22g0g1g25g34g3.6g100g7.3g9g1.6g0.4g0.3g18g0.4g9.5g1.4g11g3.1g1.5g0g2.4g3.6g0g15g1.2g6.6g0.7g4.2g15g0.1g0.5g0.2g0.1g3.5g3g0.1g0.4g0.1g0.3g100g31g2.8g9.7g2.3g11g1.7g0.1g1.7g0.5g4.7g0.5g0.2g8.6g18g25g0.5g0.5g26g7.7g0.1g0.1g100g1.1g16

In [163]:
"Andy" + "Bek"

'AndyBek'

In [164]:
nutrition.total_fat.max()

'9g'

In [165]:
nutrition.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Index: 8789 entries, Cornstarch to Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round steak, round
Columns: 75 entries, serving_size to water
dtypes: int64(2), object(73)
memory usage: 5.3+ MB


#### The astype() Method

In [166]:
df = pd.DataFrame({
    'age': [12, 13, 14, 16], 
    'weight': [41.1, 34.5, 83.2, 90.1], 
    'height': ['1.72', '1.74', '1.91', '1.54']
    })

In [167]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   age     4 non-null      int64  
 1   weight  4 non-null      float64
 2   height  4 non-null      object 
dtypes: float64(1), int64(1), object(1)
memory usage: 224.0+ bytes


In [168]:
df

Unnamed: 0,age,weight,height
0,12,41.1,1.72
1,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


In [169]:
df = df.astype(float)

In [170]:
df

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [171]:
df.astype({'age': int})

Unnamed: 0,age,weight,height
0,12,41.1,1.72
1,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


In [172]:
df.astype({'age': np.int16})

Unnamed: 0,age,weight,height
0,12,41.1,1.72
1,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


In [173]:
# nutrition.iloc[:4, :].astype(int)

In [174]:
float(714)

714.0

In [175]:
float('714')

714.0

In [176]:
# float('714g')

#### DataFrame replace() + A Glimpse At Regex

In [177]:
dfm = nutrition.iloc[:6, :1]

In [178]:
nutrition.iloc[:6, :1].info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, Cornstarch to Cauliflower, raw
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   serving_size  6 non-null      object
dtypes: object(1)
memory usage: 96.0+ bytes


In [179]:
dfm.replace('100 g', 100)

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100
"Nuts, pecans",100
"Eggplant, raw",100
"Teff, uncooked",100
"Sherbet, orange",100
"Cauliflower, raw",100


In [180]:
dfm.replace('g', '')

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100 g
"Nuts, pecans",100 g
"Eggplant, raw",100 g
"Teff, uncooked",100 g
"Sherbet, orange",100 g
"Cauliflower, raw",100 g


In [181]:
# regex -> regular expressions

In [182]:
dfm.replace('\sg', '', regex=True).astype(int)

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100
"Nuts, pecans",100
"Eggplant, raw",100
"Teff, uncooked",100
"Sherbet, orange",100
"Cauliflower, raw",100


In [183]:
nutrition.head(10)

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
"Cauliflower, raw",100 g,25,0.3g,0.1g,0,30.00 mg,44.3 mg,57.00 mcg,0.00 mcg,0.507 mg,...,0.28 g,0.130 g,0.034 g,0.031 g,0.00 mg,0.0 g,0.76 g,0.00 mg,0.00 mg,92.07 g
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
"Lamb, raw, ground",100 g,282,23g,10g,73mg,59.00 mg,69.3 mg,18.00 mcg,0.00 mcg,5.960 mg,...,23.41 g,10.190 g,9.600 g,1.850 g,73.00 mg,0.0 g,0.87 g,0.00 mg,0.00 mg,59.47 g
"Cheese, camembert",100 g,300,24g,15g,72mg,842.00 mg,15.4 mg,62.00 mcg,0.00 mcg,0.630 mg,...,24.26 g,15.259 g,7.023 g,0.724 g,72.00 mg,0.0 g,3.68 g,0.00 mg,0.00 mg,51.80 g
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


#### Part I: Collecting the Units

In [184]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [379]:
units = nutrition.astype(str).replace('[^a-zA-Z]', '', regex=True)

In [380]:
units

Unnamed: 0_level_0,Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,,g,,g,,,mg,mg,mcg,mcg,...,g,g,g,g,mg,g,g,mg,mg,g
"Nuts, pecans",,g,,g,g,,mg,mg,mcg,mcg,...,g,g,g,g,mg,g,g,mg,mg,g
"Eggplant, raw",,g,,g,,,mg,mg,mcg,mcg,...,g,g,g,g,mg,g,g,mg,mg,g
"Teff, uncooked",,g,,g,g,,mg,mg,,,...,g,g,g,g,,,g,,,g
"Sherbet, orange",,g,,g,g,mg,mg,mg,mcg,mcg,...,g,g,g,g,mg,g,g,mg,mg,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beef, raw, all grades, trimmed to 0"" fat, separable lean and fat, boneless, top round roast, round",,g,,g,g,mg,mg,mg,mcg,mcg,...,g,g,g,g,mg,g,g,mg,mg,g
"Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand",,g,,g,g,mg,mg,,mcg,mcg,...,g,g,g,g,mg,,g,,,g
"Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand",,g,,g,g,mg,mg,,mcg,mcg,...,g,g,g,g,mg,,g,,,g
"Beef, raw, all grades, trimmed to 0"" fat, separable lean only, boneless, eye of round roast, round",,g,,g,g,mg,mg,mg,mcg,mcg,...,g,g,g,g,mg,g,g,mg,mg,g


In [365]:
nutrition.sample(20, axis=1).head()

Unnamed: 0_level_0,theobromine,selenium,saturated_fat,folate,fatty_acids_total_trans,isoleucine,lysine,caffeine,valine,tryptophan,saturated_fatty_acids,Unnamed: 0,niacin,carotene_beta,tocopherol_alpha,vitamin_b6,irom,polyunsaturated_fatty_acids,alanine,tyrosine
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Cornstarch,0.00 mg,2.8 mcg,,0.00 mcg,0.00 mg,0.010 g,0.006 g,0.00 mg,0.014 g,0.001 g,0.009 g,0,0.000 mg,0.00 mcg,0.00 mg,0.000 mg,0.47 mg,0.025 g,0.019 g,0.010 g
"Nuts, pecans",0.00 mg,3.8 mcg,6.2g,22.00 mcg,0.00 mg,0.336 g,0.287 g,0.00 mg,0.411 g,0.093 g,6.180 g,1,1.167 mg,29.00 mcg,1.40 mg,0.210 mg,2.53 mg,21.614 g,0.397 g,0.215 g
"Eggplant, raw",0.00 mg,0.3 mcg,,22.00 mcg,0.00 mg,0.045 g,0.047 g,0.00 mg,0.053 g,0.009 g,0.034 g,2,0.649 mg,14.00 mcg,0.30 mg,0.084 mg,0.23 mg,0.076 g,0.051 g,0.027 g
"Teff, uncooked",0,4.4 mcg,0.4g,0,0,0.501 g,0.376 g,0,0.686 g,0.139 g,0.449 g,3,3.363 mg,5.00 mcg,0.08 mg,0.482 mg,7.63 mg,1.071 g,0.747 g,0.458 g
"Sherbet, orange",0.00 mg,1.5 mcg,1.2g,4.00 mcg,1.00 mg,0,0,0.00 mg,0,0,1.160 g,4,0.063 mg,1.00 mcg,0.01 mg,0.023 mg,0.14 mg,0.080 g,0,0


In [366]:
units.head()

Unnamed: 0_level_0,Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,,g,,g,,,mg,mg,mcg,mcg,...,g,g,g,g,mg,g,g,mg,mg,g
"Nuts, pecans",,g,,g,g,,mg,mg,mcg,mcg,...,g,g,g,g,mg,g,g,mg,mg,g
"Eggplant, raw",,g,,g,,,mg,mg,mcg,mcg,...,g,g,g,g,mg,g,g,mg,mg,g
"Teff, uncooked",,g,,g,g,,mg,mg,,,...,g,g,g,g,,,g,,,g
"Sherbet, orange",,g,,g,g,mg,mg,mg,mcg,mcg,...,g,g,g,g,mg,g,g,mg,mg,g


In [367]:
units.saturated_fat.value_counts()

g      7199
nan    1590
Name: saturated_fat, dtype: int64

In [368]:
units.saturated_fat.mode()

0    g
Name: saturated_fat, dtype: object

In [369]:
units.mode()

Unnamed: 0.1,Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,,g,,g,g,mg,mg,mg,mcg,mcg,...,g,g,g,g,mg,g,g,mg,mg,g


In [383]:
units = units.mode()

In [384]:
units

Unnamed: 0.1,Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,,g,,g,g,mg,mg,mg,mcg,mcg,...,g,g,g,g,mg,g,g,mg,mg,g


#### The rename() Method

In [371]:
nutrition.head()

Unnamed: 0_level_0,Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,0,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",1,100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",2,100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",3,100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",4,100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [372]:
df

Unnamed: 0,age,height,gender
0,12.0,1.72,M
1,,,F
2,14.0,1.91,
3,16.0,1.54,F


In [194]:
df.rename(index={0:'Pikachu'})

Unnamed: 0,age,weight,height
Pikachu,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [195]:
df.rename(index={0:'Pikachu', 1: 'Andy'})

Unnamed: 0,age,weight,height
Pikachu,12.0,41.1,1.72
Andy,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [196]:
df.rename(columns={'weight': 'Weight (kg)'})

Unnamed: 0,age,Weight (kg),height
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [197]:
df.rename(columns={'weight': 'Weight (kg)'}, index={0: 'Pikachu'}, inplace=False)

Unnamed: 0,age,Weight (kg),height
Pikachu,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [198]:
df

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [199]:
df.rename(mapper={'height': 'Height (m)'}, axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [200]:
df.axes[1]

Index(['age', 'weight', 'height'], dtype='object')

In [201]:
df.axes[0]

RangeIndex(start=0, stop=4, step=1)

#### DataFrame dropna()

In [202]:
df

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [203]:
df.loc[2, 'weight'] = np.nan

In [204]:
df

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,,1.91
3,16.0,90.1,1.54


In [205]:
df.loc[1, :] = np.nan

In [206]:
df

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,,,
2,14.0,,1.91
3,16.0,90.1,1.54


In [207]:
df.dropna()

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [208]:
df.dropna(how='any', axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [209]:
df.dropna(how='all', axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
2,14.0,,1.91
3,16.0,90.1,1.54


In [210]:
# thresh

In [211]:
df.dropna(thresh=3, axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [374]:
df.dropna(how='any', axis=0)

Unnamed: 0,age,height,gender
0,12.0,1.72,M
3,16.0,1.54,F


In [375]:
df.shape[1]

3

In [213]:
df.dropna(thresh=df.shape[1], axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [214]:
df.shape

(4, 3)

In [215]:
df.dropna(axis=1)

0
1
2
3


In [216]:
df.dropna(how='any', axis=1)

0
1
2
3


In [377]:
df.dropna(how='all', axis=1)

Unnamed: 0,age,height,gender
0,12.0,1.72,M
1,,,F
2,14.0,1.91,
3,16.0,1.54,F


In [218]:
df.dropna(axis=1, thresh=3, inplace=True)

In [219]:
df

Unnamed: 0,age,height
0,12.0,1.72
1,,
2,14.0,1.91
3,16.0,1.54


#### BONUS - dropna() With Subset

In [220]:
df

Unnamed: 0,age,height
0,12.0,1.72
1,,
2,14.0,1.91
3,16.0,1.54


In [221]:
df['gender'] = ['M', 'F', np.nan, 'F']

In [222]:
df

Unnamed: 0,age,height,gender
0,12.0,1.72,M
1,,,F
2,14.0,1.91,
3,16.0,1.54,F


In [223]:
df.dropna()

Unnamed: 0,age,height,gender
0,12.0,1.72,M
3,16.0,1.54,F


In [224]:
df.dropna(axis=0, how='any')

Unnamed: 0,age,height,gender
0,12.0,1.72,M
3,16.0,1.54,F


In [225]:
# the subset param

In [226]:
df.dropna(axis=0, how='any', subset=['gender'])
#here the dropna mehtod is made limited only to a specific column in subset i.e. 'gender', therefore we see that the 
#row with NAN in the gender column is dropped


Unnamed: 0,age,height,gender
0,12.0,1.72,M
1,,,F
3,16.0,1.54,F


In [227]:
df.dropna(axis=0, how='any', subset=['age'])

Unnamed: 0,age,height,gender
0,12.0,1.72,M
2,14.0,1.91,
3,16.0,1.54,F


In [228]:
df

Unnamed: 0,age,height,gender
0,12.0,1.72,M
1,,,F
2,14.0,1.91,
3,16.0,1.54,F


In [229]:
df

Unnamed: 0,age,height,gender
0,12.0,1.72,M
1,,,F
2,14.0,1.91,
3,16.0,1.54,F


In [378]:
df.dropna(axis=1, how='any', subset=[0, 2])

Unnamed: 0,age,height
0,12.0,1.72
1,,
2,14.0,1.91
3,16.0,1.54


In [231]:
# word of the lecture: orthogonal -> at a right angle

#### Part II: Merging Units With Column Names

In [232]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [233]:
units

Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,g,,g,g,mg,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g


In [234]:
# the rename method

In [235]:
### DETOUR: dataframe iteration

In [236]:
for k in units:
  print(k)

serving_size
calories
total_fat
saturated_fat
cholesterol
sodium
choline
folate
folic_acid
niacin
pantothenic_acid
riboflavin
thiamin
vitamin_a
vitamin_a_rae
carotene_alpha
carotene_beta
cryptoxanthin_beta
lutein_zeaxanthin
lucopene
vitamin_b12
vitamin_b6
vitamin_c
vitamin_d
vitamin_e
tocopherol_alpha
vitamin_k
calcium
copper
irom
magnesium
manganese
phosphorous
potassium
selenium
zink
protein
alanine
arginine
aspartic_acid
cystine
glutamic_acid
glycine
histidine
hydroxyproline
isoleucine
leucine
lysine
methionine
phenylalanine
proline
serine
threonine
tryptophan
tyrosine
valine
carbohydrate
fiber
sugars
fructose
galactose
glucose
lactose
maltose
sucrose
fat
saturated_fatty_acids
monounsaturated_fatty_acids
polyunsaturated_fatty_acids
fatty_acids_total_trans
alcohol
ash
caffeine
theobromine
water


In [237]:
for k in units:
  print(units[k].at[0])

g

g
g
mg
mg
mg
mcg
mcg
mg
mg
mg
mg
IU
mcg
mcg
mcg
mcg
mcg

mcg
mg
mg
IU
mg
mg
mcg
mg
mg
mg
mg
mg
mg
mg
mcg
mg
g
g
g
g
g
g
g
g

g
g
g
g
g
g
g
g
g
g
g
g
g
g






g
g
g
g
mg
g
g
mg
mg
g


In [238]:
units = units.replace('', np.nan).dropna(axis=1)

In [239]:
for k in units:
  print(units[k].at[0])

g
g
g
mg
mg
mg
mcg
mcg
mg
mg
mg
mg
IU
mcg
mcg
mcg
mcg
mcg
mcg
mg
mg
IU
mg
mg
mcg
mg
mg
mg
mg
mg
mg
mg
mcg
mg
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
mg
g
g
mg
mg
g


In [240]:
# {
#     'serving_size': 'serging_size_g',
#     'total_fat': 'total_fat_g'
#     ...
# }

In [241]:
mapper = {k: k + "_" + units[k].at[0] for k in units}

In [242]:
mapper

{'serving_size': 'serving_size_g',
 'total_fat': 'total_fat_g',
 'saturated_fat': 'saturated_fat_g',
 'cholesterol': 'cholesterol_mg',
 'sodium': 'sodium_mg',
 'choline': 'choline_mg',
 'folate': 'folate_mcg',
 'folic_acid': 'folic_acid_mcg',
 'niacin': 'niacin_mg',
 'pantothenic_acid': 'pantothenic_acid_mg',
 'riboflavin': 'riboflavin_mg',
 'thiamin': 'thiamin_mg',
 'vitamin_a': 'vitamin_a_IU',
 'vitamin_a_rae': 'vitamin_a_rae_mcg',
 'carotene_alpha': 'carotene_alpha_mcg',
 'carotene_beta': 'carotene_beta_mcg',
 'cryptoxanthin_beta': 'cryptoxanthin_beta_mcg',
 'lutein_zeaxanthin': 'lutein_zeaxanthin_mcg',
 'vitamin_b12': 'vitamin_b12_mcg',
 'vitamin_b6': 'vitamin_b6_mg',
 'vitamin_c': 'vitamin_c_mg',
 'vitamin_d': 'vitamin_d_IU',
 'vitamin_e': 'vitamin_e_mg',
 'tocopherol_alpha': 'tocopherol_alpha_mg',
 'vitamin_k': 'vitamin_k_mcg',
 'calcium': 'calcium_mg',
 'copper': 'copper_mg',
 'irom': 'irom_mg',
 'magnesium': 'magnesium_mg',
 'manganese': 'manganese_mg',
 'phosphorous': 'phospho

In [243]:
nutrition.rename(columns=mapper, inplace=True)

#### Part III: Removing Units From Values

In [244]:
nutrition.head()

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [245]:
nutrition.replace('[a-zA-Z]', '', regex=True, inplace=True)

In [246]:
nutrition.head()

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100,381,0.1,,0,9.0,0.4,0.0,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
"Nuts, pecans",100,691,72.0,6.2,0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Eggplant, raw",100,25,0.2,,0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3
"Teff, uncooked",100,367,2.4,0.4,0,12.0,13.1,0.0,0.0,3.363,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
"Sherbet, orange",100,144,2.0,1.2,1,46.0,7.7,4.0,0.0,0.063,...,2.0,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1


In [247]:
nutrition.dtypes.value_counts()

object    73
int64      2
dtype: int64

In [248]:
nutrition = nutrition.astype(float)

In [249]:
nutrition.total_fat_g.sum()

92784.20000000001

In [250]:
nutrition.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Index: 8789 entries, Cornstarch to Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round steak, round
Columns: 75 entries, serving_size_g to water_g
dtypes: float64(75)
memory usage: 5.3+ MB


#### Filtering In 2D

In [251]:
nutrition.shape

(8789, 75)

In [252]:
nutrition.head(3)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100.0,381.0,0.1,,0.0,9.0,0.4,0.0,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3


In [253]:
nutrition.filter(like="octopus", axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,1.04,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,2.08,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5


In [254]:
# filter with regex

In [255]:
nutrition.filter(regex='octopus', axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,1.04,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,2.08,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5


In [256]:
nutrition.filter(regex='[Oo]ctopus', axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Octopus (Alaska Native),100.0,56.0,0.8,0.2,41.0,0.0,0.0,0.0,0.0,2.0,...,0.8,0.2,0.0,0.2,41.0,0.0,1.5,0.0,0.0,84.0
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,1.04,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,2.08,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5


In [257]:
nutrition.filter(regex='(?i)octopus', axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Octopus (Alaska Native),100.0,56.0,0.8,0.2,41.0,0.0,0.0,0.0,0.0,2.0,...,0.8,0.2,0.0,0.2,41.0,0.0,1.5,0.0,0.0,84.0
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,1.04,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,2.08,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5


In [258]:
# filter along both dims

In [387]:
nutrition.filter(regex='(?i)octopus', axis=0).filter(items=['cholesterol_mg', 'serving_size_g', 'calories'] ,axis=1)

Unnamed: 0_level_0,calories
name,Unnamed: 1_level_1
Octopus (Alaska Native),56
"Mollusks, raw, common, octopus",82
"Mollusks, moist heat, cooked, common, octopus",164


In [260]:
nutrition.filter(regex='(?i)octopus', axis=0)\
         .loc[:, ['cholesterol_mg', 'serving_size_g', 'calories']]

Unnamed: 0_level_0,cholesterol_mg,serving_size_g,calories
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Octopus (Alaska Native),41.0,100.0,56.0
"Mollusks, raw, common, octopus",48.0,100.0,82.0
"Mollusks, moist heat, cooked, common, octopus",96.0,100.0,164.0


#### DataFrame Sorting

In [261]:
nutrition.head(3)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100.0,381.0,0.1,,0.0,9.0,0.4,0.0,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3


In [262]:
nutrition.vitamin_b12_mcg

name
Cornstarch                                                                                            0.00
Nuts, pecans                                                                                          0.00
Eggplant, raw                                                                                         0.00
Teff, uncooked                                                                                        0.00
Sherbet, orange                                                                                       0.13
                                                                                                      ... 
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round    1.64
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand    2.95
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand    2.42
Beef, raw, all grades, trimmed t

In [263]:
type(nutrition.vitamin_b12_mcg)

pandas.core.series.Series

In [264]:
nutrition.vitamin_b12_mcg.sort_values()

name
Cornstarch                                                                           0.00
Apricots, stewed, sulfured, dehydrated (low-moisture)                                0.00
Cocoa, processed with alkali, unsweetened, dry powder                                0.00
Tomato products, with herbs and cheese, sauce, canned                                0.00
Mothbeans, without salt, boiled, cooked, mature seeds                                0.00
                                                                                    ...  
Veal, braised, cooked, liver, variety meats and by-products                         84.60
Lamb, pan-fried, cooked, liver, variety meats and by-products                       85.70
Lamb, raw, liver, variety meats and by-products                                     90.05
Beef, boiled, cooked, variety meats and by-products liver, imported, New Zealand    96.00
Mollusks, moist heat, cooked, mixed species, clam                                   98.89
Name:

In [265]:
nutrition.sort_values(by=['cholesterol_mg', 'sodium_mg'], ascending=[False, True])

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Beef, simmered, cooked, brain, variety meats and by-products",100.0,151.0,11.0,2.4,3100.0,108.0,490.9,5.0,0.0,3.620,...,10.53,2.394,1.882,1.632,3100.0,0.0,1.46,0.0,0.0,74.86
"Veal, braised, cooked, brain, variety meats and by-products",100.0,136.0,9.6,2.2,3100.0,156.0,0.0,3.0,0.0,2.430,...,9.63,2.180,1.740,1.490,3100.0,0.0,1.40,0.0,0.0,76.89
"Beef, raw, brain, variety meats and by-products",100.0,143.0,10.0,2.3,3010.0,126.0,0.0,3.0,0.0,3.550,...,10.30,2.300,1.890,1.586,3010.0,0.0,1.51,0.0,0.0,76.29
"Lamb, soaked and fried, cooked, brains, imported, New Zealand",100.0,154.0,11.0,1.4,2559.0,101.0,0.0,0.0,0.0,2.995,...,10.92,1.365,4.168,0.999,2559.0,0.0,3.39,0.0,0.0,73.11
"Pork, braised, cooked, brain, variety meats and by-products, fresh",100.0,138.0,9.5,2.2,2552.0,91.0,0.0,4.0,0.0,3.330,...,9.51,2.150,1.720,1.470,2552.0,0.0,1.40,0.0,0.0,75.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Leavening agents, sodium aluminum sulfate, double-acting, baking powder",100.0,53.0,0.0,,0.0,10600.0,0.0,0.0,0.0,0.000,...,0.00,0.000,0.000,0.000,0.0,0.0,67.30,0.0,0.0,5.00
"Seasoning mix, coriander & annatto, sazon, dry",100.0,0.0,0.0,,0.0,17000.0,0.0,0.0,0.0,0.000,...,0.00,0.000,0.000,0.000,0.0,0.0,99.80,0.0,0.0,0.20
"Desserts, unsweetened, tablets, rennin",100.0,84.0,0.1,,0.0,26050.0,0.0,0.0,0.0,0.000,...,0.10,0.041,0.038,0.007,0.0,0.0,72.50,0.0,0.0,6.50
"Leavening agents, baking soda",100.0,0.0,0.0,,0.0,27360.0,0.0,0.0,0.0,0.000,...,0.00,0.000,0.000,0.000,0.0,0.0,36.90,0.0,0.0,0.20


In [266]:
# brain composition detour

In [267]:
nutrition.loc['Beef, simmered, cooked, brain, variety meats and by-products'].filter(like='_g').sort_values(ascending=False)

serving_size_g                   100.000
water_g                           74.860
protein_g                         11.670
total_fat_g                       11.000
fat_g                             10.530
saturated_fat_g                    2.400
saturated_fatty_acids_g            2.394
monounsaturated_fatty_acids_g      1.882
polyunsaturated_fatty_acids_g      1.632
carbohydrate_g                     1.480
ash_g                              1.460
threonine_g                        0.000
alcohol_g                          0.000
sugars_g                           0.000
fiber_g                            0.000
valine_g                           0.000
tyrosine_g                         0.000
tryptophan_g                       0.000
serine_g                           0.000
phenylalanine_g                    0.000
methionine_g                       0.000
lysine_g                           0.000
leucine_g                          0.000
isoleucine_g                       0.000
histidine_g     

#### Using Series between() With DataFrames

In [268]:
# nutrition.between()

In [269]:
nutrition.head()

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100.0,381.0,0.1,,0.0,9.0,0.4,0.0,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3
"Teff, uncooked",100.0,367.0,2.4,0.4,0.0,12.0,13.1,0.0,0.0,3.363,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
"Sherbet, orange",100.0,144.0,2.0,1.2,1.0,46.0,7.7,4.0,0.0,0.063,...,2.0,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1


In [270]:
nutrition.calories.head(10)

name
Cornstarch            381.0
Nuts, pecans          691.0
Eggplant, raw          25.0
Teff, uncooked        367.0
Sherbet, orange       144.0
Cauliflower, raw       25.0
Taro leaves, raw       42.0
Lamb, raw, ground     282.0
Cheese, camembert     300.0
Vegetarian fillets    290.0
Name: calories, dtype: float64

In [271]:
nutrition.calories.shape

(8789,)

In [272]:
nutrition.shape

(8789, 75)

In [273]:
nutrition.calories.between(20, 60)

name
Cornstarch                                                                                            False
Nuts, pecans                                                                                          False
Eggplant, raw                                                                                          True
Teff, uncooked                                                                                        False
Sherbet, orange                                                                                       False
                                                                                                      ...  
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round    False
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand    False
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand    False
Beef, raw, all grades, 

In [274]:
nutrition.calories.between(20, 60).shape

(8789,)

In [275]:
nutrition[nutrition.calories.between(20,60)].sample(4)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Chrysanthemum leaves, raw",100.0,24.0,0.6,,0.0,118.0,0.0,177.0,0.0,0.531,...,0.56,0.0,0.0,0.0,0.0,0.0,1.67,0.0,0.0,91.4
"CAMPBELL'S CHUNKY Soups, Grilled Chicken & Sausage Gumbo Soup",100.0,57.0,1.2,0.6,8.0,347.0,0.0,0.0,0.0,0.0,...,1.22,0.612,0.0,0.0,8.0,0.0,1.54,0.0,0.0,85.6
"Beverages, without caffeine, lemon-lime, SPRITE, carbonated",100.0,40.0,0.0,,0.0,9.0,0.4,0.0,0.0,0.015,...,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,89.78
"CAMPBELL'S Soup on the Go, Chicken & Stars Soup",100.0,23.0,0.5,0.2,2.0,315.0,0.0,0.0,0.0,0.0,...,0.49,0.164,0.0,0.0,2.0,0.0,1.25,0.0,0.0,94.0


#### Min, Max And Idx[MinMax]

In [276]:
nutrition.head()

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100.0,381.0,0.1,,0.0,9.0,0.4,0.0,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3
"Teff, uncooked",100.0,367.0,2.4,0.4,0.0,12.0,13.1,0.0,0.0,3.363,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
"Sherbet, orange",100.0,144.0,2.0,1.2,1.0,46.0,7.7,4.0,0.0,0.063,...,2.0,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1


In [277]:
nutrition.max(axis=1)

name
Cornstarch                                                                                            381.0
Nuts, pecans                                                                                          691.0
Eggplant, raw                                                                                         229.0
Teff, uncooked                                                                                        429.0
Sherbet, orange                                                                                       144.0
                                                                                                      ...  
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round    311.0
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand    246.0
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand    277.0
Beef, raw, all grades, 

In [278]:
# what food has the most potassium

In [279]:
nutrition.potassium_mg.max()

16500.0

In [280]:
nutrition.potassium_mg.idxmax()

'Leavening agents, cream of tartar'

In [281]:
nutrition.potassium_mg.sort_values(ascending=False).head(10)

name
Leavening agents, cream of tartar                         16500.0
Leavening agents, low-sodium, baking powder               10100.0
Parsley, freeze-dried                                      6300.0
Beverages, unsweetened, decaffeinated, instant, tea        6040.0
Beverages, powder, unsweetened, instant, tea               6040.0
Spices, dried, chervil                                     4740.0
Spices, dried, coriander leaf                              4466.0
Celery flakes, dried                                       4388.0
Beverages, powder, regular, instant, coffee                3535.0
Beverages, half the caffeine, regular, instant, coffee     3535.0
Name: potassium_mg, dtype: float64

In [282]:
# targeting: potassium to sodium == 16

In [283]:
K_to_NA = (nutrition.potassium_mg.replace(0,1)/nutrition.sodium_mg.replace(0, 1))\
          .sort_values(ascending=False)

In [284]:
K_to_NA.head(10)

name
Peanut flour, low fat                                           1358.0
Nuts, raw, pistachio nuts                                       1025.0
Beverages, reduced calorie, with whitener, instant, coffee       909.0
Soybeans, raw, mature seeds                                      898.5
Soy meal, raw, defatted                                          830.0
Babyfood, dry, with bananas, rice, cereal                        769.0
Nuts, without salt added, dry roasted, hazelnuts or filberts     755.0
Soy protein concentrate, produced by alcohol extraction          734.0
Nuts, almonds                                                    733.0
Nuts, full fat, acorn flour                                      712.0
dtype: float64

In [285]:
K_to_NA[K_to_NA.between(14,18)].sample(20)

name
Radishes, raw, white icicle                                                                                        17.500000
Tomato products, without salt added, puree, canned                                                                 15.678571
Catsup, low sodium                                                                                                 14.050000
Corn, raw, white, sweet                                                                                            18.000000
Turnip greens, without salt, drained, boiled, cooked, frozen                                                       14.933333
Waterchestnuts, solids and liquids, canned, chinese                                                                14.750000
Grapes, solids and liquids, water pack, thompson seedless, canned                                                  17.833333
Corn, raw, yellow, sweet                                                                                           18.00

#### DataFrame nlargest() And nsmallest()

In [286]:
nutrition.potassium_mg.sort_values(ascending=False).head(10)

name
Leavening agents, cream of tartar                         16500.0
Leavening agents, low-sodium, baking powder               10100.0
Parsley, freeze-dried                                      6300.0
Beverages, unsweetened, decaffeinated, instant, tea        6040.0
Beverages, powder, unsweetened, instant, tea               6040.0
Spices, dried, chervil                                     4740.0
Spices, dried, coriander leaf                              4466.0
Celery flakes, dried                                       4388.0
Beverages, powder, regular, instant, coffee                3535.0
Beverages, half the caffeine, regular, instant, coffee     3535.0
Name: potassium_mg, dtype: float64

In [287]:
nutrition.nlargest(10, columns='potassium_mg').potassium_mg

name
Leavening agents, cream of tartar                         16500.0
Leavening agents, low-sodium, baking powder               10100.0
Parsley, freeze-dried                                      6300.0
Beverages, powder, unsweetened, instant, tea               6040.0
Beverages, unsweetened, decaffeinated, instant, tea        6040.0
Spices, dried, chervil                                     4740.0
Spices, dried, coriander leaf                              4466.0
Celery flakes, dried                                       4388.0
Beverages, powder, regular, instant, coffee                3535.0
Beverages, half the caffeine, regular, instant, coffee     3535.0
Name: potassium_mg, dtype: float64

In [288]:
nutrition.potassium_mg.nlargest(10)

name
Leavening agents, cream of tartar                         16500.0
Leavening agents, low-sodium, baking powder               10100.0
Parsley, freeze-dried                                      6300.0
Beverages, powder, unsweetened, instant, tea               6040.0
Beverages, unsweetened, decaffeinated, instant, tea        6040.0
Spices, dried, chervil                                     4740.0
Spices, dried, coriander leaf                              4466.0
Celery flakes, dried                                       4388.0
Beverages, powder, regular, instant, coffee                3535.0
Beverages, half the caffeine, regular, instant, coffee     3535.0
Name: potassium_mg, dtype: float64

In [289]:
nutrition.nsmallest(10, columns=['sodium_mg', 'calories', 'folate_mcg'])

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Beverages, well, tap, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,99.9
"Water, NAYA, non-carbonated, bottled",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,100.0
"Beverages, decaffeinated, brewed, green, tea",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.93
"Beverages, EVIAN, non-carbonated, bottled, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,99.97
"Beverages, CALISTOGA, non-carbonated, bottled, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,100.0
"Beverages, DANNON, non-carbonated, bottled, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.98
"Sweetener, herbal extract powder from Stevia leaf",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Beverages, CRYSTAL GEYSER, non-carbonated, bottled, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,100.0
"Babyfood, without added fluoride., GERBER, bottled, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.9
"Beverages, AQUAFINA, PEPSI, non-carbonated, bottled, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.97


#### Skill Challenge

###### 1.

Find the 10 foods that have the most Vitamin B12. What do they have in common?

###### 2.

Isolate the foods in the dataset that contain, or are based on, eggplant. Which of them has the most sodium?

###### 3. 

Select a slice of the dataframe that contains 4 random rows and 2 random columns.


#### Solution

In [290]:
# 1

In [291]:
# one appraoch

In [292]:
nutrition.loc[:, 'vitamin_b12_mcg'].nlargest(10)

name
Mollusks, moist heat, cooked, mixed species, clam                                   98.89
Beef, boiled, cooked, variety meats and by-products liver, imported, New Zealand    96.00
Lamb, raw, liver, variety meats and by-products                                     90.05
Lamb, pan-fried, cooked, liver, variety meats and by-products                       85.70
Veal, braised, cooked, liver, variety meats and by-products                         84.60
Beef, raw, liver, variety meats and by-products, imported, New Zealand              84.50
Beef, pan-fried, cooked, liver, variety meats and by-products                       83.13
Lamb, braised, cooked, kidneys, variety meats and by-products                       78.90
Lamb, braised, cooked, liver, variety meats and by-products                         76.50
Veal, pan-fried, cooked, liver, variety meats and by-products                       72.50
Name: vitamin_b12_mcg, dtype: float64

In [293]:
nutrition.vitamin_b12_mcg.nlargest(10)

name
Mollusks, moist heat, cooked, mixed species, clam                                   98.89
Beef, boiled, cooked, variety meats and by-products liver, imported, New Zealand    96.00
Lamb, raw, liver, variety meats and by-products                                     90.05
Lamb, pan-fried, cooked, liver, variety meats and by-products                       85.70
Veal, braised, cooked, liver, variety meats and by-products                         84.60
Beef, raw, liver, variety meats and by-products, imported, New Zealand              84.50
Beef, pan-fried, cooked, liver, variety meats and by-products                       83.13
Lamb, braised, cooked, kidneys, variety meats and by-products                       78.90
Lamb, braised, cooked, liver, variety meats and by-products                         76.50
Veal, pan-fried, cooked, liver, variety meats and by-products                       72.50
Name: vitamin_b12_mcg, dtype: float64

In [294]:
nutrition.nlargest(n=10, columns='vitamin_b12_mcg').vitamin_b12_mcg

name
Mollusks, moist heat, cooked, mixed species, clam                                   98.89
Beef, boiled, cooked, variety meats and by-products liver, imported, New Zealand    96.00
Lamb, raw, liver, variety meats and by-products                                     90.05
Lamb, pan-fried, cooked, liver, variety meats and by-products                       85.70
Veal, braised, cooked, liver, variety meats and by-products                         84.60
Beef, raw, liver, variety meats and by-products, imported, New Zealand              84.50
Beef, pan-fried, cooked, liver, variety meats and by-products                       83.13
Lamb, braised, cooked, kidneys, variety meats and by-products                       78.90
Lamb, braised, cooked, liver, variety meats and by-products                         76.50
Veal, pan-fried, cooked, liver, variety meats and by-products                       72.50
Name: vitamin_b12_mcg, dtype: float64

In [295]:
nutrition.sort_values(by='vitamin_b12_mcg', ascending=False).head(10).vitamin_b12_mcg

name
Mollusks, moist heat, cooked, mixed species, clam                                   98.89
Beef, boiled, cooked, variety meats and by-products liver, imported, New Zealand    96.00
Lamb, raw, liver, variety meats and by-products                                     90.05
Lamb, pan-fried, cooked, liver, variety meats and by-products                       85.70
Veal, braised, cooked, liver, variety meats and by-products                         84.60
Beef, raw, liver, variety meats and by-products, imported, New Zealand              84.50
Beef, pan-fried, cooked, liver, variety meats and by-products                       83.13
Lamb, braised, cooked, kidneys, variety meats and by-products                       78.90
Lamb, braised, cooked, liver, variety meats and by-products                         76.50
Veal, pan-fried, cooked, liver, variety meats and by-products                       72.50
Name: vitamin_b12_mcg, dtype: float64

In [296]:
# 2

In [297]:
nutrition.filter(regex='(?i)eggplant', axis=0).sodium_mg.nlargest(1)

name
Eggplant, pickled    1674.0
Name: sodium_mg, dtype: float64

In [298]:
# 3

In [299]:
nutrition.sample(4).sample(2, axis=1)

Unnamed: 0_level_0,vitamin_k_mcg,choline_mg
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Tomato powder,48.8,0.0
"Cereals ready-to-eat, DORA THE EXPLORER, GENERAL MILLS",3.0,11.2
"Infant formula, with ARA and DHA, powder, AR LIPIL, ENFAMIL, MEAD JOHNSON",40.9,62.0
"Beverages, prepared with whole milk, powder, Strawberry-flavor beverage mix",0.0,0.0


#### Another Skill Challenge

###### **1**

Remove all the food items that contain at least one NaN. Do this in a way that modifies the dataframe, i.e. the changes stick.

How many food items remain after the exclusions?

###### **2**

From the remaining records, isolate those that have between 20 and 40 mg of Vitamin C per 100 g serving. Of these foods, which one is the least caloric, i.e. has the minimum calories? 

###### **3**

How many food items in the dataframe have Vitamin C levels of between 2 and 3 standard deviations (inclusive) above the mean?

#### Solution 

In [300]:
# 1

In [301]:
nutrition.shape

(8789, 75)

In [302]:
nutrition.dropna(axis=0, how='any', inplace=True)

In [303]:
nutrition.shape

(7199, 75)

In [304]:
# 2

In [305]:
nutrition[nutrition.vitamin_c_mg.between(20,40)].calories.nsmallest(1)

name
Asparagus, with salt, drained, boiled, cooked, frozen    18.0
Name: calories, dtype: float64

In [306]:
# 3

In [307]:
m = nutrition.vitamin_c_mg.mean()

In [308]:
m

5.553368523406037

In [309]:
mp2sd = m + nutrition.vitamin_c_mg.std() * 2

In [310]:
mp3sd = m + nutrition.vitamin_c_mg.std() * 3

In [311]:
print(m, mp2sd, mp3sd)

5.553368523406037 97.7621389681903 143.86652419058245


In [312]:
result_set = nutrition[nutrition.vitamin_c_mg.between(mp2sd, mp3sd)]

In [313]:
result_set.vitamin_c_mg.describe()

count     17.000000
mean     119.617647
std        9.190772
min      101.800000
25%      118.000000
50%      120.000000
75%      120.000000
max      138.000000
Name: vitamin_c_mg, dtype: float64

In [314]:
result_set.vitamin_c_mg.shape

(17,)