# DataFrame

2D Pandas data structure 

Create a DataFrame:

1. Using Series.
    - Each Series is a row
    - The keys of the Series are columns' headings
    - When turned into a DF, index is heading of rows

In [73]:
import pandas as pd
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

In [74]:
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index = ['Store 1', 'Store 1', 'Store 3'])

In [75]:
df.head()

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 3,Vinod,Bird Seed,5.0


In [76]:
df.loc['Store 1']

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5


In [80]:
df['Name']

Store 1    Chris
Store 1    Kevyn
Store 3    Vinod
Name: Name, dtype: object

In [82]:
# df['Name'] is the same as df.loc[:,'Name'] (select all rows, only column 'Name')
df.loc[:,'Name']

Store 1    Chris
Store 1    Kevyn
Store 3    Vinod
Name: Name, dtype: object

In [84]:
# This is the same as df.loc[:, ['Name', 'Cost']]
df[['Name', 'Cost']]

Unnamed: 0,Name,Cost
Store 1,Chris,22.5
Store 1,Kevyn,2.5
Store 3,Vinod,5.0


In [86]:
df.loc[:, ['Name', 'Cost']]

Unnamed: 0,Name,Cost
Store 1,Chris,22.5
Store 1,Kevyn,2.5
Store 3,Vinod,5.0


In [87]:
# Slicing rows
# This is the same as df.iloc[0:2]
df[0:2]

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5


In [88]:
df.iloc[0:2]

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5


In [35]:
type(df.loc['Store 1']) # check type

pandas.core.frame.DataFrame

In [36]:
# Select a single row by df.loc[row_label]
df.loc['Store 1']

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5


In [37]:
df.loc['Store 1', 'Cost']

Store 1    22.5
Store 1     2.5
Name: Cost, dtype: float64

In [91]:
# Get all values of a col
df['Item Purchased']

Store 1        Dog Food
Store 1    Kitty Litter
Store 3       Bird Seed
Name: Item Purchased, dtype: object

# Summary of retrieving data from rows/cols

### Selecting columns

- Single column A: `df['A']` is the same as `df.loc[:, 'A']`
- A list of columns: `df[['A', 'B', 'C']]` is the same as `df.loc[:, ['A', 'B', 'C']]`

### Selecting rows

- Single row: `df.loc[row_label]`
- A list of rows: `df.loc[[row_label1, row_label2]]`

### Slicing

- Slicing rows: `df[1:3]` is the same as `df.iloc[1:3]` -> selects rows 1 and 2
- Slicing columns: `df.loc[:, 'A':'C']`

In [39]:
# Transpose of DataFrame, i.e. swapping the columns and rows
df.T

Unnamed: 0,Store 1,Store 1.1,Store 3
Name,Chris,Kevyn,Vinod
Item Purchased,Dog Food,Kitty Litter,Bird Seed
Cost,22.5,2.5,5


In [40]:
# iloc and loc are for row selection
# for column selection, use indexing
df

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 3,Vinod,Bird Seed,5.0


In [41]:
df.loc['Store 3']

Name                  Vinod
Item Purchased    Bird Seed
Cost                      5
Name: Store 3, dtype: object

In [42]:
df['Name']

Store 1    Chris
Store 1    Kevyn
Store 3    Vinod
Name: Name, dtype: object

In [44]:
# this works, but it causes Pandas to return a copy of the DF instead of a view
df.loc['Store 1']['Cost']

Store 1    22.5
Store 1     2.5
Name: Cost, dtype: float64

In [47]:
# can also slice DF
# This means, select all rows + only the 2 specified columns
df.loc[:, ['Name', 'Cost']] 

Unnamed: 0,Name,Cost
Store 1,Chris,22.5
Store 1,Kevyn,2.5
Store 3,Vinod,5.0


In [50]:
df.drop('Store 1') # doesn't change the original DF but returns a copy

Unnamed: 0,Name,Item Purchased,Cost
Store 3,Vinod,Bird Seed,5.0


In [49]:
df

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 3,Vinod,Bird Seed,5.0


In [51]:
df_without_store1 = df.drop('Store 1')

In [52]:
df_without_store1

Unnamed: 0,Name,Item Purchased,Cost
Store 3,Vinod,Bird Seed,5.0


In [53]:
df

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 3,Vinod,Bird Seed,5.0


In [54]:
# To see more on drop function, use ?
df_without_store1.drop?

In [57]:
# del will actually delete the column from the DF
del df_without_store1['Name']
df_without_store1

Unnamed: 0,Item Purchased,Cost
Store 3,Bird Seed,5.0


In [60]:
df['Location'] = None

In [61]:
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Chris,Dog Food,22.5,
Store 1,Kevyn,Kitty Litter,2.5,
Store 3,Vinod,Bird Seed,5.0,


### Exercise

In [67]:
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])

# Add 20% discount to all stores' costs
df['Cost'] *= .8

In [68]:
df['Cost']

Store 1    18.0
Store 1     2.0
Store 2     4.0
Name: Cost, dtype: float64