In [71]:
import pandas as pd

# Pandas Notes

In [100]:
# Here we will create some dummy data to use as an example
dummy_data_df = pd.DataFrame({
    "letters":list("ABCDEFGHIJ"),
    "numbers":[(x + 1) * 10 for x in range(10)],
    "quarters":[0.25*x for x in range(10)],
})
# we will use the letters column as the index
dummy_data_df = dummy_data_df.set_index("letters")
#
dummy_data_df

Unnamed: 0_level_0,numbers,quarters
letters,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,0.0
B,20,0.25
C,30,0.5
D,40,0.75
E,50,1.0
F,60,1.25
G,70,1.5
H,80,1.75
I,90,2.0
J,100,2.25


# DataFrames and Loc

You can use loc to grab data from dataframes at locations

Think of how you want to get the row "B" and column "numbers", we would expect the output to be 20. So we can use loc to ask the dataframe to give us the information by giving it a location. (think loc as shorthad for location)

In [55]:
# firstly, let's get a row in the dataframe
# here, we will grab the row "C"
print(dummy_data_df.loc["C"])

numbers            30.0
quarter_pi_sine     1.0
Name: C, dtype: float64


In [58]:
# we can see the data loc["C"] gave us is the C row
# if we subsitute any letter within our dataframe we can
# get any row we want.

# NOTE: our index column for the dataframe is a string
# so we use the " or ' charachters. For example "A", 'B', "C", 'D'
print(dummy_data_df.loc['D'])

numbers            40.000
quarter_pi_sine     0.707
Name: D, dtype: float64


In [66]:
# the pattern to recognize 

#  dataframe
#    |   
#    |    open square bracket
#    |           |   
#    |           |     selected row
#    |           |      |     
#    |           |      |     closed square bracket
#    |           |      |      |  
dummy_data_df.loc[     'D'     ]

numbers            40.000
quarter_pi_sine     0.707
Name: D, dtype: float64

In [59]:
# next, we can also filter by both the row AND column like so
print(dummy_data_df.loc["A","numbers"])

10


In [None]:
# the pattern to recognize 

#  dataframe
#    | 
#    |    open square bracket
#    |           |
#    |           |     selected row
#    |           |      |   
#    |           |      |     seperator
#    |           |      |      |
#    |           |      |      |      selected column
#    |           |      |      |        |
#    |           |      |      |        |         closed square bracket
#    |           |      |      |        |          | 
dummy_data_df.loc[     "A"     ,    "numbers"      ]

In [73]:
# what we see here is that we are just getting back the values at the
# intersection of A and numbers

# more examples
print(dummy_data_df.loc["A","quarters"])

0.0


In [90]:
# But, what if you wanted to get more data, like, say, row A, B, and C?
print("command: dummy_data_df.loc['A'] --->")
print(dummy_data_df.loc['A'])
print()
print("command: dummy_data_df.loc['B'] --->")
print(dummy_data_df.loc['B'])
print()
print("command: dummy_data_df.loc['C'] --->")
print(dummy_data_df.loc['C'])

command: dummy_data_df.loc['A'] --->
numbers     10.0
quarters     0.0
Name: A, dtype: float64

command: dummy_data_df.loc['B'] --->
numbers     20.00
quarters     0.25
Name: B, dtype: float64

command: dummy_data_df.loc['C'] --->
numbers     30.0
quarters     0.5
Name: C, dtype: float64


In [75]:
# We can use lists, to tell loc to grab multiple rows like so
print(dummy_data_df.loc[["A","B","C"]])

         numbers  quarters
letters                   
A             10      0.00
B             20      0.25
C             30      0.50


In [76]:
# the pattern to recognize 

#  dataframe
#    | 
#    |    open square bracket
#    |           |                      
#    |           |         selection row list            
#    |           |           |             
#    |           |      _____|_____             
#    |           |     |           \     closed square bracket
#    |           |     |           \      |
dummy_data_df.loc[     ["A","B","C"]      ]

Unnamed: 0_level_0,numbers,quarters
letters,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,0.0
B,20,0.25
C,30,0.5


In [79]:
# check out how the pattern is the same when
# we create a list, then send it thru
select_these_rows = ["A","B","C"]
print(dummy_data_df.loc[select_these_rows])

         numbers  quarters
letters                   
A             10      0.00
B             20      0.25
C             30      0.50


In [80]:
# the pattern to recognize 

#  dataframe
#    | 
#    |    open square bracket
#    |           |                              
#    |           |         selected rows list          
#    |           |            |                       
#    |           |            |              closed square bracket
#    |           |            |               | 
dummy_data_df.loc[     select_these_rows      ]

Unnamed: 0_level_0,numbers,quarters
letters,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,0.0
B,20,0.25
C,30,0.5


In [81]:
# Likewise we can also grab multiple rows AND mulitple columns
print(dummy_data_df.loc[["A","B","C"],["numbers","quarters"]])

         numbers  quarters
letters                   
A             10      0.00
B             20      0.25
C             30      0.50


In [82]:
# the pattern to recognize 

#  dataframe
#    | 
#    |    open square bracket
#    |           |                      
#    |           |         selection row list            
#    |           |           |             
#    |           |           |           seperator
#    |           |      _____|_____      |       
#    |           |     |           \     |             selection columns list  
#    |           |     |           \     |               |
#    |           |     |           \     |      _________|__________      closed square bracket
#    |           |     |           \     |     |                    \     |
dummy_data_df.loc[     ["A","B","C"]     ,     ["numbers","quarters"]     ]

Unnamed: 0_level_0,numbers,quarters
letters,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,0.0
B,20,0.25
C,30,0.5


# DataFrames and iLoc

You can use iloc to grab data from dataframes at locations JUST like loc

iloc is diffrent from loc in that:
    iloc pretend that all the rows and columns are numbered indexes
    
so we can access rows and columns AS IF they were numbered

In [89]:
# Check out how we can grab the first row with the zero'th index
print("command: dummy_data_df.iloc[0] --->")
print(dummy_data_df.iloc[0])
print()
# see how the data is the same as loc for the first row "A"
print("command: dummy_data_df.loc['A'] --->")
print(dummy_data_df.loc['A'])

command: dummy_data_df.iloc[0] --->
numbers     10.0
quarters     0.0
Name: A, dtype: float64

command: dummy_data_df.loc['A'] --->
numbers     10.0
quarters     0.0
Name: A, dtype: float64


In [88]:
# we can use the same logic to apply everything above from loc
print("command: dummy_data_df.iloc[0,0] --->")
print(dummy_data_df.iloc[0,0])
print()
print("command: dummy_data_df.iloc[[0,1]] --->")
print(dummy_data_df.iloc[[0,1]])
print()
print("command: dummy_data_df.iloc[[0,1],[0,1]] --->")
print(dummy_data_df.iloc[[0,1],[0,1]])

command: dummy_data_df.iloc[0,0] --->
10

command: dummy_data_df.iloc[[0,1]] --->
         numbers  quarters
letters                   
A             10      0.00
B             20      0.25

command: dummy_data_df.iloc[[0,1],[0,1]] --->
         numbers  quarters
letters                   
A             10      0.00
B             20      0.25


In [93]:
# the power of iloc is really most usefull with ranges
# check this out!
print("command: dummy_data_df.iloc[range(4)] --->")
print(dummy_data_df.iloc[range(4)])
print()
print("command: dummy_data_df.iloc[range(2),range(2)] --->")
print(dummy_data_df.iloc[range(2),range(2)])

command: dummy_data_df.iloc[range(4)] --->
         numbers  quarters
letters                   
A             10      0.00
B             20      0.25
C             30      0.50
D             40      0.75

command: dummy_data_df.iloc[range(2),range(2)] --->
         numbers  quarters
letters                   
A             10      0.00
B             20      0.25


In [101]:
# Lastly, we can also ASSIGN with the loc and iloc
# look at the values in this dataframe
dummy_data_df

Unnamed: 0_level_0,numbers,quarters
letters,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,0.0
B,20,0.25
C,30,0.5
D,40,0.75
E,50,1.0
F,60,1.25
G,70,1.5
H,80,1.75
I,90,2.0
J,100,2.25


In [102]:
# we can change the first row number
dummy_data_df.loc["A","numbers"] = 2000
# look at the values in this dataframe
dummy_data_df

Unnamed: 0_level_0,numbers,quarters
letters,Unnamed: 1_level_1,Unnamed: 2_level_1
A,2000,0.0
B,20,0.25
C,30,0.5
D,40,0.75
E,50,1.0
F,60,1.25
G,70,1.5
H,80,1.75
I,90,2.0
J,100,2.25


In [103]:
# we can also change the third row number with iloc (3th index row and 0th index column)
dummy_data_df.iloc[3,0] = 9000
# look at the values in this dataframe
dummy_data_df

Unnamed: 0_level_0,numbers,quarters
letters,Unnamed: 1_level_1,Unnamed: 2_level_1
A,2000,0.0
B,20,0.25
C,30,0.5
D,9000,0.75
E,50,1.0
F,60,1.25
G,70,1.5
H,80,1.75
I,90,2.0
J,100,2.25
