In [1]:
import pandas as pd

# Pandas Notes

In [12]:
# Here we will create some dummy data to use as an example
dummy_data_df = pd.DataFrame({
    "letters":list("ABCDEFGHIJ"),
    "numbers":[(x + 1) * 10 for x in range(10)],
    "double numbers":[(x + 1) * 20 for x in range(10)],
    "quarters":[0.25*x for x in range(10)],
    "thirds":[round(0.333*x,2) for x in range(10)],
})
# we will use the letters column as the index
dummy_data_df = dummy_data_df.set_index("letters")
#
dummy_data_df

Unnamed: 0_level_0,numbers,double numbers,quarters,thirds
letters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,10,20,0.0,0.0
B,20,40,0.25,0.33
C,30,60,0.5,0.67
D,40,80,0.75,1.0
E,50,100,1.0,1.33
F,60,120,1.25,1.67
G,70,140,1.5,2.0
H,80,160,1.75,2.33
I,90,180,2.0,2.66
J,100,200,2.25,3.0


# DataFrame and Square Brackets

We can use square brackets to select columns

In [13]:
# here we can grab the numbers column by sending in the name of the column
dummy_data_df["numbers"]

letters
A     10
B     20
C     30
D     40
E     50
F     60
G     70
H     80
I     90
J    100
Name: numbers, dtype: int64

In [14]:
# likewise we can get the quarters column
dummy_data_df["quarters"]

letters
A    0.00
B    0.25
C    0.50
D    0.75
E    1.00
F    1.25
G    1.50
H    1.75
I    2.00
J    2.25
Name: quarters, dtype: float64

In [15]:
# the pattern to recognize 

#  dataframe
#    | 
#    |           open square bracket
#    |           |                      
#    |           |    selected column 
#    |           |     | 
#    |           |     |        closed square bracket
#    |           |     |         |
dummy_data_df[     "numbers"     ]

letters
A     10
B     20
C     30
D     40
E     50
F     60
G     70
H     80
I     90
J    100
Name: numbers, dtype: int64

In [17]:
# we can also grab multiple columns by sending in a list of columns
dummy_data_df[["numbers","quarters"]]

Unnamed: 0_level_0,numbers,quarters
letters,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,0.0
B,20,0.25
C,30,0.5
D,40,0.75
E,50,1.0
F,60,1.25
G,70,1.5
H,80,1.75
I,90,2.0
J,100,2.25


In [19]:
# we can also grab multiple columns in any order
dummy_data_df[["thirds","double numbers"]]

Unnamed: 0_level_0,thirds,double numbers
letters,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0.0,20
B,0.33,40
C,0.67,60
D,1.0,80
E,1.33,100
F,1.67,120
G,2.0,140
H,2.33,160
I,2.66,180
J,3.0,200


In [20]:
# the pattern to recognize 

#  dataframe
#    |
#    |    open square bracket
#    |       |
#    |       |         selection columns
#    |       |               |
#    |       |      _________|_________     
#    |       |    |                    \    closed square bracket
#    |       |    |                    \     |
dummy_data_df[    ["numbers","quarters"]     ]

Unnamed: 0_level_0,numbers,quarters
letters,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,0.0
B,20,0.25
C,30,0.5
D,40,0.75
E,50,1.0
F,60,1.25
G,70,1.5
H,80,1.75
I,90,2.0
J,100,2.25


# Filtering Selection with Square Brackets

we can filter parts of our data with the square brackets with conditionals

In [23]:
# we want to select the row where the data's column numbers has the value 50
dummy_data_df[dummy_data_df["numbers"] == 50]

Unnamed: 0_level_0,numbers,double numbers,quarters,thirds
letters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
E,50,100,1.0,1.33


In [26]:
# we can also grab more than one item by using a more generic filter
dummy_data_df[dummy_data_df["quarters"] > 1.25]

Unnamed: 0_level_0,numbers,double numbers,quarters,thirds
letters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
G,70,140,1.5,2.0
H,80,160,1.75,2.33
I,90,180,2.0,2.66
J,100,200,2.25,3.0


In [6]:
# the pattern to recognize 

#  dataframe
#    |
#    |    open square bracket
#    |       |
#    |       |             filter query
#    |       |                   |
#    |       |     ______________|_______________
#    |       |    |                              \    closed square bracket
#    |       |    |                              \     |
dummy_data_df[    dummy_data_df["numbers"]  ==  50     ]

In [27]:
# the pattern to recognize WITHIN the filter query 

#  
#                    selected column to filter on
#                              |
#                  ____________|__________
#                 |                       \      conditional limit
#                 |                       \       |
dummy_data_df[    dummy_data_df["numbers"]   ==  50       ]
#                                            |    
#                                            |    
#                                   conditional   

Unnamed: 0_level_0,numbers,double numbers,quarters,thirds
letters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
G,70,140,1.5,2.0
H,80,160,1.75,2.33
I,90,180,2.0,2.66
J,100,200,2.25,3.0


In [28]:
# the pattern to recognize WITHIN the filter query 

#  
#                    selected column to filter on
#                              |
#                  ____________|__________
#                 |                       \      conditional limit
#                 |                       \        |
dummy_data_df[    dummy_data_df["quarters"]   >   1.25     ]
#                                             |    
#                                             |    
#                                    conditional   

Unnamed: 0_level_0,numbers,double numbers,quarters,thirds
letters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
G,70,140,1.5,2.0
H,80,160,1.75,2.33
I,90,180,2.0,2.66
J,100,200,2.25,3.0
