## Selecting Series Data

In [1]:
import pandas as pd
import numpy as np
pd.set_option('max_columns', 4, 'max_rows', 10)

In [2]:
college = pd.read_csv("C:/Users/justine.o_kobo360/Desktop/Pandas Workbook/Pandas CookBook 1.x/Data files/college.csv", index_col='INSTNM')

In [3]:
city = college["CITY"]

In [4]:
city


INSTNM
Alabama A & M University                                            Normal
University of Alabama at Birmingham                             Birmingham
Amridge University                                              Montgomery
University of Alabama in Huntsville                             Huntsville
Alabama State University                                        Montgomery
                                                                ...       
SAE Institute of Technology  San Francisco                      Emeryville
Rasmussen College - Overland Park                            Overland Park
National Personal Training Institute of Cleveland         Highland Heights
Bay Area Medical Academy - San Jose Satellite Location            San Jose
Excel Learning Center-San Antonio South                        San Antonio
Name: CITY, Length: 7535, dtype: object

In [5]:
# Pull out a scalar value from the Series directly:
city["Alabama A & M University"]

'Normal'

In [6]:
# Pull out a scalar value using the .loc 
# attribute by name:
city.loc["Alabama A & M University"]

'Normal'

In [7]:
# Pull out a scalar value using the .iloc attribute
# by position
city.iloc[0]

'Normal'

In [7]:
# Pull out several values by indexing. Note that if
# we pass in a list to the index operation, pandas 
# will now return a Series instead of a scalar:
city[
    [
        "Alabama A & M University",
        "Alabama A & M University"
    ]
    
]

INSTNM
Alabama A & M University    Normal
Alabama A & M University    Normal
Name: CITY, dtype: object

In [9]:
# Repeat the above using .loc
city.loc[
    [
        "Alabama A & M University",
        "Alabama A & M University"
    ]
]

INSTNM
Alabama A & M University    Normal
Alabama A & M University    Normal
Name: CITY, dtype: object

In [8]:
# repeat the above using .iloc:
city.iloc[[0, 4]]

INSTNM
Alabama A & M University        Normal
Alabama State University    Montgomery
Name: CITY, dtype: object

In [9]:
# Use a slice to pull out many values :
city[
    "Alabama A & M University": "Alabama State University"
]

INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
University of Alabama in Huntsville    Huntsville
Alabama State University               Montgomery
Name: CITY, dtype: object

In [10]:
# Use a slice to pull out many values by position
city[0:5]

INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
University of Alabama in Huntsville    Huntsville
Alabama State University               Montgomery
Name: CITY, dtype: object

In [11]:
# use a slice to pull out many values with .loc
city.loc[
    "Alabama A & M University": "Alabama State University"    
]

INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
University of Alabama in Huntsville    Huntsville
Alabama State University               Montgomery
Name: CITY, dtype: object

In [12]:
# Use a slice to pull out many values with .iloc:
city.iloc[0:5]

INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
University of Alabama in Huntsville    Huntsville
Alabama State University               Montgomery
Name: CITY, dtype: object

In [13]:
# Use a Boolean array to pull out certain values

alabama_mask = city.isin(['Birmingham', 'Montgomery'])

In [14]:
city[alabama_mask]

INSTNM
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
Alabama State University               Montgomery
Auburn University at Montgomery        Montgomery
Birmingham Southern College            Birmingham
                                          ...    
Fortis Institute-Birmingham            Birmingham
Hair Academy                           Montgomery
Brown Mackie College-Birmingham        Birmingham
Nunation School of Cosmetology         Birmingham
Troy University-Montgomery Campus      Montgomery
Name: CITY, Length: 26, dtype: object

In [15]:
# Consider this toy Series that uses integer values
# for the index:
s = pd.Series([10, 20, 35, 28], index=[5, 2, 3, 1])

In [20]:
s

5    10
2    20
3    35
1    28
dtype: int64

In [17]:
s[0:4]

5    10
2    20
3    35
1    28
dtype: int64

In [18]:
s[5]

10

In [19]:
s[1]

28

In [21]:
# All of the examples in this section could be
# performed directly on the original DataFrame by
# using .loc or .iloc. We can pass in a tuple (without parentheses) of row and column labels
# or positions, respectively:
college.loc["Alabama A & M University", 'CITY']

'Normal'

In [22]:
college.loc["Amridge University", "STABBR"]

'AL'

In [23]:
college.iloc[0,0]

'Normal'

In [24]:
college.loc[
    [
        "Alabama A & M University",
        "Alabama State University",
    ], "CITY"
]

INSTNM
Alabama A & M University        Normal
Alabama State University    Montgomery
Name: CITY, dtype: object

In [25]:
college.iloc[[0, 4], 0]

INSTNM
Alabama A & M University        Normal
Alabama State University    Montgomery
Name: CITY, dtype: object

In [26]:
college.loc[
   "Alabama A & M University":"Alabama State University", "CITY"
]

INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
University of Alabama in Huntsville    Huntsville
Alabama State University               Montgomery
Name: CITY, dtype: object

In [27]:
college.iloc[0:5, 0]

INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
University of Alabama in Huntsville    Huntsville
Alabama State University               Montgomery
Name: CITY, dtype: object

In [28]:
# Care needs to be taken when using slicing off of .loc.
# If the start index appears after the stop index, 
# then an empty Series is returned without an exception:
city.loc[
    "Reid State Technical College": "Alabama State University"
]

Series([], Name: CITY, dtype: object)

In [30]:
city.loc[
     "Alabama State University" : "Reid State Technical College"
]

INSTNM
Alabama State University                   Montgomery
The University of Alabama                  Tuscaloosa
Central Alabama Community College      Alexander City
Athens State University                        Athens
Auburn University at Montgomery            Montgomery
                                            ...      
Northeast Alabama Community College        Rainsville
Oakwood University                         Huntsville
Alabama Southern Community College        Monroeville
Prince Institute-Southeast                   Elmhurst
Reid State Technical College                Evergreen
Name: CITY, Length: 41, dtype: object

## Selecting DataFrame rows

In [29]:
# Read in the college dataset, and set the index as
# the institution name:
college = pd.read_csv(
    "C:/Users/justine.o_kobo360/Desktop/Pandas Workbook/Pandas CookBook 1.x/Data files/college.csv", index_col='INSTNM'
)

In [30]:
college.sample(5, random_state=42)

Unnamed: 0_level_0,CITY,STABBR,...,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Career Point College,San Antonio,TX,...,20700,14977
Ner Israel Rabbinical College,Baltimore,MD,...,PrivacySuppressed,PrivacySuppressed
Reflections Academy of Beauty,Decatur,IL,...,,PrivacySuppressed
Capital Area Technical College,Baton Rouge,LA,...,26400,PrivacySuppressed
West Virginia University Institute of Technology,Montgomery,WV,...,43400,23969


In [31]:
# To select an entire row at that position, pass an
# integer to .iloc:

college.iloc[60]
# Because Python is zero-based, this is actually the 
# 61st row. Note that pandas
# represents this row as a Series.

CITY                  Anchorage
STABBR                       AK
HBCU                        0.0
MENONLY                     0.0
WOMENONLY                   0.0
                        ...    
PCTPELL                  0.2385
PCTFLOAN                 0.2647
UG25ABV                  0.4386
MD_EARN_WNE_P10           42500
GRAD_DEBT_MDN_SUPP      19449.5
Name: University of Alaska Anchorage, Length: 26, dtype: object

In [32]:
# To get the same row as the preceding step, pass the
# index label to .loc:
college.loc["University of Alaska Anchorage"]

CITY                  Anchorage
STABBR                       AK
HBCU                        0.0
MENONLY                     0.0
WOMENONLY                   0.0
                        ...    
PCTPELL                  0.2385
PCTFLOAN                 0.2647
UG25ABV                  0.4386
MD_EARN_WNE_P10           42500
GRAD_DEBT_MDN_SUPP      19449.5
Name: University of Alaska Anchorage, Length: 26, dtype: object

In [33]:
# To select a disjointed sets of rows as a DataFrame,
# pass a list of integersto .iloc 
college.iloc[[60, 93, 3]]

Unnamed: 0_level_0,CITY,STABBR,...,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
University of Alaska Anchorage,Anchorage,AK,...,42500,19449.5
Avalon School of Cosmetology-Mesa,Mesa,AZ,...,19600,11458.0
University of Alabama in Huntsville,Huntsville,AL,...,45500,24097.0


In [34]:
# The same DataFrame from step 4 may be reproduced with
# .loc by passing it a list of the institution names:
labels = [
    "University of Alaska Anchorage",
    "International Academy of Hair Design",
    "University of Alabama in Huntsville",
]

college.loc[labels]

Unnamed: 0_level_0,CITY,STABBR,...,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
University of Alaska Anchorage,Anchorage,AK,...,42500,19449.5
International Academy of Hair Design,Tempe,AZ,...,22200,10556.0
University of Alabama in Huntsville,Huntsville,AL,...,45500,24097.0


In [35]:
# Use slice notation with .iloc to select contiguous
# rows of the data:
college.iloc[99: 102]

Unnamed: 0_level_0,CITY,STABBR,...,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
International Academy of Hair Design,Tempe,AZ,...,22200,10556
GateWay Community College,Phoenix,AZ,...,29800,7283
Mesa Community College,Mesa,AZ,...,35200,8000


In [47]:
# Slice notation also works with .loc and is a closed 
# interval (it includes both the start
# label and the stop label):
start = "International Academy of Hair Design"
stop = "Mesa Community College"
college.loc[start:stop]

Unnamed: 0_level_0,CITY,STABBR,...,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
International Academy of Hair Design,Tempe,AZ,...,22200,10556
GateWay Community College,Phoenix,AZ,...,29800,7283
Mesa Community College,Mesa,AZ,...,35200,8000


In [52]:
college.iloc[[60, 99, 3]].index.to_list()

['University of Alaska Anchorage',
 'International Academy of Hair Design',
 'University of Alabama in Huntsville']

### Selecting DataFrame rows and columns simultaneously

In [None]:
# The generic form to select rows and columns will 
# look like the following code:
df.iloc[row_idxs, column_idxs]
df.loc[row_names, column_names]

In [53]:
# Select the first three rows and the first four 
# columns with slice notation:
college.iloc[:3, :4]

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama A & M University,Normal,AL,1.0,0.0
University of Alabama at Birmingham,Birmingham,AL,0.0,0.0
Amridge University,Montgomery,AL,0.0,0.0


In [54]:
college.loc[:'Amridge University', : 'MENONLY']

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama A & M University,Normal,AL,1.0,0.0
University of Alabama at Birmingham,Birmingham,AL,0.0,0.0
Amridge University,Montgomery,AL,0.0,0.0


In [61]:
# select all rows of the two different columns:
college.iloc[:, [4, 6]].head()

Unnamed: 0_level_0,WOMENONLY,SATVRMID
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama A & M University,0.0,424.0
University of Alabama at Birmingham,0.0,570.0
Amridge University,0.0,
University of Alabama in Huntsville,0.0,595.0
Alabama State University,0.0,425.0


In [63]:
college.loc[:, ["WOMENONLY", "SATVRMID"]].head()

Unnamed: 0_level_0,WOMENONLY,SATVRMID
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama A & M University,0.0,424.0
University of Alabama at Birmingham,0.0,570.0
Amridge University,0.0,
University of Alabama in Huntsville,0.0,595.0
Alabama State University,0.0,425.0


In [64]:
# select disjointed rows and columns:
college.iloc[[100, 200], [7, 15]]

Unnamed: 0_level_0,SATMTMID,UGDS_NHPI
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1
GateWay Community College,,0.0029
American Baptist Seminary of the West,,


In [66]:
rows = [
    "GateWay Community College",
    "American Baptist Seminary of the West", 
    
]
columns = ["SATMTMID", "UGDS_NHPI"]
college.loc[rows, columns]

Unnamed: 0_level_0,SATMTMID,UGDS_NHPI
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1
GateWay Community College,,0.0029
American Baptist Seminary of the West,,


In [70]:
# slice the rows and select a single column:
college.iloc[90:80:-2, 5]

INSTNM
Empire Beauty School-Flagstaff     0
Charles of Italy Beauty College    0
Central Arizona College            0
University of Arizona              0
Arizona State University-Tempe     0
Name: RELAFFIL, dtype: int64

In [37]:
start = "Empire Beauty School-Flagstaff"
stop = "Arizona State University-Tempe"
college.loc[start:stop:-2, "RELAFFIL"]

INSTNM
Empire Beauty School-Flagstaff     0
Charles of Italy Beauty College    0
Central Arizona College            0
University of Arizona              0
Arizona State University-Tempe     0
Name: RELAFFIL, dtype: int64

In [72]:
college = college.sort_index(ascending=False)

In [76]:
college.index.is_monotonic_decreasing

True

In [78]:
college.loc["E":"B"]

Unnamed: 0_level_0,CITY,STABBR,...,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Dyersburg State Community College,Dyersburg,TN,...,26800,7475
Dutchess Community College,Poughkeepsie,NY,...,32500,10250
Dutchess BOCES-Practical Nursing Program,Poughkeepsie,NY,...,36500,9500
Durham Technical Community College,Durham,NC,...,27200,11069.5
Durham Beauty Academy,Durham,NC,...,PrivacySuppressed,15332
...,...,...,...,...,...
Bacone College,Muskogee,OK,...,29700,26350
Babson College,Wellesley,MA,...,86700,27000
BJ's Beauty & Barber College,Auburn,WA,...,,PrivacySuppressed
BIR Training Center,Chicago,IL,...,PrivacySuppressed,15394
