# Chapter 4: データから部分抽出

## レシピ
* [Seriesデータの選択](#レシピ26-Seriesデータの選択)
* [DataFrameの行の選択](#レシピ27-DataFrameの行の選択)
* [DataFrameの行とカラムの同時選択](#レシピ28-DataFrameの行とカラムの同時選択)
* [整数とラベルの両方でデータ選択](#レシピ29-整数とラベルの両方でデータ選択)
* [スカラー選択の高速化](#レシピ30-スカラー選択の高速化)
* [行の手軽なスライシング](#レシピ31-行の手軽なスライシング)
* [文字順にスライシング](#レシピ32-文字順にスライシング)

In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
pd.options.display.max_columns = 50

# レシピ26 Seriesデータの選択

In [2]:
# 大学データセットを校名をインデックスとして読み込む
college = pd.read_csv('data/college.csv', index_col='INSTNM')
city = college['CITY']
city.head()

INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
University of Alabama in Huntsville    Huntsville
Alabama State University               Montgomery
Name: CITY, dtype: object

In [3]:
# ilocで抽出
city.iloc[3]

'Huntsville'

In [4]:
# ilocにリストを渡し、複数の整数位置を選択
city.iloc[[10,20,30]]

INSTNM
Birmingham Southern College                            Birmingham
George C Wallace State Community College-Hanceville    Hanceville
Judson College                                             Marion
Name: CITY, dtype: object

In [5]:
# スライス表記を用いて、等間隔にデータを選ぶ
city.iloc[4:50:10]

INSTNM
Alabama State University              Montgomery
Enterprise State Community College    Enterprise
Heritage Christian University           Florence
Marion Military Institute                 Marion
Reid State Technical College           Evergreen
Name: CITY, dtype: object

In [6]:
# indexラベルで抽出するloc
city.loc['Heritage Christian University']

'Florence'

In [7]:
# 複数の異なるラベルを選択するにはリスト
np.random.seed(1)
labels =list(np.random.choice(city.index,4))
labels

['Northwest HVAC/R Training Center',
 'California State University-Dominguez Hills',
 'Lower Columbia College',
 'Southwest Acupuncture College-Boulder']

In [8]:
city.loc[labels]

INSTNM
Northwest HVAC/R Training Center                Spokane
California State University-Dominguez Hills      Carson
Lower Columbia College                         Longview
Southwest Acupuncture College-Boulder           Boulder
Name: CITY, dtype: object

In [9]:
# データを等間隔で選ぶにはスライス
city.loc['Alabama State University': 'Reid State Technical College':10]

INSTNM
Alabama State University              Montgomery
Enterprise State Community College    Enterprise
Heritage Christian University           Florence
Marion Military Institute                 Marion
Reid State Technical College           Evergreen
Name: CITY, dtype: object

# レシピ27 DataFrameの行の選択

In [10]:
college.head()

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
Alabama A & M University,Normal,AL,1.0,0.0,0.0,0,424.0,420.0,0.0,4206.0,0.0333,0.9353,0.0055,0.0019,0.0024,0.0019,0.0,0.0059,0.0138,0.0656,1,0.7356,0.8284,0.1049,30300,33888.0
University of Alabama at Birmingham,Birmingham,AL,0.0,0.0,0.0,0,570.0,565.0,0.0,11383.0,0.5922,0.26,0.0283,0.0518,0.0022,0.0007,0.0368,0.0179,0.01,0.2607,1,0.346,0.5214,0.2422,39700,21941.5
Amridge University,Montgomery,AL,0.0,0.0,0.0,1,,,1.0,291.0,0.299,0.4192,0.0069,0.0034,0.0,0.0,0.0,0.0,0.2715,0.4536,1,0.6801,0.7795,0.854,40100,23370.0
University of Alabama in Huntsville,Huntsville,AL,0.0,0.0,0.0,0,595.0,590.0,0.0,5451.0,0.6988,0.1255,0.0382,0.0376,0.0143,0.0002,0.0172,0.0332,0.035,0.2146,1,0.3072,0.4596,0.264,45500,24097.0
Alabama State University,Montgomery,AL,1.0,0.0,0.0,0,425.0,430.0,0.0,4811.0,0.0158,0.9208,0.0121,0.0019,0.001,0.0006,0.0098,0.0243,0.0137,0.0892,1,0.7347,0.7554,0.127,26600,33118.5


In [11]:
# ilocに整数を渡し、その位置の行全体を選ぶ
college.iloc[60]

CITY                  Anchorage
STABBR                       AK
HBCU                          0
MENONLY                       0
WOMENONLY                     0
RELAFFIL                      0
SATVRMID                    NaN
SATMTMID                    NaN
DISTANCEONLY                  0
UGDS                      12865
UGDS_WHITE               0.5747
UGDS_BLACK               0.0358
UGDS_HISP                0.0761
UGDS_ASIAN               0.0778
UGDS_AIAN                0.0653
UGDS_NHPI                0.0086
UGDS_2MOR                 0.098
UGDS_NRA                 0.0181
UGDS_UNKN                0.0457
PPTUG_EF                 0.4539
CURROPER                      1
PCTPELL                  0.2385
PCTFLOAN                 0.2647
UG25ABV                  0.4386
MD_EARN_WNE_P10           42500
GRAD_DEBT_MDN_SUPP      19449.5
Name: University of Alaska Anchorage, dtype: object

In [12]:
# locにindexラベルを渡す
college.loc['University of Alaska Anchorage']

CITY                  Anchorage
STABBR                       AK
HBCU                          0
MENONLY                       0
WOMENONLY                     0
RELAFFIL                      0
SATVRMID                    NaN
SATMTMID                    NaN
DISTANCEONLY                  0
UGDS                      12865
UGDS_WHITE               0.5747
UGDS_BLACK               0.0358
UGDS_HISP                0.0761
UGDS_ASIAN               0.0778
UGDS_AIAN                0.0653
UGDS_NHPI                0.0086
UGDS_2MOR                 0.098
UGDS_NRA                 0.0181
UGDS_UNKN                0.0457
PPTUG_EF                 0.4539
CURROPER                      1
PCTPELL                  0.2385
PCTFLOAN                 0.2647
UG25ABV                  0.4386
MD_EARN_WNE_P10           42500
GRAD_DEBT_MDN_SUPP      19449.5
Name: University of Alaska Anchorage, dtype: object

In [13]:
# 複数行を選ぶにはilocに整数のリストを渡す
college.iloc[[60, 99, 3]]

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
University of Alaska Anchorage,Anchorage,AK,0.0,0.0,0.0,0,,,0.0,12865.0,0.5747,0.0358,0.0761,0.0778,0.0653,0.0086,0.098,0.0181,0.0457,0.4539,1,0.2385,0.2647,0.4386,42500,19449.5
International Academy of Hair Design,Tempe,AZ,0.0,0.0,0.0,0,,,0.0,188.0,0.2713,0.25,0.367,0.016,0.016,0.0,0.016,0.0,0.0638,0.0,0,0.7185,0.7346,0.3905,22200,10556.0
University of Alabama in Huntsville,Huntsville,AL,0.0,0.0,0.0,0,595.0,590.0,0.0,5451.0,0.6988,0.1255,0.0382,0.0376,0.0143,0.0002,0.0172,0.0332,0.035,0.2146,1,0.3072,0.4596,0.264,45500,24097.0


In [14]:
# locを用いて複数行を選択する
labels = ['University of Alaska Anchorage',
          'International Academy of Hair Design',
          'University of Alabama in Huntsville']
college.loc[labels]

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
University of Alaska Anchorage,Anchorage,AK,0.0,0.0,0.0,0,,,0.0,12865.0,0.5747,0.0358,0.0761,0.0778,0.0653,0.0086,0.098,0.0181,0.0457,0.4539,1,0.2385,0.2647,0.4386,42500,19449.5
International Academy of Hair Design,Tempe,AZ,0.0,0.0,0.0,0,,,0.0,188.0,0.2713,0.25,0.367,0.016,0.016,0.0,0.016,0.0,0.0638,0.0,0,0.7185,0.7346,0.3905,22200,10556.0
University of Alabama in Huntsville,Huntsville,AL,0.0,0.0,0.0,0,595.0,590.0,0.0,5451.0,0.6988,0.1255,0.0382,0.0376,0.0143,0.0002,0.0172,0.0332,0.035,0.2146,1,0.3072,0.4596,0.264,45500,24097.0


In [15]:
# index_listは切り貼りしなくてもDataFrameから直接取り出せる
college.iloc[[60, 99, 3]].index.tolist()

['University of Alaska Anchorage',
 'International Academy of Hair Design',
 'University of Alabama in Huntsville']

In [16]:
# ilocでスライス表記する
college.iloc[99:102]

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
International Academy of Hair Design,Tempe,AZ,0.0,0.0,0.0,0,,,0.0,188.0,0.2713,0.25,0.367,0.016,0.016,0.0,0.016,0.0,0.0638,0.0,0,0.7185,0.7346,0.3905,22200,10556
GateWay Community College,Phoenix,AZ,0.0,0.0,0.0,0,,,0.0,5211.0,0.3585,0.1201,0.3389,0.0355,0.0451,0.0029,0.0127,0.0161,0.0702,0.7465,1,0.327,0.2189,0.5832,29800,7283
Mesa Community College,Mesa,AZ,0.0,0.0,0.0,0,,,0.0,19055.0,0.5002,0.0661,0.2354,0.039,0.0403,0.0046,0.0205,0.0257,0.0682,0.6457,1,0.3423,0.2207,0.401,35200,8000


In [17]:
# locでスライス表記
start = 'International Academy of Hair Design'
stop = 'Mesa Community College'
college.loc[start:stop]

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
International Academy of Hair Design,Tempe,AZ,0.0,0.0,0.0,0,,,0.0,188.0,0.2713,0.25,0.367,0.016,0.016,0.0,0.016,0.0,0.0638,0.0,0,0.7185,0.7346,0.3905,22200,10556
GateWay Community College,Phoenix,AZ,0.0,0.0,0.0,0,,,0.0,5211.0,0.3585,0.1201,0.3389,0.0355,0.0451,0.0029,0.0127,0.0161,0.0702,0.7465,1,0.327,0.2189,0.5832,29800,7283
Mesa Community College,Mesa,AZ,0.0,0.0,0.0,0,,,0.0,19055.0,0.5002,0.0661,0.2354,0.039,0.0403,0.0046,0.0205,0.0257,0.0682,0.6457,1,0.3423,0.2207,0.401,35200,8000


# レシピ28 DataFrameの行とカラムの同時選択

### 行とカラムを選択する原始的な形式は次のようなコードになる
- df.iloc[rows, columns]
- df.loc[rows, columns]

In [18]:
# スライス表記で最初の3行と最初の4カラムを選ぶ
college.iloc[:3,:4]

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama A & M University,Normal,AL,1.0,0.0
University of Alabama at Birmingham,Birmingham,AL,0.0,0.0
Amridge University,Montgomery,AL,0.0,0.0


In [19]:
# 異なるカラム2つの全行を選ぶ
college.iloc[:,[4,6]].head()

Unnamed: 0_level_0,WOMENONLY,SATVRMID
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama A & M University,0.0,424.0
University of Alabama at Birmingham,0.0,570.0
Amridge University,0.0,
University of Alabama in Huntsville,0.0,595.0
Alabama State University,0.0,425.0


In [20]:
college.loc[:,['WOMENONLY', 'SATVRMID']].head()

Unnamed: 0_level_0,WOMENONLY,SATVRMID
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama A & M University,0.0,424.0
University of Alabama at Birmingham,0.0,570.0
Amridge University,0.0,
University of Alabama in Huntsville,0.0,595.0
Alabama State University,0.0,425.0


In [21]:
# 共通部分のない行の集合と列の集合を選ぶ
college.iloc[[100,200],[7,15]]

Unnamed: 0_level_0,SATMTMID,UGDS_NHPI
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1
GateWay Community College,,0.0029
American Baptist Seminary of the West,,


In [22]:
rows = ['GateWay Community College',
        'American Baptist Seminary of the West']
columns = ['SATMTMID', 'UGDS_NHPI']
college.loc[rows, columns]

Unnamed: 0_level_0,SATMTMID,UGDS_NHPI
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1
GateWay Community College,,0.0029
American Baptist Seminary of the West,,


In [23]:
# 単一スカラー値を選ぶ
college.iloc[5,-4]

0.401

In [24]:
college.loc['The University of Alabama', 'PCTFLOAN']

0.401

In [25]:
# 行をスライスして１つのカラムを選ぶ
college.iloc[90:80:-2,5]

INSTNM
Empire Beauty School-Flagstaff     0
Charles of Italy Beauty College    0
Central Arizona College            0
University of Arizona              0
Arizona State University-Tempe     0
Name: RELAFFIL, dtype: int64

In [26]:
start = 'Empire Beauty School-Flagstaff'
stop = 'Arizona State University-Tempe'
college.loc[start:stop:-2, 'RELAFFIL']

INSTNM
Empire Beauty School-Flagstaff     0
Charles of Italy Beauty College    0
Central Arizona College            0
University of Arizona              0
Arizona State University-Tempe     0
Name: RELAFFIL, dtype: int64

# レシピ29 整数とラベルの両方でデータ選択

#### カラムの整数位置をどのようにして見つけるかとilocを使って選択を完了

In [27]:
# Indexメソッドget_locを使って必要なカラムの整数位置を見つける
col_start = college.columns.get_loc('UGDS_WHITE')
col_end = college.columns.get_loc('UGDS_UNKN') + 1
col_start, col_end

(10, 19)

In [28]:
# ilocで整数位置を使い、col_startとcol_endを使ってカラムを選択する
college.iloc[:5, col_start:col_end]

Unnamed: 0_level_0,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Alabama A & M University,0.0333,0.9353,0.0055,0.0019,0.0024,0.0019,0.0,0.0059,0.0138
University of Alabama at Birmingham,0.5922,0.26,0.0283,0.0518,0.0022,0.0007,0.0368,0.0179,0.01
Amridge University,0.299,0.4192,0.0069,0.0034,0.0,0.0,0.0,0.0,0.2715
University of Alabama in Huntsville,0.6988,0.1255,0.0382,0.0376,0.0143,0.0002,0.0172,0.0332,0.035
Alabama State University,0.0158,0.9208,0.0121,0.0019,0.001,0.0006,0.0098,0.0243,0.0137


In [29]:
# locでも同様な事が出来る
row_start = college.index[10]
row_end = college.index[15]
college.loc[row_start:row_end, 'UGDS_WHITE': 'UGDS_UNKN']

Unnamed: 0_level_0,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Birmingham Southern College,0.7983,0.1102,0.0195,0.0517,0.0102,0.0,0.0051,0.0,0.0051
Chattahoochee Valley Community College,0.4661,0.4372,0.0492,0.0127,0.0023,0.0035,0.0151,0.0,0.0139
Concordia College Alabama,0.028,0.8758,0.0373,0.0093,0.0,0.0,0.0031,0.0466,0.0
South University-Montgomery,0.3046,0.6054,0.0153,0.0153,0.0153,0.0096,0.0,0.0019,0.0326
Enterprise State Community College,0.6408,0.2435,0.0509,0.0202,0.0081,0.0029,0.0254,0.0012,0.0069
James H Faulkner State Community College,0.6979,0.2259,0.032,0.0084,0.0177,0.0014,0.0152,0.0007,0.0009


# レシピ30 スカラー選択の高速化

#### スカラー値を高速に行うには、.iat及び.atを使う

In [30]:
# locで校名とカラム名を渡して、スカラー値を選ぶ
cn = 'Texas A & M University-College Station'
college.loc[cn, 'UGDS_WHITE']

0.6609999999999999

In [31]:
# 同じ結果を.atで得る
college.at[cn, 'UGDS_WHITE']

0.6609999999999999

In [32]:
# マジックコマンド%timeitを使って速度の違いを得る
%timeit college.loc[cn, 'UGDS_WHITE']

5.72 µs ± 344 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [33]:
%timeit college.at[cn, 'UGDS_WHITE']

3.59 µs ± 151 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [34]:
# ilocとiatの速度の違いを測る
row_num = college.index.get_loc(cn)
col_num = college.columns.get_loc('UGDS_WHITE')
row_num, col_num

(3765, 10)

In [35]:
%timeit college.iloc[row_num, col_num]

6.29 µs ± 298 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [36]:
%timeit college.iat[row_num, col_num]

3.75 µs ± 35.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


# レシピ31 行の手軽なスライシング

#### SeriesとDataFrame両方のインデックス演算子にスライスオブジェクトを渡す

In [37]:
# インデックス10から20まで2行おきに行を選択する
college[10:20:2]

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
Birmingham Southern College,Birmingham,AL,0.0,0.0,0.0,1,560.0,560.0,0.0,1180.0,0.7983,0.1102,0.0195,0.0517,0.0102,0.0,0.0051,0.0,0.0051,0.0017,1,0.192,0.4809,0.0152,44200.0,27000
Concordia College Alabama,Selma,AL,1.0,0.0,0.0,1,420.0,400.0,0.0,322.0,0.028,0.8758,0.0373,0.0093,0.0,0.0,0.0031,0.0466,0.0,0.1056,1,0.8667,0.9333,0.2367,19900.0,PrivacySuppressed
Enterprise State Community College,Enterprise,AL,0.0,0.0,0.0,0,,,0.0,1729.0,0.6408,0.2435,0.0509,0.0202,0.0081,0.0029,0.0254,0.0012,0.0069,0.3823,1,0.4895,0.2263,0.3399,24600.0,8273
Faulkner University,Montgomery,AL,0.0,0.0,0.0,1,,,0.0,2367.0,0.3874,0.5137,0.0258,0.0042,0.0063,0.0013,0.0173,0.0182,0.0258,0.2302,1,0.5812,0.7253,0.4589,37200.0,22000
New Beginning College of Cosmetology,Albertville,AL,0.0,0.0,0.0,0,,,0.0,115.0,0.8957,0.0348,0.0696,0.0,0.0,0.0,0.0,0.0,0.0,0.0783,1,0.8224,0.8553,0.3933,,5500


In [38]:
# 同じスライシングをSeriesでも行う
city = college['CITY']
city[10:20:2]

INSTNM
Birmingham Southern College              Birmingham
Concordia College Alabama                     Selma
Enterprise State Community College       Enterprise
Faulkner University                      Montgomery
New Beginning College of Cosmetology    Albertville
Name: CITY, dtype: object

In [39]:
# SeriesもDataFrameもインデックス演算子にラベルでもスライシングできる
start = 'Mesa Community College'
stop = 'Spokane Community College'
college[start:stop:1500]

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
Mesa Community College,Mesa,AZ,0.0,0.0,0.0,0,,,0.0,19055.0,0.5002,0.0661,0.2354,0.039,0.0403,0.0046,0.0205,0.0257,0.0682,0.6457,1,0.3423,0.2207,0.401,35200.0,8000
Hair Academy Inc-New Carrollton,New Carrollton,MD,0.0,0.0,0.0,0,,,0.0,504.0,0.0,0.994,0.004,0.0,0.002,0.0,0.0,0.0,0.0,0.4683,1,0.9756,1.0,0.5882,15200.0,9666
National College of Natural Medicine,Portland,OR,0.0,0.0,0.0,0,,,0.0,,,,,,,,,,,,1,,,,,PrivacySuppressed


In [40]:
# Seriesに同じスライシングをする
city[start:stop:1500]

INSTNM
Mesa Community College                            Mesa
Hair Academy Inc-New Carrollton         New Carrollton
National College of Natural Medicine          Portland
Name: CITY, dtype: object

# レシピ32 文字順にスライシング

#### インデックスでソートし、locでスライス表記を用いて２つの文字列の間の全ての行を選ぶ

In [41]:
# 校名が文字列順でSpとSuの間の全大学を選ぼうとする
college.loc['Sp':'Su']

KeyError: 'Sp'

In [42]:
# インデックスをソートする
college = college.sort_index()
college

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
A & W Healthcare Educators,New Orleans,LA,0.0,0.0,0.0,0,,,0.0,40.0,0.0000,0.9750,0.0250,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.1250,1,0.7018,0.8596,0.6667,,19022.5
A T Still University of Health Sciences,Kirksville,MO,0.0,0.0,0.0,0,,,0.0,,,,,,,,,,,,1,,,,219800,PrivacySuppressed
ABC Beauty Academy,Garland,TX,0.0,0.0,0.0,0,,,0.0,30.0,0.0000,0.0333,0.0333,0.9333,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0,0.7857,0.0000,0.8286,,PrivacySuppressed
ABC Beauty College Inc,Arkadelphia,AR,0.0,0.0,0.0,0,,,0.0,38.0,0.2895,0.6579,0.0526,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.2105,1,0.9815,1.0000,0.4688,PrivacySuppressed,16500
AI Miami International University of Art and Design,Miami,FL,0.0,0.0,0.0,0,,,0.0,2778.0,0.0324,0.0198,0.4773,0.0018,0.0000,0.0000,0.0018,0.0025,0.4644,0.2185,1,0.5507,0.6966,0.3262,29900,31000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Yukon Beauty College Inc,Yukon,OK,0.0,0.0,0.0,0,,,0.0,25.0,0.8000,0.0400,0.0000,0.0000,0.1200,0.0400,0.0000,0.0000,0.0000,0.0000,1,0.9259,0.8148,0.4706,PrivacySuppressed,PrivacySuppressed
Z Hair Academy,Lawrence,KS,0.0,0.0,0.0,0,,,0.0,95.0,0.9368,0.0000,0.0211,0.0000,0.0000,0.0105,0.0211,0.0000,0.0105,0.0000,1,0.7286,0.6571,0.1525,,10500
Zane State College,Zanesville,OH,0.0,0.0,0.0,0,,,0.0,2063.0,0.6995,0.0296,0.0029,0.0029,0.0029,0.0005,0.0218,0.0000,0.2399,0.5730,1,0.3645,0.3434,0.3185,23800,13960.5
duCret School of Arts,Plainfield,NJ,0.0,0.0,0.0,0,,,0.0,41.0,0.4634,0.1951,0.1463,0.0732,0.0000,0.0000,0.0976,0.0000,0.0244,0.4146,1,0.4375,0.5000,0.1250,PrivacySuppressed,PrivacySuppressed


In [43]:
# インデックスが文字順にソートされている場合、スライスを渡す事ができる
college.loc['Sp':'Su']

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
Spa Tech Institute-Ipswich,Ipswich,MA,0.0,0.0,0.0,0,,,0.0,37.0,0.9459,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0541,0.4054,1,0.2656,0.3906,0.7907,21500,6333
Spa Tech Institute-Plymouth,Plymouth,MA,0.0,0.0,0.0,0,,,0.0,153.0,0.7124,0.0131,0.0196,0.0065,0.0000,0.0000,0.0000,0.0000,0.2484,0.3399,1,0.3716,0.4266,0.6250,21500,6333
Spa Tech Institute-Westboro,Westboro,MA,0.0,0.0,0.0,0,,,0.0,90.0,0.8222,0.0333,0.1000,0.0222,0.0000,0.0000,0.0000,0.0000,0.0222,0.5778,1,0.3409,0.4545,0.6882,21500,6333
Spa Tech Institute-Westbrook,Westbrook,ME,0.0,0.0,0.0,0,,,0.0,240.0,0.9417,0.0333,0.0042,0.0167,0.0000,0.0000,0.0000,0.0000,0.0042,0.2542,1,0.4350,0.5093,0.5224,21500,6333
Spalding University,Louisville,KY,0.0,0.0,0.0,1,490.0,440.0,0.0,1227.0,0.6650,0.2127,0.0416,0.0114,0.0024,0.0024,0.0302,0.0016,0.0326,0.2502,1,0.4442,0.6725,0.3764,41700,25000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Studio Academy of Beauty,Chandler,AZ,0.0,0.0,0.0,0,,,0.0,332.0,0.4669,0.1145,0.3283,0.0090,0.0301,0.0030,0.0392,0.0000,0.0090,0.0000,1,0.5855,0.6218,0.5675,,6333
Studio Jewelers,New York,NY,0.0,0.0,0.0,0,,,0.0,55.0,0.2545,0.1091,0.2727,0.3273,0.0000,0.0000,0.0000,0.0364,0.0000,0.6000,1,0.0451,0.0902,0.8525,PrivacySuppressed,PrivacySuppressed
Stylemaster College of Hair Design,Longview,WA,0.0,0.0,0.0,0,,,0.0,77.0,0.9481,0.0130,0.0260,0.0000,0.0000,0.0000,0.0130,0.0000,0.0000,0.0000,1,0.8036,0.7024,0.4510,17000,13320
Styles and Profiles Beauty College,Selmer,TN,0.0,0.0,0.0,0,,,0.0,31.0,0.8710,0.1290,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,1,0.8182,0.7955,0.2400,PrivacySuppressed,PrivacySuppressed


In [44]:
# インデックスが逆方向にソートされていもOK
college = college.sort_index(ascending=False)
# is_monotonic_increasing又はis_monotonic_decreasingでソート方向が分かる
college.index.is_monotonic_decreasing

True

In [45]:
college.loc['E':'B']

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
Dyersburg State Community College,Dyersburg,TN,0.0,0.0,0.0,0,,,0.0,2001.0,0.7266,0.2139,0.0240,0.0040,0.0035,0.0000,0.0185,0.0010,0.0085,0.4423,1,0.4921,0.2493,0.3097,26800,7475
Dutchess Community College,Poughkeepsie,NY,0.0,0.0,0.0,0,,,0.0,6885.0,0.6003,0.1361,0.1801,0.0182,0.0022,0.0007,0.0446,0.0129,0.0049,0.3312,1,0.2464,0.1936,0.1806,32500,10250
Dutchess BOCES-Practical Nursing Program,Poughkeepsie,NY,0.0,0.0,0.0,0,,,0.0,155.0,0.4903,0.3419,0.0774,0.0258,0.0000,0.0065,0.0581,0.0000,0.0000,0.7548,1,0.5294,0.6275,0.5430,36500,9500
Durham Technical Community College,Durham,NC,0.0,0.0,0.0,0,,,0.0,4769.0,0.3080,0.4611,0.1126,0.0451,0.0048,0.0019,0.0182,0.0025,0.0457,0.6905,1,0.4495,0.1796,0.5961,27200,11069.5
Durham Beauty Academy,Durham,NC,0.0,0.0,0.0,0,,,0.0,78.0,0.0385,0.9231,0.0000,0.0128,0.0128,0.0000,0.0000,0.0000,0.0128,0.0000,1,0.5746,0.8134,0.4000,PrivacySuppressed,15332
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Bacone College,Muskogee,OK,0.0,0.0,0.0,1,398.0,428.0,0.0,939.0,0.1917,0.3099,0.0895,0.0288,0.2556,0.0053,0.0298,0.0000,0.0895,0.1140,1,0.9392,0.8920,0.1648,29700,26350
Babson College,Wellesley,MA,0.0,0.0,0.0,0,615.0,660.0,0.0,2107.0,0.3863,0.0479,0.0973,0.1144,0.0019,0.0005,0.0233,0.2682,0.0603,0.0000,1,0.1709,0.3727,0.0090,86700,27000
BJ's Beauty & Barber College,Auburn,WA,0.0,0.0,0.0,0,,,0.0,28.0,0.5000,0.0000,0.1429,0.2143,0.0000,0.0000,0.0714,0.0000,0.0714,0.0000,1,0.5192,0.6154,0.2917,,PrivacySuppressed
BIR Training Center,Chicago,IL,0.0,0.0,0.0,0,,,0.0,2132.0,0.1201,0.8452,0.0220,0.0127,0.0000,0.0000,0.0000,0.0000,0.0000,0.1806,0,0.6700,0.6998,0.6741,PrivacySuppressed,15394
