# Indexing and Iteratung In Pandas
1. iat
2. at
3. loc
4. iloc
5. items
6. pop
7. xs

In [1]:
import pandas as pd
import numpy as np
#https://www.w3resource.com/pandas/series/series-at.php
#https://www.w3resource.com/pandas/series/series.php

# iat[] Function
#Use iat if you only need to get or set a single value in a DataFrame or Series.

In [2]:
df = pd.DataFrame([[2, 3, 4], [0, 5, 6], [20, 30, 40]],
                  columns=['P', 'Q', 'R'])
df

Unnamed: 0,P,Q,R
0,2,3,4
1,0,5,6
2,20,30,40


In [5]:
#Get value at specified row/column pair
df.iat[2, 2]

40

In [6]:
#Set value at specified row/column pair
df.iat[2, 1]

30

In [7]:
#Get value within a series
df.iat[0, 0]

2

In [8]:
df.loc[0].iat[2]

4

In [9]:
df.iloc[0].iat[2]

4

-----------------------

# at[] Function
The at() function is used to access a single value for a row/column label pair

In [10]:
df = pd.DataFrame([[0, 4, 5], [0, 6, 7], [20, 30, 40]],
                  index=[1, 2, 3], columns=['P', 'Q', 'R'])
df

Unnamed: 0,P,Q,R
1,0,4,5
2,0,6,7
3,20,30,40


In [11]:
#Get value at specified row/column pair
df.at[2, 'Q']

6

In [12]:
#Set value at specified row/column pair
df.at[2, 'Q'] = 20

In [13]:
df.at[2, 'Q']

20

In [14]:
df

Unnamed: 0,P,Q,R
1,0,4,5
2,0,20,7
3,20,30,40


In [16]:
#Get value within a Series
df.loc[1].at['R']

5

### Access a group of rows and columns in Pandas

# loc[] Function

The loc() function is used to access a group of rows and columns by label(s) or a boolean array.

.loc[] is primarily label based, but may also be used with a boolean array. 

In [2]:
df = pd.DataFrame([[2, 3], [6, 5], [9, 8]],
     index=['cobra', 'viper', 'sidewinder'],
     columns=['max_speed', 'shield'])
df

Unnamed: 0,max_speed,shield
cobra,2,3
viper,6,5
sidewinder,9,8


In [3]:
df.loc[['viper', 'sidewinder']]

Unnamed: 0,max_speed,shield
viper,6,5
sidewinder,9,8


In [4]:
df.loc['cobra', 'shield']

3

In [5]:
df.loc['cobra':'viper', 'max_speed']

cobra    2
viper    6
Name: max_speed, dtype: int64

In [7]:
df.loc[[False, False, True]]

Unnamed: 0,max_speed,shield
sidewinder,9,8


In [8]:
#Conditional that returns a boolean Series
df.loc[df['shield'] > 6]

Unnamed: 0,max_speed,shield
sidewinder,9,8


In [9]:
#Conditional that returns a boolean Series with column labels specified
df.loc[df['shield'] > 6, ['max_speed']]

Unnamed: 0,max_speed
sidewinder,9


In [10]:
#Callable that returns a boolean Series
df.loc[lambda df: df['shield'] == 8]

Unnamed: 0,max_speed,shield
sidewinder,9,8


In [11]:
#Setting values ,Set value for all items matching the list of labels
df.loc[['viper', 'sidewinder'], ['shield']] = 50
df

Unnamed: 0,max_speed,shield
cobra,2,3
viper,6,50
sidewinder,9,50


In [12]:
#Set value for an entire row
df.loc['cobra'] = 10
df

Unnamed: 0,max_speed,shield
cobra,10,10
viper,6,50
sidewinder,9,50


In [18]:
#Set value for an entire column
df.loc[:, 'max_speed'] = 40
df

Unnamed: 0,max_speed,shield
cobra,40,10.0
viper,40,50.0
sidewinder,40,50.0
0,40,


In [19]:
#Set value for rows matching callable condition
df.loc[df['shield'] > 25] = 0
df

Unnamed: 0,max_speed,shield
cobra,40,10.0
viper,0,0.0
sidewinder,0,0.0
0,40,


In [20]:
# Getting values on a DataFrame with an index that has integer labels.Another example using integers for the index
df = pd.DataFrame([[2, 3], [6, 5], [9, 8]],
     index=[3, 4, 5], columns=['max_speed', 'shield'])
df

Unnamed: 0,max_speed,shield
3,2,3
4,6,5
5,9,8


In [22]:
df.loc[3:4]

Unnamed: 0,max_speed,shield
3,2,3
4,6,5


In [24]:
df.iloc[0:2]

Unnamed: 0,max_speed,shield
3,2,3
4,6,5


# MultiIndex in loc function

In [26]:
#Getting values with a MultiIndex. A number of examples using a DataFrame with a MultiIndex

tuples = [
   ('cobra', 's1'), ('cobra', 's2'),
   ('sidewinder', 's1'), ('sidewinder', 's2'),
   ('viper', 's2'), ('viper', 's3')
]

index = pd.MultiIndex.from_tuples(tuples)

values = [[6, 2], [0, 4], [20, 30],
         [1, 4], [5, 1], [36, 56]]

df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
df

Unnamed: 0,Unnamed: 1,max_speed,shield
cobra,s1,6,2
cobra,s2,0,4
sidewinder,s1,20,30
sidewinder,s2,1,4
viper,s2,5,1
viper,s3,36,56


In [27]:
df.loc['cobra']

Unnamed: 0,max_speed,shield
s1,6,2
s2,0,4


In [29]:
#Single index tuple. Note this returns a Series.
df.loc[('viper', 's2')]

max_speed    5
shield       1
Name: (viper, s2), dtype: int64

In [30]:
#Single tuple. Note using [[]] returns a DataFrame.
df.loc[[('cobra', 's2')]]

Unnamed: 0,Unnamed: 1,max_speed,shield
cobra,s2,0,4


In [31]:
#Single tuple for the index with a single label for the column
df.loc[('cobra', 's1'), 'shield']

2

In [32]:
#Slice from index tuple to single label
df.loc[('cobra', 's1'):'viper']

Unnamed: 0,Unnamed: 1,max_speed,shield
cobra,s1,6,2
cobra,s2,0,4
sidewinder,s1,20,30
sidewinder,s2,1,4
viper,s2,5,1
viper,s3,36,56


In [34]:
#Slice from index tuple to index tuple
df.loc[('cobra', 's1'):('viper', 's1')]

Unnamed: 0,Unnamed: 1,max_speed,shield
cobra,s1,6,2
cobra,s2,0,4
sidewinder,s1,20,30
sidewinder,s2,1,4


# Pandas Series: iloc() function

The iloc() function is used to access a group of rows and columns by label(s) or a boolean array.

.iloc[] is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a boolean array.

In [35]:
mydict = [{'p': 2, 'q': 3, 'r': 4, 's': 5},
           {'p': 20, 'q': 30, 'r': 40, 's': 50},
           {'p': 200, 'q': 300, 'r': 400, 's': 500 }]

df = pd.DataFrame(mydict)
df

Unnamed: 0,p,q,r,s
0,2,3,4,5
1,20,30,40,50
2,200,300,400,500


In [41]:
type(df.iloc[0])
df.iloc[1,1]

30

In [42]:
#With a list of integers.
df.iloc[[0]]

Unnamed: 0,p,q,r,s
0,2,3,4,5


In [45]:
type(df.iloc[[0]])

pandas.core.frame.DataFrame

In [44]:
#it is in series
df.iloc[0]

p    2
q    3
r    4
s    5
Name: 0, dtype: int64

In [46]:
type(df.iloc[0])

pandas.core.series.Series

In [47]:
df.iloc[[0, 2]]

Unnamed: 0,p,q,r,s
0,2,3,4,5
2,200,300,400,500


In [52]:
#With a slice object.
df.iloc[0:3,2]

0      4
1     40
2    400
Name: r, dtype: int64

In [53]:
#With a boolean mask the same length as the index.
df.iloc[[True, False, True]]

Unnamed: 0,p,q,r,s
0,2,3,4,5
2,200,300,400,500


In [54]:
#With a callable, useful in method chains. The x passed to the lambda is the DataFrame being sliced.This selects the rows whose index label even.
df.iloc[lambda x: x.index % 2 == 0]

Unnamed: 0,p,q,r,s
0,2,3,4,5
2,200,300,400,500


In [55]:
#Indexing both axes.You can mix the indexer types for the index and columns. Use : to select the entire axis.With scalar integers.
df.iloc[0, 2]

4

In [56]:
#With lists of integers.
df.iloc[[0, 2], [1, 3]]

Unnamed: 0,q,s
0,3,5
2,300,500


In [58]:
#With slice objects.
df.iloc[1:3, 0:3]

Unnamed: 0,p,q,r
1,20,30,40
2,200,300,400


In [59]:
#With a boolean array whose length matches the columns.
df.iloc[:, [True, False, True, False]]

Unnamed: 0,p,r
0,2,4
1,20,40
2,200,400


In [60]:
#With a callable function that expects the Series or DataFrame.
df.iloc[:, lambda df: [0, 2]]

Unnamed: 0,p,r
0,2,4
1,20,40
2,200,400


# Pandas Series: items() function

The items() function is used to lazily iterate over (index, value) tuples.

This method returns an iterable tuple (index, value). This is convenient if you want to create a lazy iterator.

Returns: iterable
***Iterable of tuples containing the (index, value) pairs from a Series.***

In [64]:
s = pd.Series(['P', 'Q', 'R'])
s

0    P
1    Q
2    R
dtype: object

In [65]:
for index, value in s.items():
    print("Index : {}, Value : {}".format(index, value))

Index : 0, Value : P
Index : 1, Value : Q
Index : 2, Value : R


# Pandas Series: pop() function

The pop() function is used to get item and drop from frame. Raise KeyError if not found.

In [66]:
df = pd.DataFrame([('eagle', 'bird', 320.0),
                   ('emu', 'bird', 48.0),
                   ('tiger', 'mammal', 120.5),
                   ('wolf','mammal', np.nan)],
                  columns=('name', 'class', 'max_speed'))
df

Unnamed: 0,name,class,max_speed
0,eagle,bird,320.0
1,emu,bird,48.0
2,tiger,mammal,120.5
3,wolf,mammal,


In [71]:
#drop and pop do the same work
df.pop('max_speed')

0    320.0
1     48.0
2    120.5
3      NaN
Name: max_speed, dtype: float64

In [72]:
df

Unnamed: 0,name
0,eagle
1,emu
2,tiger
3,wolf


# Pandas Series: xs() function

The xs() function is used to get cross-section from the Series/DataFrame.

This method takes a key argument to select data at a particular level of a MultiIndex.

xs can not be used to set values.

MultiIndex Slicers is a generic way to get/set values on any level or levels.

***Syntax : Series.xs(self, key, axis=0, level=None, drop_level=True)***

In [73]:
d = {'num_legs': [4, 4, 4, 2, 2],
     'num_wings': [0, 0, 0, 2, 2],
     'class': ['mammal', 'mammal', 'mammal', 'bird', 'bird'],
     'animal': ['tiger', 'lion', 'fox', 'eagle', 'penguin'],
     'locomotion': ['walks', 'walks', 'walks', 'flies', 'walks']}


df = pd.DataFrame(data=d)
df

Unnamed: 0,num_legs,num_wings,class,animal,locomotion
0,4,0,mammal,tiger,walks
1,4,0,mammal,lion,walks
2,4,0,mammal,fox,walks
3,2,2,bird,eagle,flies
4,2,2,bird,penguin,walks


In [74]:
df = df.set_index(['class', 'animal', 'locomotion'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,num_legs,num_wings
class,animal,locomotion,Unnamed: 3_level_1,Unnamed: 4_level_1
mammal,tiger,walks,4,0
mammal,lion,walks,4,0
mammal,fox,walks,4,0
bird,eagle,flies,2,2
bird,penguin,walks,2,2


In [75]:
#Get values at specified index
df.xs('mammal')

Unnamed: 0_level_0,Unnamed: 1_level_0,num_legs,num_wings
animal,locomotion,Unnamed: 2_level_1,Unnamed: 3_level_1
tiger,walks,4,0
lion,walks,4,0
fox,walks,4,0


In [76]:
#Get values at several indexes
df.xs(('mammal', 'fox'))

  return runner(coro)


Unnamed: 0_level_0,num_legs,num_wings
locomotion,Unnamed: 1_level_1,Unnamed: 2_level_1
walks,4,0


In [77]:
#Get values at specified index and level
df.xs('lion', level=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,num_legs,num_wings
class,locomotion,Unnamed: 2_level_1,Unnamed: 3_level_1
mammal,walks,4,0


In [78]:
#Get values at several indexes and levels
df.xs(('bird', 'walks'),
      level=[0, 'locomotion'])

Unnamed: 0_level_0,num_legs,num_wings
animal,Unnamed: 1_level_1,Unnamed: 2_level_1
penguin,2,2


In [80]:
#Get values at specified column and axis
df.xs(['num_wings'], axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,num_wings
class,animal,locomotion,Unnamed: 3_level_1
mammal,tiger,walks,0
mammal,lion,walks,0
mammal,fox,walks,0
bird,eagle,flies,2
bird,penguin,walks,2


In [81]:
df.xs('num_wings', axis=1)

class   animal   locomotion
mammal  tiger    walks         0
        lion     walks         0
        fox      walks         0
bird    eagle    flies         2
        penguin  walks         2
Name: num_wings, dtype: int64

# .ix() method
Besides pure label based and integer based, Pandas provides a hybrid method for selections and subsetting the object using the .ix() operator.

In [95]:
df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

# Integer slicing
print(df.ix[:4])

AttributeError: 'DataFrame' object has no attribute 'ix'