### 1. Indexing DFs

파이썬의 딕셔너리와 같은 원리로 호출할 수 있다.

#### 1) Index as immutable array

Indices are immutable - that is, they cannot be modified via the normal means :

This immutability makes it safer to share indices between multiple DataFrames and arrays.

In [None]:
ind = pd.Index( [2, 3, 5, 7, 11])
ind[1] = 0 # Error!

#### 2) Index as ordered set

Pandas objects are designed to facilitate operations such as joins across datasets, which depend on many aspects of set arithmetic.

The Index object follows many the conventions used by Python’s built-in set data structure.


In [56]:
indA = pd.Index([1,3,5,7,9] )
indB = pd.Index([2,3,5,7,11])

print(indA & indB) # intersection
print(indA | indB)    # union
print(indA ^ indB)     # symmetric difference [1, 2, 9, 11]

Int64Index([3, 5, 7], dtype='int64')
Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')
Int64Index([1, 2, 9, 11], dtype='int64')


### 2. Data Selection in Series

In [63]:
# Series as dictionary
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
print(data['b'])
print('a' in data)
print(data.keys())
data['e'] = 1.25 # new key assignment

list(data.items())

0.5
True
Index(['a', 'b', 'c', 'd'], dtype='object')


[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0), ('e', 1.25)]

In [67]:
# Series as one-dimensional array
print(data['a':'c'],"\n")    #slicing
print(data[0:2],"\n")    # by implicit integer index
print(data[ (data > 0.3) & (data <0.8) ],"\n")    # masking
print(data[ ['a', 'e'] ],"\n")    # fancy indexing

# When slicing with an explicit index, final index is included in the slice!!

a    0.25
b    0.50
c    0.75
dtype: float64 

a    0.25
b    0.50
dtype: float64 

b    0.50
c    0.75
dtype: float64 

a    0.25
e    1.25
dtype: float64 



### 3. Data Selection in DataFrame

In [13]:
d = {'name' : pandas.Series(['Braund', 'Cummings', 'Heikkinen', 'Allen'],
                    index = ['a', 'b', 'c', 'd']),
    'age' : pandas.Series([22,38,26,35], index = ['a', 'b', 'c', 'd']),
    'fare' : pandas.Series([7.25, 71.83, 8.05], index=['a', 'b', 'd']),
    'survived?': pandas.Series([False, True, True, False], index=['a','b','c','d'])}

df = pandas.DataFrame(d)

In [14]:
#1. Column Indexing

df['name'] 
    # Same as df.name

a       Braund
b     Cummings
c    Heikkinen
d        Allen
Name: name, dtype: object

In [15]:
df[['name', 'age']]

Unnamed: 0,name,age
a,Braund,22
b,Cummings,38
c,Heikkinen,26
d,Allen,35


In [16]:
#2. Row Indexing

print(df[1:4])     # Through Indices
print("")
print(df.loc['a']) # Through named indices


   age   fare       name  survived?
b   38  71.83   Cummings       True
c   26    NaN  Heikkinen       True
d   35   8.05      Allen      False

age              22
fare           7.25
name         Braund
survived?     False
Name: a, dtype: object


In [17]:
# Column의 조건에 따라 모든 Column 출력

df[df['age'] >= 30]

Unnamed: 0,age,fare,name,survived?
b,38,71.83,Cummings,True
d,35,8.05,Allen,False


In [18]:
# Column의 여러 조건에 따라 모든 Column 출력

df[ (df.age >=30) & (df['survived?'] == True) ]

Unnamed: 0,age,fare,name,survived?
b,38,71.83,Cummings,True


In [19]:
# Column의 조건에 맞는, 특정 Column만 출력

df['survived?'][df['age'] >= 30]

b     True
d    False
Name: survived?, dtype: bool

In [20]:
# numpy.roll(numpyarray, number)  
# DataFrame.shift(number)