<a href="https://colab.research.google.com/github/yprogbest/udemy_pandas/blob/main/pandasNote02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

In [None]:
df2 = pd.DataFrame(
    [
        [1.0, 1.0, 1.0],
        [2.0, 4.0, 8.0],
        [3.0, 9.0, 27.0],
        [4.0, 16.0, 64.0]
    ],
    columns=['linear', 'square', 'cubic'],
    index=['d1', 'd2', 'd3', 'd4']
)

df2

Unnamed: 0,linear,square,cubic
d1,1.0,1.0,1.0
d2,2.0,4.0,8.0
d3,3.0,9.0,27.0
d4,4.0,16.0,64.0


# DataFrameの連結

## 行方向に単純に連結する

In [None]:
dfA = pd.DataFrame([['1a', '1b'],['2a', '2b']], index=['d1', 'd2'], columns=['a', 'b'])
dfB = pd.DataFrame([['3a', '3b'],['4a', '4b']], index=['d3', 'd4'], columns=['a', 'b'])

In [None]:
display(dfA);display(dfB)

Unnamed: 0,a,b
d1,1a,1b
d2,2a,2b


Unnamed: 0,a,b
d3,3a,3b
d4,4a,4b


In [None]:
dfAB = pd.concat([dfA, dfB])
dfAB

Unnamed: 0,a,b
d1,1a,1b
d2,2a,2b
d3,3a,3b
d4,4a,4b


In [None]:
dfBA = pd.concat([dfB, dfA])
dfBA

Unnamed: 0,a,b
d3,3a,3b
d4,4a,4b
d1,1a,1b
d2,2a,2b


### 共通しないカラムがある場合

In [None]:
dfC = pd.DataFrame([['3c', '3d'],['4c', '4d']], index=['d3', 'd4'], columns=['c', 'd'])
dfC

Unnamed: 0,c,d
d3,3c,3d
d4,4c,4d


In [None]:
dfAC = pd.concat([dfA, dfC])
dfAC

Unnamed: 0,a,b,c,d
d1,1a,1b,,
d2,2a,2b,,
d3,,,3c,3d
d4,,,4c,4d


## 横方向の連結

In [None]:
dfD = pd.DataFrame([['1e', '1f'],['2e', '2f'],['3e', '3f']], index=['d1', 'd2', 'd3'], columns=['e', 'f'])
dfD

Unnamed: 0,e,f
d1,1e,1f
d2,2e,2f
d3,3e,3f


In [None]:
dfAD = pd.concat([dfA, dfD], axis=1) # axis=1: 横方向に繋げることを意味する
dfAD

Unnamed: 0,a,b,e,f
d1,1a,1b,1e,1f
d2,2a,2b,2e,2f
d3,,,3e,3f


In [None]:
dfA

Unnamed: 0,a,b
d1,1a,1b
d2,2a,2b


### 結論）共通するインデックスが対応する

# 条件指定によるデータの抽出

In [None]:
df3 = pd.DataFrame([
    ['taro', 'male', 35],
    ['hanako', 'female', 31],
    ['jiro', 'male', 23],
    ['junko', 'female', 21]
], columns=['name', 'gender', 'age'])

df3

Unnamed: 0,name,gender,age
0,taro,male,35
1,hanako,female,31
2,jiro,male,23
3,junko,female,21


## マスクの用意

In [None]:
cond = [False, True, False, False]
cond

[False, True, False, False]

## スライスに真理値のマスクを与える

In [None]:
df31 = df3[cond]
df31

Unnamed: 0,name,gender,age
1,hanako,female,31


In [None]:
df3

Unnamed: 0,name,gender,age
0,taro,male,35
1,hanako,female,31
2,jiro,male,23
3,junko,female,21


In [None]:
df3[df3['gender']=='female']

Unnamed: 0,name,gender,age
1,hanako,female,31
3,junko,female,21


In [None]:
df3['gender']=='female'

Unnamed: 0,gender
0,False
1,True
2,False
3,True


## 複雑な条件式の使用

In [None]:
df3[ (df3['gender']=='female') & (df3['age']<30) ] # 「かつ」で条件を連結

Unnamed: 0,name,gender,age
3,junko,female,21


In [None]:
df3[ ~(df3['gender']=='male') ] # 否定文　「~」で、否定を表す

Unnamed: 0,name,gender,age
1,hanako,female,31
3,junko,female,21


# DataFrameに関する情報の取得

## 要約統計量

In [None]:
df2

Unnamed: 0,linear,square,cubic
d1,1.0,1.0,1.0
d2,2.0,4.0,8.0
d3,3.0,9.0,27.0
d4,4.0,16.0,64.0


In [None]:
df2.describe()

Unnamed: 0,linear,square,cubic
count,4.0,4.0,4.0
mean,2.5,7.5,25.0
std,1.290994,6.557439,28.225284
min,1.0,1.0,1.0
25%,1.75,3.25,6.25
50%,2.5,6.5,17.5
75%,3.25,10.75,36.25
max,4.0,16.0,64.0


## データ構造に関する情報の取得

In [None]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, d1 to d4
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   linear  4 non-null      float64
 1   square  4 non-null      float64
 2   cubic   4 non-null      float64
dtypes: float64(3)
memory usage: 300.0+ bytes


# 開始部分、終了部分の取り出し

In [None]:
df2

Unnamed: 0,linear,square,cubic
d1,1.0,1.0,1.0
d2,2.0,4.0,8.0
d3,3.0,9.0,27.0
d4,4.0,16.0,64.0


In [None]:
df2.head(2)

Unnamed: 0,linear,square,cubic
d1,1.0,1.0,1.0
d2,2.0,4.0,8.0


In [None]:
df2.tail(2)

Unnamed: 0,linear,square,cubic
d3,3.0,9.0,27.0
d4,4.0,16.0,64.0
