In [2]:
import pandas as pd
import numpy as np

# DataFrame创建
# 接收二维数组
a = pd.DataFrame(np.arange(12).reshape((3,4)))
a

Unnamed: 0,0,1,2,3
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11


In [4]:
# 自定义行列的索引值
b = pd.DataFrame(np.arange(12).reshape((3,4)), index=['a','b','c'],columns=['a','b','c','d'])
b

Unnamed: 0,a,b,c,d
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11


In [5]:
# 接受字典
data = {'state':['Ohio','Ohio','Ohio','Nevada','Nevada','Nevada'],
        'year':[2000,2001,2002,2001,2002,2003],
        'pop':[1.5,1.7,3.6,2.4,2.9,3.2]
        }
frame = pd.DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [6]:
# 方法head选出前五行
frame.head()

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9


In [7]:
# 指定列的顺序
pd.DataFrame(data, columns=['year', 'state', 'pop'])

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9
5,2003,Nevada,3.2


In [8]:
# 列不在字典中，会出现缺失值
frame2 = pd.DataFrame(data, index=['one','two','three','four','five','six'],
                      columns=['state','year','pop','debt'])
frame2

Unnamed: 0,state,year,pop,debt
one,Ohio,2000,1.5,
two,Ohio,2001,1.7,
three,Ohio,2002,3.6,
four,Nevada,2001,2.4,
five,Nevada,2002,2.9,
six,Nevada,2003,3.2,


In [9]:
frame2.columns  # 查看DataFrame的columns属性，显示字典中对应的键

Index(['state', 'year', 'pop', 'debt'], dtype='object')

In [10]:
# 根据键进行索引列
frame2['state']  # 返回一个series

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object

In [13]:
# 利用loc属性索引
frame2.loc['three']

state    Ohio
year     2002
pop       3.6
debt      NaN
Name: three, dtype: object

In [14]:
# ----------------------------------------------------------------
# 修改列的值
frame2['debt'] = 16.5
frame2

Unnamed: 0,state,year,pop,debt
one,Ohio,2000,1.5,16.5
two,Ohio,2001,1.7,16.5
three,Ohio,2002,3.6,16.5
four,Nevada,2001,2.4,16.5
five,Nevada,2002,2.9,16.5
six,Nevada,2003,3.2,16.5


In [15]:
frame2['debt'] = range(6)
frame2

Unnamed: 0,state,year,pop,debt
one,Ohio,2000,1.5,0
two,Ohio,2001,1.7,1
three,Ohio,2002,3.6,2
four,Nevada,2001,2.4,3
five,Nevada,2002,2.9,4
six,Nevada,2003,3.2,5


In [16]:
# 将Series赋值给一列，会按照DataFrame的索引重排，并在空缺的地方填充缺失值
val = pd.Series([-1.2,-1.5,-1.7],index=['two','four','five'])
frame2['debt'] = val
frame2

Unnamed: 0,state,year,pop,debt
one,Ohio,2000,1.5,
two,Ohio,2001,1.7,-1.2
three,Ohio,2002,3.6,
four,Nevada,2001,2.4,-1.5
five,Nevada,2002,2.9,-1.7
six,Nevada,2003,3.2,


In [23]:
# 如果被赋值的列不存在，则会生成一个新列
frame2['eastern'] = frame2['state'] == 'Ohio'
frame2

Unnamed: 0,state,year,pop,debt,eastern
one,Ohio,2000,1.5,,True
two,Ohio,2001,1.7,-1.2,True
three,Ohio,2002,3.6,,True
four,Nevada,2001,2.4,-1.5,False
five,Nevada,2002,2.9,-1.7,False
six,Nevada,2003,3.2,,False


In [24]:
# 用del关键字删除列
del frame2['eastern']
frame2

Unnamed: 0,state,year,pop,debt
one,Ohio,2000,1.5,
two,Ohio,2001,1.7,-1.2
three,Ohio,2002,3.6,
four,Nevada,2001,2.4,-1.5
five,Nevada,2002,2.9,-1.7
six,Nevada,2003,3.2,


In [25]:
# --------------------------------------------------------
# 接收嵌套字典
pop = {'Nevada':{2001:2.4,2002:2.9},
       'Ohio':{2000:1.5,2001:1.7,2002:3.6}}
pop

{'Nevada': {2001: 2.4, 2002: 2.9}, 'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

In [26]:
frame3 = pd.DataFrame(pop)
frame3

Unnamed: 0,Nevada,Ohio
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


In [31]:
# 这种创建方式显示地指明了索引，因此排序时要使inplace=True，对原DataFrame进行操作
frame3.sort_index(axis=0, ascending=True,inplace=True)
frame3


Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [32]:
# 转置
frame3.T

Unnamed: 0,2000,2001,2002
Nevada,,2.4,2.9
Ohio,1.5,1.7,3.6


In [33]:
# 行列索引具有name属性
frame3.index.name = 'year'
frame3.columns.name = 'state'
frame3

state,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [34]:
# values属性
frame3.values

array([[nan, 1.5],
       [2.4, 1.7],
       [2.9, 3.6]])