# pandas

In [1]:
import pandas as pd

pd.__version__

'0.23.4'

## Series

In [2]:
ser = pd.Series(['张三', '李四', '王五'])
ser

0    张三
1    李四
2    王五
dtype: object

In [3]:
ser = pd.Series(['张三', '李四', '王五'], list(range(1,4)))
ser

1    张三
2    李四
3    王五
dtype: object

In [4]:
ser = pd.Series(['张三', '李四', '王五'], index=list(range(1,4)))
ser

1    张三
2    李四
3    王五
dtype: object

In [5]:
ser[2]

'李四'

In [6]:
ser[2] = 'Jack'

In [7]:
ser

1      张三
2    Jack
3      王五
dtype: object

In [8]:
type(ser)

pandas.core.series.Series

In [9]:
ser.values

array(['张三', 'Jack', '王五'], dtype=object)

In [10]:
type(ser.values)

numpy.ndarray

In [11]:
ser2 = pd.Series([19, 18, 20], index=range(1, 4))
ser2

1    19
2    18
3    20
dtype: int64

In [12]:
ser2 + 1

1    20
2    19
3    21
dtype: int64

In [13]:
ser2[ser2%2==0]

2    18
3    20
dtype: int64

In [14]:
data = {'beijing':12000, 'shanghai':11500, 'guangzhou':10800}
data

{'beijing': 12000, 'guangzhou': 10800, 'shanghai': 11500}

In [15]:
ser3 = pd.Series(data)

In [16]:
ser3

beijing      12000
shanghai     11500
guangzhou    10800
dtype: int64

In [17]:
ser3['beijing']

12000

In [18]:
'shanghai' in ser3

True

In [19]:
ser4 = pd.Series(data, index=['shanghai', 'beijing', 'chongqing'])
ser4

shanghai     11500.0
beijing      12000.0
chongqing        NaN
dtype: float64

In [22]:
ser4.to_dict()

{'beijing': 12000.0, 'chongqing': nan, 'shanghai': 11500.0}

In [23]:
ser4.tolist()

[11500.0, 12000.0, nan]

In [24]:
ser4.to_json()

'{"shanghai":11500.0,"beijing":12000.0,"chongqing":null}'

In [25]:
ser4.to_frame()

Unnamed: 0,0
shanghai,11500.0
beijing,12000.0
chongqing,


## DataFrame

In [27]:
import numpy as np
data = np.arange(100, 109).reshape(3, -1)
data

array([[100, 101, 102],
       [103, 104, 105],
       [106, 107, 108]])

In [28]:
df = pd.DataFrame(data)

In [29]:
df

Unnamed: 0,0,1,2
0,100,101,102
1,103,104,105
2,106,107,108


In [31]:
data = {
    'name': ['jack','marry','lily'],
    'age': [19, 18, 20, 33],
    'height': [167, 160, 164]
}
df = pd.DataFrame(data)
df

ValueError: arrays must all be same length

In [32]:
data = {
    'name': ['jack','marry','lily'],
    'age': [19, 18, 20],
    'height': [167, 160, 164]
}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age,height
0,jack,19,167
1,marry,18,160
2,lily,20,164


In [34]:
df.columns

Index(['name', 'age', 'height'], dtype='object')

In [35]:
df.columns = ['username', 'age', 'height']
df

Unnamed: 0,username,age,height
0,jack,19,167
1,marry,18,160
2,lily,20,164


In [37]:
df = pd.DataFrame(data, columns=['name', 'age', 'height', 'email'])
df

Unnamed: 0,name,age,height,email
0,jack,19,167,
1,marry,18,160,
2,lily,20,164,


In [38]:
df.index

RangeIndex(start=0, stop=3, step=1)

In [39]:
df = pd.DataFrame(data, columns=['name', 'age', 'height', 'email'], index=range(1,4))
df

Unnamed: 0,name,age,height,email
1,jack,19,167,
2,marry,18,160,
3,lily,20,164,
