In [1]:
import numpy as np

In [2]:
import pandas as pd

# Object creation

In [3]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [4]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [5]:
dates = pd.date_range("20130101", periods=6)

In [6]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [7]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))

In [8]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.898896,-0.632261,1.024244,-2.323634
2013-01-02,0.629062,-0.324345,-0.911324,0.114132
2013-01-03,-1.336046,-0.53313,0.397859,0.730258
2013-01-04,-0.390638,-0.335332,0.538846,0.043267
2013-01-05,1.323935,0.561652,-0.12266,0.392429
2013-01-06,0.318702,0.239736,1.370378,1.494211


In [9]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

In [10]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [11]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

# Viewing data

In [12]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.898896,-0.632261,1.024244,-2.323634
2013-01-02,0.629062,-0.324345,-0.911324,0.114132
2013-01-03,-1.336046,-0.53313,0.397859,0.730258
2013-01-04,-0.390638,-0.335332,0.538846,0.043267
2013-01-05,1.323935,0.561652,-0.12266,0.392429


In [13]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.390638,-0.335332,0.538846,0.043267
2013-01-05,1.323935,0.561652,-0.12266,0.392429
2013-01-06,0.318702,0.239736,1.370378,1.494211


In [14]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [15]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [16]:
df.to_numpy()

array([[ 0.8988956 , -0.63226127,  1.02424407, -2.32363405],
       [ 0.62906153, -0.32434487, -0.91132377,  0.11413248],
       [-1.33604589, -0.53312968,  0.39785928,  0.73025787],
       [-0.39063814, -0.33533172,  0.5388457 ,  0.0432669 ],
       [ 1.32393545,  0.56165238, -0.12266025,  0.3924294 ],
       [ 0.31870236,  0.23973623,  1.37037829,  1.49421087]])

In [17]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

numpy 所有数据是一个类型，pandas中数据一列是一个类型，转化后不包含index列

In [18]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.240652,-0.170613,0.382891,0.075111
std,0.964183,0.469021,0.817507,1.288431
min,-1.336046,-0.632261,-0.911324,-2.323634
25%,-0.213303,-0.48368,0.00747,0.060983
50%,0.473882,-0.329838,0.468352,0.253281
75%,0.831437,0.098716,0.902894,0.645801
max,1.323935,0.561652,1.370378,1.494211


In [19]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.898896,0.629062,-1.336046,-0.390638,1.323935,0.318702
B,-0.632261,-0.324345,-0.53313,-0.335332,0.561652,0.239736
C,1.024244,-0.911324,0.397859,0.538846,-0.12266,1.370378
D,-2.323634,0.114132,0.730258,0.043267,0.392429,1.494211


In [20]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-2.323634,1.024244,-0.632261,0.898896
2013-01-02,0.114132,-0.911324,-0.324345,0.629062
2013-01-03,0.730258,0.397859,-0.53313,-1.336046
2013-01-04,0.043267,0.538846,-0.335332,-0.390638
2013-01-05,0.392429,-0.12266,0.561652,1.323935
2013-01-06,1.494211,1.370378,0.239736,0.318702


In [21]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-01,0.898896,-0.632261,1.024244,-2.323634
2013-01-03,-1.336046,-0.53313,0.397859,0.730258
2013-01-04,-0.390638,-0.335332,0.538846,0.043267
2013-01-02,0.629062,-0.324345,-0.911324,0.114132
2013-01-06,0.318702,0.239736,1.370378,1.494211
2013-01-05,1.323935,0.561652,-0.12266,0.392429
