In [10]:
from IPython.display import display, HTML

def pprintdfs(*dfs):
    dfs_html = "\n".join(f'<div style="margin-right: 30px;">{df._repr_html_()}</div>' for df in dfs)
    side_by_side_html = f"""
    <div style="display: flex;">
        {dfs_html}
    </div>
    """
    display(HTML(side_by_side_html)) # 在Jupyter中渲染

## DataFrame

DataFrame 是一个二维带标签的数据结构，每一列可能包含不同类型的数据。

`pd.DataFrame(data[,index,columns,dtype])`

- dict of ndarrays, lists, dicts, or Series
- Structured or record ndarray
- list of dict, tuple
- A Series
- Another DataFrame
- 2D ndarray

In [1]:
import numpy as np
import pandas as pd

### 1 From dict of Series

In [17]:
data1 = dict(
    one = pd.Series([1, 2, 3, 4], index=list("abcd")),
    two = pd.Series([5, 6, 7], index=list("abc")))
df1_1 = pd.DataFrame(data1)
df1_2 = pd.DataFrame(data1, index=["d", "b", "a"])
df1_3 = pd.DataFrame(data1, columns=["two", "three"])
pprintdfs(df1_1, df1_2, df1_3)

Unnamed: 0,one,two
a,1,5.0
b,2,6.0
c,3,7.0
d,4,

Unnamed: 0,one,two
d,4,
b,2,6.0
a,1,5.0

Unnamed: 0,two,three
a,5,
b,6,
c,7,


### 2 From dict of lists

In [18]:
data2 = {
    "one": [1.0, 2.0, 3.0, 4.0],
    "two": [4.0, 3.0, 2.0, 1.0],
} # All arrays must be of the same length
df2_1 = pd.DataFrame(data2)
df2_2 = pd.DataFrame(data2, index=list("abcd"))
pprintdfs(df2_1, df2_2)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


### 3 From structured or record array

In [23]:
data3 = np.zeros((2, ), dtype=[("A", "i4"), ("B", "f4"), ("C", "S10")])
data3[:] = [(1, 2.0, "hello"), (2, 3.0, "world")]
df3_1 = pd.DataFrame(data3)
df3_2 = pd.DataFrame(data3, index=["first", "second"])
df3_3 = pd.DataFrame(data3, index=["first", "second"], columns=["B", "C", "A"])

pprintdfs(df3_1, df3_2, df3_3)

Unnamed: 0,A,B,C
0,1,2.0,b'hello'
1,2,3.0,b'world'

Unnamed: 0,A,B,C
first,1,2.0,b'hello'
second,2,3.0,b'world'

Unnamed: 0,B,C,A
first,2.0,b'hello',1
second,3.0,b'world',2


### 4 From a list of dicts

In [29]:
data4 = [
    {"a": 1, "b": 2, "c": 3},
    {"a": 5, "b": 7},
]
df4_1 = pd.DataFrame(data4)
df4_2 = pd.DataFrame(data4, columns=list("bcad"))
df4_3 = pd.DataFrame(data4, index=["first", "second"], columns=list("bcad"))

pprintdfs(df4_1, df4_2, df4_3)

Unnamed: 0,a,b,c
0,1,2,3.0
1,5,7,

Unnamed: 0,b,c,a,d
0,2,3.0,1,
1,7,,5,

Unnamed: 0,b,c,a,d
first,2,3.0,1,
second,7,,5,


### 5 From a dict of tuples

In [30]:
data5 = {
    ("a", "b"): {("A", "B"): 1, ("A", "C"): 2},
    ("a", "a"): {("A", "C"): 3, ("A", "B"): 4},
    ("a", "c"): {("A", "B"): 5, ("A", "C"): 6},
    ("b", "a"): {("A", "C"): 7, ("A", "B"): 8},
    ("b", "b"): {("A", "D"): 9, ("A", "B"): 10},
}
df5 = pd.DataFrame(data5)
pprintdfs(df5)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,b,a,c,a,b
A,B,1.0,4.0,5.0,8.0,10.0
A,C,2.0,3.0,6.0,7.0,
A,D,,,,,9.0


### 6 From a Series

In [34]:
s6 = pd.Series([1.0, 2.0, 3.0], index=list("abc"), name="series example")
df6 = pd.DataFrame(s6)
pprintdfs(df6)

Unnamed: 0,series example
a,1.0
b,2.0
c,3.0
