In [1]:
pwd

'/content'

In [1]:
%pushd /content
import numpy as np
import pandas as pd
pd.options.display.max_rows = 10
pd.options.display.max_colwidth = 60
pd.options.display.max_columns = 10
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc("figure", figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)

/content


In [3]:
data = pd.Series(np.random.uniform(size=9),
                 index=[["a", "a", "a", "b", "b", "c", "c", "d", "d"],
                        [1, 2, 3, 1, 3, 1, 2, 2, 3]])
data

Unnamed: 0,Unnamed: 1,0
a,1,0.929616
a,2,0.316376
a,3,0.183919
b,1,0.20456
b,3,0.567725
c,1,0.595545
c,2,0.964515
d,2,0.653177
d,3,0.748907


In [4]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [5]:
data["b"]

Unnamed: 0,0
1,0.20456
3,0.567725


In [6]:
data["b":"c"]

Unnamed: 0,Unnamed: 1,0
b,1,0.20456
b,3,0.567725
c,1,0.595545
c,2,0.964515


- Metoda `.loc[]` jest używana do dostępu do grup wierszy i kolumn na podstawie etykiet. W tym przypadku przekazujesz listę etykiet `"b", "d"`, co oznacza, że chcesz wybrać wiersze o etykietach indeksu `'b'` i `'d'`.

In [7]:
data.loc[["b", "d"]]

Unnamed: 0,Unnamed: 1,0
b,1,0.20456
b,3,0.567725
d,2,0.653177
d,3,0.748907


In [8]:
data

Unnamed: 0,Unnamed: 1,0
a,1,0.929616
a,2,0.316376
a,3,0.183919
b,1,0.20456
b,3,0.567725
c,1,0.595545
c,2,0.964515
d,2,0.653177
d,3,0.748907


In [9]:
data.loc[:, 2]

Unnamed: 0,0
a,0.316376
c,0.964515
d,0.653177


- Metoda .unstack() w Pandas jest używana do “rozpakowania” danych wierszy w formie kolumn, co pozwala zmienić kształt tabeli z formatów hierarchicznych lub wielopoziomowych indeksów na bardziej płaskie, łatwe do interpretacji układy.

In [10]:
data.unstack()

Unnamed: 0,1,2,3
a,0.929616,0.316376,0.183919
b,0.20456,,0.567725
c,0.595545,0.964515,
d,,0.653177,0.748907


In [11]:
data = pd.Series(
    np.random.rand(6),
    index=[['A', 'A', 'B', 'B', 'C', 'C'], [1, 2, 1, 2, 1, 2]]
)

print("Oryginalny obiekt:")
print(data)

# Przekształcenie za pomocą unstack
unstacked = data.unstack()
print("\nPo zastosowaniu unstack():")
print(unstacked)

Oryginalny obiekt:
A  1    0.653570
   2    0.747715
B  1    0.961307
   2    0.008388
C  1    0.106444
   2    0.298704
dtype: float64

Po zastosowaniu unstack():
          1         2
A  0.653570  0.747715
B  0.961307  0.008388
C  0.106444  0.298704


In [15]:
data = pd.Series(
    np.random.rand(6),
    index=[['A', 'A', 'B', 'B', 'C', 'C'], ['x', 'y', 'x', 'y', 'x', 'y'], [1, 2, 1, 2, 1, 2]]
)

print("Oryginalny obiekt:")
print(data)

# Rozpakowanie drugiego poziomu (x, y)
unstacked = data.unstack(level=1)
print("\nPo zastosowaniu unstack(level=1):")
print(unstacked)

Oryginalny obiekt:
A  x  1    0.728266
   y  2    0.818350
B  x  1    0.500223
   y  2    0.810189
C  x  1    0.095969
   y  2    0.218950
dtype: float64

Po zastosowaniu unstack(level=1):
            x         y
A 1  0.728266       NaN
  2       NaN  0.818350
B 1  0.500223       NaN
  2       NaN  0.810189
C 1  0.095969       NaN
  2       NaN  0.218950


- .unstack() Zamienia jeden poziom indeksu wierszy na kolumny. Używane głównie do „rozpakowania” danych w bardziej szeroki format.

- .stack() Odwrotność .unstack(). Przenosi poziomy kolumn do indeksu wierszy, tworząc MultiIndex, jeśli to możliwe.

In [16]:
data.unstack().stack()

Unnamed: 0,Unnamed: 1,Unnamed: 2,0
A,x,1,0.728266
A,y,2,0.81835
B,x,1,0.500223
B,y,2,0.810189
C,x,1,0.095969
C,y,2,0.21895


In [17]:
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
                     index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
                     columns=[["Ohio", "Ohio", "Colorado"],
                              ["Green", "Red", "Green"]])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [18]:
frame.index.names = ["key1", "key2"]
frame.columns.names = ["state", "color"]
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [19]:
frame.index.nlevels

2

In [21]:
frame["Ohio"]

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


- Metoda .swaplevel() w Pandas służy do zamiany kolejności poziomów w wielopoziomowym indeksie (MultiIndex) obiektu DataFrame lub Series. Jest szczególnie przydatna w sytuacjach, gdy chcesz zmienić sposób organizacji danych w indeksie bez modyfikowania ich wartości.

In [22]:
frame.swaplevel("key1", "key2")

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [23]:
frame.sort_index(level=1)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


- Metoda swaplevel w połączeniu z sort_index zmienia kolejność poziomów w wielopoziomowym indeksie (MultiIndex) i sortuje dane według wskazanego poziomu indeksu.

In [24]:
frame.swaplevel(0, 1).sort_index(level=0)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
1,b,6,7,8
2,a,3,4,5
2,b,9,10,11


In [25]:
frame.groupby(level="key2").sum()

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,6,8,10
2,12,14,16


In [26]:
frame.groupby(level="color", axis="columns").sum()

  frame.groupby(level="color", axis="columns").sum()


Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,2,1
a,2,8,4
b,1,14,7
b,2,20,10


In [27]:
frame = pd.DataFrame({"a": range(7), "b": range(7, 0, -1),
                      "c": ["one", "one", "one", "two", "two",
                            "two", "two"],
                      "d": [0, 1, 2, 0, 1, 2, 3]})
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


In [28]:
frame2 = frame.set_index(["c", "d"])
frame2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


- Metoda set_index z argumentem drop=False pozwala ustawić jedną lub więcej kolumn jako indeks, ale jednocześnie kolumny te pozostają w danych, a nie są usuwane, jak to ma miejsce domyślnie.

In [29]:
frame.set_index(["c", "d"], drop=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,0,0,7,one,0
one,1,1,6,one,1
one,2,2,5,one,2
two,0,3,4,two,0
two,1,4,3,two,1
two,2,5,2,two,2
two,3,6,1,two,3


In [30]:
frame2.reset_index()

Unnamed: 0,c,d,a,b
0,one,0,0,7
1,one,1,1,6
2,one,2,2,5
3,two,0,3,4
4,two,1,4,3
5,two,2,5,2
6,two,3,6,1


In [2]:
df1 = pd.DataFrame({"key": ["b", "b", "a", "c", "a", "a", "b"],
                    "data1": pd.Series(range(7), dtype="Int64")})
df2 = pd.DataFrame({"key": ["a", "b", "d"],
                    "data2": pd.Series(range(3), dtype="Int64")})

print("df1:")
print(df1)
print("\ndf2:")
print(df2)

df1:
  key  data1
0   b      0
1   b      1
2   a      2
3   c      3
4   a      4
5   a      5
6   b      6

df2:
  key  data2
0   a      0
1   b      1
2   d      2


In [3]:
pd.merge(df1, df2)

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,a,2,0
3,a,4,0
4,a,5,0
5,b,6,1


In [4]:
pd.merge(df1, df2, on="key")

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,a,2,0
3,a,4,0
4,a,5,0
5,b,6,1


In [5]:
df3 = pd.DataFrame({"lkey": ["b", "b", "a", "c", "a", "a", "b"],
                    "data1": pd.Series(range(7), dtype="Int64")})
df4 = pd.DataFrame({"rkey": ["a", "b", "d"],
                    "data2": pd.Series(range(3), dtype="Int64")})

pd.merge(df3, df4, left_on="lkey", right_on="rkey")

Unnamed: 0,lkey,data1,rkey,data2
0,b,0,b,1
1,b,1,b,1
2,a,2,a,0
3,a,4,a,0
4,a,5,a,0
5,b,6,b,1


- Używając `how="outer"`, łączysz dwa DataFrame, zachowując wszystkie wiersze z obu DataFrame. Jeśli wiersze nie mają dopasowania w drugim DataFrame, wartości w kolumnach, które nie mają odpowiednich danych, będą wypełnione wartością `NaN`.

In [6]:
pd.merge(df1, df2, how="outer")

Unnamed: 0,key,data1,data2
0,a,2.0,0.0
1,a,4.0,0.0
2,a,5.0,0.0
3,b,0.0,1.0
4,b,1.0,1.0
5,b,6.0,1.0
6,c,3.0,
7,d,,2.0


In [7]:
pd.merge(df3, df4, left_on="lkey", right_on="rkey", how="outer")

Unnamed: 0,lkey,data1,rkey,data2
0,a,2.0,a,0.0
1,a,4.0,a,0.0
2,a,5.0,a,0.0
3,b,0.0,b,1.0
4,b,1.0,b,1.0
5,b,6.0,b,1.0
6,c,3.0,,
7,,,d,2.0


In [8]:
df1 = pd.DataFrame({"key": ["b", "b", "a", "c", "a", "b"],
                    "data1": pd.Series(range(6), dtype="Int64")})
df2 = pd.DataFrame({"key": ["a", "b", "a", "b", "d"],
                    "data2": pd.Series(range(5), dtype="Int64")})
df1
df2
pd.merge(df1, df2, on="key", how="left")

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,0,3
2,b,1,1
3,b,1,3
4,a,2,0
...,...,...,...
6,c,3,
7,a,4,0
8,a,4,2
9,b,5,1


In [9]:
pd.merge(df1, df2, how="inner")

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,0,3
2,b,1,1
3,b,1,3
4,a,2,0
5,a,2,2
6,a,4,0
7,a,4,2
8,b,5,1
9,b,5,3


- Funkcja `pd.merge()` łączy oba DataFrame na podstawie kolumn `key1` i `key2`. Używając parametru `how="outer"`, wykonujesz pełne złączenie zewnętrzne (full outer join), co oznacza, że wszystkie wiersze z obu DataFrame będą zachowane.

In [10]:
left = pd.DataFrame({"key1": ["foo", "foo", "bar"],
                     "key2": ["one", "two", "one"],
                     "lval": pd.Series([1, 2, 3], dtype='Int64')})

right = pd.DataFrame({"key1": ["foo", "foo", "bar", "bar"],
                      "key2": ["one", "one", "one", "two"],
                      "rval": pd.Series([4, 5, 6, 7], dtype='Int64')})

pd.merge(left, right, on=["key1", "key2"], how="outer")

Unnamed: 0,key1,key2,lval,rval
0,bar,one,3.0,6.0
1,bar,two,,7.0
2,foo,one,1.0,4.0
3,foo,one,1.0,5.0
4,foo,two,2.0,


In [11]:
pd.merge(left, right, on="key1")

Unnamed: 0,key1,key2_x,lval,key2_y,rval
0,foo,one,1,one,4
1,foo,one,1,one,5
2,foo,two,2,one,4
3,foo,two,2,one,5
4,bar,one,3,one,6
5,bar,one,3,two,7


In [12]:
pd.merge(left, right, on="key1", suffixes=("_left", "_right"))

Unnamed: 0,key1,key2_left,lval,key2_right,rval
0,foo,one,1,one,4
1,foo,one,1,one,5
2,foo,two,2,one,4
3,foo,two,2,one,5
4,bar,one,3,one,6
5,bar,one,3,two,7


In [13]:
left1 = pd.DataFrame({"key": ["a", "b", "a", "a", "b", "c"],
                      "value": pd.Series(range(6), dtype="Int64")})
right1 = pd.DataFrame({"group_val": [3.5, 7]}, index=["a", "b"])

print(left1)
print(right1)

pd.merge(left1, right1, left_on="key", right_index=True)

  key  value
0   a      0
1   b      1
2   a      2
3   a      3
4   b      4
5   c      5
   group_val
a        3.5
b        7.0


Unnamed: 0,key,value,group_val
0,a,0,3.5
1,b,1,7.0
2,a,2,3.5
3,a,3,3.5
4,b,4,7.0


In [14]:
pd.merge(left1, right1, left_on="key", right_index=True, how="outer")

Unnamed: 0,key,value,group_val
0,a,0,3.5
2,a,2,3.5
3,a,3,3.5
1,b,1,7.0
4,b,4,7.0
5,c,5,


In [None]:
lefth = pd.DataFrame({"key1": ["Ohio", "Ohio", "Ohio",
                               "Nevada", "Nevada"],
                      "key2": [2000, 2001, 2002, 2001, 2002],
                      "data": pd.Series(range(5), dtype="Int64")})

righth_index = pd.MultiIndex.from_arrays(
    [
        ["Nevada", "Nevada", "Ohio", "Ohio", "Ohio", "Ohio"],
        [2001, 2000, 2000, 2000, 2001, 2002]
    ]
)
righth = pd.DataFrame({"event1": pd.Series([0, 2, 4, 6, 8, 10], dtype="Int64",
                                           index=righth_index),
                       "event2": pd.Series([1, 3, 5, 7, 9, 11], dtype="Int64",
                                           index=righth_index)})
print(lefth)
print(righth)

In [None]:
#! ipython id=b39ab44459844fccaf2b9d9efcfc4f39
pd.merge(lefth, righth, left_on=["key1", "key2"], right_index=True)
pd.merge(lefth, righth, left_on=["key1", "key2"],
         right_index=True, how="outer")

In [None]:
#! ipython id=03abe17d800a4c2d87ed26e3734d3170
left2 = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]],
                     index=["a", "c", "e"],
                     columns=["Ohio", "Nevada"]).astype("Int64")
right2 = pd.DataFrame([[7., 8.], [9., 10.], [11., 12.], [13, 14]],
                      index=["b", "c", "d", "e"],
                      columns=["Missouri", "Alabama"]).astype("Int64")
left2
right2
pd.merge(left2, right2, how="outer", left_index=True, right_index=True)

In [None]:
#! ipython id=852ad741bf6a4678833ce0a65ae27e38
left2.join(right2, how="outer")

In [None]:
#! ipython id=ef7c8ddca1a14f0e9b2d08e30d2ced8f
left1.join(right1, on="key")

In [None]:
#! ipython id=4de3092737b54b6db024df459ad1c77b
another = pd.DataFrame([[7., 8.], [9., 10.], [11., 12.], [16., 17.]],
                       index=["a", "c", "e", "f"],
                       columns=["New York", "Oregon"])
another
left2.join([right2, another])
left2.join([right2, another], how="outer")

In [None]:
#! ipython id=83e9fddfd1654d50bf17752bb93fbbfa
arr = np.arange(12).reshape((3, 4))
arr
np.concatenate([arr, arr], axis=1)

In [None]:
#! ipython id=e62e34f6a56e4d3f8d688ba9f226733b
s1 = pd.Series([0, 1], index=["a", "b"], dtype="Int64")
s2 = pd.Series([2, 3, 4], index=["c", "d", "e"], dtype="Int64")
s3 = pd.Series([5, 6], index=["f", "g"], dtype="Int64")

In [None]:
#! ipython id=0e8d10f6da2746ada70388da4ce93c91
s1
s2
s3
pd.concat([s1, s2, s3])

In [None]:
#! ipython id=8f872f81400043afb3c1bf48942de5a4
pd.concat([s1, s2, s3], axis="columns")

In [None]:
#! ipython id=51ed91ff4a6d45bcb8a563b6b9752eab
s4 = pd.concat([s1, s3])
s4
pd.concat([s1, s4], axis="columns")
pd.concat([s1, s4], axis="columns", join="inner")

In [None]:
#! ipython id=10b6e25547cf49868dfd9f769490d495
result = pd.concat([s1, s1, s3], keys=["one", "two", "three"])
result
result.unstack()

In [None]:
#! ipython id=b7239f0a4b364eb4802a6a72d49d541b
pd.concat([s1, s2, s3], axis="columns", keys=["one", "two", "three"])

In [None]:
#! ipython id=67e6126a29bf499b84243fe1121d8288
df1 = pd.DataFrame(np.arange(6).reshape(3, 2), index=["a", "b", "c"],
                   columns=["one", "two"])
df2 = pd.DataFrame(5 + np.arange(4).reshape(2, 2), index=["a", "c"],
                   columns=["three", "four"])
df1
df2
pd.concat([df1, df2], axis="columns", keys=["level1", "level2"])

In [None]:
#! ipython id=6209f511f4bd408c823f3601438554b9
pd.concat({"level1": df1, "level2": df2}, axis="columns")

In [None]:
#! ipython id=942c4ea1830145879af5d47d5c4d9248
pd.concat([df1, df2], axis="columns", keys=["level1", "level2"],
          names=["upper", "lower"])

In [None]:
#! ipython id=81cb00cc63184f10aba558c214e4244c
df1 = pd.DataFrame(np.random.standard_normal((3, 4)),
                   columns=["a", "b", "c", "d"])
df2 = pd.DataFrame(np.random.standard_normal((2, 3)),
                   columns=["b", "d", "a"])
df1
df2

In [None]:
#! ipython id=b663f7c1ded3455db4abd472a8154ada
pd.concat([df1, df2], ignore_index=True)

In [None]:
#! ipython id=e4379b1ba6c74b9fbf6174fb4f2be35b
a = pd.Series([np.nan, 2.5, 0.0, 3.5, 4.5, np.nan],
              index=["f", "e", "d", "c", "b", "a"])
b = pd.Series([0., np.nan, 2., np.nan, np.nan, 5.],
              index=["a", "b", "c", "d", "e", "f"])
a
b
np.where(pd.isna(a), b, a)

In [None]:
#! ipython id=f222a83337184ce4bf0efa05e07dc6d3
a.combine_first(b)

In [None]:
#! ipython id=15c3d24d99974e409a9255e8c01222b5
df1 = pd.DataFrame({"a": [1., np.nan, 5., np.nan],
                    "b": [np.nan, 2., np.nan, 6.],
                    "c": range(2, 18, 4)})
df2 = pd.DataFrame({"a": [5., 4., np.nan, 3., 7.],
                    "b": [np.nan, 3., 4., 6., 8.]})
df1
df2
df1.combine_first(df2)

In [None]:
#! ipython id=647c8bf32f9f4f68818781a117aec2a6
data = pd.DataFrame(np.arange(6).reshape((2, 3)),
                    index=pd.Index(["Ohio", "Colorado"], name="state"),
                    columns=pd.Index(["one", "two", "three"],
                    name="number"))
data

In [None]:
#! ipython id=93b2cac6e12c4c4f84f63acea8ae9997
result = data.stack()
result

In [None]:
#! ipython id=a4b865ed812a43c39ba96e004b7f58f0
result.unstack()

In [None]:
#! ipython id=1381976b93554a30bca3f97ff21cef4a
result.unstack(level=0)
result.unstack(level="state")

In [None]:
#! ipython id=c26a7396ad494449b09e718e2d9ed336
s1 = pd.Series([0, 1, 2, 3], index=["a", "b", "c", "d"], dtype="Int64")
s2 = pd.Series([4, 5, 6], index=["c", "d", "e"], dtype="Int64")
data2 = pd.concat([s1, s2], keys=["one", "two"])
data2

In [None]:
#! ipython id=8d7eaaf573f3469a82a555f0dd73665c
data2.unstack()
data2.unstack().stack()
data2.unstack().stack(dropna=False)

In [None]:
#! ipython id=f3e7f3eace4e497097fb20f56ec70436
df = pd.DataFrame({"left": result, "right": result + 5},
                  columns=pd.Index(["left", "right"], name="side"))
df
df.unstack(level="state")

In [None]:
#! ipython id=62e63632842c40e7ab3913dffdb17734
df.unstack(level="state").stack(level="side")

In [None]:
#! ipython id=7691f5f737a44b39a67861744c98d2ae
data = pd.read_csv("przykłady/macrodata.csv")
data = data.loc[:, ["year", "quarter", "realgdp", "infl", "unemp"]]
data.head()

In [None]:
#! ipython id=cb853f8845dc4808b3bb2cd2697cbb14
periods = pd.PeriodIndex(year=data.pop("year"),
                         quarter=data.pop("quarter"),
                         name="date")
periods
data.index = periods.to_timestamp("D")
data.head()

In [None]:
#! ipython id=a50d1ca51a9c4a8591f2ab4bb824d84b
data = data.reindex(columns=["realgdp", "infl", "unemp"])
data.columns.name = "item"
data.head()

In [None]:
#! ipython id=1ccfe1d1bd2c4d3face047a0659bf635
long_data = (data.stack()
             .reset_index()
             .rename(columns={0: "value"}))

In [None]:
#! ipython id=4005e426f21b47bead4aea340933266a
long_data[:10]

In [None]:
#! ipython id=877e33a3f6114aa0966c3d68a139701c
pivoted = long_data.pivot(index="date", columns="item",
                          values="value")
pivoted.head()

In [None]:
#! ipython id=b88155754cf540f7be5662ce714b0d12
long_data["value2"] = np.random.standard_normal(len(long_data))
long_data[:10]

In [None]:
#! ipython id=112caaf289fa40ed9f88aba26faedfbe
pivoted = long_data.pivot(index="date", columns="item")
pivoted.head()
pivoted["value"].head()

In [None]:
#! ipython id=2b53be975f41406f86b9a19cc1a04c9c
unstacked = long_data.set_index(["date", "item"]).unstack(level="item")
unstacked.head()

In [None]:
#! ipython suppress id=f14d4b5d2fb44feb92f41233b5f019bc
%popd

In [None]:
#! ipython id=f95fff0c51f149dc88462804af6b5038
df = pd.DataFrame({"key": ["foo", "bar", "baz"],
                   "A": [1, 2, 3],
                   "B": [4, 5, 6],
                   "C": [7, 8, 9]})
df

In [None]:
#! ipython id=30b180a4722e4db9af5953c471f6c603
melted = pd.melt(df, id_vars="key")
melted

In [None]:
#! ipython id=7969c1c5a676429b82ccbe31c997fef4
reshaped = melted.pivot(index="key", columns="variable",
                        values="value")
reshaped

In [None]:
#! ipython id=53f7023267ac43f399635dabee6b430f
reshaped.reset_index()

In [None]:
#! ipython id=0d5675ca7ba94da3bc9168d3d9b7a47e
pd.melt(df, id_vars="key", value_vars=["A", "B"])

In [None]:
#! ipython id=a39fe88030144c79a2ce8c7e7ec566d8
pd.melt(df, value_vars=["A", "B", "C"])
pd.melt(df, value_vars=["key", "A", "B"])