# Intro to Data Structures
https://pandas.pydata.org/pandas-docs/stable/dsintro.html

In [1]:
import numpy as np
import pandas as pd

## Series

Series is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.). The axis labels are collectively referred to as the index. 

In [2]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s

a    0.337062
b    1.038739
c   -0.931932
d   -0.291651
e   -0.210281
dtype: float64

In [3]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [4]:
pd.Series(np.random.randn(5))

0   -0.820016
1    1.042221
2   -0.050387
3   -0.239851
4   -0.779401
dtype: float64

In [5]:
#From a dict

d = {'a' : 0., 'b' : 1., 'c' : 2.}
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [6]:
pd.Series(d, index=['b', 'c', 'd', 'a'])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [7]:
#From scalar value 
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [8]:
#Series is ndarray-like
s[0]

0.3370619135210377

In [9]:
s[:3]

a    0.337062
b    1.038739
c   -0.931932
dtype: float64

In [10]:
s[s > s.median()]

a    0.337062
b    1.038739
dtype: float64

In [11]:
s[[4, 3, 1]]

e   -0.210281
d   -0.291651
b    1.038739
dtype: float64

In [12]:
np.exp(s)

a    1.400826
b    2.825651
c    0.393792
d    0.747029
e    0.810357
dtype: float64

In [13]:
#Series is dict-like
s['a']

0.3370619135210377

In [14]:
s['e'] = 12.
s

a     0.337062
b     1.038739
c    -0.931932
d    -0.291651
e    12.000000
dtype: float64

In [15]:
'e' in s

True

In [16]:
'f' in s

False

In [17]:
s.get('f', np.nan)

nan

In [18]:
#Vectorized operations and label alignment with Series
s = pd.Series(np.random.randn(5))
s 

0   -0.153118
1   -0.158712
2   -0.249644
3    0.406865
4   -0.069518
dtype: float64

In [19]:
s + s

0   -0.306235
1   -0.317424
2   -0.499287
3    0.813729
4   -0.139037
dtype: float64

In [20]:
s * 2

0   -0.306235
1   -0.317424
2   -0.499287
3    0.813729
4   -0.139037
dtype: float64

In [21]:
np.exp(s)

0    0.858029
1    0.853242
2    0.779078
3    1.502101
4    0.932843
dtype: float64

In [22]:
#Operations between Series automatically align the data based on label (Not for position)
# If one label doesn't have value in each Serie, the operation returns NaN
s[1:] + s[:-1]

0         NaN
1   -0.317424
2   -0.499287
3    0.813729
4         NaN
dtype: float64

In [23]:
#Name attribute
s = pd.Series(np.random.randn(5), name='something')
s

0    0.628898
1    0.422323
2    0.164728
3    0.896829
4    0.265410
Name: something, dtype: float64

In [24]:
s.name

'something'

In [25]:
#Rename serie
s2 = s.rename("different")
s2

0    0.628898
1    0.422323
2    0.164728
3    0.896829
4    0.265410
Name: different, dtype: float64

## DataFrame
DataFrame is a 2-dimensional labeled data structure with columns of potentially different types. You can think of it like a spreadsheet or SQL table, or a dict of Series objects

Along with the data, you can optionally pass index (row labels) and columns (column labels) arguments.

In [26]:
#From dict of Series or dicts
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
    'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
d

{'one': a    1.0
 b    2.0
 c    3.0
 dtype: float64, 'two': a    1.0
 b    2.0
 c    3.0
 d    4.0
 dtype: float64}

In [27]:
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [28]:
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [29]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [30]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [31]:
df.columns

Index(['one', 'two'], dtype='object')

In [32]:
#From dict of ndarrays / lists
d = {'one' : [1., 2., 3., 4.],
     'two' : [4., 3., 2., 1.]}
d

{'one': [1.0, 2.0, 3.0, 4.0], 'two': [4.0, 3.0, 2.0, 1.0]}

In [33]:
pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [34]:
pd.DataFrame(d, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [35]:
#From structured or record array
data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])
data

array([(0, 0.0, b''), (0, 0.0, b'')], 
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [36]:
data[:] = [(1,2.,'Hello'), (2,3.,"World")]
data

array([(1, 2.0, b'Hello'), (2, 3.0, b'World')], 
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [37]:
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,1,2.0,b'Hello'
1,2,3.0,b'World'


In [38]:
pd.DataFrame(data, index=['first', 'second'])

Unnamed: 0,A,B,C
first,1,2.0,b'Hello'
second,2,3.0,b'World'


In [39]:
pd.DataFrame(data, columns=['C', 'A', 'B'])

Unnamed: 0,C,A,B
0,b'Hello',1,2.0
1,b'World',2,3.0


In [40]:
#From a list of dicts
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]
data2

[{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]

In [41]:
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [42]:
pd.DataFrame(data2, index=['first', 'second'])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [43]:
pd.DataFrame(data2, columns=['a', 'b'])

Unnamed: 0,a,b
0,1,2
1,5,10


In [44]:
#From a dict of tuples
pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2},
              ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4},
              ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6},
              ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8},
              ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}})

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,a,b,c,a,b
A,B,4.0,1.0,5.0,8.0,10.0
A,C,3.0,2.0,6.0,7.0,
A,D,,,,,9.0


In [45]:
#Alternate Constructors
data

array([(1, 2.0, b'Hello'), (2, 3.0, b'World')], 
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [46]:
pd.DataFrame.from_records(data, index='C')

Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
b'Hello',1,2.0
b'World',2,3.0


In [47]:
pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])])

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [48]:
pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],orient='index', columns=['one', 'two', 'three'])

Unnamed: 0,one,two,three
A,1,2,3
B,4,5,6


In [51]:
#Column selection, addition, deletion
df['flag'] = df['one'] > 2
df

Unnamed: 0,one,two,flag
a,1.0,1.0,False
b,2.0,2.0,False
c,3.0,3.0,True
d,,4.0,False


In [53]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [54]:
df['three'] = df['one'] * df['two']
df

Unnamed: 0,one,two,flag,three
a,1.0,1.0,False,1.0
b,2.0,2.0,False,4.0
c,3.0,3.0,True,9.0
d,,4.0,False,


In [55]:
df['flag'] = df['one'] > 2
df

Unnamed: 0,one,two,flag,three
a,1.0,1.0,False,1.0
b,2.0,2.0,False,4.0
c,3.0,3.0,True,9.0
d,,4.0,False,


In [56]:
#Delete column
del df['two']
df

Unnamed: 0,one,flag,three
a,1.0,False,1.0
b,2.0,False,4.0
c,3.0,True,9.0
d,,False,


In [57]:
three = df.pop('three')
three

a    1.0
b    4.0
c    9.0
d    NaN
Name: three, dtype: float64

In [58]:
df

Unnamed: 0,one,flag
a,1.0,False
b,2.0,False
c,3.0,True
d,,False


In [59]:
df['foo'] = 'bar'
df

Unnamed: 0,one,flag,foo
a,1.0,False,bar
b,2.0,False,bar
c,3.0,True,bar
d,,False,bar


In [60]:
df['one_trunc'] = df['one'][:2]
df

Unnamed: 0,one,flag,foo,one_trunc
a,1.0,False,bar,1.0
b,2.0,False,bar,2.0
c,3.0,True,bar,
d,,False,bar,


In [61]:
df.insert(1, 'bar', df['one'])
df

Unnamed: 0,one,bar,flag,foo,one_trunc
a,1.0,1.0,False,bar,1.0
b,2.0,2.0,False,bar,2.0
c,3.0,3.0,True,bar,
d,,,False,bar,


In [65]:
#Assigning New Columns in Method Chains
iris = pd.read_csv('../files/iris.data')
iris.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [66]:
(iris.assign(sepal_ratio = iris['SepalWidth'] / iris['SepalLength']).head())

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name,sepal_ratio
0,5.1,3.5,1.4,0.2,Iris-setosa,0.686275
1,4.9,3.0,1.4,0.2,Iris-setosa,0.612245
2,4.7,3.2,1.3,0.2,Iris-setosa,0.680851
3,4.6,3.1,1.5,0.2,Iris-setosa,0.673913
4,5.0,3.6,1.4,0.2,Iris-setosa,0.72


In [67]:
iris.assign(sepal_ratio = lambda x: (x['SepalWidth'] / x['SepalLength'])).head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name,sepal_ratio
0,5.1,3.5,1.4,0.2,Iris-setosa,0.686275
1,4.9,3.0,1.4,0.2,Iris-setosa,0.612245
2,4.7,3.2,1.3,0.2,Iris-setosa,0.680851
3,4.6,3.1,1.5,0.2,Iris-setosa,0.673913
4,5.0,3.6,1.4,0.2,Iris-setosa,0.72


In [69]:
(iris.query('SepalLength > 5')
.assign(SepalRatio = lambda x: x.SepalWidth / x.SepalLength
        ,PetalRatio = lambda x: x.PetalWidth / x.PetalLength)
 .plot(kind='scatter', x='SepalRatio', y='PetalRatio'))

<matplotlib.axes._subplots.AxesSubplot at 0x7f7d4a296320>

In [70]:
#Indexing / Selection
df

Unnamed: 0,one,bar,flag,foo,one_trunc
a,1.0,1.0,False,bar,1.0
b,2.0,2.0,False,bar,2.0
c,3.0,3.0,True,bar,
d,,,False,bar,


In [72]:
# Select column: df[col] ==> Series
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [74]:
#Select row by label: df.loc[label] ==> Series
df.loc['b']

one              2
bar              2
flag         False
foo            bar
one_trunc        2
Name: b, dtype: object

In [75]:
#Select row by integer location: df.iloc[loc] ==> Series
df.iloc[2]

one             3
bar             3
flag         True
foo           bar
one_trunc     NaN
Name: c, dtype: object

In [76]:
#Slice rows: df[5:10] ==> DataFrame
df[1:3]

Unnamed: 0,one,bar,flag,foo,one_trunc
b,2.0,2.0,False,bar,2.0
c,3.0,3.0,True,bar,


In [79]:
#Select rows by boolean vector: df[bool_vec] ==> DataFrame
df[[False,True,True,False]]

Unnamed: 0,one,bar,flag,foo,one_trunc
b,2.0,2.0,False,bar,2.0
c,3.0,3.0,True,bar,


In [81]:
#Data alignment and arithmetic
df = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,1.3883,-0.55739,0.939028,0.373704
1,1.135673,-0.08362,-0.715492,1.104049
2,0.660569,-1.181487,1.31183,-1.078282
3,0.022639,-0.090022,-1.463242,0.632892
4,-1.756491,1.01552,1.115693,-2.667645
5,-0.856511,1.415929,1.177264,-0.883002
6,-0.225961,-1.556934,0.142676,-1.643993
7,0.574071,0.912964,1.543834,-0.986787
8,0.890934,0.693463,-2.479745,1.042842
9,-1.219861,1.039418,0.69035,0.651039


In [82]:
df2 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C'])
df2

Unnamed: 0,A,B,C
0,-0.136428,0.501009,1.681462
1,-0.744342,1.845359,-0.73266
2,-0.622178,-1.075561,-0.623121
3,0.845775,-0.589538,0.246859
4,2.428367,-0.576309,1.656607
5,-0.643044,-2.129173,-0.573792
6,0.616512,0.0355,-0.660079


In [83]:
df + df2

Unnamed: 0,A,B,C,D
0,1.251873,-0.056381,2.620491,
1,0.391331,1.761738,-1.448152,
2,0.038392,-2.257048,0.688709,
3,0.868414,-0.67956,-1.216383,
4,0.671875,0.439211,2.7723,
5,-1.499555,-0.713244,0.603473,
6,0.390551,-1.521434,-0.517404,
7,,,,
8,,,,
9,,,,


In [84]:
df - df.iloc[0]

Unnamed: 0,A,B,C,D
0,0.0,0.0,0.0,0.0
1,-0.252628,0.47377,-1.65452,0.730345
2,-0.727731,-0.624096,0.372801,-1.451986
3,-1.365661,0.467368,-2.40227,0.259187
4,-3.144792,1.57291,0.176664,-3.041349
5,-2.244812,1.973319,0.238236,-1.256706
6,-1.614261,-0.999544,-0.796353,-2.017697
7,-0.814229,1.470355,0.604806,-1.360491
8,-0.497366,1.250853,-3.418774,0.669138
9,-2.608161,1.596808,-0.248678,0.277335


In [85]:
index = pd.date_range('1/1/2000', periods=8)
index

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08'],
              dtype='datetime64[ns]', freq='D')

In [86]:
df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=list('ABC'))
df

Unnamed: 0,A,B,C
2000-01-01,0.155477,1.194408,-0.136439
2000-01-02,-1.809752,1.340046,1.101628
2000-01-03,-1.585783,-0.033436,1.284236
2000-01-04,0.018606,0.143577,-1.278076
2000-01-05,-0.408728,-0.782694,0.380166
2000-01-06,0.140995,1.012655,-1.021252
2000-01-07,-1.584333,-1.508318,0.441465
2000-01-08,1.229434,-0.251935,-0.39926


In [87]:
type(df['A'])

pandas.core.series.Series

In [89]:
df.sub(df['A'], axis=0) 

Unnamed: 0,A,B,C
2000-01-01,0.0,1.038931,-0.291916
2000-01-02,0.0,3.149797,2.91138
2000-01-03,0.0,1.552347,2.870019
2000-01-04,0.0,0.124971,-1.296682
2000-01-05,0.0,-0.373966,0.788894
2000-01-06,0.0,0.87166,-1.162247
2000-01-07,0.0,0.076014,2.025798
2000-01-08,0.0,-1.481369,-1.628694


In [90]:
#Operations with scalars 
df * 5 + 2

Unnamed: 0,A,B,C
2000-01-01,2.777384,7.97204,1.317803
2000-01-02,-7.04876,8.700228,7.508138
2000-01-03,-5.928917,1.832818,8.421179
2000-01-04,2.09303,2.717883,-4.390378
2000-01-05,-0.043642,-1.91347,3.90083
2000-01-06,2.704977,7.063276,-3.10626
2000-01-07,-5.921664,-5.541592,4.207325
2000-01-08,8.147172,0.740325,0.003701


In [91]:
1 / df

Unnamed: 0,A,B,C
2000-01-01,6.431832,0.837235,-7.329257
2000-01-02,-0.552562,0.746243,0.907748
2000-01-03,-0.630603,-29.907446,0.778673
2000-01-04,53.746198,6.964923,-0.782426
2000-01-05,-2.446612,-1.277638,2.630429
2000-01-06,7.092432,0.987503,-0.97919
2000-01-07,-0.631181,-0.66299,2.265185
2000-01-08,0.813382,-3.969277,-2.504635


In [92]:
df ** 4

Unnamed: 0,A,B,C
2000-01-01,0.0005843342,2.035217,0.000347
2000-01-02,10.72695,3.224617,1.472785
2000-01-03,6.323761,1e-06,2.720064
2000-01-04,1.19842e-07,0.000425,2.668248
2000-01-05,0.02790869,0.375291,0.020888
2000-01-06,0.0003952021,1.05159,1.087757
2000-01-07,6.300654,5.175736,0.037983
2000-01-08,2.284659,0.004029,0.025411


In [93]:
#Boolean operators
df1 = pd.DataFrame({'a' : [1, 0, 1], 'b' : [0, 1, 1] }, dtype=bool)
df1

Unnamed: 0,a,b
0,True,False
1,False,True
2,True,True


In [94]:
df2 = pd.DataFrame({'a' : [0, 1, 1], 'b' : [1, 1, 0] }, dtype=bool)
df2

Unnamed: 0,a,b
0,False,True
1,True,True
2,True,False


In [95]:
df1 & df2

Unnamed: 0,a,b
0,False,False
1,False,True
2,True,False


In [96]:
df1 | df2

Unnamed: 0,a,b
0,True,True
1,True,True
2,True,True


In [97]:
df1 ^ df2

Unnamed: 0,a,b
0,True,True
1,True,False
2,False,True


In [98]:
-df1

Unnamed: 0,a,b
0,False,True
1,True,False
2,False,False


In [99]:
#Transposing
# only show the first 5 rows
df[:5].T

Unnamed: 0,2000-01-01 00:00:00,2000-01-02 00:00:00,2000-01-03 00:00:00,2000-01-04 00:00:00,2000-01-05 00:00:00
A,0.155477,-1.809752,-1.585783,0.018606,-0.408728
B,1.194408,1.340046,-0.033436,0.143577,-0.782694
C,-0.136439,1.101628,1.284236,-1.278076,0.380166


In [100]:
#DataFrame interoperability with NumPy functions
np.exp(df)

Unnamed: 0,A,B,C
2000-01-01,1.168215,3.301603,0.872459
2000-01-02,0.163695,3.819217,3.00906
2000-01-03,0.204787,0.967116,3.611907
2000-01-04,1.01878,1.154395,0.278573
2000-01-05,0.664495,0.457173,1.462527
2000-01-06,1.151419,2.752901,0.360144
2000-01-07,0.205085,0.221282,1.554984
2000-01-08,3.419295,0.777295,0.670816


In [101]:
np.asarray(df)

array([[ 0.15547671,  1.19440797, -0.13643948],
       [-1.80975193,  1.34004551,  1.10162761],
       [-1.58578336, -0.03343649,  1.28423577],
       [ 0.01860597,  0.14357661, -1.27807559],
       [-0.40872843, -0.78269403,  0.38016606],
       [ 0.14099536,  1.01265523, -1.02125206],
       [-1.58433278, -1.50831838,  0.4414651 ],
       [ 1.22943437, -0.25193503, -0.3992598 ]])

In [102]:
#The dot method on DataFrame implements matrix multiplication:
df.T.dot(df)

Unnamed: 0,A,B,C
A,10.022988,0.358877,-5.564853
B,0.358877,7.220641,-0.810188
C,-5.564853,-0.810188,6.05672


In [103]:
s1 = pd.Series(np.arange(5,10))
s1

0    5
1    6
2    7
3    8
4    9
dtype: int64

In [104]:
s1.dot(s1)

255

In [106]:
#Console display
baseball = pd.read_csv('../files/game_logs.csv')
print(baseball)

  interactivity=interactivity, compiler=compiler, result=result)


            date  number_of_game day_of_week v_name v_league  v_game_number  \
0       18710504               0         Thu    CL1       na              1   
1       18710505               0         Fri    BS1       na              1   
2       18710506               0         Sat    CL1       na              2   
3       18710508               0         Mon    CL1       na              3   
4       18710509               0         Tue    BS1       na              2   
5       18710511               0         Thu    CH1       na              2   
6       18710513               0         Sat    WS3       na              2   
7       18710513               0         Sat    CH1       na              3   
8       18710515               0         Mon    WS3       na              3   
9       18710516               0         Tue    TRO       na              2   
10      18710516               0         Tue    WS3       na              4   
11      18710517               0         Wed    WS3 

In [107]:
baseball.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 171907 entries, 0 to 171906
Columns: 161 entries, date to acquisition_info
dtypes: float64(77), int64(6), object(78)
memory usage: 211.2+ MB


In [108]:
print(baseball.iloc[-20:, :12].to_string())

            date  number_of_game day_of_week v_name v_league  v_game_number h_name h_league  h_game_number  v_score  h_score  length_outs
171887  20161001               0         Sat    MIL       NL            161    COL       NL            161        4        3         60.0
171888  20161001               0         Sat    NYN       NL            161    PHI       NL            161        5        3         54.0
171889  20161001               0         Sat    LAN       NL            161    SFN       NL            161        0        3         51.0
171890  20161001               0         Sat    PIT       NL            161    SLN       NL            161        3        4         51.0
171891  20161001               0         Sat    MIA       NL            160    WAS       NL            161        1        2         51.0
171892  20161002               0         Sun    HOU       AL            162    ANA       AL            162        1        8         51.0
171893  20161002               0  

In [109]:
pd.DataFrame(np.random.randn(3, 12))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.116211,0.247393,0.462065,0.331835,0.906233,0.405651,1.611538,0.068275,0.377707,1.314001,1.198042,-0.087064
1,1.056646,1.300047,-0.081029,0.713962,-1.578055,1.053586,-1.716723,0.084718,0.181338,0.211748,0.168981,-0.676513
2,-0.150781,-1.715333,0.686989,0.519197,-0.43259,0.070421,-1.573035,0.656502,0.104428,-0.597963,-0.093206,1.329506


In [112]:
pd.set_option('display.width', 40) # default is 80
pd.DataFrame(np.random.randn(3, 12))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,2.26086,2.084625,-0.530009,-1.427457,-1.806215,0.961151,0.029171,1.346059,-0.816722,-0.316666,-0.389906,-2.140342
1,-1.554464,-0.688023,0.475525,-0.230576,-2.831427,0.138499,0.797007,0.102345,-0.222169,0.189605,0.648083,0.53735
2,-0.022427,1.098532,0.847095,-1.413214,-0.417706,-0.01608,0.056465,0.186735,-0.11914,2.111516,0.364742,0.066273


## Panel
Panel is a somewhat less-used, but still important container for 3-dimensional data.

Warning In 0.20.0, Panel is deprecated and will be removed in a future version. See the section Deprecate Panel.

Over the last few years, pandas has increased in both breadth and depth, with new features, datatype support, and manipulation routines. As a result, supporting efficient indexing and functional routines for Series, DataFrame and Panel has contributed to an increasingly fragmented and difficult-to-understand codebase.

The 3-D structure of a Panel is much less common for many types of data analysis, than the 1-D of the Series or the 2-D of the DataFrame. Going forward it makes sense for pandas to focus on these areas exclusively.

Oftentimes, one can simply use a MultiIndex DataFrame for easily working with higher dimensional data.