## Pandas Basics

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
s = pd.Series(np.random.randn(5), index=['a','b', 'c', 'd', 'e'])

In [3]:
s1 = pd.Series(np.random.randn(5))
s1

0    0.860872
1    0.478581
2    0.907158
3    0.529182
4   -0.907725
dtype: float64

In [4]:
s

a   -0.718008
b   -0.260626
c   -0.299116
d    0.843922
e    0.054848
dtype: float64

In [5]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [6]:
pd.Series(np.random.randn(5))

0   -0.456371
1    1.414752
2   -0.083262
3    0.027912
4    0.951943
dtype: float64

In [7]:
d = {'a' : 0., 'b' : 1., 'c' : 2.}

In [8]:
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [9]:
pd.Series(d, index=['b', 'c', 'd', 'a'])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [10]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [11]:
s[0]

-0.7180078456769777

In [12]:
s[:3]

a   -0.718008
b   -0.260626
c   -0.299116
dtype: float64

In [13]:
s['a']

-0.7180078456769777

In [14]:
s['e'] = 12.

In [15]:
s

a    -0.718008
b    -0.260626
c    -0.299116
d     0.843922
e    12.000000
dtype: float64

In [16]:
s.get('a')

-0.7180078456769777

In [17]:
ts1 = pd.Series(np.random.randn(5))
ts2 = pd.Series(np.random.randn(5))

In [18]:
d = {'col1': ts1, 'col2': ts2}
d

{'col1': 0   -0.417107
 1   -0.060480
 2   -0.272207
 3    0.661507
 4   -1.187352
 dtype: float64, 'col2': 0    0.257100
 1   -0.548194
 2    1.258666
 3    0.328082
 4   -0.681201
 dtype: float64}

In [19]:
df1 = pd.DataFrame(data = d)
df1

Unnamed: 0,col1,col2
0,-0.417107,0.2571
1,-0.06048,-0.548194
2,-0.272207,1.258666
3,0.661507,0.328082
4,-1.187352,-0.681201


In [20]:
df2 = pd.DataFrame(np.random.randn(10, 5))
df2

Unnamed: 0,0,1,2,3,4
0,-0.191865,-0.324519,0.795026,0.637741,0.857753
1,0.159406,-0.543069,-0.236143,-0.296446,0.780676
2,1.485205,0.33708,1.283326,-1.326186,0.969282
3,0.879561,-1.122273,0.218994,-0.308466,0.716052
4,-1.808452,0.817358,-1.617937,-1.360541,0.762652
5,-2.32166,0.542401,-0.163108,0.115136,0.960252
6,-0.85485,-0.705876,0.149927,2.380247,-0.150206
7,0.282883,0.681398,0.44177,-0.282656,-0.432957
8,0.584745,-0.747112,0.033845,-0.206283,-1.622045
9,-0.580999,-0.207494,-0.289945,1.152434,1.111346


In [21]:
df3 = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e'])
df3

Unnamed: 0,a,b,c,d,e
0,-0.684447,0.993011,-1.366328,2.408397,-0.854576
1,-0.833109,-0.219437,1.80837,-0.504458,-1.703668
2,-0.060759,0.678363,-1.24138,-0.334058,0.227526
3,0.475439,0.199203,-0.60047,-0.654786,2.338409
4,-0.37952,-0.864932,-0.097195,1.375281,-0.702526
5,1.354651,0.904611,-2.415289,-0.350243,-0.633695
6,0.644709,-0.413527,0.71527,0.062987,-1.091057
7,0.324023,0.685614,0.81898,0.504572,0.758902
8,-1.144429,-0.510812,-0.770607,-0.589228,1.958803
9,-1.42265,0.225761,1.24872,1.267281,0.305116


In [22]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [23]:
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [24]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [25]:
df.columns

Index(['one', 'two'], dtype='object')

In [26]:
df.index.hasnans

False

In [27]:
dfc = pd.read_csv('data1.csv')
dfc

Unnamed: 0,date,variable,value
0,2000-01-03,A,0.469112
1,2000-01-04,A,-0.282863
2,2000-01-05,A,-1.509059
3,2000-01-03,B,-1.135632
4,2000-01-04,B,1.212112
5,2000-01-05,B,-0.173215
6,2000-01-03,C,0.119209
7,2000-01-04,C,-1.044236
8,2000-01-05,C,-0.861849
9,2000-01-03,D,-2.104569


In [28]:
dfc[dfc['variable'] == 'A']

Unnamed: 0,date,variable,value
0,2000-01-03,A,0.469112
1,2000-01-04,A,-0.282863
2,2000-01-05,A,-1.509059


In [29]:
dfc.pivot(index='date', columns='variable', values='value')

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,0.469112,-1.135632,0.119209,-2.104569
2000-01-04,-0.282863,1.212112,-1.044236,-0.494929
2000-01-05,-1.509059,-0.173215,-0.861849,1.071804


In [30]:
dfc.describe()

Unnamed: 0,value
count,12.0
mean,-0.39451
std,1.007649
min,-2.104569
25%,-1.067085
50%,-0.388896
75%,0.206685
max,1.212112
