In [1]:
import numpy as np
import pandas as pd

In [2]:
pd.Series(data=[10, 20, 30])

0    10
1    20
2    30
dtype: int64

In [3]:
pd.Series(data=[10, 20, 30], index=['a', 'b', 'c'])

a    10
b    20
c    30
dtype: int64

In [4]:
pd.Series([10, 20, 30], ['a', 'b', 'c'])

a    10
b    20
c    30
dtype: int64

In [5]:
pd.Series({'a': 1, 'b': 2, 'c': 3})

a    1
b    2
c    3
dtype: int64

In [6]:
pd.Series(data=[sum, print, len])

0      <built-in function sum>
1    <built-in function print>
2      <built-in function len>
dtype: object

In [7]:
df = pd.DataFrame(np.random.randn(5, 5), ['A', 'B', 'C', 'D', 'E'], ['W', 'X', 'Y', 'Z', 'Q'])
df

Unnamed: 0,W,X,Y,Z,Q
A,0.770514,0.155173,-0.456494,-0.132056,0.149175
B,0.428586,-1.392645,-0.54774,0.284534,0.173642
C,-0.380068,0.471139,0.251608,1.310424,-1.897645
D,0.260093,-1.065963,0.008679,-2.445964,-2.450252
E,-0.144256,0.317745,-0.48783,1.346161,-0.929177


In [8]:
df.drop('Q', axis=1, inplace=True) # axis 1 is col

In [9]:
df.loc['A']

W    0.770514
X    0.155173
Y   -0.456494
Z   -0.132056
Name: A, dtype: float64

In [10]:
#df.iloc[0]
df.iloc[0, 1:4]

X    0.155173
Y   -0.456494
Z   -0.132056
Name: A, dtype: float64

In [11]:
df>0

Unnamed: 0,W,X,Y,Z
A,True,True,False,False
B,True,False,False,True
C,False,True,True,True
D,True,False,True,False
E,False,True,False,True


In [12]:
df[df>0]

Unnamed: 0,W,X,Y,Z
A,0.770514,0.155173,,
B,0.428586,,,0.284534
C,,0.471139,0.251608,1.310424
D,0.260093,,0.008679,
E,,0.317745,,1.346161


In [13]:
df = df.reset_index()
df

Unnamed: 0,index,W,X,Y,Z
0,A,0.770514,0.155173,-0.456494,-0.132056
1,B,0.428586,-1.392645,-0.54774,0.284534
2,C,-0.380068,0.471139,0.251608,1.310424
3,D,0.260093,-1.065963,0.008679,-2.445964
4,E,-0.144256,0.317745,-0.48783,1.346161


In [14]:
df['Nums'] = ['11', '22', '33', '44', '55']
df

Unnamed: 0,index,W,X,Y,Z,Nums
0,A,0.770514,0.155173,-0.456494,-0.132056,11
1,B,0.428586,-1.392645,-0.54774,0.284534,22
2,C,-0.380068,0.471139,0.251608,1.310424,33
3,D,0.260093,-1.065963,0.008679,-2.445964,44
4,E,-0.144256,0.317745,-0.48783,1.346161,55


In [15]:
df = df.set_index('Nums')
df

Unnamed: 0_level_0,index,W,X,Y,Z
Nums,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
11,A,0.770514,0.155173,-0.456494,-0.132056
22,B,0.428586,-1.392645,-0.54774,0.284534
33,C,-0.380068,0.471139,0.251608,1.310424
44,D,0.260093,-1.065963,0.008679,-2.445964
55,E,-0.144256,0.317745,-0.48783,1.346161


In [16]:
hier_index = list(zip([1, 2, 3, 1, 2, 3], ['G1', 'G1', 'G1', 'G2', 'G2', 'G2']))
hier_index = pd.MultiIndex.from_tuples(hier_index)
df = pd.DataFrame(np.random.randn(6, 2), hier_index, ['A', 'B'])
df

Unnamed: 0,Unnamed: 1,A,B
1,G1,1.189482,-0.244461
2,G1,1.76378,1.088537
3,G1,-0.350407,-0.147355
1,G2,-0.595488,1.129887
2,G2,-1.176547,0.780369
3,G2,-0.673083,0.664145


In [17]:
df.index.names = ['Nums', 'Groups']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Nums,Groups,Unnamed: 2_level_1,Unnamed: 3_level_1
1,G1,1.189482,-0.244461
2,G1,1.76378,1.088537
3,G1,-0.350407,-0.147355
1,G2,-0.595488,1.129887
2,G2,-1.176547,0.780369
3,G2,-0.673083,0.664145


In [18]:
df.xs # cross section

<bound method NDFrame.xs of                     A         B
Nums Groups                    
1    G1      1.189482 -0.244461
2    G1      1.763780  1.088537
3    G1     -0.350407 -0.147355
1    G2     -0.595488  1.129887
2    G2     -1.176547  0.780369
3    G2     -0.673083  0.664145>

In [19]:
df.xs(1, level='Nums')

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,1.189482,-0.244461
G2,-0.595488,1.129887


In [20]:
d = {'A': [1, 2, np.nan], 'B': [5, np.nan, np.nan]}
df = pd.DataFrame(d)
df

Unnamed: 0,A,B
0,1.0,5.0
1,2.0,
2,,


In [21]:
df.dropna(axis=0)

Unnamed: 0,A,B
0,1.0,5.0


In [22]:
df.dropna(axis=0, thresh=1)

Unnamed: 0,A,B
0,1.0,5.0
1,2.0,


In [23]:
df.fillna(value='FILL')

Unnamed: 0,A,B
0,1.0,5.0
1,2.0,FILL
2,FILL,FILL


In [24]:
data = {'Company': ['GOOG', 'GOOG', 'MSFT', 'MSFT'],
        'Person': ['Sam', 'Charlie', 'Amy', 'Vanessa'],
       'Sales': [200, 120, 140, 340]}
df = pd.DataFrame(data)
df

Unnamed: 0,Company,Person,Sales
0,GOOG,Sam,200
1,GOOG,Charlie,120
2,MSFT,Amy,140
3,MSFT,Vanessa,340


In [25]:
byCompany = df.groupby('Company')
byCompany['Sales'].mean()

Company
GOOG    160.0
MSFT    240.0
Name: Sales, dtype: float64

In [26]:
#byCompany['Sales'].sum().loc['MSFT']
byCompany['Sales'].sum()

Company
GOOG    320
MSFT    480
Name: Sales, dtype: int64

In [27]:
byCompany['Sales'].std()

Company
GOOG     56.568542
MSFT    141.421356
Name: Sales, dtype: float64

In [28]:
byCompany.count()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
GOOG,2,2
MSFT,2,2


In [29]:
byCompany.min()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
GOOG,Charlie,120
MSFT,Amy,140


In [30]:
byCompany.max()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
GOOG,Sam,200
MSFT,Vanessa,340


In [31]:
byCompany.describe()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
GOOG,2.0,160.0,56.568542,120.0,140.0,160.0,180.0,200.0
MSFT,2.0,240.0,141.421356,140.0,190.0,240.0,290.0,340.0


In [32]:
df

Unnamed: 0,Company,Person,Sales
0,GOOG,Sam,200
1,GOOG,Charlie,120
2,MSFT,Amy,140
3,MSFT,Vanessa,340


In [33]:
df['Sales'].unique()

array([200, 120, 140, 340])

In [34]:
df['Sales'].nunique()

4

In [35]:
df['Sales'].value_counts()

Sales
200    1
120    1
140    1
340    1
Name: count, dtype: int64

In [36]:
df['Sales'].apply(lambda x:x * 2)

0    400
1    240
2    280
3    680
Name: Sales, dtype: int64

In [37]:
df.columns

Index(['Company', 'Person', 'Sales'], dtype='object')

In [38]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [39]:
df.sort_values('Sales')

Unnamed: 0,Company,Person,Sales
1,GOOG,Charlie,120
2,MSFT,Amy,140
0,GOOG,Sam,200
3,MSFT,Vanessa,340


In [40]:
df.isnull()

Unnamed: 0,Company,Person,Sales
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
