In [1]:
import numpy as np
import pandas as pd

from numpy.random import randn
np.random.seed(101)

## Series

In [2]:
# Series from lists
my_data = [10,20,20,40]
pd.Series(my_data)

0    10
1    20
2    20
3    40
dtype: int64

In [3]:
labels = ['a','b','c','d']
pd.Series(my_data, labels)

a    10
b    20
c    20
d    40
dtype: int64

In [4]:
pd.Series(data=my_data, index=labels)

a    10
b    20
c    20
d    40
dtype: int64

In [5]:
# Series from Numpy arrays

ar = np.array(my_data)
ar

array([10, 20, 20, 40])

In [6]:
pd.Series(ar, labels)

a    10
b    20
c    20
d    40
dtype: int64

In [7]:
# Series from dicts

d={'a':10, 'b':20, 'c':30, 'd':40}
d

{'a': 10, 'b': 20, 'c': 30, 'd': 40}

In [8]:
pd.Series(d)

a    10
b    20
c    30
d    40
dtype: int64

In [9]:
my_list = ['a','b','c']
pd.Series(my_list)

0    a
1    b
2    c
dtype: object

In [10]:
# Grab info from a series

sr1 = pd.Series([1,2,3,4], ['R','G','B','Y'])
sr1

R    1
G    2
B    3
Y    4
dtype: int64

In [11]:
sr2 = pd.Series([1,2,5,4], ['R','G','W','Y'])
sr2

R    1
G    2
W    5
Y    4
dtype: int64

In [12]:
sr1['R']

1

In [13]:
sr2['W']

5

In [14]:
sr3 = pd.Series(['a','b','c'])
sr3

0    a
1    b
2    c
dtype: object

In [15]:
sr3[1]

'b'

In [16]:
# Addition on Series
sr1 + sr2

B    NaN
G    4.0
R    2.0
W    NaN
Y    8.0
dtype: float64

## DataFrames

In [17]:
df = pd.DataFrame(randn(5,4), ['a','b','c','c','e'], ['w','x','y','z'])
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
c,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [18]:
# Indexing & Selection

df['w']

a    2.706850
b    0.651118
c   -2.018168
c    0.188695
e    0.190794
Name: w, dtype: float64

In [19]:
type(df['w'])

pandas.core.series.Series

In [20]:
df[['x','y']]

Unnamed: 0,x,y
a,0.628133,0.907969
b,-0.319318,-0.848077
c,0.740122,0.528813
c,-0.758872,-0.933237
e,1.978757,2.605967


In [21]:
type(df[['x','y']])

pandas.core.frame.DataFrame

In [22]:
# Add new column

df['new'] = df['x'] + df['z']
df

Unnamed: 0,w,x,y,z,new
a,2.70685,0.628133,0.907969,0.503826,1.131958
b,0.651118,-0.319318,-0.848077,0.605965,0.286647
c,-2.018168,0.740122,0.528813,-0.589001,0.151122
c,0.188695,-0.758872,-0.933237,0.955057,0.196184
e,0.190794,1.978757,2.605967,0.683509,2.662266


In [23]:
# Delete a column

df.drop('new', axis=1)

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
c,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [24]:
df

Unnamed: 0,w,x,y,z,new
a,2.70685,0.628133,0.907969,0.503826,1.131958
b,0.651118,-0.319318,-0.848077,0.605965,0.286647
c,-2.018168,0.740122,0.528813,-0.589001,0.151122
c,0.188695,-0.758872,-0.933237,0.955057,0.196184
e,0.190794,1.978757,2.605967,0.683509,2.662266


In [25]:
# Delete and save in memory
df.drop('new', axis=1, inplace=True)

In [26]:
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
c,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [31]:
# Selecting rows -label

df.loc['e']

w    0.190794
x    1.978757
y    2.605967
z    0.683509
Name: e, dtype: float64

In [29]:
type(df.loc['e'])

pandas.core.series.Series

In [32]:
# Selecting rows - index
df.iloc[1]

w    0.651118
x   -0.319318
y   -0.848077
z    0.605965
Name: b, dtype: float64

In [34]:
# Selecting subsets of rows and columns

df.loc['b','w']

0.6511179479432686

In [36]:
df.loc[['a','b'], ['y','c']].reindex()

Unnamed: 0,y,c
a,0.907969,
b,-0.848077,


In [37]:
# Conditional selection

df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
c,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [38]:
df>0

Unnamed: 0,w,x,y,z
a,True,True,True,True
b,True,False,False,True
c,False,True,True,False
c,True,False,False,True
e,True,True,True,True


In [40]:
b = df>0
b

Unnamed: 0,w,x,y,z
a,True,True,True,True
b,True,False,False,True
c,False,True,True,False
c,True,False,False,True
e,True,True,True,True


In [41]:
df[b]

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,,,0.605965
c,,0.740122,0.528813,
c,0.188695,,,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [42]:
df[df>0]

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,,,0.605965
c,,0.740122,0.528813,
c,0.188695,,,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [43]:
df['w']>0

a     True
b     True
c    False
c     True
e     True
Name: w, dtype: bool

In [44]:
df[df['w']>0]

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [46]:
df[df['x']<0]

Unnamed: 0,w,x,y,z
b,0.651118,-0.319318,-0.848077,0.605965
c,0.188695,-0.758872,-0.933237,0.955057


In [51]:
r = df['x']<0

In [52]:
df[r]

Unnamed: 0,w,x,y,z
b,0.651118,-0.319318,-0.848077,0.605965
c,0.188695,-0.758872,-0.933237,0.955057


In [55]:
df[df['w']>0][['x','y']]

Unnamed: 0,x,y
a,0.628133,0.907969
b,-0.319318,-0.848077
c,-0.758872,-0.933237
e,1.978757,2.605967


In [58]:
# Use multiple conditions

df[(df['w']>0) & (df['y']>1)]

Unnamed: 0,w,x,y,z
e,0.190794,1.978757,2.605967,0.683509


In [59]:
True and True

True

In [60]:
False & True

False

In [61]:
df[(df['w']>0) | (df['y']>1)]

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [62]:
# index in details

df.reset_index()

Unnamed: 0,index,w,x,y,z
0,a,2.70685,0.628133,0.907969,0.503826
1,b,0.651118,-0.319318,-0.848077,0.605965
2,c,-2.018168,0.740122,0.528813,-0.589001
3,c,0.188695,-0.758872,-0.933237,0.955057
4,e,0.190794,1.978757,2.605967,0.683509


In [63]:
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
c,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [64]:
new_index = 'R G B Y O'.split()
new_index

['R', 'G', 'B', 'Y', 'O']

In [65]:
df['colors'] = new_index
df

Unnamed: 0,w,x,y,z,colors
a,2.70685,0.628133,0.907969,0.503826,R
b,0.651118,-0.319318,-0.848077,0.605965,G
c,-2.018168,0.740122,0.528813,-0.589001,B
c,0.188695,-0.758872,-0.933237,0.955057,Y
e,0.190794,1.978757,2.605967,0.683509,O


In [68]:
df.set_index('colors')

Unnamed: 0_level_0,w,x,y,z
colors,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
R,2.70685,0.628133,0.907969,0.503826
G,0.651118,-0.319318,-0.848077,0.605965
B,-2.018168,0.740122,0.528813,-0.589001
Y,0.188695,-0.758872,-0.933237,0.955057
O,0.190794,1.978757,2.605967,0.683509


In [69]:
df

Unnamed: 0,w,x,y,z,colors
a,2.70685,0.628133,0.907969,0.503826,R
b,0.651118,-0.319318,-0.848077,0.605965,G
c,-2.018168,0.740122,0.528813,-0.589001,B
c,0.188695,-0.758872,-0.933237,0.955057,Y
e,0.190794,1.978757,2.605967,0.683509,O


## Multi-index & Index Hierarchy

In [70]:
# Index Levels

outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
higher_index = list(zip(outside, inside))
higher_index

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [71]:
higher_index = pd.MultiIndex.from_tuples(higher_index)
higher_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [72]:
# Multi-index dataframe

df = pd.DataFrame(randn(6,2), higher_index, ['A','B'])
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [74]:
df.loc['G1'].loc[1]['A']

0.3026654485851825

In [75]:
df.loc['G2'].loc[2]['B']

0.07295967531703869

In [76]:
df.index.names

FrozenList([None, None])

In [78]:
df.index.names = ['Groups', 'Nums']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Nums,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [80]:
# Cross section

df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Nums,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [81]:
df.loc['G1']

Unnamed: 0_level_0,A,B
Nums,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.302665,1.693723
2,-1.706086,-1.159119
3,-0.134841,0.390528


In [82]:
df.xs('G1')

Unnamed: 0_level_0,A,B
Nums,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.302665,1.693723
2,-1.706086,-1.159119
3,-0.134841,0.390528


In [83]:
df.xs(1, level='Nums')

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.302665,1.693723
G2,0.166905,0.184502


## Working with missing data

In [84]:
d = {'A':[1,2,np.nan], 'B':[3,np.nan,np.nan], 'C':[4,5,6]}
d

{'A': [1, 2, nan], 'B': [3, nan, nan], 'C': [4, 5, 6]}

In [85]:
df = pd.DataFrame(d)
df

Unnamed: 0,A,B,C
0,1.0,3.0,4
1,2.0,,5
2,,,6


In [86]:
# dropna method

df.dropna()

Unnamed: 0,A,B,C
0,1.0,3.0,4


In [87]:
df.dropna(axis=1)

Unnamed: 0,C
0,4
1,5
2,6


In [88]:
df.dropna(thresh=2)

Unnamed: 0,A,B,C
0,1.0,3.0,4
1,2.0,,5


In [89]:
df.dropna(axis=1, thresh=2)

Unnamed: 0,A,C
0,1.0,4
1,2.0,5
2,,6


In [90]:
# fillna method

df.fillna(value='replacement')

Unnamed: 0,A,B,C
0,1,3,4
1,2,replacement,5
2,replacement,replacement,6


In [91]:
df['A'].fillna(value=df['A'].mean())

0    1.0
1    2.0
2    1.5
Name: A, dtype: float64

In [92]:
df

Unnamed: 0,A,B,C
0,1.0,3.0,4
1,2.0,,5
2,,,6


## Groupby Function

In [93]:
team_data={'Company':['Apple','Apple','Google','Google','FB','FB'],
           'Person':['Mark','Tom','John','Sara','Mia','Emma'],
           'Sales':[200,150,350,125,260,180]}
team_data

{'Company': ['Apple', 'Apple', 'Google', 'Google', 'FB', 'FB'],
 'Person': ['Mark', 'Tom', 'John', 'Sara', 'Mia', 'Emma'],
 'Sales': [200, 150, 350, 125, 260, 180]}

In [95]:
df = pd.DataFrame(team_data)
df

Unnamed: 0,Company,Person,Sales
0,Apple,Mark,200
1,Apple,Tom,150
2,Google,John,350
3,Google,Sara,125
4,FB,Mia,260
5,FB,Emma,180


In [98]:
by_company = df.groupby('Company')
print(by_company)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x11b021bd0>


In [99]:
by_company.mean()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
Apple,175.0
FB,220.0
Google,237.5


In [100]:
by_company.sum()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
Apple,350
FB,440
Google,475


In [101]:
by_company.std()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
Apple,35.355339
FB,56.568542
Google,159.099026


In [102]:
by_company.std().loc['Apple']

Sales    35.355339
Name: Apple, dtype: float64

In [104]:
df.groupby('Company').count()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
Apple,2,2
FB,2,2
Google,2,2


In [105]:
df.groupby('Company').max()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
Apple,Tom,200
FB,Mia,260
Google,Sara,350


In [106]:
df.groupby('Company').min()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
Apple,Mark,150
FB,Emma,180
Google,John,125


In [107]:
# groupby method with describe method

df.groupby('Company').describe()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Apple,2.0,175.0,35.355339,150.0,162.5,175.0,187.5,200.0
FB,2.0,220.0,56.568542,180.0,200.0,220.0,240.0,260.0
Google,2.0,237.5,159.099026,125.0,181.25,237.5,293.75,350.0


In [108]:
df.groupby('Company').describe().transpose()

Unnamed: 0,Company,Apple,FB,Google
Sales,count,2.0,2.0,2.0
Sales,mean,175.0,220.0,237.5
Sales,std,35.355339,56.568542,159.099026
Sales,min,150.0,180.0,125.0
Sales,25%,162.5,200.0,181.25
Sales,50%,175.0,220.0,237.5
Sales,75%,187.5,240.0,293.75
Sales,max,200.0,260.0,350.0


In [109]:
df.groupby('Company').describe().transpose()['FB']

Sales  count      2.000000
       mean     220.000000
       std       56.568542
       min      180.000000
       25%      200.000000
       50%      220.000000
       75%      240.000000
       max      260.000000
Name: FB, dtype: float64

## Merging, Joining and Concatenating DataFrames

In [110]:
df1=pd.DataFrame({'A':['A0','A1','A2','A3'],
                  'B':['B0','B1','B2','B3'],
                  'C':['C0','C1','C2','C3'],
                  'D': ['D0','D1','D2','D3']},
                   index = [0, 1, 2, 3])
df1

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [111]:
df2=pd.DataFrame({'A':['A4','A5','A6','A7'],
                  'B':['B4','B5','B6','B7'],
                  'C':['C4','C5','C6','C7'],
                  'D':['D4','D5','D6','D7']},
                   index = [4, 5, 6, 7])
df2

Unnamed: 0,A,B,C,D
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [112]:
df3=pd.DataFrame({'A':['A8','A9','A10','A11'],
                  'B':['B8','B9','B10','B11'],
                  'C':['C8','C9','C10','C11'],
                  'D':['D8','D9','D10','D11']},
                   index = [8, 9, 10, 11])
df3

Unnamed: 0,A,B,C,D
8,A8,B8,C8,D8
9,A9,B9,C9,D9
10,A10,B10,C10,D10
11,A11,B11,C11,D11


In [113]:
# Concatenation

pd.concat([df1,df2,df3])

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
8,A8,B8,C8,D8
9,A9,B9,C9,D9


In [115]:
pd.concat([df1,df2,df3],axis=1)

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1,A.2,B.2,C.2,D.2
0,A0,B0,C0,D0,,,,,,,,
1,A1,B1,C1,D1,,,,,,,,
2,A2,B2,C2,D2,,,,,,,,
3,A3,B3,C3,D3,,,,,,,,
4,,,,,A4,B4,C4,D4,,,,
5,,,,,A5,B5,C5,D5,,,,
6,,,,,A6,B6,C6,D6,,,,
7,,,,,A7,B7,C7,D7,,,,
8,,,,,,,,,A8,B8,C8,D8
9,,,,,,,,,A9,B9,C9,D9


In [117]:
# Merging

df4=pd.DataFrame({'Key':['K0','K1','K2','K3'],
                   'A':['A0','A1','A2','A3'],
                   'B':['B0','B1','B2','B3']})

df5=pd.DataFrame({'Key':['K0','K1','K2','K3'],
                    'C':['C0','C1','C2','C3'],
                    'D':['D0','D1','D2','D3']})

df4

Unnamed: 0,Key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,B3


In [118]:
df5

Unnamed: 0,Key,C,D
0,K0,C0,D0
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3


In [119]:
pd.merge(df4,df5,how='inner',on='Key')

Unnamed: 0,Key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3


In [120]:
# Join

df6=pd.DataFrame({'A':['A0','A1','A2','A3'],
                  'B':['B0','B1','B2','B3']},
                    index=['K0','K1','K2','K3'])

df7=pd.DataFrame({'C':['C0','C1','C2','C3'],
                  'D':['D0','D1','D2','D3']},
                    index=['K0','K1','K2','K3'])

df6

Unnamed: 0,A,B
K0,A0,B0
K1,A1,B1
K2,A2,B2
K3,A3,B3


In [121]:
df7

Unnamed: 0,C,D
K0,C0,D0
K1,C1,D1
K2,C2,D2
K3,C3,D3


In [122]:
df6.join(df7)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,C1,D1
K2,A2,B2,C2,D2
K3,A3,B3,C3,D3


## Pandas Operations

In [123]:
df=pd.DataFrame({'col1':[1,2,3,4],
                 'col2':[45,55,65,45],
                 'col3':['asd','jkl','qwe','xyz']})
df

Unnamed: 0,col1,col2,col3
0,1,45,asd
1,2,55,jkl
2,3,65,qwe
3,4,45,xyz


In [125]:
df['col2'].unique()

array([45, 55, 65])

In [126]:
df['col2'].nunique()

3

In [127]:
df['col2'].value_counts()

45    2
55    1
65    1
Name: col2, dtype: int64

In [128]:
# Apply method

def multiply2(x):
    return x*2

In [129]:
df['col1'].sum()

10

In [130]:
df['col1'].apply(multiply2)

0    2
1    4
2    6
3    8
Name: col1, dtype: int64

In [131]:
df['col3'].apply(len)

0    3
1    3
2    3
3    3
Name: col3, dtype: int64

In [132]:
df.columns

Index(['col1', 'col2', 'col3'], dtype='object')

In [133]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [135]:
df.sort_values('col2')

Unnamed: 0,col1,col2,col3
0,1,45,asd
3,4,45,xyz
1,2,55,jkl
2,3,65,qwe


In [136]:
df.isnull()

Unnamed: 0,col1,col2,col3
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False


In [137]:
# Pivot table method in data frames

d={'A':['Red','Red','Red','Green','Green','Green'],
   'B':['One','One','two','two','one','one'],
   'C':['x','y','x','y','x','y'],
   'D':[1,3,5,2,4,1]}
d

{'A': ['Red', 'Red', 'Red', 'Green', 'Green', 'Green'],
 'B': ['One', 'One', 'two', 'two', 'one', 'one'],
 'C': ['x', 'y', 'x', 'y', 'x', 'y'],
 'D': [1, 3, 5, 2, 4, 1]}

In [138]:
df=pd.DataFrame(d)
df

Unnamed: 0,A,B,C,D
0,Red,One,x,1
1,Red,One,y,3
2,Red,two,x,5
3,Green,two,y,2
4,Green,one,x,4
5,Green,one,y,1


In [139]:
df.pivot_table(values='D',index=['A','B'],columns='C')

Unnamed: 0_level_0,C,x,y
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
Green,one,4.0,1.0
Green,two,,2.0
Red,One,1.0,3.0
Red,two,5.0,


## Reading & Writing Files

In [147]:
# Reading and writing CSV Files

b = pd.read_csv('test.csv')
b

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [148]:
d = {'name':['Alice','Bella','Sara','Emily'],
     'number':[18,20,22,24],
     'score':[85,87,83,89]}

d

{'name': ['Alice', 'Bella', 'Sara', 'Emily'],
 'number': [18, 20, 22, 24],
 'score': [85, 87, 83, 89]}

In [149]:
df=pd.DataFrame(d)
df

Unnamed: 0,name,number,score
0,Alice,18,85
1,Bella,20,87
2,Sara,22,83
3,Emily,24,89


In [150]:
df.to_csv('test2.csv',index=False)

In [151]:
pd.read_csv('test2.csv')

Unnamed: 0,name,number,score
0,Alice,18,85
1,Bella,20,87
2,Sara,22,83
3,Emily,24,89


In [153]:
# Reading and writing Excel Files

pd.read_excel('test.xlsx', sheet_name='Sheet1')

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [154]:
pd.read_excel('test.xlsx')

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [155]:
d = {'name':['Alice','Bella','Sara','Emily'],
   'number':[18,20,22,24],
   'score':[85,87,83,89]}

d

{'name': ['Alice', 'Bella', 'Sara', 'Emily'],
 'number': [18, 20, 22, 24],
 'score': [85, 87, 83, 89]}

In [156]:
df=pd.DataFrame(d)
df

Unnamed: 0,name,number,score
0,Alice,18,85
1,Bella,20,87
2,Sara,22,83
3,Emily,24,89


In [157]:
df.to_excel('test2.xlsx',sheet_name='NewSheet')

In [160]:
pd.read_excel('test2.xlsx',sheet_name='NewSheet')

Unnamed: 0.1,Unnamed: 0,name,number,score
0,0,Alice,18,85
1,1,Bella,20,87
2,2,Sara,22,83
3,3,Emily,24,89
