In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [10]:
firstSeries = Series([1,3,6,7,89,4,2,99,6,4,1])
# firstSeries
firstSeries.values
# firstSeries.index

array([ 1,  3,  6,  7, 89,  4,  2, 99,  6,  4,  1], dtype=int64)

In [74]:
# Creating a Series with a named index
coins = Series([.01,.05,.10,.25],index=['penny','nickle','dime','quarter'])
print(coins)
print(coins['dime'])
print(coins[coins>.05])

penny      0.01
nickle     0.05
dime       0.10
quarter    0.25
dtype: float64
0.1
dime       0.10
quarter    0.25
dtype: float64


In [75]:
# Converting a Series to a Python dictionary
coinDict = coins.to_dict()
coinDict

{'penny': 0.01, 'nickle': 0.05, 'dime': 0.1, 'quarter': 0.25}

In [76]:
# Converting back to series
coinsSecond = Series(coinDict)
coinsSecond

penny      0.01
nickle     0.05
dime       0.10
quarter    0.25
dtype: float64

In [77]:
coinLabels = ['penny','nickle','dime','quarter','vj']
coinsThird = Series(coinDict,index=coinLabels)
coinsThird

penny      0.01
nickle     0.05
dime       0.10
quarter    0.25
vj          NaN
dtype: float64

In [30]:
pd.isnull(coinsThird['vj'])

True

In [31]:
# Adding two Series together
firstSeries + coinsThird

0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
5         NaN
6         NaN
7         NaN
8         NaN
9         NaN
10        NaN
dime      NaN
nickle    NaN
penny     NaN
quarter   NaN
vj        NaN
dtype: float64

In [34]:
# Labeling Series Indexes
coinsThird.index.name = 'Coins'
coinsThird

Coins
penny      0.01
nickle     0.05
dime       0.10
quarter    0.25
vj          NaN
dtype: float64

In [37]:
# Checking for Unique Values and their Counts
ser1 = Series(list('abacab'))
print(ser1.unique())
print(ser1.value_counts())

['a' 'b' 'c']
a    3
b    2
c    1
dtype: int64


In [4]:
# Rank and Sort
# Sort by Index Name using .sort_index:

ser1 = Series(range(3),index=['C','A','B'])
print(ser1.sort_index())
print(ser1.sort_values())
print(ser1.rank())

A    1
B    2
C    0
dtype: int64
C    0
A    1
B    2
dtype: int64
C    1.0
A    2.0
B    3.0
dtype: float64


In [14]:
# DATAFRAMES
firstDataframe = DataFrame(np.arange(12).reshape(4,3))
firstDataframe

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11


In [19]:
# Constructing a DataFrame from a Dictionary
firstDict = {'city':['SF','LA','NYC'],'population':[32222,32111,2213456]}
convertedDF = DataFrame(firstDict)
convertedDF

Unnamed: 0,city,population
0,SF,32222
1,LA,32111
2,NYC,2213456


In [23]:
# Adding a Series to an existing DataFrame:
newSeries = Series(["red","blue"],index=[3,1])
firstDataframe['color'] = newSeries
firstDataframe

Unnamed: 0,0,1,2,color
0,0,1,2,
1,3,4,5,blue
2,6,7,8,
3,9,10,11,red


In [45]:
# Reading a DataFrame from a webpage (using edit/copy), using read_clipboard() pandas function
import webbrowser
website = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'
webbrowser.open(website)
nflDF = pd.read_clipboard(engine='python', sep='\t+')
nflDF

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL season,Division
0,1,Green Bay Packers,1401,782,581,38,0.572,1921,NFC North
1,2,Dallas Cowboys,947,538,403,6,0.571,1960,NFC East
2,3,New England Patriots,949,529,411,9,0.562,1960,AFC East
3,4,Chicago Bears,1435,783,610,42,0.56,1920,NFC North
4,5,Baltimore Ravens,417,233,183,1,0.56,1996,AFC North
5,6,Miami Dolphins,865,476,385,4,0.553,1966,AFC East
6,7,Minnesota Vikings,935,503,421,11,0.544,1961,NFC North
7,8,Kansas City Chiefs,949,507,430,12,0.541,1960,AFC West
8,9,Pittsburgh Steelers,1237,652,563,22,0.536,1933,AFC North
9,10,San Francisco 49ers,1067,561,492,14,0.532,1950,NFC West


In [54]:
# Grab column names
nflDF.columns

Index(['Rank', 'Team', 'GP', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL season',
       'Division'],
      dtype='object')

In [55]:
print(nflDF.Team) #this can be used is case of connected column name otherwise below one
print(nflDF['Team'])

0         Green Bay Packers
1            Dallas Cowboys
2      New England Patriots
3             Chicago Bears
4          Baltimore Ravens
5            Miami Dolphins
6         Minnesota Vikings
7        Kansas City Chiefs
8       Pittsburgh Steelers
9       San Francisco 49ers
10    Indianapolis Colts[d]
11          New York Giants
12           Denver Broncos
13        Las Vegas Raiders
14         Seattle Seahawks
15         Los Angeles Rams
16     Los Angeles Chargers
17    Washington Commanders
18         Tennessee Titans
19      Cleveland Browns[e]
20      Philadelphia Eagles
21            Buffalo Bills
22        Carolina Panthers
23       New Orleans Saints
24            Detroit Lions
25       Cincinnati Bengals
26            New York Jets
27          Atlanta Falcons
28           Houston Texans
29        Arizona Cardinals
30     Jacksonville Jaguars
31     Tampa Bay Buccaneers
Name: Team, dtype: object
0         Green Bay Packers
1            Dallas Cowboys
2      New England Pat

In [56]:
# Display specific data columns
DataFrame(nflDF,columns=['Rank','Team','GP'])

Unnamed: 0,Rank,Team,GP
0,1,Green Bay Packers,1401
1,2,Dallas Cowboys,947
2,3,New England Patriots,949
3,4,Chicago Bears,1435
4,5,Baltimore Ravens,417
5,6,Miami Dolphins,865
6,7,Minnesota Vikings,935
7,8,Kansas City Chiefs,949
8,9,Pittsburgh Steelers,1237
9,10,San Francisco 49ers,1067


In [57]:
# Display to 5 rows
nflDF.head()

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL season,Division
0,1,Green Bay Packers,1401,782,581,38,0.572,1921,NFC North
1,2,Dallas Cowboys,947,538,403,6,0.571,1960,NFC East
2,3,New England Patriots,949,529,411,9,0.562,1960,AFC East
3,4,Chicago Bears,1435,783,610,42,0.56,1920,NFC North
4,5,Baltimore Ravens,417,233,183,1,0.56,1996,AFC North


In [58]:
# Display a specific number of rows
nflDF.head(3)

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL season,Division
0,1,Green Bay Packers,1401,782,581,38,0.572,1921,NFC North
1,2,Dallas Cowboys,947,538,403,6,0.571,1960,NFC East
2,3,New England Patriots,949,529,411,9,0.562,1960,AFC East


In [59]:
# Display last 5 rows
nflDF.tail(5)

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL season,Division
27,28,Atlanta Falcons,865,376,483,6,0.438,1966,NFC South
28,29,Houston Texans,321,139,182,0,0.433,2002,AFC South
29,30,Arizona Cardinals,1395,577,777,41,0.428,1920,NFC West
30,31,Jacksonville Jaguars,433,180,253,0,0.416,1995,AFC South
31,32,Tampa Bay Buccaneers,725,291,433,1,0.402,1976,NFC South


In [66]:
# Rename index and columns (dict method)
firstDataframe.rename(index={0:'a',1:'b',2:'c',3:'d'}, columns={0:'column1',1:'column2'},inplace=True)
firstDataframe

Unnamed: 0,column1,column2,2,color
a,0,1,2,
b,3,4,5,blue
c,6,7,8,
d,9,10,11,red


In [70]:
# Rename a specific column
nflDF.rename(columns={'First NFL season':'First Season'},inplace=True)
nflDF.columns

Index(['Rank', 'Team', 'GP', 'Won', 'Lost', 'Tied', 'Pct.', 'First Season',
       'Division'],
      dtype='object')

In [79]:
# Index Objects
# Set a Series index to be its own object
coin_index = coins.index
coin_index

Index(['penny', 'nickle', 'dime', 'quarter'], dtype='object')

In [93]:
# Reindexing
ser1 = Series([1,2,3,4],index=['A','B','C','D'])
ser2 = ser1.reindex(['A','B','C','D','E','F'])
ser2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

In [94]:
# Creates a new Series, with null values for 'E' and 'F'
# NOTE: this also converted the Series from dtype int64 to float64. ser2['C'] returns 3.0
ser2.reindex(['A','B','C','D','E','F','G'],fill_value=0)

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    0.0
dtype: float64

In [95]:
# Adds a new index 'G' with a value of 0. Indexes 'E' and 'F' are both still null values.
ser2.reindex(['B','A','C','D','E','F','G'])

B    2.0
A    1.0
C    3.0
D    4.0
E    NaN
F    NaN
G    NaN
dtype: float64

In [96]:
# Changes the order of index:value pairs (it doesn't reassign the index) B:2 is now ahead of A:1
ser2.reindex(['C','D','E','F'])

C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

In [97]:
# brings back A:1 and B:2 (because ser2 is based on ser1) but not G:0. It assigns a null value to G.
ser2.reindex(['A','B','C','D','E','F','G'])

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    NaN
dtype: float64

In [5]:
# Interpolating values between indices
ser3 = Series(['USA','Mexico','Canada'],index=[0,5,10])
ser3.reindex(range(15),method='ffill')

0        USA
1        USA
2        USA
3        USA
4        USA
5     Mexico
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10    Canada
11    Canada
12    Canada
13    Canada
14    Canada
dtype: object

In [11]:
# Reindexing onto a DataFrame
from numpy.random import randn
dframe = DataFrame(randn(25).reshape((5,5,)),index=['A','B','D','E','F'],columns=['col1','col2','col3','col4','col5'])
# dframe
dframe2 = dframe.reindex(['A','B','C','D','E','F'])
dframe2

Unnamed: 0,col1,col2,col3,col4,col5
A,-0.303391,-0.20263,1.795576,-0.771724,-1.83474
B,1.118061,-0.302933,-0.494248,0.910106,0.439971
C,,,,,
D,-0.468245,1.609098,-1.007292,1.800812,1.399996
E,-0.716623,0.926632,-1.179988,-1.634252,0.46256
F,0.551906,-1.093634,0.913135,-0.096347,0.794542


In [12]:
# Reindexing DataFrame columns
dframe2.reindex(columns=['col1','col2','col3','col4','col5','col6'])

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,-0.303391,-0.20263,1.795576,-0.771724,-1.83474,
B,1.118061,-0.302933,-0.494248,0.910106,0.439971,
C,,,,,,
D,-0.468245,1.609098,-1.007292,1.800812,1.399996,
E,-0.716623,0.926632,-1.179988,-1.634252,0.46256,
F,0.551906,-1.093634,0.913135,-0.096347,0.794542,


In [25]:
# Drop Entry – Rows & columns
ser1 = Series(np.arange(3),index=['A','B','C'])
ser1.drop('B')

dframe.drop('col4',axis=1) #drop column
# OR
dframe.drop('B',axis=0) #drop a row

Unnamed: 0,col1,col2,col3,col4,col5
A,-0.303391,-0.20263,1.795576,-0.771724,-1.83474
D,-0.468245,1.609098,-1.007292,1.800812,1.399996
E,-0.716623,0.926632,-1.179988,-1.634252,0.46256
F,0.551906,-1.093634,0.913135,-0.096347,0.794542


In [29]:
# Selecting Entries in a Series
series1 = Series(np.arange(5),index=['A','B','C','D','E'])
series1 = series1*2
series1

A    0
B    2
C    4
D    6
E    8
dtype: int32

In [45]:
# series1['A']
# series1[0]
# series1[1:3]
series1[series1>4]
series1[series1>4] = 10 #changes C
series1
# series1[['A','B']]

A     0
B     2
C     4
D    10
E    10
dtype: int32

In [67]:
# Selecting Entries in a DataFrame
dframe = DataFrame(np.arange(25).reshape((5,5)),index=['NYC','LA','SF','DC','CHI'],columns=['A','B','C','D','E'])
dframe[dframe>10]
dframe['B']['LA']
dframe[['A','B']]
dframe[dframe['C']>10]
dframe>10

Unnamed: 0,A,B,C,D,E
NYC,False,False,False,False,False
LA,False,False,False,False,False
SF,False,True,True,True,True
DC,True,True,True,True,True
CHI,True,True,True,True,True


In [77]:
# Data Alignment
seriesFirst = Series([0,1,2,3],index=list('ABCD'))
seriesSecond = Series([4,5,6,7,8],index=list('ABCDE'))
seriesFirst + seriesSecond

A     4.0
B     6.0
C     8.0
D    10.0
E     NaN
dtype: float64

In [81]:
seriesSecond.add(seriesFirst,fill_value=0)

A     4.0
B     6.0
C     8.0
D    10.0
E     8.0
dtype: float64

In [None]:
# Operations Between a Series and a DataFrame
