# Week 3 DataFrames

### Merging DataFrames

In [1]:
import pandas as pd

df1=pd.DataFrame([{'Name':'Kelly','Role':'Directorof HR'},
                  {'Name':'Sally','Role':'Course Liasion'},
                  {'Name':'James','Role':'Grader'}])
df1=df1.set_index('Name')

df2=pd.DataFrame([{'Name':'James','School':'Business'},
                  {'Name':'Mike','School':'Law'},
                  {'Name':'Sally','School':'Engineering'}])
df2=df2.set_index('Name')

print(df1.head())
print(df2.head())

                 Role
Name                 
Kelly   Directorof HR
Sally  Course Liasion
James          Grader
            School
Name              
James     Business
Mike           Law
Sally  Engineering


In [2]:
df1

Unnamed: 0_level_0,Role
Name,Unnamed: 1_level_1
Kelly,Directorof HR
Sally,Course Liasion
James,Grader


In [3]:
df2

Unnamed: 0_level_0,School
Name,Unnamed: 1_level_1
James,Business
Mike,Law
Sally,Engineering


In [4]:
# Using merge lets get the all data in one table

pd.merge(df1,df2,how='outer',left_index=True,right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
James,Grader,Business
Kelly,Directorof HR,
Mike,,Law
Sally,Course Liasion,Engineering


In [5]:
# Use merge and find the common of both i.e. intersection

pd.merge(df1,df2,how='inner',left_index=True,right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Sally,Course Liasion,Engineering
James,Grader,Business


In [6]:
# Keep first df as it is and concate the second one

pd.merge(df1,df2,how='left',left_index=True,right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Kelly,Directorof HR,
Sally,Course Liasion,Engineering
James,Grader,Business


In [7]:
# Keep second df as it is and concate the first one

pd.merge(df1,df2,how='right',left_index=True,right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
James,Grader,Business
Mike,,Law
Sally,Course Liasion,Engineering


In [8]:
# Remove index from both and merge using on parameter

df1=df1.reset_index()
df2=df2.reset_index()

pd.merge(df1,df2,how='right', on='Name')

Unnamed: 0,Name,Role,School
0,James,Grader,Business
1,Mike,,Law
2,Sally,Course Liasion,Engineering


In [9]:
# Conflict between two dataframes,

df1=pd.DataFrame([{'Name':'Kelly','Role':'Directorof HR','Location':'State Street'},
                  {'Name':'Sally','Role':'Course Liasion','Location':'Washington Avenue'},
                  {'Name':'James','Role':'Grader','Location':'Washington Avenue'}])
df1=df1.set_index('Name')

df2=pd.DataFrame([{'Name':'James','School':'Business','Location':'1024 Billiord Avenue'},
                  {'Name':'Mike','School':'Law','Location':'Fraternity House #22'},
                  {'Name':'Sally','School':'Engineering','Location':'512 Wilson Crescent'}])

pd.merge(df1,df2, how='left',on='Name')        # it will resolve the conflict using _x and _y

Unnamed: 0,Name,Role,Location_x,School,Location_y
0,Kelly,Directorof HR,State Street,,
1,Sally,Course Liasion,Washington Avenue,Engineering,512 Wilson Crescent
2,James,Grader,Washington Avenue,Business,1024 Billiord Avenue


In [10]:
df1=pd.DataFrame([{'First Name':'Kelly','Last Name': 'Desjardins','Role':'Directorof HR'},
                  {'First Name':'Sally','Last Name': 'Brooks','Role':'Course Liasion'},
                  {'First Name':'James','Last Name': 'Wilde','Role':'Grader'}])

df2=pd.DataFrame([{'First Name':'James','Last Name': 'Hammond','School':'Business'},
                  {'First Name':'Mike','Last Name': 'Smith','School':'Law'},
                  {'First Name':'Sally','Last Name': 'Brooks','School':'Engineering'}])

pd.merge(df1,df2,how='inner',on=['First Name','Last Name'])   # Common in both only

Unnamed: 0,First Name,Last Name,Role,School
0,Sally,Brooks,Course Liasion,Engineering


### Idiomatic Pandas - Make Code Pandorable

In [1]:
import pandas as pd
import numpy as np
import timeit

In [2]:
df=pd.read_csv('census.csv')
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
0,40,3,6,1,0,Alabama,Alabama,4779736,4780127,4785161,...,0.002295,-0.193196,0.381066,0.582002,-0.467369,1.030015,0.826644,1.383282,1.724718,0.712594
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,14.83296,17.647293,21.845705,19.243286,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-4.728132,-2.50069,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861


In [4]:
(df.where(df['SUMLEV']==50)
     .dropna()
     .set_index(['STNAME','CTYNAME'])
     .rename(columns={'ESTIMATESBASE2010': 'Estimates Base 2010'}))

Unnamed: 0_level_0,Unnamed: 1_level_0,SUMLEV,REGION,DIVISION,STATE,COUNTY,CENSUS2010POP,Estimates Base 2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
STNAME,CTYNAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,Autauga County,50.0,3.0,6.0,1.0,1.0,54571.0,54571.0,54660.0,55253.0,55175.0,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.592270,-2.187333
Alabama,Baldwin County,50.0,3.0,6.0,1.0,3.0,182265.0,182265.0,183193.0,186659.0,190396.0,...,14.832960,17.647293,21.845705,19.243286,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
Alabama,Barbour County,50.0,3.0,6.0,1.0,5.0,27457.0,27457.0,27341.0,27226.0,27159.0,...,-4.728132,-2.500690,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
Alabama,Bibb County,50.0,3.0,6.0,1.0,7.0,22915.0,22919.0,22861.0,22733.0,22642.0,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861
Alabama,Blount County,50.0,3.0,6.0,1.0,9.0,57322.0,57322.0,57373.0,57711.0,57776.0,...,1.807375,-1.177622,-1.748766,-2.062535,-1.369970,1.859511,-0.848580,-1.402476,-1.577232,-0.884411
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,Sweetwater County,50.0,4.0,8.0,56.0,37.0,43806.0,43806.0,43593.0,44041.0,45104.0,...,1.072643,16.243199,-5.339774,-14.252889,-14.248864,1.255221,16.243199,-5.295460,-14.075283,-14.070195
Wyoming,Teton County,50.0,4.0,8.0,56.0,39.0,21294.0,21294.0,21297.0,21482.0,21697.0,...,-1.589565,0.972695,19.525929,14.143021,-0.564849,0.654527,2.408578,21.160658,16.308671,1.520747
Wyoming,Uinta County,50.0,4.0,8.0,56.0,41.0,21118.0,21118.0,21102.0,20912.0,20989.0,...,-17.755986,-4.916350,-6.902954,-14.215862,-12.127022,-18.136812,-5.536861,-7.521840,-14.740608,-12.606351
Wyoming,Washakie County,50.0,4.0,8.0,56.0,43.0,8533.0,8533.0,8545.0,8469.0,8443.0,...,-11.637475,-0.827815,-2.013502,-17.781491,1.682288,-11.990126,-1.182592,-2.250385,-18.020168,1.441961


In [6]:
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
0,40,3,6,1,0,Alabama,Alabama,4779736,4780127,4785161,...,0.002295,-0.193196,0.381066,0.582002,-0.467369,1.030015,0.826644,1.383282,1.724718,0.712594
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,14.83296,17.647293,21.845705,19.243286,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-4.728132,-2.50069,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861


In [9]:
df=df[df['SUMLEV']==50]
df.set_index(['STNAME','CTYNAME'],inplace=True)
df.rename(columns={'ESTIMATESBASE2010': 'Estimates Base 2010'})

Unnamed: 0_level_0,Unnamed: 1_level_0,SUMLEV,REGION,DIVISION,STATE,COUNTY,CENSUS2010POP,Estimates Base 2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
STNAME,CTYNAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,Autauga County,50,3,6,1,1,54571,54571,54660,55253,55175,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.592270,-2.187333
Alabama,Baldwin County,50,3,6,1,3,182265,182265,183193,186659,190396,...,14.832960,17.647293,21.845705,19.243286,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
Alabama,Barbour County,50,3,6,1,5,27457,27457,27341,27226,27159,...,-4.728132,-2.500690,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
Alabama,Bibb County,50,3,6,1,7,22915,22919,22861,22733,22642,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861
Alabama,Blount County,50,3,6,1,9,57322,57322,57373,57711,57776,...,1.807375,-1.177622,-1.748766,-2.062535,-1.369970,1.859511,-0.848580,-1.402476,-1.577232,-0.884411
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,Sweetwater County,50,4,8,56,37,43806,43806,43593,44041,45104,...,1.072643,16.243199,-5.339774,-14.252889,-14.248864,1.255221,16.243199,-5.295460,-14.075283,-14.070195
Wyoming,Teton County,50,4,8,56,39,21294,21294,21297,21482,21697,...,-1.589565,0.972695,19.525929,14.143021,-0.564849,0.654527,2.408578,21.160658,16.308671,1.520747
Wyoming,Uinta County,50,4,8,56,41,21118,21118,21102,20912,20989,...,-17.755986,-4.916350,-6.902954,-14.215862,-12.127022,-18.136812,-5.536861,-7.521840,-14.740608,-12.606351
Wyoming,Washakie County,50,4,8,56,43,8533,8533,8545,8469,8443,...,-11.637475,-0.827815,-2.013502,-17.781491,1.682288,-11.990126,-1.182592,-2.250385,-18.020168,1.441961


In [10]:
# Time test

def first_approach():
    global df
    return ((df.where(df['SUMLEV']==50)
             .dropna()
             .set_index(['STNAME','CTYNAME'])
             .rename(columns={'ESTIMATESBASE2010': 'Estimates Base 2010'})))
df=pd.read_csv('census.csv')
timeit.timeit(first_approach,number=10)

0.24073650000013913

In [15]:
# Time test

def second_approach():
    global df
    new_df=df[df['SUMLEV']==50]
    new_df.set_index(['STNAME','CTYNAME'],inplace=True)
    return new_df.rename(columns={'ESTIMATESBASE2010': 'Estimates Base 2010'})
df=pd.read_csv('census.csv')
timeit.timeit(second_approach,number=10)

0.07230719999984103

In [16]:
def min_max(row):
    data = row[['POPESTIMATE2010',
                'POPESTIMATE2011',
                'POPESTIMATE2012',
                'POPESTIMATE2013',
                'POPESTIMATE2014',
                'POPESTIMATE2015']]
    return pd.Series({'min': np.min(data), 'max': np.max(data)})

In [17]:
df.apply(min_max, axis=1)

Unnamed: 0,min,max
0,4785161,4858979
1,54660,55347
2,183193,203709
3,26489,27341
4,22512,22861
...,...,...
3188,43593,45162
3189,21297,23125
3190,20822,21102
3191,8316,8545


In [18]:
df.apply(min_max, axis=1).head()

Unnamed: 0,min,max
0,4785161,4858979
1,54660,55347
2,183193,203709
3,26489,27341
4,22512,22861


In [19]:
def min_max(row):
    data = row[['POPESTIMATE2010',
                'POPESTIMATE2011',
                'POPESTIMATE2012',
                'POPESTIMATE2013',
                'POPESTIMATE2014',
                'POPESTIMATE2015']]
    row['max'] = np.max(data)
    row['min'] = np.min(data)
    return row
df.apply(min_max, axis=1)

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015,max,min
0,40,3,6,1,0,Alabama,Alabama,4779736,4780127,4785161,...,0.381066,0.582002,-0.467369,1.030015,0.826644,1.383282,1.724718,0.712594,4858979,4785161
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.592270,-2.187333,55347,54660
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,21.845705,19.243286,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499,203709,183193
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299,27341,26489
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861,22861,22512
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,50,4,8,56,37,Wyoming,Sweetwater County,43806,43806,43593,...,-5.339774,-14.252889,-14.248864,1.255221,16.243199,-5.295460,-14.075283,-14.070195,45162,43593
3189,50,4,8,56,39,Wyoming,Teton County,21294,21294,21297,...,19.525929,14.143021,-0.564849,0.654527,2.408578,21.160658,16.308671,1.520747,23125,21297
3190,50,4,8,56,41,Wyoming,Uinta County,21118,21118,21102,...,-6.902954,-14.215862,-12.127022,-18.136812,-5.536861,-7.521840,-14.740608,-12.606351,21102,20822
3191,50,4,8,56,43,Wyoming,Washakie County,8533,8533,8545,...,-2.013502,-17.781491,1.682288,-11.990126,-1.182592,-2.250385,-18.020168,1.441961,8545,8316


In [20]:
def min_max(row):
    data = row[['POPESTIMATE2010',
                'POPESTIMATE2011',
                'POPESTIMATE2012',
                'POPESTIMATE2013',
                'POPESTIMATE2014',
                'POPESTIMATE2015']]
    row['max'] = np.max(data)
    row['min'] = np.min(data)
    return row
df.apply(min_max, axis=1).head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015,max,min
0,40,3,6,1,0,Alabama,Alabama,4779736,4780127,4785161,...,0.381066,0.582002,-0.467369,1.030015,0.826644,1.383282,1.724718,0.712594,4858979,4785161
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333,55347,54660
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,21.845705,19.243286,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499,203709,183193
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299,27341,26489
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861,22861,22512


In [24]:
rows=['POPESTIMATE2010','POPESTIMATE2011','POPESTIMATE2012','POPESTIMATE2013','POPESTIMATE2014',
        'POPESTIMATE2015']
df.apply(lambda x:np.max(x[rows]), axis=1).head()

0    4858979
1      55347
2     203709
3      27341
4      22861
dtype: int64

# Group by

### Spliting 

In [25]:
import pandas as pd
import numpy as np

In [26]:
df=pd.read_csv('census.csv')

df=df[df['SUMLEV']==50]
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,14.83296,17.647293,21.845705,19.243286,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-4.728132,-2.50069,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861
5,50,3,6,1,9,Alabama,Blount County,57322,57322,57373,...,1.807375,-1.177622,-1.748766,-2.062535,-1.36997,1.859511,-0.84858,-1.402476,-1.577232,-0.884411


In [27]:
df.shape

(3142, 100)

In [28]:
%%timeit -n 3
for state in df['STNAME'].unique():
    avg=np.average(df.where(df['STNAME']==state).dropna()['CENSUS2010POP'])
    print('Countries in state'+state+'have an average population of'+str(avg))

Countries in stateAlabamahave an average population of71339.34328358209
Countries in stateAlaskahave an average population of24490.724137931036
Countries in stateArizonahave an average population of426134.4666666667
Countries in stateArkansashave an average population of38878.90666666667
Countries in stateCaliforniahave an average population of642309.5862068966
Countries in stateColoradohave an average population of78581.1875
Countries in stateConnecticuthave an average population of446762.125
Countries in stateDelawarehave an average population of299311.3333333333
Countries in stateDistrict of Columbiahave an average population of601723.0
Countries in stateFloridahave an average population of280616.5671641791
Countries in stateGeorgiahave an average population of60928.63522012578
Countries in stateHawaiihave an average population of272060.2
Countries in stateIdahohave an average population of35626.86363636364
Countries in stateIllinoishave an average population of125790.50980392157
Co

Countries in stateIllinoishave an average population of125790.50980392157
Countries in stateIndianahave an average population of70476.10869565218
Countries in stateIowahave an average population of30771.262626262625
Countries in stateKansashave an average population of27172.55238095238
Countries in stateKentuckyhave an average population of36161.39166666667
Countries in stateLouisianahave an average population of70833.9375
Countries in stateMainehave an average population of83022.5625
Countries in stateMarylandhave an average population of240564.66666666666
Countries in stateMassachusettshave an average population of467687.78571428574
Countries in stateMichiganhave an average population of119080.0
Countries in stateMinnesotahave an average population of60964.65517241379
Countries in stateMississippihave an average population of36186.54878048781
Countries in stateMissourihave an average population of52077.62608695652
Countries in stateMontanahave an average population of17668.125
Countr

Countries in stateNew Jerseyhave an average population of418661.61904761905
Countries in stateNew Mexicohave an average population of62399.36363636364
Countries in stateNew Yorkhave an average population of312550.03225806454
Countries in stateNorth Carolinahave an average population of95354.83
Countries in stateNorth Dakotahave an average population of12690.396226415094
Countries in stateOhiohave an average population of131096.63636363635
Countries in stateOklahomahave an average population of48718.844155844155
Countries in stateOregonhave an average population of106418.72222222222
Countries in statePennsylvaniahave an average population of189587.74626865672
Countries in stateRhode Islandhave an average population of210513.4
Countries in stateSouth Carolinahave an average population of100551.39130434782
Countries in stateSouth Dakotahave an average population of12336.060606060606
Countries in stateTennesseehave an average population of66801.1052631579
Countries in stateTexashave an ave

Countries in stateUtahhave an average population of95306.37931034483
Countries in stateVermonthave an average population of44695.78571428572
Countries in stateVirginiahave an average population of60111.29323308271
Countries in stateWashingtonhave an average population of172424.10256410256
Countries in stateWest Virginiahave an average population of33690.8
Countries in stateWisconsinhave an average population of78985.91666666667
Countries in stateWyominghave an average population of24505.478260869564
Countries in stateAlabamahave an average population of71339.34328358209
Countries in stateAlaskahave an average population of24490.724137931036
Countries in stateArizonahave an average population of426134.4666666667
Countries in stateArkansashave an average population of38878.90666666667
Countries in stateCaliforniahave an average population of642309.5862068966
Countries in stateColoradohave an average population of78581.1875
Countries in stateConnecticuthave an average population of446762.

Countries in stateLouisianahave an average population of70833.9375
Countries in stateMainehave an average population of83022.5625
Countries in stateMarylandhave an average population of240564.66666666666
Countries in stateMassachusettshave an average population of467687.78571428574
Countries in stateMichiganhave an average population of119080.0
Countries in stateMinnesotahave an average population of60964.65517241379
Countries in stateMississippihave an average population of36186.54878048781
Countries in stateMissourihave an average population of52077.62608695652
Countries in stateMontanahave an average population of17668.125
Countries in stateNebraskahave an average population of19638.075268817203
Countries in stateNevadahave an average population of158855.9411764706
Countries in stateNew Hampshirehave an average population of131647.0
Countries in stateNew Jerseyhave an average population of418661.61904761905
Countries in stateNew Mexicohave an average population of62399.36363636364
C

Countries in stateNorth Dakotahave an average population of12690.396226415094
Countries in stateOhiohave an average population of131096.63636363635
Countries in stateOklahomahave an average population of48718.844155844155
Countries in stateOregonhave an average population of106418.72222222222
Countries in statePennsylvaniahave an average population of189587.74626865672
Countries in stateRhode Islandhave an average population of210513.4
Countries in stateSouth Carolinahave an average population of100551.39130434782
Countries in stateSouth Dakotahave an average population of12336.060606060606
Countries in stateTennesseehave an average population of66801.1052631579
Countries in stateTexashave an average population of98998.27165354331
Countries in stateUtahhave an average population of95306.37931034483
Countries in stateVermonthave an average population of44695.78571428572
Countries in stateVirginiahave an average population of60111.29323308271
Countries in stateWashingtonhave an average p

Countries in stateWyominghave an average population of24505.478260869564
Countries in stateAlabamahave an average population of71339.34328358209
Countries in stateAlaskahave an average population of24490.724137931036
Countries in stateArizonahave an average population of426134.4666666667
Countries in stateArkansashave an average population of38878.90666666667
Countries in stateCaliforniahave an average population of642309.5862068966
Countries in stateColoradohave an average population of78581.1875
Countries in stateConnecticuthave an average population of446762.125
Countries in stateDelawarehave an average population of299311.3333333333
Countries in stateDistrict of Columbiahave an average population of601723.0
Countries in stateFloridahave an average population of280616.5671641791
Countries in stateGeorgiahave an average population of60928.63522012578
Countries in stateHawaiihave an average population of272060.2
Countries in stateIdahohave an average population of35626.86363636364
Cou

Countries in stateIllinoishave an average population of125790.50980392157
Countries in stateIndianahave an average population of70476.10869565218
Countries in stateIowahave an average population of30771.262626262625
Countries in stateKansashave an average population of27172.55238095238
Countries in stateKentuckyhave an average population of36161.39166666667
Countries in stateLouisianahave an average population of70833.9375
Countries in stateMainehave an average population of83022.5625
Countries in stateMarylandhave an average population of240564.66666666666
Countries in stateMassachusettshave an average population of467687.78571428574
Countries in stateMichiganhave an average population of119080.0
Countries in stateMinnesotahave an average population of60964.65517241379
Countries in stateMississippihave an average population of36186.54878048781
Countries in stateMissourihave an average population of52077.62608695652
Countries in stateMontanahave an average population of17668.125
Countr

Countries in stateNew Hampshirehave an average population of131647.0
Countries in stateNew Jerseyhave an average population of418661.61904761905
Countries in stateNew Mexicohave an average population of62399.36363636364
Countries in stateNew Yorkhave an average population of312550.03225806454
Countries in stateNorth Carolinahave an average population of95354.83
Countries in stateNorth Dakotahave an average population of12690.396226415094
Countries in stateOhiohave an average population of131096.63636363635
Countries in stateOklahomahave an average population of48718.844155844155
Countries in stateOregonhave an average population of106418.72222222222
Countries in statePennsylvaniahave an average population of189587.74626865672
Countries in stateRhode Islandhave an average population of210513.4
Countries in stateSouth Carolinahave an average population of100551.39130434782
Countries in stateSouth Dakotahave an average population of12336.060606060606
Countries in stateTennesseehave an ave

Countries in stateTexashave an average population of98998.27165354331
Countries in stateUtahhave an average population of95306.37931034483
Countries in stateVermonthave an average population of44695.78571428572
Countries in stateVirginiahave an average population of60111.29323308271
Countries in stateWashingtonhave an average population of172424.10256410256
Countries in stateWest Virginiahave an average population of33690.8
Countries in stateWisconsinhave an average population of78985.91666666667
Countries in stateWyominghave an average population of24505.478260869564
816 ms ± 5.62 ms per loop (mean ± std. dev. of 7 runs, 3 loops each)


In [32]:
%%timeit -n 3
for group, frame in df.groupby('STNAME'):
    avg = np.average(frame['CENSUS2010POP'])
    print('Counties in state ' + group + ' have an average population of ' + str(avg))

Counties in state Alabama have an average population of 71339.34328358209
Counties in state Alaska have an average population of 24490.724137931036
Counties in state Arizona have an average population of 426134.4666666667
Counties in state Arkansas have an average population of 38878.90666666667
Counties in state California have an average population of 642309.5862068966
Counties in state Colorado have an average population of 78581.1875
Counties in state Connecticut have an average population of 446762.125
Counties in state Delaware have an average population of 299311.3333333333
Counties in state District of Columbia have an average population of 601723.0
Counties in state Florida have an average population of 280616.5671641791
Counties in state Georgia have an average population of 60928.63522012578
Counties in state Hawaii have an average population of 272060.2
Counties in state Idaho have an average population of 35626.86363636364
Counties in state Illinois have an average populat

Counties in state Tennessee have an average population of 66801.1052631579
Counties in state Texas have an average population of 98998.27165354331
Counties in state Utah have an average population of 95306.37931034483
Counties in state Vermont have an average population of 44695.78571428572
Counties in state Virginia have an average population of 60111.29323308271
Counties in state Washington have an average population of 172424.10256410256
Counties in state West Virginia have an average population of 33690.8
Counties in state Wisconsin have an average population of 78985.91666666667
Counties in state Wyoming have an average population of 24505.478260869564
Counties in state Alabama have an average population of 71339.34328358209
Counties in state Alaska have an average population of 24490.724137931036
Counties in state Arizona have an average population of 426134.4666666667
Counties in state Arkansas have an average population of 38878.90666666667
Counties in state California have an 

In [33]:
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,14.83296,17.647293,21.845705,19.243286,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-4.728132,-2.50069,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861
5,50,3,6,1,9,Alabama,Blount County,57322,57322,57373,...,1.807375,-1.177622,-1.748766,-2.062535,-1.36997,1.859511,-0.84858,-1.402476,-1.577232,-0.884411


In [34]:
df = df.set_index('STNAME')

def fun(item):
    if item[0]<'M':
        return 0
    if item[0]<'Q':
        return 1
    return 2

for group, frame in df.groupby(fun):
    print('There are ' + str(len(frame)) + ' records in group ' + str(group) + ' for processing.')

There are 1177 records in group 0 for processing.
There are 1134 records in group 1 for processing.
There are 831 records in group 2 for processing.


In [35]:
df = pd.read_csv('census.csv')
df = df[df['SUMLEV']==50]

In [36]:
df.groupby('STNAME').agg({'CENSUS2010POP': np.average})

Unnamed: 0_level_0,CENSUS2010POP
STNAME,Unnamed: 1_level_1
Alabama,71339.343284
Alaska,24490.724138
Arizona,426134.466667
Arkansas,38878.906667
California,642309.586207
Colorado,78581.1875
Connecticut,446762.125
Delaware,299311.333333
District of Columbia,601723.0
Florida,280616.567164


In [37]:
df.groupby('STNAME').agg({'CENSUS2010POP': np.average}).head()

Unnamed: 0_level_0,CENSUS2010POP
STNAME,Unnamed: 1_level_1
Alabama,71339.343284
Alaska,24490.724138
Arizona,426134.466667
Arkansas,38878.906667
California,642309.586207


In [38]:
print(type(df.groupby(level=0)['POPESTIMATE2010','POPESTIMATE2011']))
print(type(df.groupby(level=0)['POPESTIMATE2010']))

<class 'pandas.core.groupby.generic.DataFrameGroupBy'>
<class 'pandas.core.groupby.generic.SeriesGroupBy'>


  print(type(df.groupby(level=0)['POPESTIMATE2010','POPESTIMATE2011']))


In [40]:
(df.set_index('STNAME').groupby(level=0)['CENSUS2010POP'].agg({'avg': np.average, 'sum': np.sum}))

SpecificationError: nested renamer is not supported

In [1]:
import pandas as pd

df=pd.read_csv('Admission_Predict.csv',index_col=0)
df.head()

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.0,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.8
5,314,103,2,2.0,3.0,8.21,0,0.65


In [4]:
df[df['TOEFL Score'].gt(105)&df['TOEFL Score'].lt(115)]

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,324,107,4,4.0,4.5,8.87,1,0.76
4,322,110,3,3.5,2.5,8.67,1,0.80
7,321,109,3,3.0,4.0,8.20,1,0.75
10,323,108,3,3.5,3.0,8.60,0,0.45
11,325,106,3,3.5,4.0,8.40,1,0.52
...,...,...,...,...,...,...,...,...
392,318,106,3,2.0,3.0,8.65,0,0.71
393,326,112,4,4.0,3.5,9.12,1,0.84
395,329,111,4,4.5,4.0,9.23,1,0.89
396,324,110,3,3.5,3.5,9.04,1,0.82


In [5]:
(df['TOEFL Score']>105)&(df['TOEFL Score']<115)

Serial No.
1      False
2       True
3      False
4       True
5      False
       ...  
396     True
397     True
398    False
399    False
400    False
Name: TOEFL Score, Length: 400, dtype: bool

In [6]:
df[(df['TOEFL Score']isin(range(106,115)))]

SyntaxError: invalid syntax (<ipython-input-6-0f25ac7c569e>, line 1)

In [7]:
import pandas as pd

df=pd.read_csv('Admission_Predict.csv')
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [8]:
df=df.rename(mapper=lambda x:x.upper(),axis='column')

ValueError: No axis named column for object type DataFrame

In [9]:
df=df.rename(mapper=lambda x:x.upper(),axis=1)