In [29]:
people = {
    "first": ["Corey", 'Jane', 'John', 'Yi'], 
    "last": ["Schafer", 'Doe', 'Doe', 'Zhang'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com', 'YiZhang@email.com']
}
import pandas as pd
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Yi,Zhang,YiZhang@email.com


In [30]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

### A way to reset column names, but not a good way. Because you need to provide all the column names even you don't want to change all of them

In [31]:
df.columns = ['first_name', 'last_name', 'email']

In [32]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Yi,Zhang,YiZhang@email.com


In [33]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Yi,Zhang,YiZhang@email.com


In [34]:
df.columns = df.columns.str.replace('_', ' ')
df.columns = df.columns.str.replace(' ', '_')
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Yi,Zhang,YiZhang@email.com


In [35]:
df.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Yi,Zhang,YiZhang@email.com


In [36]:
filt = (df['first'] == 'John') & (df['last'] == 'Doe')

### df[filt] returns a slice view from a DataFrame. Set value on a copy will not change the value in df 

In [37]:
df[filt]['last'] ='Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


### It's better to use `loc` or `at` to get and set a value on DataFrame

In [38]:
df.loc[filt, 'last'] = 'Smith'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com
3,Yi,Zhang,YiZhang@email.com


###  `at` is used to get/set a single value, `loc` can be used to set single/multiple values. There is no much difference between at and loc. Maybe there is some optimize in at when getting/setting a single value

In [39]:
df.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']

In [40]:
df.loc[2].at['last']

'Smith'

In [41]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johnsmith@email.com
3,Yi,Zhang,yizhang@email.com


In [42]:
apply
map
applymap
replace

NameError: name 'apply' is not defined

### Example of `apply`.
`apply` can be used on DateFrame or Series

In [43]:
df['email'].apply(len)

0    23
1    17
2    19
3    17
Name: email, dtype: int64

In [44]:
def update_email(email):
    return email.upper()

In [45]:
df['email'] = df['email'].apply(update_email)

In [46]:
df['email'] = df['email'].apply(lambda x: x.lower())

In [47]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johnsmith@email.com
3,Yi,Zhang,yizhang@email.com


In [49]:
df.apply(len, axis = 'rows')

first    4
last     4
email    4
dtype: int64

In [50]:
df.apply(len, axis = 'columns')

0    3
1    3
2    3
3    3
dtype: int64

In [54]:
df.apply(pd.Series.min, axis='columns')

0    Corey
1      Doe
2     John
3       Yi
dtype: object

In [55]:
df.apply(lambda x: x.min())

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

### Example of `applymap`. 
`applymap` can only used on DataFrame

In [56]:
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,19
3,2,5,17


In [57]:
df.applymap(lambda x: x.upper())

Unnamed: 0,first,last,email
0,COREY,SCHAFER,COREYMSCHAFER@GMAIL.COM
1,JANE,DOE,JANEDOE@EMAIL.COM
2,JOHN,SMITH,JOHNSMITH@EMAIL.COM
3,YI,ZHANG,YIZHANG@EMAIL.COM


In [58]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johnsmith@email.com
3,yi,zhang,yizhang@email.com


### Example of `map`. 
`map` can only used on Series. If the values in series do not get substitued, a `NaN` will be set.
If you don't want `NaN` set to the un-substitued, use `replace` instead.

In [59]:
df['first'].map({'Corey':'Chris', 'Jane':'Mary'})

0    Chris
1     Mary
2      NaN
3      NaN
Name: first, dtype: object

In [60]:
df['first'].replace({'Corey':'Chris', 'Jane':'Mary'})

0    Chris
1     Mary
2     John
3       Yi
Name: first, dtype: object

In [61]:
import pandas as pd
df = pd.read_csv('../data/survey_results_public.csv')
schema_df = pd.read_csv('../data/survey_results_schema.csv')
# Set max display rows and columns
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 10)
pd.set_option('display.width', None)
df.head()

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,...,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
0,1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,...,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
1,2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,...,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,...,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
3,4,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,...,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
4,5,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",...,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy


In [63]:
df.rename(columns={'ConvertedComp': 'SalaryUSD'}, inplace=True)
df['SalaryUSD']

0            NaN
1            NaN
2         8820.0
3        61000.0
4            NaN
          ...   
88878        NaN
88879        NaN
88880        NaN
88881        NaN
88882        NaN
Name: SalaryUSD, Length: 88883, dtype: float64

In [66]:
df['Hobbyist'].map({'Yes':True, 'No': False})

0         True
1        False
2         True
3        False
4         True
         ...  
88878     True
88879    False
88880    False
88881    False
88882     True
Name: Hobbyist, Length: 88883, dtype: bool

In [67]:
df['Hobbyist'] = df['Hobbyist'].map({'Yes':True, 'No': False})
df

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,...,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
0,1,I am a student who is learning to code,True,Never,The quality of OSS and closed source software ...,...,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
1,2,I am a student who is learning to code,False,Less than once per year,The quality of OSS and closed source software ...,...,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,3,"I am not primarily a developer, but I write co...",True,Never,The quality of OSS and closed source software ...,...,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
3,4,I am a developer by profession,False,Never,The quality of OSS and closed source software ...,...,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
4,5,I am a developer by profession,True,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",...,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy
...,...,...,...,...,...,...,...,...,...,...,...
88878,88377,,True,Less than once a month but more than once per ...,The quality of OSS and closed source software ...,...,,,No,Appropriate in length,Easy
88879,88601,,False,Never,The quality of OSS and closed source software ...,...,,,,,
88880,88802,,False,Never,,...,,,,,
88881,88816,,False,Never,"OSS is, on average, of HIGHER quality than pro...",...,,,,,
