In [1]:
import pandas as pd
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [2]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [3]:
df.columns = ['first_name','last_name','email']

In [5]:
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [7]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [8]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [9]:
df.columns = df.columns.str.replace('_',' ')

In [10]:
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [12]:
df.columns = [x.lower() for x in df.columns]
df.columns = df.columns.str.replace(' ','_')
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [16]:
df.rename(columns = {'first_name':'first','last_name':'last'},inplace=True)

In [17]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [18]:
df.loc[2]

first                 John
last                   Doe
email    JohnDoe@email.com
Name: 2, dtype: object

In [19]:
df.loc[2] = ['John','Smith','JohnSmith@email.com']

In [20]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnSmith@email.com


In [22]:
df.loc[2,['last','email']] = ['Doe','JohnDoe@gmail.com']
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@gmail.com


In [23]:
df.loc[2,'last'] = 'Smith'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@gmail.com


In [24]:
df.at[2,'last'] = 'Doe'

In [25]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@gmail.com


In [27]:
filt = (df['email'] == 'JohnDoe@gmail.com')
df[filt]

Unnamed: 0,first,last,email
2,John,Doe,JohnDoe@gmail.com


In [28]:
df[filt]['last']

2    Doe
Name: last, dtype: object

In [31]:
df.loc[filt,'last'] = 'Smith'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@gmail.com


In [32]:
df['email'].str.lower()

0    coreymschafer@gmail.com
1          janedoe@email.com
2          johndoe@gmail.com
Name: email, dtype: object

In [34]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@gmail.com


In [35]:
df['email'].apply(len) # function

0    23
1    17
2    17
Name: email, dtype: int64

In [36]:
def update_email(email):
    return email.upper()

In [37]:
df['email'].apply(update_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2          JOHNDOE@GMAIL.COM
Name: email, dtype: object

In [38]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@GMAIL.COM


In [40]:
df['email'] = df['email'].apply(lambda x : x.lower())
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@gmail.com


In [42]:
df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [43]:
df.apply(len) # length function to each series

first    3
last     3
email    3
dtype: int64

In [44]:
len(df['email'])

3

In [45]:
df.apply(len,axis='columns')

0    3
1    3
2    3
dtype: int64

In [46]:
 df.apply(pd.Series.min)

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [47]:
df.apply(lambda x: x.min())

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [48]:
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,17


In [49]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@gmail.com


In [50]:
df['first'].map({'Corey':'Chris','Jane':'Mary'}) # map all cell

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [51]:
df['first'].replace({'Corey':'Chris','Jane':'Mary'})

0    Chris
1     Mary
2     John
Name: first, dtype: object

In [52]:
df['first'] = df['first'].replace({'Corey':'Chris','Jane':'Mary'})
df

Unnamed: 0,first,last,email
0,Chris,Schafer,coreymschafer@gmail.com
1,Mary,Doe,janedoe@email.com
2,John,Smith,johndoe@gmail.com


In [55]:
df = pd.read_csv('survey_results_public.csv',index_col='Respondent')
schema_df = pd.read_csv('survey_results_schema.csv',index_col='Column')

In [56]:
pd.set_option('display.max_columns',10)
pd.set_option('display.max_rows',10)

In [57]:
df.head()

Unnamed: 0_level_0,Hobby,OpenSource,Country,Student,Employment,...,Age,Dependents,MilitaryUS,SurveyTooLong,SurveyEasy
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,Yes,No,Kenya,No,Employed part-time,...,25 - 34 years old,Yes,,The survey was an appropriate length,Very easy
3,Yes,Yes,United Kingdom,No,Employed full-time,...,35 - 44 years old,Yes,,The survey was an appropriate length,Somewhat easy
4,Yes,Yes,United States,No,Employed full-time,...,,,,,
5,No,No,United States,No,Employed full-time,...,35 - 44 years old,No,No,The survey was an appropriate length,Somewhat easy
7,Yes,No,South Africa,"Yes, part-time",Employed full-time,...,18 - 24 years old,Yes,,The survey was an appropriate length,Somewhat easy


In [58]:
df.rename(columns={'Student':'People'})

Unnamed: 0_level_0,Hobby,OpenSource,Country,People,Employment,...,Age,Dependents,MilitaryUS,SurveyTooLong,SurveyEasy
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,Yes,No,Kenya,No,Employed part-time,...,25 - 34 years old,Yes,,The survey was an appropriate length,Very easy
3,Yes,Yes,United Kingdom,No,Employed full-time,...,35 - 44 years old,Yes,,The survey was an appropriate length,Somewhat easy
4,Yes,Yes,United States,No,Employed full-time,...,,,,,
5,No,No,United States,No,Employed full-time,...,35 - 44 years old,No,No,The survey was an appropriate length,Somewhat easy
7,Yes,No,South Africa,"Yes, part-time",Employed full-time,...,18 - 24 years old,Yes,,The survey was an appropriate length,Somewhat easy
...,...,...,...,...,...,...,...,...,...,...,...
101513,Yes,Yes,United States,,,...,,,,,
101531,No,Yes,Spain,"Yes, full-time","Not employed, but looking for work",...,,,,,
101541,Yes,Yes,India,"Yes, full-time",Employed full-time,...,,,,,
101544,Yes,No,Russian Federation,No,"Independent contractor, freelancer, or self-em...",...,,,,,


In [60]:
df.rename(columns={'Student':'People'},inplace=True)
df

Unnamed: 0_level_0,Hobby,OpenSource,Country,People,Employment,...,Age,Dependents,MilitaryUS,SurveyTooLong,SurveyEasy
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,Yes,No,Kenya,No,Employed part-time,...,25 - 34 years old,Yes,,The survey was an appropriate length,Very easy
3,Yes,Yes,United Kingdom,No,Employed full-time,...,35 - 44 years old,Yes,,The survey was an appropriate length,Somewhat easy
4,Yes,Yes,United States,No,Employed full-time,...,,,,,
5,No,No,United States,No,Employed full-time,...,35 - 44 years old,No,No,The survey was an appropriate length,Somewhat easy
7,Yes,No,South Africa,"Yes, part-time",Employed full-time,...,18 - 24 years old,Yes,,The survey was an appropriate length,Somewhat easy
...,...,...,...,...,...,...,...,...,...,...,...
101513,Yes,Yes,United States,,,...,,,,,
101531,No,Yes,Spain,"Yes, full-time","Not employed, but looking for work",...,,,,,
101541,Yes,Yes,India,"Yes, full-time",Employed full-time,...,,,,,
101544,Yes,No,Russian Federation,No,"Independent contractor, freelancer, or self-em...",...,,,,,


In [61]:
df['People']

Respondent
1                     No
3                     No
4                     No
5                     No
7         Yes, part-time
               ...      
101513               NaN
101531    Yes, full-time
101541    Yes, full-time
101544                No
101548               NaN
Name: People, Length: 98855, dtype: object

In [63]:
df['Hobby']

Respondent
1         Yes
3         Yes
4         Yes
5          No
7         Yes
         ... 
101513    Yes
101531     No
101541    Yes
101544    Yes
101548    Yes
Name: Hobby, Length: 98855, dtype: object

In [64]:
df['Hobby'].map({'Yes':True,'No':False})

Respondent
1          True
3          True
4          True
5         False
7          True
          ...  
101513     True
101531    False
101541     True
101544     True
101548     True
Name: Hobby, Length: 98855, dtype: bool

In [65]:
df['Hobby'] = df['Hobby'].map({'Yes':True,'No':False})

In [66]:
df

Unnamed: 0_level_0,Hobby,OpenSource,Country,People,Employment,...,Age,Dependents,MilitaryUS,SurveyTooLong,SurveyEasy
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,True,No,Kenya,No,Employed part-time,...,25 - 34 years old,Yes,,The survey was an appropriate length,Very easy
3,True,Yes,United Kingdom,No,Employed full-time,...,35 - 44 years old,Yes,,The survey was an appropriate length,Somewhat easy
4,True,Yes,United States,No,Employed full-time,...,,,,,
5,False,No,United States,No,Employed full-time,...,35 - 44 years old,No,No,The survey was an appropriate length,Somewhat easy
7,True,No,South Africa,"Yes, part-time",Employed full-time,...,18 - 24 years old,Yes,,The survey was an appropriate length,Somewhat easy
...,...,...,...,...,...,...,...,...,...,...,...
101513,True,Yes,United States,,,...,,,,,
101531,False,Yes,Spain,"Yes, full-time","Not employed, but looking for work",...,,,,,
101541,True,Yes,India,"Yes, full-time",Employed full-time,...,,,,,
101544,True,No,Russian Federation,No,"Independent contractor, freelancer, or self-em...",...,,,,,
