In [9]:
# Here we create a list dictionary of values for our data frame
people = {
    "FirstName" : ['John', 'Jacob', 'Jennifer', 'Jordan', 'Jaqueline'],
    "LastName": ['Jingle', 'Heimer', 'Schmidt', 'Daniel', 'Fishwall'],
    "Email": ['johnjingle33@fakeemail.com', 'jacobheimer@fakeemail.com', 'Jennifer32@email.com', 'jdaniel@email.com', 'jaqlfish@email.com']
}

In [10]:
# Here we import the pandas module
import pandas as pd

In [11]:
# Here we create the initial dataframe.
df = pd.DataFrame(people)
df

Unnamed: 0,FirstName,LastName,Email
0,John,Jingle,johnjingle33@fakeemail.com
1,Jacob,Heimer,jacobheimer@fakeemail.com
2,Jennifer,Schmidt,Jennifer32@email.com
3,Jordan,Daniel,jdaniel@email.com
4,Jaqueline,Fishwall,jaqlfish@email.com


In [12]:
# Here we use iloc to grab the first row of the dataframe.
df.iloc[0]

FirstName                          John
LastName                         Jingle
Email        johnjingle33@fakeemail.com
Name: 0, dtype: object

In [13]:
# Here we name the columns of the dataframe.
df.columns = ['FirstName', 'LastName', 'Email']

In [14]:
# Here we use a list comprehension to change the column
# names to lower case strings.
df.columns = [x.lower() for x in df.columns]
df.columns = df.columns.str.replace()
df

TypeError: replace() missing 2 required positional arguments: 'pat' and 'repl'

In [15]:
# Here we rename the columns using a python dictionary and use the 
# inplace=True kwarg to make the changes to the dataframe
df.rename(columns={'firstname': 'FirstName', 'lastname': 'LastName', 'email': 'Email'}, inplace=True)
df

Unnamed: 0,FirstName,LastName,Email
0,John,Jingle,johnjingle33@fakeemail.com
1,Jacob,Heimer,jacobheimer@fakeemail.com
2,Jennifer,Schmidt,Jennifer32@email.com
3,Jordan,Daniel,jdaniel@email.com
4,Jaqueline,Fishwall,jaqlfish@email.com


In [15]:
# Here we use the loc method to set the second row values equal to a list
# of chosen values by position!
df.loc[2] = ['Bobby', 'Jones', 'bobbyboyj@fakeemail.com']
df

Unnamed: 0,FirstName,LastName,Email
0,John,Jingle,johnjingle33@fakeemail.com
1,Jacob,Heimer,jacobheimer@fakeemail.com
2,Bobby,Jones,bobbyboyj@fakeemail.com
3,Jordan,Daniel,jdaniel@email.com
4,Jaqueline,Fishwall,jaqlfish@email.com


In [16]:
# Here we use the row search function iloc to
# look at the second row, LastName and Email column and set the 
# value for those positions = to new values.
df.loc[2, ['LastName', 'Email']] = ['Doe', 'BobbyDoe@gmail.com']
df

Unnamed: 0,FirstName,LastName,Email
0,John,Jingle,johnjingle33@fakeemail.com
1,Jacob,Heimer,jacobheimer@fakeemail.com
2,Jennifer,Doe,BobbyDoe@gmail.com
3,Jordan,Daniel,jdaniel@email.com
4,Jaqueline,Fishwall,jaqlfish@email.com


In [17]:
# Here create a filter variable to store the conditional check to
# find the desired email in the Email column of the dataframe.
# in the second line we pass the filt variable and the strint of the name
# of the column we want to change with the string value.
filt = (df['Email'] == 'BobbyDoe@gmail.com')
df.loc[filt, 'LastName'] = 'Smith'
df

Unnamed: 0,FirstName,LastName,Email
0,John,Jingle,johnjingle33@fakeemail.com
1,Jacob,Heimer,jacobheimer@fakeemail.com
2,Jennifer,Smith,BobbyDoe@gmail.com
3,Jordan,Daniel,jdaniel@email.com
4,Jaqueline,Fishwall,jaqlfish@email.com


In [18]:
# Here we set the emails to lower case using the str method.
# Note this wouldn't work if there were int, float, or other
# data types within the dataframe!
df['Email']= df['Email'].str.lower()
df

Unnamed: 0,FirstName,LastName,Email
0,John,Jingle,johnjingle33@fakeemail.com
1,Jacob,Heimer,jacobheimer@fakeemail.com
2,Jennifer,Smith,bobbydoe@gmail.com
3,Jordan,Daniel,jdaniel@email.com
4,Jaqueline,Fishwall,jaqlfish@email.com


In [19]:
# Here we see that apply works on series data only and shows us the len
# of each email in the email column.
df['Email'].apply(len)

0    26
1    25
2    18
3    17
4    18
Name: Email, dtype: int64

In [20]:
def update_email(email):
    return email.upper()

In [21]:
# Here we apply a function defined above to the email column
# of the dataframe
df['Email'] = df['Email'].apply(update_email)
df

Unnamed: 0,FirstName,LastName,Email
0,John,Jingle,JOHNJINGLE33@FAKEEMAIL.COM
1,Jacob,Heimer,JACOBHEIMER@FAKEEMAIL.COM
2,Jennifer,Smith,BOBBYDOE@GMAIL.COM
3,Jordan,Daniel,JDANIEL@EMAIL.COM
4,Jaqueline,Fishwall,JAQLFISH@EMAIL.COM


In [22]:
# Here we apply a lambda function using the apply method
# which changes the emails to lower case!
df['Email'] = df['Email'].apply(lambda x: x.lower())
df

Unnamed: 0,FirstName,LastName,Email
0,John,Jingle,johnjingle33@fakeemail.com
1,Jacob,Heimer,jacobheimer@fakeemail.com
2,Jennifer,Smith,bobbydoe@gmail.com
3,Jordan,Daniel,jdaniel@email.com
4,Jaqueline,Fishwall,jaqlfish@email.com


In [23]:
# Here we pass a second argument to the apply method.
# this means that we want to invert the axis to look at rows.
# The default for axis='rows' so if axis='columns' we are
# using rows as our axis and 
# df.apply(len)
df.apply(len, axis='columns')

0    3
1    3
2    3
3    3
4    3
dtype: int64

In [24]:
# Here we use apply to apply the min series method on the dataframe as a series, so column by column. 
# Apply works on rows or columns not individual elements!
df.apply(pd.Series.min)

FirstName                 Jacob
LastName                 Daniel
Email        bobbydoe@gmail.com
dtype: object

In [25]:
# Use apply method and pass a lambda function as the argument to find
# the min values for all columns! Apply works on rows or columns!
df.apply(lambda x: x.min())

FirstName                 Jacob
LastName                 Daniel
Email        bobbydoe@gmail.com
dtype: object

In [26]:
# Here we see that applymap works on all elements of the dataframe using the len method.
# Note: We dont want the return value of the method len() we want len or func without () !
df.applymap(len)

Unnamed: 0,FirstName,LastName,Email
0,4,6,26
1,5,6,25
2,8,5,18
3,6,6,17
4,9,8,18


In [27]:
# The applymap method works on every element of the dataframe.
# In the below example the lower method is applied to all elements
# in the datafram individually.
df.applymap(str.lower)

Unnamed: 0,FirstName,LastName,Email
0,john,jingle,johnjingle33@fakeemail.com
1,jacob,heimer,jacobheimer@fakeemail.com
2,jennifer,smith,bobbydoe@gmail.com
3,jordan,daniel,jdaniel@email.com
4,jaqueline,fishwall,jaqlfish@email.com


In [28]:
# Updating column values using map, it will produce NAN where no match is found!
df['FirstName'].map({'John': ' chris', 'Bobby': 'walter'})

0     chris
1       NaN
2       NaN
3       NaN
4       NaN
Name: FirstName, dtype: object

In [29]:
# Updating certain column values based on key value match with dict obj.
# Replace does not produce NAN when a match is not found!
df['FirstName'].replace({'John': ' chris', 'Bobby': 'walter'})

0        chris
1        Jacob
2     Jennifer
3       Jordan
4    Jaqueline
Name: FirstName, dtype: object

In [30]:
# Add a new column called FullName by concatenating values from
# the FirstName and LastName column on a empty space.
# To add to the data frame set the column name df['colname'] = operation
df['FullName'] = df['FirstName'] + ' ' + df['LastName']
df

Unnamed: 0,FirstName,LastName,Email,FullName
0,John,Jingle,johnjingle33@fakeemail.com,John Jingle
1,Jacob,Heimer,jacobheimer@fakeemail.com,Jacob Heimer
2,Jennifer,Smith,bobbydoe@gmail.com,Jennifer Smith
3,Jordan,Daniel,jdaniel@email.com,Jordan Daniel
4,Jaqueline,Fishwall,jaqlfish@email.com,Jaqueline Fishwall


In [31]:
# Apply lowercase series lambda function to all elements in the
# email column, finalize op by setting equal to current df column.
df['Email'] = df['Email'].apply(lambda x: x.lower())
df

Unnamed: 0,FirstName,LastName,Email,FullName
0,John,Jingle,johnjingle33@fakeemail.com,John Jingle
1,Jacob,Heimer,jacobheimer@fakeemail.com,Jacob Heimer
2,Jennifer,Smith,bobbydoe@gmail.com,Jennifer Smith
3,Jordan,Daniel,jdaniel@email.com,Jordan Daniel
4,Jaqueline,Fishwall,jaqlfish@email.com,Jaqueline Fishwall


In [32]:
# How to delete columns from a dataframe, use inplace=True to finalize the operation.
df.drop(columns=['FirstName', 'LastName'], inplace=True)

In [33]:
df

Unnamed: 0,Email,FullName
0,johnjingle33@fakeemail.com,John Jingle
1,jacobheimer@fakeemail.com,Jacob Heimer
2,bobbydoe@gmail.com,Jennifer Smith
3,jdaniel@email.com,Jordan Daniel
4,jaqlfish@email.com,Jaqueline Fishwall


In [34]:
# df['FullName'].str.split(' ')
# expand=True makes it so that the values are split into to pandas columns
# expand is passed here within the method split, which searches for the explicit
# space ' ' to split the string values on!
df['FullName'].str.split(' ', expand=True)

Unnamed: 0,0,1
0,John,Jingle
1,Jacob,Heimer
2,Jennifer,Smith
3,Jordan,Daniel
4,Jaqueline,Fishwall


In [35]:
# Adding the split columns to the dataframe, set df col names = to
# previous command
df[['FirstName', 'LastName']] = df['FullName'].str.split(' ', expand=True)

In [40]:
df

Unnamed: 0,Email,FullName,FirstName,LastName
0,johnjingle33@fakeemail.com,John Jingle,John,Jingle
1,jacobheimer@fakeemail.com,Jacob Heimer,Jacob,Heimer
2,bobbydoe@gmail.com,Jennifer Smith,Jennifer,Smith
3,jdaniel@email.com,Jordan Daniel,Jordan,Daniel
4,jaqlfish@email.com,Jaqueline Fishwall,Jaqueline,Fishwall


In [44]:
# Use a conditional to filter the data
# 

filt = (df['LastName'] == 'Jingle') | (df['FirstName'] == 'Jacob') | (df['LastName'] == 'Daniel') | (df['LastName'] == 'Fishwall')
df.loc[~filt, 'Email']


2    bobbydoe@gmail.com
Name: Email, dtype: object