In [1]:
import pandas as pd
import numpy as np

# 1) Data Loading

In [2]:
people = {
    "first": ["yonathan", "efrem", "daniel"],
    "last": ["cherkos", "muse", "cherkos"],
    "email": ["yonycherkos@gmail.com", "efremmuse@gmail.com", "danielcherkos@gmail.com"]
}

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,yonathan,cherkos,yonycherkos@gmail.com
1,efrem,muse,efremmuse@gmail.com
2,daniel,cherkos,danielcherkos@gmail.com


# 2) Selecting Rows and Columns

In [14]:
df.email

0      yonycherkos@gmail.com
1        efremmuse@gmail.com
2    danielcherkos@gmail.com
Name: email, dtype: object

In [15]:
df["email"]

0      yonycherkos@gmail.com
1        efremmuse@gmail.com
2    danielcherkos@gmail.com
Name: email, dtype: object

In [16]:
df.iloc[0]

first                 yonathan
last                   cherkos
email    yonycherkos@gmail.com
Name: 0, dtype: object

In [17]:
df.loc[0]

first                 yonathan
last                   cherkos
email    yonycherkos@gmail.com
Name: 0, dtype: object

In [19]:
df.loc[1, ["last", "email"]]

last                    muse
email    efremmuse@gmail.com
Name: 1, dtype: object

# 3) Set, Reset, and Use Indexes

In [20]:
df.set_index("email")

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
yonycherkos@gmail.com,yonathan,cherkos
efremmuse@gmail.com,efrem,muse
danielcherkos@gmail.com,daniel,cherkos


In [21]:
df

Unnamed: 0,first,last,email
0,yonathan,cherkos,yonycherkos@gmail.com
1,efrem,muse,efremmuse@gmail.com
2,daniel,cherkos,danielcherkos@gmail.com


In [23]:
df.set_index("email", inplace=True)

In [24]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
yonycherkos@gmail.com,yonathan,cherkos
efremmuse@gmail.com,efrem,muse
danielcherkos@gmail.com,daniel,cherkos


In [25]:
df.reset_index(inplace=True)
df

Unnamed: 0,email,first,last
0,yonycherkos@gmail.com,yonathan,cherkos
1,efremmuse@gmail.com,efrem,muse
2,danielcherkos@gmail.com,daniel,cherkos


# 4) Using Conditionals to Filter Rows and Columns

In [26]:
filt = (df["last"] == "cherkos")

In [27]:
df[filt]

Unnamed: 0,email,first,last
0,yonycherkos@gmail.com,yonathan,cherkos
2,danielcherkos@gmail.com,daniel,cherkos


In [28]:
df.loc[filt, "email"]

0      yonycherkos@gmail.com
2    danielcherkos@gmail.com
Name: email, dtype: object

In [29]:
df.loc[~filt]

Unnamed: 0,email,first,last
1,efremmuse@gmail.com,efrem,muse


# 5) Updating Rows and Columns - Modifying Data Within DataFrames

In [5]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [6]:
df.rename(columns={'first': 'firstname', 'last': 'lastname'}, inplace=True)

In [7]:
df

Unnamed: 0,firstname,lastname,email
0,yonathan,cherkos,yonycherkos@gmail.com
1,efrem,muse,efremmuse@gmail.com
2,daniel,cherkos,danielcherkos@gmail.com


In [8]:
filt = (df["lastname"] == "cherkos")
df.loc[filt, "lastname"] = "w/cherkos"

In [9]:
df

Unnamed: 0,firstname,lastname,email
0,yonathan,w/cherkos,yonycherkos@gmail.com
1,efrem,muse,efremmuse@gmail.com
2,daniel,w/cherkos,danielcherkos@gmail.com


In [10]:
def update_firstname(firstname):
    return firstname.capitalize()

In [11]:
df["firstname"] = df["firstname"].apply(update_firstname)
df

Unnamed: 0,firstname,lastname,email
0,Yonathan,w/cherkos,yonycherkos@gmail.com
1,Efrem,muse,efremmuse@gmail.com
2,Daniel,w/cherkos,danielcherkos@gmail.com


In [12]:
df["lastname"].replace({"w/cherkos": "cherkos"}, inplace=True)

In [13]:
df

Unnamed: 0,firstname,lastname,email
0,Yonathan,cherkos,yonycherkos@gmail.com
1,Efrem,muse,efremmuse@gmail.com
2,Daniel,cherkos,danielcherkos@gmail.com


# 6) Add Remove Rows and Columns From DataFrames

In [17]:
df["first"] + " " + df["last"]

0    yonathan cherkos
1          efrem muse
2      daniel cherkos
dtype: object

In [18]:
df["fullname"] = df["first"] + " " + df["last"]

In [19]:
df

Unnamed: 0,first,last,email,fullname
0,yonathan,cherkos,yonycherkos@gmail.com,yonathan cherkos
1,efrem,muse,efremmuse@gmail.com,efrem muse
2,daniel,cherkos,danielcherkos@gmail.com,daniel cherkos


In [20]:
df.drop(columns=["first", "last"], inplace=True)

In [21]:
df

Unnamed: 0,email,fullname
0,yonycherkos@gmail.com,yonathan cherkos
1,efremmuse@gmail.com,efrem muse
2,danielcherkos@gmail.com,daniel cherkos


In [22]:
df["fullname"].str.split(" ", expand=True)

Unnamed: 0,0,1
0,yonathan,cherkos
1,efrem,muse
2,daniel,cherkos


In [23]:
df[["first", "last"]] = df["fullname"].str.split(" ", expand=True)

In [24]:
df

Unnamed: 0,email,fullname,first,last
0,yonycherkos@gmail.com,yonathan cherkos,yonathan,cherkos
1,efremmuse@gmail.com,efrem muse,efrem,muse
2,danielcherkos@gmail.com,daniel cherkos,daniel,cherkos


In [29]:
df = df.append({"first": "eleny"}, ignore_index=True)

In [30]:
df

Unnamed: 0,email,fullname,first,last
0,yonycherkos@gmail.com,yonathan cherkos,yonathan,cherkos
1,efremmuse@gmail.com,efrem muse,efrem,muse
2,danielcherkos@gmail.com,daniel cherkos,daniel,cherkos
3,,,eleny,


In [33]:
df = df.drop(index=3)

In [34]:
df

Unnamed: 0,email,fullname,first,last
0,yonycherkos@gmail.com,yonathan cherkos,yonathan,cherkos
1,efremmuse@gmail.com,efrem muse,efrem,muse
2,danielcherkos@gmail.com,daniel cherkos,daniel,cherkos


# 7) Sorting Data

In [36]:
df.sort_values(by="last", ascending=True)

Unnamed: 0,email,fullname,first,last
0,yonycherkos@gmail.com,yonathan cherkos,yonathan,cherkos
2,danielcherkos@gmail.com,daniel cherkos,daniel,cherkos
1,efremmuse@gmail.com,efrem muse,efrem,muse


In [39]:
df.sort_values(by=["last", "first"], ascending=[True, False], inplace=True)

In [40]:
df

Unnamed: 0,email,fullname,first,last
0,yonycherkos@gmail.com,yonathan cherkos,yonathan,cherkos
2,danielcherkos@gmail.com,daniel cherkos,daniel,cherkos
1,efremmuse@gmail.com,efrem muse,efrem,muse


# 8) Grouping and Aggregating

# 9) Cleaning Data - Casting Datatypes and Handling Missing Values

In [5]:
people = {
    "first": ["yonathan", "efrem", "daniel", np.nan, np.nan, "NA"],
    "last": ["cherkos", "muse", "cherkos", None, np.nan, "Missing"],
    "email": ["yonycherkos@gmail.com", "efremmuse@gmail.com", None, None, "danielcherkos@gmail.com", "Missing"]
}

In [6]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,yonathan,cherkos,yonycherkos@gmail.com
1,efrem,muse,efremmuse@gmail.com
2,daniel,cherkos,
3,,,
4,,,danielcherkos@gmail.com
5,,Missing,Missing


In [7]:
df.dropna()

Unnamed: 0,first,last,email
0,yonathan,cherkos,yonycherkos@gmail.com
1,efrem,muse,efremmuse@gmail.com
5,,Missing,Missing


In [12]:
df.dropna(axis="index", how="all")

Unnamed: 0,first,last,email
0,yonathan,cherkos,yonycherkos@gmail.com
1,efrem,muse,efremmuse@gmail.com
2,daniel,cherkos,
4,,,danielcherkos@gmail.com
5,,Missing,Missing


In [23]:
df.dropna(axis="index", how="all", subset=["email"])

Unnamed: 0,first,last,email
0,yonathan,cherkos,yonycherkos@gmail.com
1,efrem,muse,efremmuse@gmail.com
4,,,danielcherkos@gmail.com
5,,Missing,Missing


In [28]:
df.replace({"NA": np.nan, "Missing": np.nan, None: np.nan}, inplace=True)
df

Unnamed: 0,first,last,email
0,yonathan,cherkos,yonycherkos@gmail.com
1,efrem,muse,efremmuse@gmail.com
2,daniel,cherkos,
3,,,
4,,,danielcherkos@gmail.com
5,,,


In [30]:
df.dropna(axis="index", how="all")

Unnamed: 0,first,last,email
0,yonathan,cherkos,yonycherkos@gmail.com
1,efrem,muse,efremmuse@gmail.com
2,daniel,cherkos,
4,,,danielcherkos@gmail.com


In [33]:
df["age"] = [24, 22, 25, 22, 21, 30]
df

Unnamed: 0,first,last,email,age
0,yonathan,cherkos,yonycherkos@gmail.com,24
1,efrem,muse,efremmuse@gmail.com,22
2,daniel,cherkos,,25
3,,,,22
4,,,danielcherkos@gmail.com,21
5,,,,30


In [37]:
df["age"].mean()

24.0