# Pandas Data Frames

Pandas DataFrame is a two-dimensional labeled data structure capable of holding data of various types—integers, floats, strings, Python objects, and more.

## Initiate Data Frames

In [3]:
import pandas as pd

data_dict = { "Name" : ["Alice", "Bob", "Chris"],
              "Age" : [21, 26, 34],
              "Address" : ["Washington, DC", "Newyork City, NY", "Richmond, VA"]}

data_frame = pd.DataFrame(data_dict)

print(data_frame)

    Name  Age           Address
0  Alice   21    Washington, DC
1    Bob   26  Newyork City, NY
2  Chris   34      Richmond, VA


## Data Frame Functions:

In [4]:
# First 2 Rows of the DataFrame
print(data_frame.head(2))
# First 5 Rows of the DataFrame by default
print(data_frame.head())
# Last 2 rows of the DataFrame
print(data_frame.tail(2))
# Last 5 Rows of the DataFrame by default
print(data_frame.tail())
# A tuple representing the number of the dimensions (columns and rows) of the data frame
print(data_frame.shape)
# Column lables of the Dataframe df
print(data_frame.columns)
# Data type of each column
print(data_frame.dtypes)

    Name  Age           Address
0  Alice   21    Washington, DC
1    Bob   26  Newyork City, NY
    Name  Age           Address
0  Alice   21    Washington, DC
1    Bob   26  Newyork City, NY
2  Chris   34      Richmond, VA
    Name  Age           Address
1    Bob   26  Newyork City, NY
2  Chris   34      Richmond, VA
    Name  Age           Address
0  Alice   21    Washington, DC
1    Bob   26  Newyork City, NY
2  Chris   34      Richmond, VA
(3, 3)
Index(['Name', 'Age', 'Address'], dtype='object')
Name       object
Age         int64
Address    object
dtype: object


## λ (Lambda) Function for DataFrame Manipulation

In [5]:
data_frame["isAdult"] = data_frame["Age"].apply(lambda age: "Y" if age >= 17 else "N")
print(data_frame)

    Name  Age           Address isAdult
0  Alice   21    Washington, DC       Y
1    Bob   26  Newyork City, NY       Y
2  Chris   34      Richmond, VA       Y


## Concatenation of Two Data Frames

In [6]:
data_frame2 = pd.DataFrame({"Name": ["David"], "Age": [14], "Address" : ["Chicago, IL"], "isAdult": ["N"] })

data_frame_concatenated = pd.concat([data_frame, data_frame2], ignore_index=True)

print(data_frame_concatenated)

    Name  Age           Address isAdult
0  Alice   21    Washington, DC       Y
1    Bob   26  Newyork City, NY       Y
2  Chris   34      Richmond, VA       Y
3  David   14       Chicago, IL       N


## Find Elements

In [7]:
# Find By a Column Name
print(data_frame["Name"])
# Find By multiple Columns
print(data_frame[["Name", "Age"]])

0    Alice
1      Bob
2    Chris
Name: Name, dtype: object
    Name  Age
0  Alice   21
1    Bob   26
2  Chris   34


In [8]:
# Find by integer Location (iloc)
# Find a value in thrid row and third column
print(data_frame.iloc[2, 2])
# Find first two rows and columns
print(data_frame.iloc[:2, :2])

Richmond, VA
    Name  Age
0  Alice   21
1    Bob   26


## Descriptive Statistics

### Central Tendency

In [9]:
# Find Mean, Median and Mode
age_subset = data_frame["Age"]
print(data_frame)

# Find Mean of Age
print(f"Mean of the age: {age_subset.mean()}")

# Find Median of Age
print(f"Median of the age: {age_subset.median()}")

# Find Mode of Age
print(f"Mode of the age: {age_subset.mode().iloc[0]}")

    Name  Age           Address isAdult
0  Alice   21    Washington, DC       Y
1    Bob   26  Newyork City, NY       Y
2  Chris   34      Richmond, VA       Y
Mean of the age: 27.0
Median of the age: 26.0
Mode of the age: 21


## Data Filtering

In [10]:
# Get a data set of persons who is below 30

youthful = data_frame[data_frame['Age'] < 30]

print(f"Persons below 30 are {youthful}")

Persons below 30 are     Name  Age           Address isAdult
0  Alice   21    Washington, DC       Y
1    Bob   26  Newyork City, NY       Y
