## Inner, Left, Right, Outer Joins

In [1]:
import pandas as pd

# Example DataFrames
data1 = {'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie']}
data2 = {'ID': [2, 3, 4], 'Age': [24, 27, 22]}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

# Inner Join
inner_join = pd.merge(df1, df2, on='ID', how='inner')
print("Inner Join:\n", inner_join)

# Left Join
left_join = pd.merge(df1, df2, on='ID', how='left')
print("Left Join:\n", left_join)

# Outer Join
outer_join = pd.merge(df1, df2, on='ID', how='outer')
print("Outer Join:\n", outer_join)

Inner Join:
    ID     Name  Age
0   2      Bob   24
1   3  Charlie   27
Left Join:
    ID     Name   Age
0   1    Alice   NaN
1   2      Bob  24.0
2   3  Charlie  27.0
Outer Join:
    ID     Name   Age
0   1    Alice   NaN
1   2      Bob  24.0
2   3  Charlie  27.0
3   4      NaN  22.0


## Synthetic Variables

In [2]:
# Example DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Price': [100, 150, 200],
        'Quantity': [2, 3, 4]}

df = pd.DataFrame(data)

# Creating a derived variable for total cost
df['Total Cost'] = df['Price'] * df['Quantity']
print(df)

      Name  Price  Quantity  Total Cost
0    Alice    100         2         200
1      Bob    150         3         450
2  Charlie    200         4         800


In [3]:
# Example DataFrame with ages
data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 45, 65]}

df = pd.DataFrame(data)

# Binning Age into categories
bins = [0, 30, 60, 100]
labels = ['Young', 'Middle-aged', 'Senior']
df['Age Group'] = pd.cut(df['Age'], bins=bins, labels=labels)
print(df)

      Name  Age    Age Group
0    Alice   25        Young
1      Bob   45  Middle-aged
2  Charlie   65       Senior
