In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('pandas_practice_dataset.csv')
df.head()

Unnamed: 0,id,name,age,department,salary
0,1,Ali,25,IT,5000
1,2,Sara,30,HR,6000
2,3,John,22,IT,4500
3,4,Mona,28,Finance,7000
4,5,Omar,35,IT,8000


In [4]:
# Filter employees in IT
it_employees = df[df["department"] == "IT"]
it_employees

Unnamed: 0,id,name,age,department,salary
0,1,Ali,25,IT,5000
2,3,John,22,IT,4500
4,5,Omar,35,IT,8000
9,10,Dina,26,IT,5300
15,16,Nada,32,IT,6200
18,19,Samir,34,IT,6100


In [5]:
# Filter employees with salary > 6000
high_salary = df[df["salary"] > 6000]
high_salary

Unnamed: 0,id,name,age,department,salary
3,4,Mona,28,Finance,7000
4,5,Omar,35,IT,8000
7,8,Nour,29,Finance,7200
10,11,Hassan,40,Management,9000
12,13,Tarek,33,Finance,6800
14,15,Mostafa,38,Management,8500
15,16,Nada,32,IT,6200
18,19,Samir,34,IT,6100
19,20,Rania,28,Finance,7500


In [8]:
# Bonus column = 10% of salary
df["bonus"] = df["salary"] * 0.1

# Age in 5 years
df["age_5yrs"] = df["age"] + 5

df.head()

Unnamed: 0,id,name,age,department,salary,bonus,age_5yrs
0,1,Ali,25,IT,5000,500.0,30
1,2,Sara,30,HR,6000,600.0,35
2,3,John,22,IT,4500,450.0,27
3,4,Mona,28,Finance,7000,700.0,33
4,5,Omar,35,IT,8000,800.0,40


In [9]:
# Average salary per department
avg_salary = df.groupby("department")["salary"].mean()
avg_salary

department
Finance       7125.0
HR            5425.0
IT            5850.0
Management    8750.0
Sales         5125.0
Name: salary, dtype: float64

In [10]:
# Count employees per department
count_emp = df.groupby("department")["id"].count()
count_emp

department
Finance       4
HR            4
IT            6
Management    2
Sales         4
Name: id, dtype: int64

In [11]:
# For each department: average salary and max salary
df.groupby("department")["salary"].agg(["mean", "max"])

Unnamed: 0_level_0,mean,max
department,Unnamed: 1_level_1,Unnamed: 2_level_1
Finance,7125.0,7500
HR,5425.0,6000
IT,5850.0,8000
Management,8750.0,9000
Sales,5125.0,5900


In [12]:
# Sort by salary descending
df.sort_values("salary", ascending=False)

Unnamed: 0,id,name,age,department,salary,bonus,age_5yrs
10,11,Hassan,40,Management,9000,900.0,45
14,15,Mostafa,38,Management,8500,850.0,43
4,5,Omar,35,IT,8000,800.0,40
19,20,Rania,28,Finance,7500,750.0,33
7,8,Nour,29,Finance,7200,720.0,34
3,4,Mona,28,Finance,7000,700.0,33
12,13,Tarek,33,Finance,6800,680.0,38
15,16,Nada,32,IT,6200,620.0,37
18,19,Samir,34,IT,6100,610.0,39
1,2,Sara,30,HR,6000,600.0,35


In [14]:
# Rank employees by salary
df["salary_rank"] = df["salary"].rank(ascending=False)
df.head()

Unnamed: 0,id,name,age,department,salary,bonus,age_5yrs,salary_rank
0,1,Ali,25,IT,5000,500.0,30,16.0
1,2,Sara,30,HR,6000,600.0,35,10.0
2,3,John,22,IT,4500,450.0,27,20.0
3,4,Mona,28,Finance,7000,700.0,33,6.0
4,5,Omar,35,IT,8000,800.0,40,3.0


In [15]:
# Suppose we have another DataFrame with bonus info
bonus_df = pd.DataFrame({
    "id": [1,2,3],
    "extra_bonus": [100,200,150]
})

# Merge on employee id
df = pd.merge(df, bonus_df, on="id", how="left")
df.head()

Unnamed: 0,id,name,age,department,salary,bonus,age_5yrs,salary_rank,extra_bonus
0,1,Ali,25,IT,5000,500.0,30,16.0,100.0
1,2,Sara,30,HR,6000,600.0,35,10.0,200.0
2,3,John,22,IT,4500,450.0,27,20.0,150.0
3,4,Mona,28,Finance,7000,700.0,33,6.0,
4,5,Omar,35,IT,8000,800.0,40,3.0,


In [16]:
# Average salary per department
pd.pivot_table(df, values="salary", index="department", aggfunc="mean")

Unnamed: 0_level_0,salary
department,Unnamed: 1_level_1
Finance,7125.0
HR,5425.0
IT,5850.0
Management,8750.0
Sales,5125.0


In [17]:
# Apply function to salary
df["salary_plus_bonus"] = df["salary"].apply(lambda x: x + 500)

# Transform multiple columns
df[["salary","bonus"]] = df[["salary","bonus"]].apply(lambda x: x*1.05)

In [18]:
df.head()

Unnamed: 0,id,name,age,department,salary,bonus,age_5yrs,salary_rank,extra_bonus,salary_plus_bonus
0,1,Ali,25,IT,5250.0,525.0,30,16.0,100.0,5500
1,2,Sara,30,HR,6300.0,630.0,35,10.0,200.0,6500
2,3,John,22,IT,4725.0,472.5,27,20.0,150.0,5000
3,4,Mona,28,Finance,7350.0,735.0,33,6.0,,7500
4,5,Omar,35,IT,8400.0,840.0,40,3.0,,8500
