In [7]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
    'Age': [24, 27, 22, 32, 29],
    'Department': ['HR', 'Finance', 'IT', 'Marketing', 'HR'],
    'Salary': [45000, 54000, 50000, 62000, 47000]
}
df = pd.DataFrame(data)

# a. Print the first five rows of the DataFrame
print("First five rows:")
print(df.head())

# b. Get the summary statistics of the 'Age' and 'Salary' columns
print("\nSummary statistics for Age and Salary:")
print(df[['Age', 'Salary']].describe())

# c. Calculate the average salary of employees in the 'HR' department
avg_hr_salary = df[df['Department'] == 'HR']['Salary'].mean()
print("\nAverage salary in HR department:", avg_hr_salary)


First five rows:
      Name  Age Department  Salary
0    Alice   24         HR   45000
1      Bob   27    Finance   54000
2  Charlie   22         IT   50000
3    Diana   32  Marketing   62000
4      Eve   29         HR   47000

Summary statistics for Age and Salary:
             Age        Salary
count   5.000000      5.000000
mean   26.800000  51600.000000
std     3.962323   6730.527468
min    22.000000  45000.000000
25%    24.000000  47000.000000
50%    27.000000  50000.000000
75%    29.000000  54000.000000
max    32.000000  62000.000000

Average salary in HR department: 46000.0


In [8]:
# Add a new column 'Bonus' which is 10% of the salary
df['Bonus'] = df['Salary'] * 0.10
print("DataFrame with Bonus column:")
print(df)


DataFrame with Bonus column:
      Name  Age Department  Salary   Bonus
0    Alice   24         HR   45000  4500.0
1      Bob   27    Finance   54000  5400.0
2  Charlie   22         IT   50000  5000.0
3    Diana   32  Marketing   62000  6200.0
4      Eve   29         HR   47000  4700.0


In [9]:
# Filter the DataFrame to show employees aged between 25 and 30
filtered_df = df[(df['Age'] >= 25) & (df['Age'] <= 30)]
print("Employees aged between 25 and 30:")
print(filtered_df)


Employees aged between 25 and 30:
  Name  Age Department  Salary   Bonus
1  Bob   27    Finance   54000  5400.0
4  Eve   29         HR   47000  4700.0


In [11]:
# Group the data by 'Department' and calculate the average salary for each department
avg_salary_by_dept = df.groupby('Department')['Salary'].mean()
print("Average salary for each department:")
print(avg_salary_by_dept)


Average salary for each department:
Department
Finance      54000.0
HR           46000.0
IT           50000.0
Marketing    62000.0
Name: Salary, dtype: float64


In [12]:
# Sort the DataFrame by 'Salary' in ascending order and save to a new CSV file
sorted_df = df.sort_values(by='Salary', ascending=True)
sorted_df.to_csv('sorted_by_salary.csv', index=False)
print("DataFrame sorted by Salary (ascending):")
print(sorted_df)
print("\nSaved to 'sorted_by_salary.csv'.")


DataFrame sorted by Salary (ascending):
      Name  Age Department  Salary   Bonus
0    Alice   24         HR   45000  4500.0
4      Eve   29         HR   47000  4700.0
2  Charlie   22         IT   50000  5000.0
1      Bob   27    Finance   54000  5400.0
3    Diana   32  Marketing   62000  6200.0

Saved to 'sorted_by_salary.csv'.
