In [None]:
# 1. Import Pandas library
import pandas as pd  # Pandas is a powerful data manipulation library

# 2. Create a DataFrame with the provided data
data = {
    'Name': ['John', 'Alice', 'Bob', 'Charlie', 'David'],
    'Age': [23, 30, 25, 35, 28],
    'City': ['New York', 'Los Angeles', 'Chicago', 'San Francisco', 'New York'],
    'Salary': [50000, 70000, 45000, 80000, 55000]
}

# Convert the dictionary into a DataFrame (tabular structure)
df = pd.DataFrame(data)

# 3. Print the DataFrame to confirm creation
print("Full DataFrame:")
print(df)

# 4a. Display the "Name" and "Salary" columns only
print("\nName and Salary columns:")
print(df[['Name', 'Salary']])

# 4b. Show the first 3 rows of the DataFrame
print("\nFirst 3 rows:")
print(df.head(3))  # head(n) returns the first n rows

# 4c. Find the average salary
average_salary = df['Salary'].mean()  # mean() calculates average
print(f"\nAverage Salary: ${average_salary:.2f}")

# 4d. Filter and display rows where Age >= 30
print("\nPeople aged 30 and above:")
print(df[df['Age'] >= 30])  # Boolean indexing

# 5a. Sort by Salary in descending order
print("\nSorted by Salary (Descending):")
print(df.sort_values(by='Salary', ascending=False))

# 5b. Sort by City then Age (both ascending)
print("\nSorted by City and Age:")
print(df.sort_values(by=['City', 'Age']))

# 6. Introduce missing values
df.loc[2, 'Salary'] = None      # Set Bob's Salary to NaN
df.loc[4, 'City'] = None        # Set David's City to NaN

# 6a. Display DataFrame with missing values
print("\nDataFrame with missing values:")
print(df)

# 6b. Fill missing Salary with the average of existing salaries
avg_salary = df['Salary'].mean()  # Calculate average salary ignoring NaNs
df['Salary'].fillna(avg_salary, inplace=True)  # Replace NaNs with average

# 6c. Fill missing City values with "Unknown"
df['City'].fillna('Unknown', inplace=True)

# Display DataFrame after filling missing values
print("\nAfter filling missing values:")
print(df)

# 6d. Drop rows with any remaining missing values (none should exist now)
df_cleaned = df.dropna()

# Show cleaned DataFrame
print("\nCleaned DataFrame (no missing values):")
print(df_cleaned)

# 7. Save the cleaned DataFrame to a CSV file
df_cleaned.to_csv('Cl
