In [None]:
# 1. Import Pandas
import pandas as pd

# 2. Create the DataFrame
data = {
    'Name': ['John', 'Alice', 'Bob', 'Charlie', 'David'],
    'Age': [23, 30, 25, 35, 28],
    'City': ['New York', 'Los Angeles', 'Chicago', 'San Francisco', 'New York'],
    'Salary': [50000, 70000, 45000, 80000, 55000]
}
df = pd.DataFrame(data)

# 3. Print the DataFrame
print("Original DataFrame:")
print(df)

# 4. Basic Operations
# a. Display the "Name" and "Salary" columns only
print("\nName and Salary columns:")
print(df[['Name', 'Salary']])

# b. Show the first 3 rows
print("\nFirst 3 rows:")
print(df.head(3))

# c. Find the average salary
average_salary = df['Salary'].mean()
print(f"\nAverage Salary: ${average_salary:.2f}")

# d. Filter rows where Age >= 30
print("\nRows where Age >= 30:")
print(df[df['Age'] >= 30])

# 5. Sorting
# a. Sort by "Salary" in descending order
print("\nSorted by Salary (descending):")
print(df.sort_values(by='Salary', ascending=False))

# b. Sort by "City" then by "Age" (both ascending)
print("\nSorted by City and Age:")
print(df.sort_values(by=['City', 'Age']))

# 6. Missing Data
# Add missing values
df.loc[2, 'Salary'] = None  # Bob's salary is now missing
df.loc[4, 'City'] = None    # David's city is now missing

# a. Display DataFrame with missing values
print("\nDataFrame with missing values:")
print(df)

# b. Fill missing salary with the average (excluding NaN)
avg_salary = df['Salary'].mean()
df['Salary'].fillna(avg_salary, inplace=True)

# c. Fill missing city with "Unknown"
df['City'].fillna("Unknown", inplace=True)

# d. Drop any rows where there is a missing value (should be none left after filling)
cleaned_df = df.dropna()

print("\nDataFrame after filling missing values:")
print(df)

print("\nDataFrame after dropping any remaining missing values:")
print(cleaned_df)

# 7. Save the cleaned data to a CSV file
cleaned_df.to_csv("Cleaned_data.csv", index=False)
print("\nData saved to Cleaned_data.csv")

# Bonus Challenge: Group by City and find average salary
print("\nAverage Salary by City:")
grouped_salary = df.groupby('City')['Salary'].mean()
print(grouped_salary)
