# Data Manipulation with Pandas

**Creating Pandas Data Structures**

Creating a Pandas Series

In [None]:
import pandas as pd

# Creating a Series from a list
series = pd.Series([10, 20, 30, 40, 50])
print(series)


Creating a DataFrame from a Dictionary

In [None]:
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'Los Angeles', 'Chicago']}
df = pd.DataFrame(data)
print(df)


**Viewing and Inspecting Data**

Viewing the First Few Rows

In [None]:
df.head()

Getting DataFrame Information

In [None]:
df.info()

Describing Statistical Information

In [None]:
df.describe()

**Selecting and Filtering Data**

Selecting Columns

In [None]:
print(df['Name'])

Filtering Rows Based on Condition

In [None]:
filtered_df = df[df['Age'] > 30]
print(filtered_df)


Selecting Specific Rows and Columns

In [None]:
selected_data = df.loc[0:1, ['Name', 'City']]
print(selected_data)


**Modifying Data**


Adding a New Column

In [None]:
df['Salary'] = [70000, 80000, 90000]
print(df)


Updating Column Values

In [None]:
df['Age'] = df['Age'] + 1
print(df)


Dropping Columns

In [None]:
df = df.drop('City', axis=1)
print(df)


**Handling Missing Data**

Detecting Missing Values

In [None]:
df_with_nan = pd.DataFrame({'A': [1, 2, None], 'B': [4, None, 6]})
print(df_with_nan.isnull())

Filling Missing Values

In [None]:
filled_df = df_with_nan.fillna(0)
print(filled_df)


Dropping Rows with Missing Values

In [None]:
cleaned_df = df_with_nan.dropna()
print(cleaned_df)

**Grouping and Aggregation**


Grouping Data and Calculating Mean

In [None]:
df = pd.DataFrame({'Department': ['HR', 'IT', 'HR', 'IT'],
                   'Salary': [50000, 60000, 45000, 80000]})
grouped = df.groupby('Department').mean()
print(grouped)


Applying Custom Functions with apply()

In [None]:
def double_salary(x):
    return x * 2

df['Double Salary'] = df['Salary'].apply(double_salary)
print(df)

 # Practical Examples

In [None]:
import pandas as pd

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv('employees.csv')

Viewing the First Few Rows of the DataFrame

In [None]:
print("First 5 Rows:\n")
df.head()

Displaying Basic Information About the DataFrame

In [None]:
print("DataFrame Information:\n")
df.info()

Calculating Basic Statistics for Numerical Columns

In [None]:
print("Statistical Summary:\n")
df.describe()

Filtering Employees by Department

In [None]:
it_employees = df[df['Department'] == 'IT']
print("IT Department Employees:\n")
it_employees.head()

Calculating the Average Salary by Department

In [None]:
average_salary_by_dept = df.groupby('Department')['Salary'].mean()
print("Average Salary by Department:\n")
average_salary_by_dept.head()

 Adding a New Column for Salary After a 5% Raise

In [None]:
df['Salary_After_Raise'] = df['Salary'] * 1.05
print("DataFrame with Salary After Raise:\n" )
df.head()

Sorting Employees by Age in Descending Order

In [None]:
sorted_by_age = df.sort_values(by='Age', ascending=False)
print("Employees Sorted by Age (Descending):\n")
sorted_by_age.head()

Calculating the Number of Employees in Each Department

In [None]:
employee_count_by_dept = df['Department'].value_counts()
print("Number of Employees by Department:\n")
employee_count_by_dept.head()


Finding Employees Who Joined After 2020

In [None]:
df['Joining_Date'] = pd.to_datetime(df['Joining_Date'])
recent_joins = df[df['Joining_Date'] > '2020-01-01']
print("Employees Joined After 2020:\n")
recent_joins.head()

Saving the Filtered DataFrame to a New CSV File

In [None]:
recent_joins.to_csv('recent_joins.csv', index=False)
print("Filtered data saved to recent_joins.csv.")