In [1]:
import pandas as pd

# Creating a DataFrame

In [2]:
# Create a DataFrame
data = {'Name': ['John', 'Jane', 'Mike', 'Emily'],
        'Age': [25, 30, 28, 32],
        'City': ['New York', 'London', 'Paris', 'Sydney']}
df = pd.DataFrame(data)
print("DataFrame:")
print(df)

DataFrame:
    Name  Age      City
0   John   25  New York
1   Jane   30    London
2   Mike   28     Paris
3  Emily   32    Sydney


# Selecting columns

In [3]:
# Select a single column
name_column = df['Name']
print("\nName Column:")
print(name_column)


Name Column:
0     John
1     Jane
2     Mike
3    Emily
Name: Name, dtype: object


In [4]:
# Select multiple columns
name_age_columns = df[['Name', 'Age']]
print("\nName and Age Columns:")
print(name_age_columns)


Name and Age Columns:
    Name  Age
0   John   25
1   Jane   30
2   Mike   28
3  Emily   32


# Filtering rows

In [5]:
# Filter rows based on a condition
filtered_df = df[df['Age'] > 25]
print("\nFiltered DataFrame:")
print(filtered_df)


Filtered DataFrame:
    Name  Age    City
1   Jane   30  London
2   Mike   28   Paris
3  Emily   32  Sydney


# Sorting DataFrame

In [6]:
# Sort DataFrame by a column
sorted_df = df.sort_values('Age')
print("\nSorted DataFrame:")
print(sorted_df)


Sorted DataFrame:
    Name  Age      City
0   John   25  New York
2   Mike   28     Paris
1   Jane   30    London
3  Emily   32    Sydney


# Adding and removing columns


In [7]:
# Add a new column
df['Gender'] = ['Male', 'Female', 'Male', 'Female']
print("\nDataFrame with new column:")
print(df)


DataFrame with new column:
    Name  Age      City  Gender
0   John   25  New York    Male
1   Jane   30    London  Female
2   Mike   28     Paris    Male
3  Emily   32    Sydney  Female


In [8]:
# Remove a column
df = df.drop('City', axis=1)
print("\nDataFrame with column removed:")
print(df)


DataFrame with column removed:
    Name  Age  Gender
0   John   25    Male
1   Jane   30  Female
2   Mike   28    Male
3  Emily   32  Female


# Grouping and aggregation

# Handling missing values

In [10]:
# Create a DataFrame with missing values
data = {'Name': ['John', 'Jane', 'Mike', 'Emily'],
        'Age': [25, None, 28, 32],
        'City': ['New York', 'London', None, 'Sydney']}
df = pd.DataFrame(data)
print("\nDataFrame with missing values:")
print(df)


DataFrame with missing values:
    Name   Age      City
0   John  25.0  New York
1   Jane   NaN    London
2   Mike  28.0      None
3  Emily  32.0    Sydney


In [11]:
# Check for missing values
print("\nMissing Values:")
print(df.isnull().sum())


Missing Values:
Name    0
Age     1
City    1
dtype: int64


In [12]:
# Fill missing values with a specific value
df_filled = df.fillna(0)
print("\nDataFrame with filled missing values:")
print(df_filled)


DataFrame with filled missing values:
    Name   Age      City
0   John  25.0  New York
1   Jane   0.0    London
2   Mike  28.0         0
3  Emily  32.0    Sydney


# Merging DataFrames

In [13]:
# Create two DataFrames
data1 = {'Name': ['John', 'Jane', 'Mike', 'Emily'],
         'Age': [25, 30, 28, 32]}
data2 = {'Name': ['John', 'Jane', 'Mike', 'Emily'],
         'City': ['New York', 'London', 'Paris', 'Sydney']}
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

In [14]:
# Merge the DataFrames
merged_df = pd.merge(df1, df2, on='Name')
print("\nMerged DataFrame:")
print(merged_df)


Merged DataFrame:
    Name  Age      City
0   John   25  New York
1   Jane   30    London
2   Mike   28     Paris
3  Emily   32    Sydney


# Pivot Tables


In [15]:
# Create a DataFrame
data = {'Name': ['John', 'Jane', 'Mike', 'Emily', 'John', 'Jane', 'Mike', 'Emily'],
        'Subject': ['Math', 'Math', 'Math', 'Math', 'Science', 'Science', 'Science', 'Science'],
        'Score': [85, 90, 92, 88, 78, 82, 80, 85]}
df = pd.DataFrame(data)

In [16]:
# Create a pivot table
pivot_table = df.pivot_table(index='Name', columns='Subject', values='Score', aggfunc='mean')
print("\nPivot Table:")
print(pivot_table)


Pivot Table:
Subject  Math  Science
Name                  
Emily      88       85
Jane       90       82
John       85       78
Mike       92       80
