In [None]:
import pandas as pd
import numpy as np

# Create a DataFrame with missing values
df = pd.DataFrame({
    'A': [1, 2, np.nan, 4],
    'B': [np.nan, 2, 3, 4],
    'C': [1, np.nan, np.nan, 4]
})

# Fill missing values with a specific value
df_filled = df.fillna(0)

# Fill missing values using forward fill
df_ffill = df.fillna(method='ffill')

# Fill missing values using backward fill
df_bfill = df.fillna(method='bfill')

# Drop rows with any missing values
df_dropped = df.dropna()

# Interpolate missing values
df_interpolated = df.interpolate()


In [None]:
# Create a DataFrame
data = {
    'Category': ['A', 'A', 'B', 'B', 'C', 'C'],
    'Values': [1, 2, 3, 4, 5, 6]
}
df = pd.DataFrame(data)

# Group by 'Category' and calculate the sum
grouped_sum = df.groupby('Category').sum()

# Group by 'Category' and calculate multiple aggregations
grouped_agg = df.groupby('Category').agg({
    'Values': ['sum', 'mean', 'max']
})

# Group by 'Category' and apply a custom aggregation function
grouped_custom = df.groupby('Category').agg(lambda x: x.sum() / 2)


In [None]:
# Create a DataFrame
data = {
    'A': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'],
    'B': ['one', 'one', 'two', 'two', 'one', 'one'],
    'C': ['small', 'large', 'large', 'small', 'small', 'large'],
    'D': [1, 2, 2, 3, 3, 4]
}
df = pd.DataFrame(data)

# Create a pivot table
pivot = df.pivot_table(values='D', index=['A', 'B'], columns='C', aggfunc=np.sum)


In [None]:
# Create two DataFrames
df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], 'value': [1, 2, 3, 4]})
df2 = pd.DataFrame({'key': ['B', 'D', 'E'], 'value': [5, 6, 7]})

# Merge DataFrames on 'key'
merged_inner = pd.merge(df1, df2, on='key', how='inner')  # Inner join
merged_left = pd.merge(df1, df2, on='key', how='left')    # Left join
merged_right = pd.merge(df1, df2, on='key', how='right')  # Right join
merged_outer = pd.merge(df1, df2, on='key', how='outer')  # Outer join


In [None]:
# Create a MultiIndex DataFrame
arrays = [
    ['bar', 'bar', 'baz', 'baz', 'foo', 'foo'],
    ['one', 'two', 'one', 'two', 'one', 'two']
]
index = pd.MultiIndex.from_arrays(arrays, names=('first', 'second'))
df = pd.DataFrame({'A': range(6), 'B': range(6, 12)}, index=index)

# Accessing data in MultiIndex DataFrame
df.loc['bar']
df.loc['baz', 'one']
df.xs('one', level='second')


In [None]:
# Create a time series DataFrame
dates = pd.date_range('20210101', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

# Resample the data to a different frequency
df_resampled = df.resample('M').mean()

# Rolling window calculations
df_rolling = df.rolling(window=3).mean()

# Shifting data
df_shifted = df.shift(1)
