In [13]:
# Create an example DataFrame with missing values
import pandas as pd
import numpy as np

data = {
  'Name': ['Alice', 'Bob', 'Charlie', 'David'],
  'Score': [88, 92, np.nan, 95]
}

df = pd.DataFrame(data)
print(df)

      Name  Score
0    Alice   88.0
1      Bob   92.0
2  Charlie    NaN
3    David   95.0


In [14]:
# replicate the df to df1
df1 = df.copy()
# Drop rows with any missing values
df1.dropna(inplace=True)
print(df1)

    Name  Score
0  Alice   88.0
1    Bob   92.0
3  David   95.0


In [15]:
# replicate the df to df2
df2 = df.copy()
# Fill missing values with a specific value
df2.fillna(0, inplace=True)
print(df2)

      Name  Score
0    Alice   88.0
1      Bob   92.0
2  Charlie    0.0
3    David   95.0


In [16]:
# Create an example DataFrame with duplicate rows
data = {
  'Name': ['Alice', 'Bob', 'Charlie', 'Alice'],
  'Score': [88, 92, 95, 88]
}
df = pd.DataFrame(data)
print(df)

      Name  Score
0    Alice     88
1      Bob     92
2  Charlie     95
3    Alice     88


In [17]:
# Identify duplicate rows
duplicates = df.duplicated()
print(duplicates)

0    False
1    False
2    False
3     True
dtype: bool


In [18]:
# Remove duplicate rows
df_no_duplicates = df.drop_duplicates()
print(df_no_duplicates)

      Name  Score
0    Alice     88
1      Bob     92
2  Charlie     95


In [19]:
data = {
  'Name': ['Alice', 'Bob', 'Charlie', 'Alice'],
  'Score': [88, 92, 95, 88]
}
df = pd.DataFrame(data)

# Sort by 'Score' in ascending order
df_sorted = df.sort_values(by='Score')
print(df_sorted)

      Name  Score
0    Alice     88
3    Alice     88
1      Bob     92
2  Charlie     95


In [20]:
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['B', 'C', 'D'], 'value2': [4, 5, 6]})
# left join on 'key' column
# This will include all rows from df1 and only matching rows from df2
merged_df = pd.merge(df1, df2, on='key', how='left')
print(merged_df)

  key  value1  value2
0   A       1     NaN
1   B       2     4.0
2   C       3     5.0


In [21]:
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['B', 'C', 'D'], 'value2': [4, 5, 6]})
# right join on 'key' column
# This will include all rows from df2 and only matching rows from df1
merged_df = pd.merge(df1, df2, on='key', how='right')
print(merged_df)

  key  value1  value2
0   B     2.0       4
1   C     3.0       5
2   D     NaN       6


In [22]:
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['B', 'C', 'D'], 'value2': [4, 5, 6]})
# inner join on 'key' column
# This will include only rows with matching keys in both df1 and df2
merged_df = pd.merge(df1, df2, on='key', how='inner')
print(merged_df)

  key  value1  value2
0   B       2       4
1   C       3       5


In [23]:
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['B', 'C', 'D'], 'value2': [4, 5, 6]})
# outer join on 'key' column
# This will include all rows from both df1 and df2, with NaN where there is no match
merged_df = pd.merge(df1, df2, on='key', how='outer')
print(merged_df)

  key  value1  value2
0   A     1.0     NaN
1   B     2.0     4.0
2   C     3.0     5.0
3   D     NaN     6.0
