# **DATA WRANGLING**

In [1]:
import pandas as pd
import numpy as np
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['A', 'B', 'D'], 'value2': [4, 5, 6]})

## **Join based on the 'key' column**

In [2]:
joined_df = df1.set_index('key').join(df2.set_index('key'), how='inner')
print("Joined DataFrame:")
joined_df


Joined DataFrame:


Unnamed: 0_level_0,value1,value2
key,Unnamed: 1_level_1,Unnamed: 2_level_1
A,1,4
B,2,5


## **Combine**

In [3]:
df1 = pd.DataFrame({'A': [1, np.nan], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [np.nan, 8]})

combined_df = df1.combine_first(df2)
print("\nCombined DataFrame:")
combined_df


Combined DataFrame:


Unnamed: 0,A,B
0,1.0,3.0
1,6.0,4.0


## **Merge**

In [4]:
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['A', 'B', 'D'], 'value2': [4, 5, 6]})

merged_df = pd.merge(df1, df2, on='key', how='inner')
print("\nMerged DataFrame:")
merged_df



Merged DataFrame:


Unnamed: 0,key,value1,value2
0,A,1,4
1,B,2,5


## **Melt**

In [5]:
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'C': [5, 6]})
melted_df = pd.melt(df, id_vars=['A'], value_vars=['B', 'C'])
print("\nMelted DataFrame:")
melted_df



Melted DataFrame:


Unnamed: 0,A,variable,value
0,1,B,3
1,2,B,4
2,1,C,5
3,2,C,6


## **Replace**

In [6]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 4]})
replaced_df = df.replace(4, 10)
print("\nReplaced Values DataFrame:")
replaced_df



Replaced Values DataFrame:


Unnamed: 0,A,B
0,1,10
1,2,5
2,3,10


## **Filter**

In [7]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
filtered_df = df[df['A'] > 1]
print("\nFiltered DataFrame:")
filtered_df



Filtered DataFrame:


Unnamed: 0,A,B
1,2,5
2,3,6


## **Drop**

In [8]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
dropped_df = df.drop(columns=['B'])
print("\nDropped Column DataFrame:")
dropped_df



Dropped Column DataFrame:


Unnamed: 0,A
0,1
1,2
2,3


## **Concat**

In [9]:
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})

concat_df = pd.concat([df1, df2], axis=0)
print("\nConcatenated DataFrame:")
concat_df



Concatenated DataFrame:


Unnamed: 0,A,B
0,1,3
1,2,4
0,5,7
1,6,8


## **GroupBy**

In [10]:
df = pd.DataFrame({'key': ['A', 'B', 'A', 'B'], 'value': [1, 2, 3, 4]})
grouped_df = df.groupby('key').sum()
print("\nGrouped DataFrame:")
grouped_df



Grouped DataFrame:


Unnamed: 0_level_0,value
key,Unnamed: 1_level_1
A,4
B,6


## **Duplicate Handling**

In [11]:
df = pd.DataFrame({'A': [1, 2, 2, 3], 'B': [3, 4, 4, 5]})
# Check for duplicates
duplicates = df.duplicated()
print("\nDuplicates:")
duplicates



Duplicates:


0    False
1    False
2     True
3    False
dtype: bool

## **Dropping Duplicates**

In [12]:
dropped_duplicates_df = df.drop_duplicates()
print("\nDropped Duplicates DataFrame:")
dropped_duplicates_df


Dropped Duplicates DataFrame:


Unnamed: 0,A,B
0,1,3
1,2,4
3,3,5
