## Pandas Practice Guide — Part 1: Basics

### 1. Introduction
- Pandas is a powerful Python library for data manipulation and analysis.

In [1]:
import pandas as pd
import warnings 
warnings.filterwarnings('ignore')
print("Pandas version:", pd.__version__)

Pandas version: 2.2.2


### 2. Creating DataFrames

In [2]:
# Create DataFrame from dictionary
data = {
    'Name': ['Amit', 'Neha', 'Raj', 'Sara', 'Vikas', 'Danish','Ahmed'],
    'Age': [25, 30, 22, 28, 24, 23, 20],
    'City': ['Delhi', 'Mumbai', 'Chennai', 'Kolkata', 'Pune', 'Chennai','Mumbai'],
    'Sales': [25000, 40000, 23000, 37000, 28000, 32000, 44000]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Sales
0,Amit,25,Delhi,25000
1,Neha,30,Mumbai,40000
2,Raj,22,Chennai,23000
3,Sara,28,Kolkata,37000
4,Vikas,24,Pune,28000
5,Danish,23,Chennai,32000
6,Ahmed,20,Mumbai,44000


In [3]:
# Create DataFrame from list of lists
data2 = [
    ['Ravi', 26, 'Delhi'],
    ['Simran', 29, 'Mumbai']
]
df2 = pd.DataFrame(data2, columns=['Name', 'Age', 'City'])
df2

Unnamed: 0,Name,Age,City
0,Ravi,26,Delhi
1,Simran,29,Mumbai


### 3. Basic Information

In [4]:
# Display first few rows
df.head()

Unnamed: 0,Name,Age,City,Sales
0,Amit,25,Delhi,25000
1,Neha,30,Mumbai,40000
2,Raj,22,Chennai,23000
3,Sara,28,Kolkata,37000
4,Vikas,24,Pune,28000


In [5]:
# Display last few rows
df.tail()

Unnamed: 0,Name,Age,City,Sales
2,Raj,22,Chennai,23000
3,Sara,28,Kolkata,37000
4,Vikas,24,Pune,28000
5,Danish,23,Chennai,32000
6,Ahmed,20,Mumbai,44000


In [6]:
# Show DataFrame info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    7 non-null      object
 1   Age     7 non-null      int64 
 2   City    7 non-null      object
 3   Sales   7 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 356.0+ bytes


In [7]:
# Show statistical summary
df.describe()

Unnamed: 0,Age,Sales
count,7.0,7.0
mean,24.571429,32714.285714
std,3.457222,7910.2104
min,20.0,23000.0
25%,22.5,26500.0
50%,24.0,32000.0
75%,26.5,38500.0
max,30.0,44000.0


In [8]:
# Show shape (rows, columns)
df.shape

(7, 4)

In [9]:
# Show column names
df.columns

Index(['Name', 'Age', 'City', 'Sales'], dtype='object')

In [10]:
# Show data types
df.dtypes

Name     object
Age       int64
City     object
Sales     int64
dtype: object

### 4. Accessing Columns & Rows

In [11]:
# Access single column
df['Name']

0      Amit
1      Neha
2       Raj
3      Sara
4     Vikas
5    Danish
6     Ahmed
Name: Name, dtype: object

In [12]:
# Access multiple columns
df[['Name', 'Sales']]

Unnamed: 0,Name,Sales
0,Amit,25000
1,Neha,40000
2,Raj,23000
3,Sara,37000
4,Vikas,28000
5,Danish,32000
6,Ahmed,44000


In [13]:
# Access row using loc (label-based)
df.loc[0]

Name      Amit
Age         25
City     Delhi
Sales    25000
Name: 0, dtype: object

In [14]:
# Access multiple rows
df.loc[0:2]

Unnamed: 0,Name,Age,City,Sales
0,Amit,25,Delhi,25000
1,Neha,30,Mumbai,40000
2,Raj,22,Chennai,23000


In [15]:
# Access row using iloc (index-based)
df.iloc[1]

Name       Neha
Age          30
City     Mumbai
Sales     40000
Name: 1, dtype: object

In [16]:
# Access specific cell
df.loc[2, 'City']

'Chennai'

### 5. Conditional Selection (Filtering)

In [17]:
# Rows where Sales > 25000
df[df['Sales'] > 25000]

Unnamed: 0,Name,Age,City,Sales
1,Neha,30,Mumbai,40000
3,Sara,28,Kolkata,37000
4,Vikas,24,Pune,28000
5,Danish,23,Chennai,32000
6,Ahmed,20,Mumbai,44000


In [18]:
# Rows where City is 'Delhi'
df[df['City'] == 'Delhi']

Unnamed: 0,Name,Age,City,Sales
0,Amit,25,Delhi,25000


In [19]:
# Multiple conditions
df[(df['Sales'] > 25000) & (df['Age'] < 28)]

Unnamed: 0,Name,Age,City,Sales
4,Vikas,24,Pune,28000
5,Danish,23,Chennai,32000
6,Ahmed,20,Mumbai,44000


### 6. Adding and Removing Columns

In [20]:
# Add new column
df['Bonus'] = df['Sales'] * 0.10
df

Unnamed: 0,Name,Age,City,Sales,Bonus
0,Amit,25,Delhi,25000,2500.0
1,Neha,30,Mumbai,40000,4000.0
2,Raj,22,Chennai,23000,2300.0
3,Sara,28,Kolkata,37000,3700.0
4,Vikas,24,Pune,28000,2800.0
5,Danish,23,Chennai,32000,3200.0
6,Ahmed,20,Mumbai,44000,4400.0


In [21]:
# Delete column
df.drop('Bonus', axis=1, inplace=True)
df

Unnamed: 0,Name,Age,City,Sales
0,Amit,25,Delhi,25000
1,Neha,30,Mumbai,40000
2,Raj,22,Chennai,23000
3,Sara,28,Kolkata,37000
4,Vikas,24,Pune,28000
5,Danish,23,Chennai,32000
6,Ahmed,20,Mumbai,44000


In [22]:
# Rename columns
df.rename(columns={'Sales': 'Monthly_Sales'}, inplace=True)
df

Unnamed: 0,Name,Age,City,Monthly_Sales
0,Amit,25,Delhi,25000
1,Neha,30,Mumbai,40000
2,Raj,22,Chennai,23000
3,Sara,28,Kolkata,37000
4,Vikas,24,Pune,28000
5,Danish,23,Chennai,32000
6,Ahmed,20,Mumbai,44000


### 7. Handling Missing Values

In [23]:
# Create DataFrame with missing values
data3 = {
    'Name': ['Amit', 'Neha', None, 'Sara', 'Vikas'],
    'Age': [25, None, 22, 28, 24],
    'Sales': [25000, 40000, None, 37000, 28000]
}
df3 = pd.DataFrame(data3)
df3

Unnamed: 0,Name,Age,Sales
0,Amit,25.0,25000.0
1,Neha,,40000.0
2,,22.0,
3,Sara,28.0,37000.0
4,Vikas,24.0,28000.0


In [24]:
# Check missing values
df3.isnull().sum()

Name     1
Age      1
Sales    1
dtype: int64

In [25]:
# Drop missing values
df3.dropna()

Unnamed: 0,Name,Age,Sales
0,Amit,25.0,25000.0
3,Sara,28.0,37000.0
4,Vikas,24.0,28000.0


In [26]:
# Fill missing values
df3['Age'].fillna(df3['Age'].mean(), inplace=True)
df3['Sales'].fillna(0, inplace=True)
df3['Name'].fillna('Unknown', inplace=True)
df3

Unnamed: 0,Name,Age,Sales
0,Amit,25.0,25000.0
1,Neha,24.75,40000.0
2,Unknown,22.0,0.0
3,Sara,28.0,37000.0
4,Vikas,24.0,28000.0


### 8. Removing Duplicates

In [27]:
# Create duplicate data
df_dup = pd.DataFrame({
    'Name': ['Amit', 'Neha', 'Amit', 'Sara'],
    'Age': [25, 30, 25, 28]
})

In [28]:
# Find duplicates
df_dup.duplicated()

0    False
1    False
2     True
3    False
dtype: bool

In [29]:
# Remove duplicates
df_dup.drop_duplicates(inplace=True)
df_dup

Unnamed: 0,Name,Age
0,Amit,25
1,Neha,30
3,Sara,28


### 9. Sorting Data

In [30]:
# Sort by column
df.sort_values(by='Monthly_Sales')

Unnamed: 0,Name,Age,City,Monthly_Sales
2,Raj,22,Chennai,23000
0,Amit,25,Delhi,25000
4,Vikas,24,Pune,28000
5,Danish,23,Chennai,32000
3,Sara,28,Kolkata,37000
1,Neha,30,Mumbai,40000
6,Ahmed,20,Mumbai,44000


In [31]:
# Sort in descending order
df.sort_values(by='Age', ascending=False)

Unnamed: 0,Name,Age,City,Monthly_Sales
1,Neha,30,Mumbai,40000
3,Sara,28,Kolkata,37000
0,Amit,25,Delhi,25000
4,Vikas,24,Pune,28000
5,Danish,23,Chennai,32000
2,Raj,22,Chennai,23000
6,Ahmed,20,Mumbai,44000


In [32]:
# Sort by multiple columns
df.sort_values(by=['City', 'Age'])

Unnamed: 0,Name,Age,City,Monthly_Sales
2,Raj,22,Chennai,23000
5,Danish,23,Chennai,32000
0,Amit,25,Delhi,25000
3,Sara,28,Kolkata,37000
6,Ahmed,20,Mumbai,44000
1,Neha,30,Mumbai,40000
4,Vikas,24,Pune,28000


### 10. Changing Data Types

In [33]:
# Convert 'Age' column to float
df['Age'] = df['Age'].astype(float)

In [34]:
# Convert 'Age' to string
df['Age'] = df['Age'].astype(str)

### 11. Exporting Data

In [35]:
# Save DataFrame to CSV file
# df.to_csv('pandas_basics.csv', index=False)

# Save DataFrame to Excel
# df.to_excel('pandas_basics.xlsx', index=False)
