# 📘 Pandas Basics in Python
This notebook covers the essential and most useful Pandas functions for data analysis.

## 1. Importing Pandas

In [1]:
import pandas as pd

## 2. Creating DataFrames and Series

In [2]:
data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}
df = pd.DataFrame(data)
print(df)

s = pd.Series([10, 20, 30])
print("Series:\n", s)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
Series:
 0    10
1    20
2    30
dtype: int64


## 3. Reading and Writing Files

In [3]:
# df.to_csv('data.csv', index=False)
# df = pd.read_csv('data.csv')
# df.to_excel('data.xlsx')
# df = pd.read_excel('data.xlsx')
# Use only when needed with actual files

## 4. Data Inspection

In [None]:
print(df.head())
print(df.tail())
print(df.info())
print(df.describe())

# df.head()       # Show first 5 rows
# df.tail()       # Last 5 rows
# df.shape        # (rows, columns)
# df.columns      # Column names
# df.info()       # Summary (data types, nulls)
# df.describe()   # Stats on numeric columns

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 180.0+ bytes
None
        Age
count   3.0
mean   30.0
std     5.0
min    25.0
25%    27.5
50%    30.0
75%    32.5
max    35.0


## 5. Selecting Columns and Rows

In [5]:
print(df['Name'])  # Column
print(df[['Name', 'Age']])
print(df.loc[0])  # Row by label
print(df.iloc[1])  # Row by index

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
Name    Alice
Age        25
Name: 0, dtype: object
Name    Bob
Age      30
Name: 1, dtype: object


## 6. Filtering and Boolean Indexing

In [6]:
print(df[df['Age'] > 25])

      Name  Age
1      Bob   30
2  Charlie   35


## 7. Adding and Modifying Columns

In [7]:
df['Country'] = ['USA', 'UK', 'Canada']
df['Age Plus 5'] = df['Age'] + 5
print(df)

      Name  Age Country  Age Plus 5
0    Alice   25     USA          30
1      Bob   30      UK          35
2  Charlie   35  Canada          40


## 8. Deleting Columns or Rows

In [8]:
df = df.drop('Age Plus 5', axis=1)  # Drop column
df = df.drop(1, axis=0)  # Drop row by index
print(df)

      Name  Age Country
0    Alice   25     USA
2  Charlie   35  Canada


## 9. Sorting

In [9]:
print(df.sort_values(by='Age'))

      Name  Age Country
0    Alice   25     USA
2  Charlie   35  Canada


## 10. Handling Missing Data

In [10]:
df.loc[2, 'Age'] = None
print(df)
df_filled = df.fillna(0)
print(df_filled)
df_dropped = df.dropna()
print(df_dropped)

      Name   Age Country
0    Alice  25.0     USA
2  Charlie   NaN  Canada
      Name   Age Country
0    Alice  25.0     USA
2  Charlie   0.0  Canada
    Name   Age Country
0  Alice  25.0     USA


## 11. Grouping and Aggregation

In [11]:
df = pd.DataFrame({
    'Department': ['IT', 'HR', 'IT', 'HR'],
    'Employee': ['Alice', 'Bob', 'Charlie', 'David'],
    'Salary': [60000, 55000, 65000, 52000]
})
print(df.groupby('Department')['Salary'].mean())

Department
HR    53500.0
IT    62500.0
Name: Salary, dtype: float64


## 12. Merging and Joining DataFrames

In [12]:
df1 = pd.DataFrame({'ID': [1, 2], 'Name': ['Alice', 'Bob']})
df2 = pd.DataFrame({'ID': [1, 2], 'Salary': [60000, 55000]})
merged = pd.merge(df1, df2, on='ID')
print(merged)

   ID   Name  Salary
0   1  Alice   60000
1   2    Bob   55000


## 13. Pivot Tables

In [13]:
pivot_df = df.pivot_table(values='Salary', index='Department', aggfunc='mean')
print(pivot_df)

             Salary
Department         
HR          53500.0
IT          62500.0


## 14. Working with Dates

In [14]:
date_df = pd.DataFrame({
    'Date': pd.date_range('2024-01-01', periods=5),
    'Value': [10, 20, 30, 40, 50]
})
print(date_df)
print(date_df['Date'].dt.month)

        Date  Value
0 2024-01-01     10
1 2024-01-02     20
2 2024-01-03     30
3 2024-01-04     40
4 2024-01-05     50
0    1
1    1
2    1
3    1
4    1
Name: Date, dtype: int32


## 15. Useful Utilities

In [15]:
print(df.columns)
print(df.index)
print(df.shape)
print(df.dtypes)

Index(['Department', 'Employee', 'Salary'], dtype='object')
RangeIndex(start=0, stop=4, step=1)
(4, 3)
Department    object
Employee      object
Salary         int64
dtype: object
