In [1]:
import pandas as pd

In [2]:
import pandas as pd
import numpy as np

print("=== Creating Series and DataFrames ===")

# Creating a Series
s = pd.Series([10, 20, 30, 40], index=['a', 'b', 'c', 'd'])
print("Series:\n", s)

# Creating a DataFrame from dictionary
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['Delhi', 'Mumbai', 'Bangalore']
}
df = pd.DataFrame(data)
print("\nDataFrame:\n", df)

# Creating a DataFrame from NumPy array
arr = np.array([[1, 2], [3, 4], [5, 6]])
df2 = pd.DataFrame(arr, columns=['X', 'Y'])
print("\nDataFrame from array:\n", df2)


=== Creating Series and DataFrames ===
Series:
 a    10
b    20
c    30
d    40
dtype: int64

DataFrame:
       Name  Age       City
0    Alice   25      Delhi
1      Bob   30     Mumbai
2  Charlie   35  Bangalore

DataFrame from array:
    X  Y
0  1  2
1  3  4
2  5  6


# DataFrame **Basics**

In [3]:
print("=== DataFrame Basic Info ===")

print("Shape:", df.shape)
print("Columns:", df.columns)
print("Index:", df.index)
print("Data Types:\n", df.dtypes)
print("Head:\n", df.head(2))
print("Tail:\n", df.tail(2))
print("Describe:\n", df.describe())


=== DataFrame Basic Info ===
Shape: (3, 3)
Columns: Index(['Name', 'Age', 'City'], dtype='object')
Index: RangeIndex(start=0, stop=3, step=1)
Data Types:
 Name    object
Age      int64
City    object
dtype: object
Head:
     Name  Age    City
0  Alice   25   Delhi
1    Bob   30  Mumbai
Tail:
       Name  Age       City
1      Bob   30     Mumbai
2  Charlie   35  Bangalore
Describe:
         Age
count   3.0
mean   30.0
std     5.0
min    25.0
25%    27.5
50%    30.0
75%    32.5
max    35.0


# **Indexing, Selecting, Filtering**

In [4]:
print("=== Indexing and Filtering ===")

# Column access
print("Name column:\n", df['Name'])

# Row access
print("First row:\n", df.loc[0])
print("First row (iloc):\n", df.iloc[0])

# Boolean filtering
print("Age > 25:\n", df[df['Age'] > 25])

# Conditional & combined filters
print("Age > 25 and City is Mumbai:\n", df[(df['Age'] > 25) & (df['City'] == 'Mumbai')])


=== Indexing and Filtering ===
Name column:
 0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
First row:
 Name    Alice
Age        25
City    Delhi
Name: 0, dtype: object
First row (iloc):
 Name    Alice
Age        25
City    Delhi
Name: 0, dtype: object
Age > 25:
       Name  Age       City
1      Bob   30     Mumbai
2  Charlie   35  Bangalore
Age > 25 and City is Mumbai:
   Name  Age    City
1  Bob   30  Mumbai


# **Modifying Data**

In [5]:
print("=== Modifying Data ===")

df['Salary'] = [50000, 60000, 70000]
print("After adding Salary:\n", df)

df.at[1, 'Age'] = 31
print("After modifying Age of index 1:\n", df)

df.drop('City', axis=1, inplace=True)
print("After dropping City:\n", df)

df.rename(columns={'Name': 'Full Name'}, inplace=True)
print("After renaming column:\n", df)


=== Modifying Data ===
After adding Salary:
       Name  Age       City  Salary
0    Alice   25      Delhi   50000
1      Bob   30     Mumbai   60000
2  Charlie   35  Bangalore   70000
After modifying Age of index 1:
       Name  Age       City  Salary
0    Alice   25      Delhi   50000
1      Bob   31     Mumbai   60000
2  Charlie   35  Bangalore   70000
After dropping City:
       Name  Age  Salary
0    Alice   25   50000
1      Bob   31   60000
2  Charlie   35   70000
After renaming column:
   Full Name  Age  Salary
0     Alice   25   50000
1       Bob   31   60000
2   Charlie   35   70000


# **Aggregation and Statistics**

In [6]:
print("=== Aggregation and Statistics ===")

print("Mean Age:", df['Age'].mean())
print("Max Salary:", df['Salary'].max())
print("Value Counts (Age):\n", df['Age'].value_counts())
print("Correlation Matrix:\n", df.corr(numeric_only=True))


=== Aggregation and Statistics ===
Mean Age: 30.333333333333332
Max Salary: 70000
Value Counts (Age):
 Age
25    1
31    1
35    1
Name: count, dtype: int64
Correlation Matrix:
              Age    Salary
Age     1.000000  0.993399
Salary  0.993399  1.000000


# **GroupBy Operations**

In [7]:
print("=== GroupBy Example ===")

# Add new column for grouping
df['Dept'] = ['HR', 'IT', 'HR']

grouped = df.groupby('Dept')
print("GroupBy mean:\n", grouped.mean(numeric_only=True))
print("GroupBy count:\n", grouped.count())


=== GroupBy Example ===
GroupBy mean:
        Age   Salary
Dept               
HR    30.0  60000.0
IT    31.0  60000.0
GroupBy count:
       Full Name  Age  Salary
Dept                        
HR            2    2       2
IT            1    1       1


# ***Missing Data Handling***

In [8]:
print("=== Missing Data ===")

df_missing = pd.DataFrame({
    'A': [1, 2, np.nan],
    'B': [5, np.nan, np.nan],
    'C': [10, 20, 30]
})
print("With missing values:\n", df_missing)

# Fill missing values
print("Fill NaN with 0:\n", df_missing.fillna(0))

# Drop rows with any NaN
print("Drop rows with NaN:\n", df_missing.dropna())

# Check for nulls
print("Isnull:\n", df_missing.isnull())


=== Missing Data ===
With missing values:
      A    B   C
0  1.0  5.0  10
1  2.0  NaN  20
2  NaN  NaN  30
Fill NaN with 0:
      A    B   C
0  1.0  5.0  10
1  2.0  0.0  20
2  0.0  0.0  30
Drop rows with NaN:
      A    B   C
0  1.0  5.0  10
Isnull:
        A      B      C
0  False  False  False
1  False   True  False
2   True   True  False


# **Sorting and Duplicates**

In [9]:
print("=== Sorting and Duplicates ===")

print("Sorted by Age:\n", df.sort_values('Age'))

# Duplicate example
df_dup = pd.DataFrame({'A': [1, 2, 2, 3], 'B': [4, 5, 5, 6]})
print("Duplicates:\n", df_dup[df_dup.duplicated()])
print("After dropping duplicates:\n", df_dup.drop_duplicates())


=== Sorting and Duplicates ===
Sorted by Age:
   Full Name  Age  Salary Dept
0     Alice   25   50000   HR
1       Bob   31   60000   IT
2   Charlie   35   70000   HR
Duplicates:
    A  B
2  2  5
After dropping duplicates:
    A  B
0  1  4
1  2  5
3  3  6


# **Merging, Joining, Concatenating**

In [10]:
print("=== Merging and Joining ===")

left = pd.DataFrame({'ID': [1, 2], 'Name': ['A', 'B']})
right = pd.DataFrame({'ID': [1, 2], 'Score': [85, 90]})

merged = pd.merge(left, right, on='ID')
print("Merged DataFrame:\n", merged)

df1 = pd.DataFrame({'A': [1, 2]})
df2 = pd.DataFrame({'A': [3, 4]})
concat_df = pd.concat([df1, df2])
print("Concatenated:\n", concat_df)


=== Merging and Joining ===
Merged DataFrame:
    ID Name  Score
0   1    A     85
1   2    B     90
Concatenated:
    A
0  1
1  2
0  3
1  4


# **File I/O**

In [11]:
print("=== File I/O ===")

# Saving and reading (uncomment to execute)
# df.to_csv("data.csv", index=False)
# loaded_df = pd.read_csv("data.csv")
# print("Loaded CSV:\n", loaded_df)


=== File I/O ===
