In [1]:
# Lists - A list is an ordered, mutable collection of elements.
my_list = [1, 2, 3, 4, 5]
print(my_list)
print(my_list[2])

[1, 2, 3, 4, 5]
3


In [2]:
my_list.append(6)  # Add new element
print(my_list)

[1, 2, 3, 4, 5, 6]


In [3]:
# Tuples - A tuple is an ordered, immutable collection of elements.

my_tuple = (10, 20, 30, 40)
print(my_tuple)

(10, 20, 30, 40)


In [4]:
print(my_tuple[1])

20


In [8]:
my_tuple = (10, "Rakesh", 30, 40)
print(my_tuple)

(10, 'Rakesh', 30, 40)


In [5]:
# my_tuple[1] = 25

In [6]:
# Dictionary - A dictionary is an unordered collection of key-value pairs.

my_dict = {'name': 'Alice', 'age': 25, 'city': 'New York'}
print(my_dict['name'])

Alice


In [7]:
my_dict['age'] = 26  # Updating value
print(my_dict)

{'name': 'Alice', 'age': 26, 'city': 'New York'}


In [9]:
# Arrays - An array is a collection of elements of the same type, optimized for numerical computations.

import numpy as np
my_array = np.array([1, 2, 3, 4, 5])
print(my_array[2])  # Output: 3

3


In [10]:
print(my_array.mean())  # Compute mean

3.0


In [11]:
# Series - A Series is a one-dimensional labeled array in Pandas.

import pandas as pd
my_series = pd.Series([10, 20, 30, 40], index=['A', 'B', 'C', 'D'])
print(my_series)

A    10
B    20
C    30
D    40
dtype: int64


In [12]:
print(my_series['B'])

20


In [13]:
# Dataframe - A DataFrame is a two-dimensional labeled data structure, similar to a table.

import pandas as pd
data = {'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}
df = pd.DataFrame(data)
print(df)

   ID     Name  Age
0   1    Alice   25
1   2      Bob   30
2   3  Charlie   35


## **Creating DataFrames in Python**

In [14]:
# Using Lists of Lists

import pandas as pd
data = [[1, 'Alice', 25], [2, 'Bob', 30], [3, 'Charlie', 35]]
df = pd.DataFrame(data, columns=['ID', 'Name', 'Age'])
print(df)

   ID     Name  Age
0   1    Alice   25
1   2      Bob   30
2   3  Charlie   35


In [15]:
# Using a Dictionary

data = {'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}
df = pd.DataFrame(data)
print(df)


   ID     Name  Age
0   1    Alice   25
1   2      Bob   30
2   3  Charlie   35


## Data Operations on DataFrames

In [16]:
## Selecting Columns

print(df['Name'])  # Select a single column
print(df[['Name', 'Age']])  # Select multiple columns

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


In [17]:
# Selecting Rows using Indexing

print(df.iloc[1])  # Select second row
print(df.loc[df['Age'] > 25])  # Filter rows where Age > 25

ID        2
Name    Bob
Age      30
Name: 1, dtype: object
   ID     Name  Age
1   2      Bob   30
2   3  Charlie   35


In [18]:
# Modifying Data in DataFrames

# Adding a New Column

df['Salary'] = [50000, 60000, 70000]
print(df)


   ID     Name  Age  Salary
0   1    Alice   25   50000
1   2      Bob   30   60000
2   3  Charlie   35   70000


In [19]:
# Updating a Value
df.at[1, 'Age'] = 32
print(df)

   ID     Name  Age  Salary
0   1    Alice   25   50000
1   2      Bob   32   60000
2   3  Charlie   35   70000


In [20]:
# Deleting a column

df.drop(columns=['Salary'], inplace=True)
print(df)

   ID     Name  Age
0   1    Alice   25
1   2      Bob   32
2   3  Charlie   35


In [22]:
# Aggregations and Summary Statistics

# Compute Basic Statistics

print(df.describe())  # Summary statistics for numerical columns


        ID        Age
count  3.0   3.000000
mean   2.0  30.666667
std    1.0   5.131601
min    1.0  25.000000
25%    1.5  28.500000
50%    2.0  32.000000
75%    2.5  33.500000
max    3.0  35.000000


In [24]:
df.groupby('Name').mean()  # Group by Age and compute mean

Unnamed: 0_level_0,ID,Age
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,1.0,25.0
Bob,2.0,32.0
Charlie,3.0,35.0


In [25]:
# Merging and Joining DataFrames
# Concatenation

df1 = pd.DataFrame({'ID': [1, 2], 'Name': ['Alice', 'Bob']})
df2 = pd.DataFrame({'ID': [3, 4], 'Name': ['Charlie', 'David']})
df_concat = pd.concat([df1, df2])

In [26]:
# Merging on a Key Column

df1 = pd.DataFrame({'ID': [1, 2], 'Score': [90, 85]})
df_merged = df.merge(df1, on='ID')

In [27]:
print(df.isnull().sum())  # Count missing values per column

ID      0
Name    0
Age     0
dtype: int64


In [28]:
df.fillna(0, inplace=True)  # Replace NaN values with 0

In [29]:
df.dropna(inplace=True)  # Remove rows with NaN values
