In [76]:
# Lists - A list is an ordered, mutable collection of elements. 
# They can store multiple items, and these items can be of any data type, including strings.
my_list = [1, 2, 3, 4, 5]
print(my_list)
print(my_list[2])

[1, 2, 3, 4, 5]
3


In [77]:
my_list.append(6)  # Add new element
print(my_list)

[1, 2, 3, 4, 5, 6]


In [78]:
# A list of text values
fruits = ["apple", "banana", "cherry", "date"]
print(fruits)

['apple', 'banana', 'cherry', 'date']


In [79]:
# Tuples - A tuple is an ordered, immutable collection of elements.

my_tuple = (10, 20, 30, 40)
print(my_tuple)

(10, 20, 30, 40)


In [80]:
print(my_tuple[1])

20


In [81]:
my_tuple = (10, "Rakesh", 30, 40)
print(my_tuple)

(10, 'Rakesh', 30, 40)


In [82]:
#my_tuple[1] = 25

In [83]:
# Dictionary - A dictionary is an unordered collection of key-value pairs.

my_dict = {'name': 'Alice', 'age': 25, 'city': 'New York'}
print(my_dict['name'])

Alice


In [84]:
my_dict['age'] = 26  # Updating value
print(my_dict)

{'name': 'Alice', 'age': 26, 'city': 'New York'}


In [85]:
# Arrays - An array is a collection of elements of the same type, optimized for numerical computations.

import numpy as np
my_array = np.array([1, 2, 3, 4, 5])
print(my_array[2])  # Output: 3

3


### Optimized for numerical computations: means
- arrays can have strings — but their main use case is numerical data.
So, when we say "optimized for numerical computations," we’re usually referring to NumPy arrays, which are designed for fast math operations.

In [86]:
print(my_array.mean())  # Compute mean

3.0


In [87]:
import numpy as np
arr = np.array(['apple', 'banana', 'cherry'])
print(arr)
print(arr.dtype)

['apple' 'banana' 'cherry']
<U6


The <U6 in dtype means it's a Unicode string with max length 6.

Unicode is a universal standard for representing text in computers. 
It assigns a unique code point (number) to every character, no matter the language, script, or symbol.

In [88]:
arr = np.array(['apple', 'banana', 4])
print(arr)

['apple' 'banana' '4']


Series : A one-dimensional labeled array where each element has an index. Used in Pandas for data analysis.

In [89]:
import pandas as pd
my_series = pd.Series([10, 20, 30, 40], index=['A', 'B', 'C', 'D'])
print(my_series)

A    10
B    20
C    30
D    40
dtype: int64


In [90]:
print(my_series['B'])

20


In [91]:
# Dataframe - A DataFrame is a two-dimensional labeled data structure, similar to a table.

import pandas as pd
data = {'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}
df = pd.DataFrame(data)
print(df)

   ID     Name  Age
0   1    Alice   25
1   2      Bob   30
2   3  Charlie   35


## **Creating DataFrames in Python**

In [92]:
# Using Lists of Lists

import pandas as pd
data = [[1, 'Alice', 25], [2, 'Bob', 30], [3, 'Charlie', 35]]
df = pd.DataFrame(data, columns=['ID', 'Name', 'Age'])
print(df)

   ID     Name  Age
0   1    Alice   25
1   2      Bob   30
2   3  Charlie   35


In [93]:
# Using a Dictionary

data = {'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}
df = pd.DataFrame(data)
print(df)


   ID     Name  Age
0   1    Alice   25
1   2      Bob   30
2   3  Charlie   35


In [94]:
# using series
import pandas as pd
# Create Series
names = pd.Series(['Alice', 'Bob', 'Charlie'])
ages = pd.Series([25, 30, 22])

# Create DataFrame from Series
df = pd.DataFrame({
    'Name': names,
    'Age': ages
})

print(df)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   22


In [95]:
import pandas as pd
import numpy as np

# Create NumPy array
data = np.array([
    ['Alice', 25],
    ['Bob', 30],
    ['Charlie', 22]
])

# Create DataFrame from array
df = pd.DataFrame(data, columns=['Name', 'Age'])

df

Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Charlie,22


## Data Operations on DataFrames

In [96]:
## Selecting Columns

print(df['Name'])  # Select a single column
print(df[['Name', 'Age']])  # Select multiple columns

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
      Name Age
0    Alice  25
1      Bob  30
2  Charlie  22


In [97]:
# Selecting Rows using Indexing
# Convert Age to numeric
df['Age'] = df['Age'].astype(int)

# Now this will work:
print(df.iloc[1])  # Select second row
print(df.loc[df['Age'] > 25])  # Filter rows where Age > 25

Name    Bob
Age      30
Name: 1, dtype: object
  Name  Age
1  Bob   30


In [98]:
# Modifying Data in DataFrames

# Adding a New Column

df['Salary'] = [50000, 60000, 70000]
print(df)

      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   22   70000


In [99]:
# Updating a Value
df.at[1, 'Age'] = 32
print(df)

      Name  Age  Salary
0    Alice   25   50000
1      Bob   32   60000
2  Charlie   22   70000


In [100]:
# Deleting a column
df.drop(columns=['Salary'], inplace=True)
print(df)

      Name  Age
0    Alice   25
1      Bob   32
2  Charlie   22


In [101]:
# Aggregations and Summary Statistics

# Compute Basic Statistics

print(df.describe())  # Summary statistics for numerical columns


             Age
count   3.000000
mean   26.333333
std     5.131601
min    22.000000
25%    23.500000
50%    25.000000
75%    28.500000
max    32.000000


In [102]:
df.groupby('Name').mean()  # Group by Age and compute mean

Unnamed: 0_level_0,Age
Name,Unnamed: 1_level_1
Alice,25.0
Bob,32.0
Charlie,22.0


In [103]:
print(df.isnull().sum())  # Count missing values per column

Name    0
Age     0
dtype: int64
