In [2]:
# Task 1: creating Arrays
import numpy as np
import pandas as pd

arr1 = np.arange(1, 11)
arr2 = np.arange(1, 10).reshape(3, 3)
arr3 = np.random.rand(3, 5, 3)

print(f"1D array: shape={arr1.shape}, size={arr1.size}, datatype={arr1.dtype}")
print(f"2D array: shape={arr2.shape}, size={arr2.size}, datatype={arr2.dtype}")
print(f"3D array: shape={arr3.shape}, size={arr3.size}, datatype={arr3.dtype}")

1D array: shape=(10,), size=10, datatype=int32
2D array: shape=(3, 3), size=9, datatype=int32
3D array: shape=(3, 5, 3), size=45, datatype=float64


In [4]:
#Task 2: Array indexing and Slicing
data = [10, 20, 30, 40, 50, 60, 70, 80, 90]
na = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90])

f=na[:3]
a=na[::2]
r=na[::-1]

print("First 3:", f)
print("Alternate:", a)
print("Reversed:", r)

First 3: [10 20 30]
Alternate: [10 30 50 70 90]
Reversed: [90 80 70 60 50 40 30 20 10]


In [5]:
# Task 3: Mathematical Operations
A = np.random.randint(1, 21, 5)
B = np.random.randint(1, 21, 5)

print("A:", A)
print("B:", B)

print("Add:", A + B)
print("Subtract:", A - B)
print("Multiply:", A * B)
print("Divide:", A / B)

print("Dot Product:", np.dot(A, B))

print("Mean of A:", np.mean(A))
print("Median of A:", np.median(A))
print("Std Dev of A:", np.std(A))
print("Variance of A:", np.var(A))

print("Max in B:", np.max(B), "at index", np.argmax(B))
print("Min in B:", np.min(B), "at index", np.argmin(B))

A: [9 7 1 6 9]
B: [11 20 19  5  5]
Add: [20 27 20 11 14]
Subtract: [ -2 -13 -18   1   4]
Multiply: [ 99 140  19  30  45]
Divide: [0.81818182 0.35       0.05263158 1.2        1.8       ]
Dot Product: 333
Mean of A: 6.4
Median of A: 7.0
Std Dev of A: 2.939387691339814
Variance of A: 8.64
Max in B: 20 at index 1
Min in B: 5 at index 3


In [6]:
# Task 4: Reshaping and Transposing
arr=np.arange(1, 13)

r_2D = arr.reshape(4, 3)
r_3D = arr.reshape(2, 2, 3)
t = r_2D.T

print("2D Reshape:c", r_2D)
print("3D Reshape: ", r_3D)
print("Transposed: ", t)
print("Transposed Shape:", t.shape)

2D Reshape:c [[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
3D Reshape:  [[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]
Transposed:  [[ 1  4  7 10]
 [ 2  5  8 11]
 [ 3  6  9 12]]
Transposed Shape: (3, 4)


In [8]:
# Task 5: Boolean Masking and Filtering 
Arr = np.random.randint(10, 51, 15)

print(Arr)
print("Greater than 25:", Arr[Arr > 25])
Arr[Arr < 30] = 0
print("Replaced < 30 with 0:", Arr)
print("Count divisible by 5:", np.sum(Arr % 5 == 0))

[45 49 27 22 12 50 27 44 23 13 16 40 12 15 50]
Greater than 25: [45 49 27 50 27 44 40 50]
Replaced < 30 with 0: [45 49  0  0  0 50  0 44  0  0  0 40  0  0 50]
Count divisible by 5: 13


In [9]:
# Task 6: Working with Built-in Functions 
l = np.linspace(0, 1, 10)
idt = np.eye(4)
random = np.sort(np.random.randint(1, 101, 20))
largest = random[-5:]

print("Equally spaced values:", l)
print("idt matrix: ", idt)
print("Sorted random:", random)
print("Top 5:", largest)

Equally spaced values: [0.         0.11111111 0.22222222 0.33333333 0.44444444 0.55555556
 0.66666667 0.77777778 0.88888889 1.        ]
idt matrix:  [[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
Sorted random: [ 2 19 22 22 33 33 47 48 56 60 61 63 67 68 69 73 89 91 94 99]
Top 5: [73 89 91 94 99]


In [10]:
# Task 7: Generic 
import time
A = np.random.rand(100, 100)
B = np.random.rand(100, 100)

start = time.time()
product = np.dot(A, B)
end = time.time()

print("Matrix Multiplication Time:", end - start)

try:
    det = np.linalg.det(product)
    inv = np.linalg.inv(product)
    print("Determinant:", det)
    print("Inverse: ", inv)
except np.linalg.LinAlgError:
    print("Matrix is not invertible.")

Matrix Multiplication Time: 0.039701223373413086
Determinant: -5.733137794674754e+50
Inverse:  [[ 28.83231846   2.6212711   -9.97747429 ...  17.97374958  -3.98209711
    2.52716274]
 [-19.6931599   -1.74603604   7.30267658 ... -13.0178056    2.64802478
   -0.9241424 ]
 [ -4.08440626  -0.49876876   1.18417265 ...  -2.23499688   0.45343634
   -0.87246485]
 ...
 [-21.35486552  -1.14382495   8.56702098 ... -13.85030639   1.76022226
    0.094413  ]
 [ 19.27683361   1.83289663  -7.92741769 ...  12.75840533  -1.44166216
    0.27310331]
 [-21.53417041  -1.16700025   6.94311314 ... -11.84460593   2.03276896
   -2.34673005]]


In [11]:
# ----------------------------------------------------------------------
# Part II Pandas tasks

In [13]:
# Task 1: Working with Series 
data = [25, 30, 35, 40, 45]
pdf = pd.Series(data, index=['A', 'B', 'C', 'D', 'E'])

print( pdf)
print("First 3 elements:", pdf[:3])
print("Mean:", pdf.mean(), "Median:", pdf.median(), "Std Dev:", pdf.std())

A    25
B    30
C    35
D    40
E    45
dtype: int64
First 3 elements: A    25
B    30
C    35
dtype: int64
Mean: 35.0 Median: 35.0 Std Dev: 7.905694150420948


In [15]:
# Task 2: Creating and Inspecting DataFrames\
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Carol', 'David', 'Eve'],
    'Age': [20, 22, 19, 21, 20],
    'Gender': ['Female', 'Male', 'Female', 'Male', 'Female'],
    'Marks': [85, 78, 92, 74, 88]
})

print("First two rows:\n", df.head(2))
print("Columns:", df.columns)
print("Data types:\n", df.dtypes)
print("Summary stats:\n", df.describe())

df['Passed'] = df['Marks'] >= 80
print("With Passed column:\n", df)


First two rows:
     Name  Age  Gender  Marks
0  Alice   20  Female     85
1    Bob   22    Male     78
Columns: Index(['Name', 'Age', 'Gender', 'Marks'], dtype='object')
Data types:
 Name      object
Age        int64
Gender    object
Marks      int64
dtype: object
Summary stats:
              Age      Marks
count   5.000000   5.000000
mean   20.400000  83.400000
std     1.140175   7.334848
min    19.000000  74.000000
25%    20.000000  78.000000
50%    20.000000  85.000000
75%    21.000000  88.000000
max    22.000000  92.000000
With Passed column:
     Name  Age  Gender  Marks  Passed
0  Alice   20  Female     85    True
1    Bob   22    Male     78   False
2  Carol   19  Female     92    True
3  David   21    Male     74   False
4    Eve   20  Female     88    True


In [17]:
# Task 3: Data Selection and Filtering
print("Name & Marks:\n", df[['Name', 'Marks']])
print("Marks greater than 80:\n", df[df['Marks'] > 80])
print("Highest marks:\n", df[df['Marks'] == df['Marks'].max()])

Name & Marks:
     Name  Marks
0  Alice     85
1    Bob     78
2  Carol     92
3  David     74
4    Eve     88
Marks greater than 80:
     Name  Age  Gender  Marks  Passed
0  Alice   20  Female     85    True
2  Carol   19  Female     92    True
4    Eve   20  Female     88    True
Highest marks:
     Name  Age  Gender  Marks  Passed
2  Carol   19  Female     92    True


In [19]:
# Task 4: Handling Missing Data
df.loc[1, 'Marks'] = None
df.loc[4, 'Age'] = None

print("Missing:\n", df)
print("Missing values:\n", df.isnull())

df['Marks'] = df['Marks'].fillna(df['Marks'].mean())
df.dropna(subset=['Age'], inplace=True)
print("filling and dropping:\n", df)

Missing:
     Name   Age  Gender  Marks Passed
0  Alice  20.0  Female   85.0   True
1    Bob  22.0    Male    NaN  False
2  Carol  19.0  Female   92.0   True
3  David  21.0    Male   74.0  False
4    NaN   NaN     NaN    NaN    NaN
Missing values:
     Name    Age  Gender  Marks  Passed
0  False  False   False  False   False
1  False  False   False   True   False
2  False  False   False  False   False
3  False  False   False  False   False
4   True   True    True   True    True
filling and dropping:
     Name   Age  Gender      Marks Passed
0  Alice  20.0  Female  85.000000   True
1    Bob  22.0    Male  83.666667  False
2  Carol  19.0  Female  92.000000   True
3  David  21.0    Male  74.000000  False


In [20]:
# Task 5: Grouping and Aggregation
grp = df.groupby('Gender').agg({'Age': 'mean', 'Marks': 'mean'})
count = df['Gender'].value_counts()

print("Group mean:\n", grp)
print("Count by gender:\n", count)

Group mean:
          Age      Marks
Gender                 
Female  19.5  88.500000
Male    21.5  78.833333
Count by gender:
 Gender
Female    2
Male      2
Name: count, dtype: int64


In [21]:
# Task 6: Reading and Writing Data
df.to_csv('students_data.csv', index=False)
ndf = pd.read_csv('students_data.csv')
print("Loaded:\n", ndf.head())

Loaded:
     Name   Age  Gender      Marks  Passed
0  Alice  20.0  Female  85.000000    True
1    Bob  22.0    Male  83.666667   False
2  Carol  19.0  Female  92.000000    True
3  David  21.0    Male  74.000000   False
