- Numpy : ndarrays, numpy dot (dot product), slicing arrays and different ways of addressing (extracting) row and columns in 2d arrays. Matmul (matrix multiplication) and @, transpose (or T)
- Remember the rules for matrix multiplication (correct dimensions for matrix multiplication, and how to figure out the correct dimensions of the end product of matrix multiplication)
- Regular expressions (brush up your knowledge of the regex notebook)
- Pandas basics, different ways to access rows and columns (simple indexing using square brackets, loc, iloc, at, iat)
- Pandas creating dataframes from series and dictionaries
- Vector and Matrix Norms, how to compute them (1,2 and infinity for vectors. 1, frobenius, and infinity norms for matrices)
- Eigenvalues and Eigenvectors (how to detect if a given value and vector is an eigenvalue and eigenvector for a given matrix)

# Numpy

In [3]:
import numpy as np

# Dot product is between (2) vectors, and one example in physics is force x dist = work
 
array1 = np.array([1,2,3])
array2 = np.array([1,2,3])

print(np.dot(array1,array2))


14


### Dot product
![Screenshot 2024-10-27 at 12.41.36 PM.png](attachment:db9f0e6c-6aa1-4d72-b88d-e4b4b2546ffe.png)

In [5]:
# Creates a random 5x5 matrix
array3 = np.random.randint(1,20,25).reshape((5,5))
array3

array([[ 3,  3, 19, 15, 13],
       [18, 19, 18,  6,  4],
       [ 1, 19,  7,  9, 12],
       [ 9, 16,  3, 10, 14],
       [ 4,  1, 10,  7, 18]])

In [6]:
type(array3)

numpy.ndarray

In [7]:
# Creating 2d array (3x3) manually:
array11 = np.array([[1,2,3],[3,4,5],[6,7,8]])
array11

array([[1, 2, 3],
       [3, 4, 5],
       [6, 7, 8]])

### Addressing and extracting rows and columns via splicing

In [9]:
array3[:,1] # Returns all rows of column 2

array([ 3, 19, 19, 16,  1])

In [10]:
array3[3,:] # Returns all columns of row 4

array([ 9, 16,  3, 10, 14])

In [11]:
# Splice out [17,18]
array3[2,2:4] # Remember, the element in index (2,4) is not included.

array([7, 9])

### Issues with splicing with multiple brackets:

In [13]:
# Numpy doesn't splice with multiple square brackets.
# Notice how the (2) operations below do the same thing.  

array3[:][1]

array([18, 19, 18,  6,  4])

In [14]:
array3[1][:] # Returns all of row 2 again?

array([18, 19, 18,  6,  4])

In [15]:
# Basically, numpy is only reading the first integer and associating it with "rows," regardless of its positioning.

In [16]:
array3[1][:][:]

array([18, 19, 18,  6,  4])

In [17]:
array3[1]

array([18, 19, 18,  6,  4])

In [18]:
# Numpy just ignores the semicolon!

In [83]:
array3[1][1] # This works though for returning a single cell.

19

In [85]:
array3[1][0:2] # This works too for returning a multiple cells.

array([18, 19])

### Matrix Multiplication 
- If A x B, columns A MUST EQUAL rows B.
- Use .shape and .size for dimensions.
  - .shape returns (rows,columns)
  - .size returns total number of elements (rows x columns)

In [20]:
array4 = np.random.randint(1,20,35).reshape((5,7))

print(np.matmul(array3,array4),'\n')
# OR this way
print(array3 @ array4)

[[327 624 333 725 648 371 544]
 [377 729 433 743 875 574 679]
 [321 661 434 418 605 473 558]
 [313 718 401 469 696 411 502]
 [286 497 267 451 509 256 392]] 

[[327 624 333 725 648 371 544]
 [377 729 433 743 875 574 679]
 [321 661 434 418 605 473 558]
 [313 718 401 469 696 411 502]
 [286 497 267 451 509 256 392]]


In [87]:
print(array4.shape) # Returns (5,7))
print(array4.size) # Returns 5x5 = 35)

(5, 7)
35


#### Transpose
- Below are all (3) ways to transpose a matrix:

In [23]:
print(array3,'\n')
array3.T

[[ 3  3 19 15 13]
 [18 19 18  6  4]
 [ 1 19  7  9 12]
 [ 9 16  3 10 14]
 [ 4  1 10  7 18]] 



array([[ 3, 18,  1,  9,  4],
       [ 3, 19, 19, 16,  1],
       [19, 18,  7,  3, 10],
       [15,  6,  9, 10,  7],
       [13,  4, 12, 14, 18]])

In [24]:
array3.transpose()

array([[ 3, 18,  1,  9,  4],
       [ 3, 19, 19, 16,  1],
       [19, 18,  7,  3, 10],
       [15,  6,  9, 10,  7],
       [13,  4, 12, 14, 18]])

In [25]:
array3.swapaxes(0,1)

array([[ 3, 18,  1,  9,  4],
       [ 3, 19, 19, 16,  1],
       [19, 18,  7,  3, 10],
       [15,  6,  9, 10,  7],
       [13,  4, 12, 14, 18]])

In [89]:
array3.swapaxes(0,0) # No change.

array([[ 3,  3, 19, 15, 13],
       [18, 19, 18,  6,  4],
       [ 1, 19,  7,  9, 12],
       [ 9, 16,  3, 10, 14],
       [ 4,  1, 10,  7, 18]])

In [91]:
array3.swapaxes(1,0) # Another way to get transpose.

array([[ 3, 18,  1,  9,  4],
       [ 3, 19, 19, 16,  1],
       [19, 18,  7,  3, 10],
       [15,  6,  9, 10,  7],
       [13,  4, 12, 14, 18]])

## Pandas

### Create a Series and Dataframe

In [28]:
import pandas as pd

#### Series 

pay = pd.Series({'Joe':60000,'Jade':65000,'Jonathan':60000})
marital_stat = pd.Series({'Joe':'Y','Jade':'N','Jonathan':'Y'})
years_service = pd.Series({'Joe':5,'Jade':0,'Jonathan':7})

employee_df = pd.DataFrame({'Pay':pay, 'Married?':marital_stat,'YOS':years_service})

# Works because all Series have the same indexing.  Notices that the dictionary format swaps when going from Series to DataFrame.
# Key goes from being the index to being a column in a DataFrame.
employee_df



Unnamed: 0,Pay,Married?,YOS
Joe,60000,Y,5
Jade,65000,N,0
Jonathan,60000,Y,7


In [115]:
# A few other ways to make randomized matrices

# Method 1
df_rand = pd.DataFrame(np.random.randn(6,6),index=np.arange(0,6,1),columns=['A','B','C','D','E','F',])
df_rand

Unnamed: 0,A,B,C,D,E,F
0,-0.843306,0.083859,0.41302,1.226958,-0.325418,0.547702
1,0.02664,-1.05474,-0.120291,0.425118,0.247027,0.757155
2,0.680415,-0.879465,1.940978,0.154664,-0.411868,1.168578
3,0.287405,0.518229,-0.073613,1.741705,0.637215,-0.025357
4,0.577604,-0.207255,0.732236,0.282118,-0.10731,-0.055979
5,-1.757881,-1.092061,-0.094516,-0.910834,-0.355541,0.575418


In [117]:
# Method 2

df_rand2 = pd.DataFrame(np.random.randint(0,30,36).reshape(6,6),index=np.arange(0,6,1),columns=['A','B','C','D','E','F',])
df_rand2

Unnamed: 0,A,B,C,D,E,F
0,23,9,29,4,17,1
1,0,24,12,14,13,24
2,0,23,26,8,9,17
3,13,0,2,26,18,29
4,21,26,15,26,18,5
5,1,21,16,25,18,25


### Accessing columns and rows

##### Remember that Pandas focuses on **accessing COLUMNS**:

In [31]:
employee_df['Pay']

Joe         60000
Jade        65000
Jonathan    60000
Name: Pay, dtype: int64

In [32]:
# Each column is also an attribute!
employee_df.Pay

Joe         60000
Jade        65000
Jonathan    60000
Name: Pay, dtype: int64

##### To access rows, **you MUST have a colon**, *even if it is only *ONE* row!*

In [34]:
print(employee_df['Joe':'Joe'],'\n') 

print(employee_df['Joe':'Jade'])

       Pay Married?  YOS
Joe  60000        Y    5 

        Pay Married?  YOS
Joe   60000        Y    5
Jade  65000        N    0


In [35]:
# There's a traditional [row][columns] format as well.

# But again, it won't work if there is no colon!
print(employee_df['Joe':'Jade']['Pay'])

#print(employee_df['Joe']['Pay']) # THROWS ERROR
print(employee_df['Joe':'Joe']['Pay']) # Again, "same request" but doesn't throw an error.

Joe     60000
Jade    65000
Name: Pay, dtype: int64
Joe    60000
Name: Pay, dtype: int64


In [36]:
# If you want a cell, try these ways:

# Notice all return the type contained in the cell.

print(employee_df['Pay']['Joe'])

print(employee_df['Pay'][0])

print(type(employee_df['Pay']['Joe']))

# print(employee_df[0][0]) WILL GIVE ERROR!

60000
60000
<class 'numpy.int64'>


  print(employee_df['Pay'][0])


##### .at/.iat/.loc/.iloc
- Think of these functions as making Dataframes focus on rows then columns, working backwards from pandas' default.

In [95]:
# With loc/iloc, you can use [row][column] OR [row,column].  Either way ROW comes first.

print(employee_df.loc['Joe'],'\n')
print(employee_df.iloc[0],'\n')

print(employee_df.loc['Joe']['Pay'],'\n')
print(employee_df.iloc[0,0],'\n')

print(type(employee_df.loc['Joe']['Pay'])) # Which type is it before casting? (A numpy 64-bit int)
print(type(int(employee_df.loc['Joe']['Pay']))) # We have successfully pulled a cell out and made it an int!


Pay         60000
Married?        Y
YOS             5
Name: Joe, dtype: object 

Pay         60000
Married?        Y
YOS             5
Name: Joe, dtype: object 

60000 

60000 

<class 'numpy.int64'>
<class 'int'>


In [39]:
# With at/iat, you MUST use [row,column] format as well. 

# THESE are faster than loc methods for accessing a scalar.

print(employee_df.at['Jade','Pay'],'\n')
print(employee_df.iat[1, 0],'\n')

65000 

65000 



### Vectors and Norms
- 1,2 and infinity for vectors
- 1, frobenius, and infinity norms for matrices)

- ![Screenshot 2024-10-28 at 11.31.42 AM.png](attachment:c1cc20b7-2768-45bc-880a-2764183a3302.png)

In [41]:
v1 = array11[:,1]
v1

array([2, 4, 7])

In [42]:
# Norms for vectors
print(np.linalg.norm(v1,1)) # Should be 13
print(np.linalg.norm(v1,2)) # Should be sqrt(69)
print(np.linalg.norm(v1, np.inf)) # Should be 7

13.0
8.306623862918075
7.0


In [121]:
array14 = [[1,2,3],[4,5,6],[7,8,9]]

In [123]:
# Norms for matrices
print(np.linalg.norm(array14,1)) # Should be 18
print(np.linalg.norm(array14,'fro')) # Should be sqrt(285)
print(np.linalg.norm(array14,np.inf)) # Should be 24

18.0
16.881943016134134
24.0


In [141]:
import math
sum = 0
for i in range(1,10):
    sum += i**2
math.sqrt(sum)

16.881943016134134

### Eigenvectors and values
- Eigenvalues and Eigenvectors (how to detect if a given value and vector is an eigenvalue and eigenvector for a given matrix)

In [46]:
# Ax = lambda(x)
# (A-lambdaI)x = 0

test = np.array([[-6,3],[4,5]])
eigenval, eigenvect = np.linalg.eig(test)

print(test)
print(eigenval)
print(eigenvect)


[[-6  3]
 [ 4  5]]
[-7.  6.]
[[-0.9486833  -0.24253563]
 [ 0.31622777 -0.9701425 ]]


In [47]:
# Eigenvectors
e_v1 = eigenvect[:,0]
e_v2 = eigenvect[:,1]

# Test first pair of eigenvalues and eigenvectors
print('First')
print(np.matmul(test,e_v1))
print(eigenval[0]*e_v1)

print('Second')
# Test second pair of eigenvalues and eigenvectors
print(np.matmul(test,e_v2))
print(eigenval[1]*e_v2)

First
[ 6.64078309 -2.21359436]
[ 6.64078309 -2.21359436]
Second
[-1.45521375 -5.820855  ]
[-1.45521375 -5.820855  ]


In [155]:
# Alternatively:
# See if (A-lambdaI)x = 0

print((test-(eigenval[1]*np.identity(2)))@e_v2) # It works!

array([0., 0.])

array([[1., 0.],
       [0., 1.]])