In [1]:
#Numpy
#Numpy is what is happening in the background pandas
#Built on C++
#We could potentially do everything we are about to do ourselves
#But that would be incredibly inefficient and take a lot of time

In [None]:
#Numpy 50-100X faster and efficient than python
#Arrays, C++ Arrays = Contiguous memory, optimized searches
#Lists in python, can contain multiple types, and are not "Arrays"
# Numpy Arrays are all one data type, and it is stores in an nd-array

In [2]:
#Let's start using it
import numpy as np
print(np.__version__)

2.0.0


In [4]:
# Creating an array

arr = np.array([1, 2, 3, 4, 5])
list = [1, 2, 3, 4, 5]
print(list)
#arr - prints arr
print(arr)

[1 2 3 4 5]


In [10]:
# Dimensions - n-dimensional array
#0-3D arrays
# 0D Array AKA a Scalar
zero_arr = np.array(42)
zero_arr

#ID Array AKA a Array
one_arr = np.array([1, 2, 3, 4, 5])
one_arr

#2D array aka a matrix
two_arr = np.array([[1, 2, 3], [4, 5, 6]])
two_arr

#3D array AKA a Tensor
three_arr = np.array([[[1, 2, 3],[4, 5, 6]],[[7, 8, 9],[10, 11, 12]]])
#rows are innermost arrays
#outer arrays are columns
three_arr

print(zero_arr.ndim, one_arr.ndim, two_arr.ndim, three_arr.ndim)

0 1 2 3


In [13]:
#Indexing into our nd arrays
#Python indexing - do not do this
print(two_arr[0][1])

#Numpy indexing
print(two_arr[0, 1]) #using c lookups instead of python lookups

print(three_arr[0, 1, 2])

2
2
6


In [18]:
#Numpy arrays are a single data type
#Numpy has its own data types
# i - integers up int64
# b - boolean
# u - unsigned ints
# f - float - float128
# c - complex
# m - timedelta
# M - datetime
# O - object
# S - string
# U - unicode strings
# V - void type -- fixed chunk of memory that is reserved

print(arr.dtype)

arr = np.array([1, 2, 3, 4, 5], dtype='f') #enforced type saftey
print(arr.dtype)
new_arr = arr.astype('S')
new_arr

float32
float32


array([b'1.0', b'2.0', b'3.0', b'4.0', b'5.0'], dtype='|S32')

In [20]:
# Checking our matrixes shape

print(arr.shape)
print(two_arr.shape) # 2 rows, 3 columns
print(three_arr.shape) # 2 rows, 2 columns, 3 depth

(5,)
(2, 3)


In [25]:
#Reshape arrays

arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
print(arr)
new_arrr = arr.reshape(4, 3)
print(new_arrr)

[ 1  2  3  4  5  6  7  8  9 10 11 12]
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [30]:
#If we do not know our full dimensions we can substitute in a -1

new_arrr = arr.reshape(-1, 3)
print(new_arrr)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [31]:
# to flatten our arrays

arr = new_arrr.reshape(-1)
print(arr)

[ 1  2  3  4  5  6  7  8  9 10 11 12]


In [33]:
#Iterating through arrays

for x in three_arr:
    print(x)
    
for x in np.nditer(three_arr):
    print(x)

[[1 2 3]
 [4 5 6]]
[[ 7  8  9]
 [10 11 12]]
1
2
3
4
5
6
7
8
9
10
11
12


In [37]:
#Joining arrays
# join based on axis
# axis = 0 is based on rows, axis = 1 is based on columns

arr1 = np.array([[1,2], [3,4]])
arr2 = np.array([[5,6], [7,8]])
arr = np.concatenate((arr1, arr2), axis = 0)
print(arr)

[[1 2]
 [3 4]
 [5 6]
 [7 8]]


In [36]:
# Join based off of columns

arr = np.concatenate((arr1, arr2), axis = 1)
print(arr)

[[1 2 5 6]
 [3 4 7 8]]


In [None]:
#stacking
# Stack(), hstack(), dstack()

arr1 = np.array([1,2, 3,4])
arr2 = np.array([5,6, 7,8])

arr = np.Stack(arr1, arr2)
arr = np.hstack(arr1, arr2) #horizontally
arr = np.dstack(arr1, arr2) #depth


In [38]:
# split array into n number of parts

arr = np.array([1, 2,3 , 4, 5, 6])
new_arr = np.array_split(arr, 3)
print(new_arr)

[array([1, 2]), array([3, 4]), array([5, 6])]


In [None]:
# Searching our arrays is done with the where() method

arr = np.array([1, 2, 3, 4, 5, 6])
x = np.where(arr == 4)
print(x)

In [39]:
 #Searchsorted() --binary search that returns index 
 #only works if array is sorted
 
arr = np.array([1, 2, 3, 4, 5, 6])
x = np.searchsorted(arr, 6)
print(x)

5


In [40]:
# sorting arrays
# can still use the python sort() method
# np.sort()

arr = np.array([[5,2,1], [6,3,9]])
print(np.sort(arr))

[[1 2 5]
 [3 6 9]]


In [41]:
#filtering our arrays
#filter based off a condition
#or we can use a boolean array

arr = np.array([40, 41, 42, 430])
x = [False, True, False, True]

filtered_arr = arr[x]
print(filtered_arr)

[ 41 430]


In [43]:
# copies vs views
# when we work with Big data
# 4 GB -> 8 GB
# 11 GB -> 22 GB
# use a view

#copies a reference to the object

arr = np.array([1, 2, 3, 4])
print(arr)
view = arr.view()
copy = arr.copy()
arr = np.array([1, 2, 3, 5])
print(view)
arr[0]= 15
print(copy)
print(view.base)
print(copy.base)


[1 2 3 4]
[1 2 3 4]
[1 2 3 4]
[1 2 3 4]
None


In [45]:
import time

In [46]:
# use python list

start = time.time()
list = [i for i in range(1_000_000)]
squared = [x**2 for x in list]
stop = time.time()

print(stop-start)

0.19635677337646484


In [47]:
start = time.time()
arr = np.arange(1_000_000)
squared = arr**2
stop = time.time()

print(stop-start)

0.008728504180908203
