In [None]:
#CHAPTER 4 NumPy Basics: Arrays and Vectorized Computation

In [None]:
""" Here are some of the things you’ll find in NumPy:
• ndarray, an efficient multidimensional array providing fast array-oriented arithmetic
operations and flexible broadcasting capabilities.
• Mathematical functions for fast operations on entire arrays of data without having
to write loops.
• Tools for reading/writing array data to disk and working with memory-mapped
files.
• Linear algebra, random number generation, and Fourier transform capabilities.
• A C API for connecting NumPy with libraries written in C, C++, or FORTRAN.  """

In [None]:
""" For most data analysis applications, the main areas of functionality I’ll focus on are:
• Fast vectorized array operations for data munging and cleaning, subsetting and
filtering, transformation, and any other kinds of computations
• Common array algorithms like sorting, unique, and set operations
• Efficient descriptive statistics and aggregating/summarizing data
• Data alignment and relational data manipulations for merging and joining
together heterogeneous datasets
• Expressing conditional logic as array expressions instead of loops with if-elifelse
branches
• Group-wise data manipulations (aggregation, transformation, function application)
While NumPy provides a computational foundation for general numerical data processing,
many readers will want to use pandas as the basis for most kinds of statistics
or analytics, especially on tabular data. pandas also provides some more domainspecific
functionality like time series manipulation, which is not present in NumPy.  """

In [None]:
""" One of the reasons NumPy is so important for numerical computations in Python is
because it is designed for efficiency on large arrays of data. There are a number of
reasons for this:
• NumPy internally stores data in a contiguous block of memory, independent of
other built-in Python objects. NumPy’s library of algorithms written in the C language
can operate on this memory without any type checking or other overhead.
NumPy arrays also use much less memory than built-in Python sequences.
• NumPy operations perform complex computations on entire arrays without the
need for Python for loops. """

In [2]:
""" To give you an idea of the performance difference, consider a NumPy array of one
million integers, and the equivalent Python list:multiply each sequence by 2:

NumPy-based algorithms are generally 10 to 100 times faster (or more) than their
pure Python counterparts and use significantly less memory. """

import numpy as np
my_arr = np.arange(1000000)
my_list = list(range(1000000))
%time for _ in range(10): my_arr2 = my_arr * 2

Wall time: 31.2 ms


In [3]:
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]

Wall time: 1.15 s


In [1]:
""" 4.1 The NumPy ndarray: A Multidimensional Array Object
One of the key features of NumPy is its N-dimensional array object, or ndarray,
which is a fast, flexible container for large datasets in Python. Arrays enable you to
perform mathematical operations on whole blocks of data using similar syntax to the
equivalent operations between scalar elements."""
import numpy as np
# Generate some random data
data = np.random.randn(2, 3)
data

array([[-0.23113294, -0.65022989,  0.78979056],
       [ 0.94186738, -0.47548191,  0.4044284 ]])

In [5]:
# write mathematical operations with data:
data * 10

array([[18.1835523 ,  3.24565246, -1.28960377],
       [ 6.00635797, 10.99309792,  1.19778635]])

In [2]:
data

array([[ 1.81835523,  0.32456525, -0.12896038],
       [ 0.6006358 ,  1.09930979,  0.11977863]])

In [3]:
In [16]: data + data

array([[ 3.63671046,  0.64913049, -0.25792075],
       [ 1.20127159,  2.19861958,  0.23955727]])

In [4]:
data

array([[ 1.81835523,  0.32456525, -0.12896038],
       [ 0.6006358 ,  1.09930979,  0.11977863]])

In [6]:
""" An ndarray is a generic multidimensional container for homogeneous data; that is, all
of the elements must be the same type. Every array has a shape, a tuple indicating the
size of each dimension, and a dtype, an object describing the data type of the array:"""
data.shape

(2, 3)

In [7]:
data.dtype

dtype('float64')

In [10]:
#Creating ndarrays. 1. from a list data1
data1 = [6, 7.5, 8, 0, 1]
type(data1)

list

In [None]:
arr1 = np.array(data1)

In [11]:
type(arr1)

numpy.ndarray

In [12]:
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [13]:
#Nested sequences, like a list of equal-length lists, will be converted into a multidimensional array:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [4]:
np.zeros((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [5]:
np.ones((3,4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [6]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [7]:
np.eye(2,3)

array([[1., 0., 0.],
       [0., 1., 0.]])

In [8]:
#You can explicitly convert or cast an array from one dtype to another using ndarray’s astype method:
arr = np.array([1, 2, 3, 4, 5])
print(arr.dtype)
float_arr = arr.astype(np.float64)
print(float_arr.dtype)

int32
float64


In [9]:
#Arithmetic with NumPy Arrays
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [10]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [11]:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2 > arr

array([[False,  True, False],
       [ True, False,  True]])

In [20]:
#Basic Indexing and Slicing
arr = np.arange(10.0)
print(arr[5])
print(arr[5:8])

5.0
[5. 6. 7.]


In [21]:
arr[5:8] = 12.0

In [22]:
arr

array([ 0.,  1.,  2.,  3.,  4., 12., 12., 12.,  8.,  9.])

In [None]:
""" An important first distinction from Python’s built-in lists is that array slices are views on the original array.
This means that the data is not copied, and any modifications to the view will be reflected in the source array. """

In [4]:
import numpy as np
pp = np.arange(15)
pp

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [6]:
ajk = pp
ajk

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [7]:
ajk[0] = 999

In [8]:
print(pp)
print(ajk)

[999   1   2   3   4   5   6   7   8   9  10  11  12  13  14]
[999   1   2   3   4   5   6   7   8   9  10  11  12  13  14]


In [23]:
# whereas copy ops as below
arr2 =arr.copy()

In [24]:
arr2

array([ 0.,  1.,  2.,  3.,  4., 12., 12., 12.,  8.,  9.])

In [27]:
arr2[5:8] = 10.0

In [28]:
arr2

array([ 0.,  1.,  2.,  3.,  4., 10., 10., 10.,  8.,  9.])

In [29]:
arr

array([ 0.,  1.,  2.,  3.,  4., 12., 12., 12.,  8.,  9.])

In [4]:
import numpy as np
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [5]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [32]:
arr2d[2]

array([7, 8, 9])

In [33]:
arr2d[0][2]

3

In [16]:
# axis 0 as the “rows” of the array and axis 1 as the “columns.”
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [19]:
arr3d[0]

array([[42, 42, 42],
       [42, 42, 42]])

In [18]:
arr3d[0] = 42

In [20]:
arr3d[0]

array([[42, 42, 42],
       [42, 42, 42]])

In [21]:
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [49]:
#Similarly, arr3d[1, 0] gives you all of the values whose indices start with (1, 0),forming a 1-dimensional array:
arr3d[0, 0]

array([1, 2, 3])

In [50]:
arr3d[1, 0]

array([7, 8, 9])

In [51]:
arr3d[0, 1]

array([4, 5, 6])

In [53]:
#This expression is the same as though we had indexed in two steps:
x = arr3d[0]
x

array([[1, 2, 3],
       [4, 5, 6]])

In [54]:
arr3d[0, 1]

array([4, 5, 6])

In [55]:
#Indexing with slices: Like one-dimensional objects such as Python lists, ndarrays can be sliced :
arr

array([ 0.,  1.,  2.,  3.,  4., 12., 12., 12.,  8.,  9.])

In [56]:
 arr[1:6]

array([ 1.,  2.,  3.,  4., 12.])

In [23]:
#2dim
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [24]:
arr2d[:2] # select first 2 rows

array([[1, 2, 3],
       [4, 5, 6]])

In [25]:
arr2d[2:] # rows after 2nd

array([[7, 8, 9]])

In [26]:
arr2d[:1]

array([[1, 2, 3]])

In [27]:
arr2d[1:]

array([[4, 5, 6],
       [7, 8, 9]])

In [62]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
""" As you can see, it has sliced along axis 0, the first axis. A slice, therefore, selects a
range of elements along an axis. It can be helpful to read the expression arr2d[:2] as
“select the first two rows of arr2d.” """

In [6]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [8]:
#You can pass multiple slices just like you can pass multiple indexes:
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [None]:
#select the second row but only the first two columns
arr2d[1, :2]

In [9]:
arr2d[0,:3]

array([1, 2, 3])

In [11]:
#select the third column but only the first two rows:
arr2d[:2, 2]

array([3, 6])

In [12]:
#Note that a colon by itself means to take the entire axis, so you can slice only higher dimensional axes by doing:
arr2d[:, :1]

array([[1],
       [4],
       [7]])

In [28]:
#assigning to a slice expression assigns to the whole selection:
arr2d[:2, 1:] = 0

In [29]:
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

In [34]:
#Boolean Indexing
""" we have some data in an array and an array of names with duplicates. Using here the randn function in numpy.random to generate
some random normally distributed data:   """
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
data

array([[ 1.69694378,  1.75579383,  0.07920237,  1.37432167],
       [-1.6038185 ,  0.18063703,  0.66556388,  0.01364613],
       [ 1.25601999,  0.87059149, -0.18442245,  1.25977139],
       [ 0.18332857,  1.62777291, -0.2386422 , -0.19518667],
       [ 0.68162704,  0.37271762, -0.66131857, -0.96468714],
       [ 0.07897415, -1.16398338,  0.43925481,  0.29306506],
       [ 0.34198216,  0.9317303 ,  0.87488477, -1.2605849 ]])

In [35]:
#select all the rows with corresponding name 'Bob'. Like arithmetic operations, comparisons
#(such as ==) with arrays are also vectorized. Thus, comparing names with the string 'Bob' yields a boolean array:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [36]:
#This boolean array can be passed when indexing the array:
data[names == 'Bob']

array([[ 1.69694378,  1.75579383,  0.07920237,  1.37432167],
       [ 0.18332857,  1.62777291, -0.2386422 , -0.19518667]])

In [37]:
#NB: The boolean array must be of the SAME length as the array axis it’s indexing. You can
#even mix and match boolean arrays with slices or integers (or sequences of integers
#select from the rows where names == 'Bob' and index the columns to 2,
data[names == 'Bob', 2:]

array([[ 0.07920237,  1.37432167],
       [-0.2386422 , -0.19518667]])

In [27]:
data[names == 'Bob', 3]

array([-1.71102144,  0.95741381])

In [28]:
#To select everything but 'Bob', you can either use != or negate the condition using ~:
names != 'Bob'
data[~(names == 'Bob')]

array([[ 0.6414689 ,  0.25556212, -0.47672115, -2.17976129],
       [ 2.39968261,  0.16391135,  1.37487116,  0.071535  ],
       [ 0.61153111,  0.99757604, -1.04534585, -0.69679727],
       [ 0.19516417, -1.59462338, -1.76949031,  0.31048472],
       [ 1.59151373, -1.60070094, -1.21312433,  0.75103648]])

In [29]:
#The ~ operator can be useful when you want to invert a general condition:
cond = names == 'Bob'
data[~cond]

array([[ 0.6414689 ,  0.25556212, -0.47672115, -2.17976129],
       [ 2.39968261,  0.16391135,  1.37487116,  0.071535  ],
       [ 0.61153111,  0.99757604, -1.04534585, -0.69679727],
       [ 0.19516417, -1.59462338, -1.76949031,  0.31048472],
       [ 1.59151373, -1.60070094, -1.21312433,  0.75103648]])

In [30]:
#Selecting two of the three names to combine multiple boolean conditions, use operators like & (and) and | (or):
mask = (names == 'Bob') | (names == 'Will')
print(mask)
data[mask]

[ True False  True  True  True False False]


array([[-1.15329821, -0.40307842,  0.16023458, -1.71102144],
       [ 2.39968261,  0.16391135,  1.37487116,  0.071535  ],
       [-0.00899295, -0.78370039, -0.09956641,  0.95741381],
       [ 0.61153111,  0.99757604, -1.04534585, -0.69679727]])

In [None]:
#Selecting data from an array by boolean indexing always creates a copy of the data,
#even if the returned array is unchanged.

In [32]:
#Setting values with boolean arrays...To set all of the negative values in data to 0 
data[data < 0] = 0

In [33]:
data

array([[0.        , 0.        , 0.16023458, 0.        ],
       [0.6414689 , 0.25556212, 0.        , 0.        ],
       [2.39968261, 0.16391135, 1.37487116, 0.071535  ],
       [0.        , 0.        , 0.        , 0.95741381],
       [0.61153111, 0.99757604, 0.        , 0.        ],
       [0.19516417, 0.        , 0.        , 0.31048472],
       [1.59151373, 0.        , 0.        , 0.75103648]])

In [34]:
data[names != 'Joe'] = 7

In [35]:
data
#see later, these types of operations on two-dimensional data are convenient to do with pandas.

array([[7.        , 7.        , 7.        , 7.        ],
       [0.6414689 , 0.25556212, 0.        , 0.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.19516417, 0.        , 0.        , 0.31048472],
       [1.59151373, 0.        , 0.        , 0.75103648]])

In [36]:
#Fancy Indexing:Fancy indexing is a term adopted by NumPy to describe indexing using integer arrays.Suppose we had an 8 × 4 array:
arr = np.empty((8, 4))
for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [42]:
#arr[3,2] gives 3.0
arr[[3,2,7]]  # gives the full rows in any order....see the double square brackets

array([[3., 3., 3., 3.],
       [2., 2., 2., 2.],
       [7., 7., 7., 7.]])

In [43]:
arr[[-1,-3]]

array([[7., 7., 7., 7.],
       [5., 5., 5., 5.]])

In [44]:
""" Passing multiple index arrays does something slightly different; it selects a onedimensional
array of elements corresponding to each tuple of indices:"""
arr = np.arange(32).reshape((8, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [45]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]
#ie (row1, col0) is 4

array([ 4, 23, 29, 10])

In [46]:
arr[[1,7]]

array([[ 4,  5,  6,  7],
       [28, 29, 30, 31]])

In [None]:
""" Here the elements (1, 0), (5, 3), (7, 1), and (2, 2) were selected. Regardless of
how many dimensions the array has (here, only 2), the result of fancy indexing is
always one-dimensional.
The behavior of fancy indexing in this case is a bit different from what some users
might have expected, which is the rectangular region formed by selecting a subset of the matrix’s rows and columns. 
Here is one way to get that:"""

In [50]:
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [49]:
arr[:,[1,2]]

array([[ 1,  2],
       [ 5,  6],
       [ 9, 10],
       [13, 14],
       [17, 18],
       [21, 22],
       [25, 26],
       [29, 30]])

In [None]:
#NB: Keep in mind that fancy indexing, unlike slicing, always copies the data into a new array.pp102

In [53]:
#Transposing Arrays and Swapping Axes
""" Transposing is a special form of reshaping that similarly returns a view on the underlying
data without copying anything. Arrays have the transpose method and also the special T attribute:"""
arr = np.arange(15).reshape((3, 5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [54]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [57]:
#When doing matrix computations, computing the inner matrix product using np.dot:
arr2 = np.random.randn(6, 3)
np.dot(arr2.T, arr2)

array([[ 4.26417643, -3.20495048,  1.01077857],
       [-3.20495048,  8.75624598, -1.95447824],
       [ 1.01077857, -1.95447824,  1.93064022]])

In [66]:
arr = np.arange(12)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [70]:
pp = arr.reshape(4,3)
pp

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [71]:
pp.transpose()

array([[ 0,  3,  6,  9],
       [ 1,  4,  7, 10],
       [ 2,  5,  8, 11]])

In [None]:
#chk pp104 on transpose 3D

In [79]:
""" 4.2 Universal Functions: Fast Element-Wise Array Functions
A universal function, or ufunc, is a function that performs element-wise operations
on data in ndarrays....think of them as fast vectorized wrappers for simple
functions that take one or more scalar values and produce one or more scalar results.
Many ufuncs are simple element-wise transformations, like sqrt or exp:"""
arr = np.arange(10)
print(arr)
print(np.sqrt(arr))
print(np.exp(arr))

[0 1 2 3 4 5 6 7 8 9]
[0.         1.         1.41421356 1.73205081 2.         2.23606798
 2.44948974 2.64575131 2.82842712 3.        ]
[1.00000000e+00 2.71828183e+00 7.38905610e+00 2.00855369e+01
 5.45981500e+01 1.48413159e+02 4.03428793e+02 1.09663316e+03
 2.98095799e+03 8.10308393e+03]


In [2]:
# These are referred to as unary ufuncs. Others, such as add or maximum, take two arrays(thus, binary ufuncs) and return a single array as the result:
import numpy as np
x = np.random.randn(8)
y = np.random.randn(8)
print(x)
print(y)
np.maximum(x, y) # element wise max

[ 0.94275504 -1.13235346  0.93409    -0.06839314 -1.16684005  1.34864543
  0.62308256  1.51808715]
[ 0.0972633   0.53350994 -1.4747688  -1.76580811 -1.59100437 -1.54346126
 -1.6295496   1.83828352]


array([ 0.94275504,  0.53350994,  0.93409   , -0.06839314, -1.16684005,
        1.34864543,  0.62308256,  1.83828352])

In [3]:
arr = np.random.randn(7) * 5

In [4]:
arr

array([-1.85862659,  1.48184429,  0.72250189,  0.79795147, -5.5184491 ,
        9.70153881,  3.83872068])

In [5]:
#ufunc can return multiple arrays. modf is one example, a vectorized version of the built-in Python divmod; it returns the fractional and integral
#parts of a floating-point array:
remainder, whole_part = np.modf(arr)

In [6]:
remainder

array([-0.85862659,  0.48184429,  0.72250189,  0.79795147, -0.5184491 ,
        0.70153881,  0.83872068])

In [7]:
whole_part

array([-1.,  1.,  0.,  0., -5.,  9.,  3.])

In [None]:
""" Table 4-3. Unary ufuncs
Function Description
abs, fabs Compute the absolute value element-wise for integer, floating-point, or complex values
sqrt Compute the square root of each element (equivalent to arr ** 0.5)
square Compute the square of each element (equivalent to arr ** 2)
exp Compute the exponent ex of each element
log, log10,
log2, log1p
Natural logarithm (base e), log base 10, log base 2, and log(1 + x), respectively
sign Compute the sign of each element: 1 (positive), 0 (zero), or –1 (negative)
ceil Compute the ceiling of each element (i.e., the smallest integer greater than or equal to that
number)
floor Compute the floor of each element (i.e., the largest integer less than or equal to each element)
rint Round elements to the nearest integer, preserving the dtype
modf Return fractional and integral parts of array as a separate array
isnan Return boolean array indicating whether each value is NaN (Not a Number)
isfinite, isinf Return boolean array indicating whether each element is finite (non-inf, non-NaN) or infinite,
respectively
cos, cosh, sin,
sinh, tan, tanh
Regular and hyperbolic trigonometric functions
arccos, arccosh,
arcsin, arcsinh,
arctan, arctanh
Inverse trigonometric functions
logical_not Compute truth value of not x element-wise (equivalent to ~arr)."""

In [None]:
""" Table 4-4. Binary universal functions
Function Description
add Add corresponding elements in arrays
subtract Subtract elements in second array from first array
multiply Multiply array elements
divide, floor_divide Divide or floor divide (truncating the remainder)
power Raise elements in first array to powers indicated in second array
maximum, fmax Element-wise maximum; fmax ignores NaN
minimum, fmin Element-wise minimum; fmin ignores NaN
mod Element-wise modulus (remainder of division)
copysign Copy sign of values in second argument to values in first argument 
greater, greater_equal,
less, less_equal,
equal, not_equal
Perform element-wise comparison, yielding boolean array (equivalent to infix
operators >, >=, <, <=, ==, !=)
logical_and,
logical_or, logical_xor
Compute element-wise truth value of logical operation (equivalent to infix operators & |, ^)

"""

In [None]:
# pp108 ---4.3 Array-Oriented Programming with Arrays

In [None]:
# pp109 Expressing Conditional Logic as Array Operations

In [None]:
# pp111 Mathematical and Statistical Methods

In [None]:
""" A set of mathematical functions that compute statistics about an entire array or about
the data along an axis are accessible as methods of the array class. You can use aggregations
(often called reductions) like sum, mean, and std (standard deviation) either by
calling the array instance method or using the top-level NumPy function. """

In [33]:
random.seed(22)
arr = np.random.randn(5, 4)
arr

array([[ 1.41475007,  0.49754328,  1.21535179, -0.62233131],
       [-0.56874368, -1.10033316, -0.47610251,  1.11423519],
       [ 1.88336424, -0.19736157,  1.58870592,  0.99920906],
       [-2.05520501,  1.31924878,  0.55708403, -0.1754541 ],
       [-1.32172742,  0.91440706, -0.07441925,  0.23801246]])

In [32]:
arr = np.random.randn(5, 4)
arr

array([[-0.29014251,  1.80868314,  0.01512053, -1.13623724],
       [ 0.2733288 , -0.49182155,  0.88841939,  1.39319423],
       [-0.97245062, -1.51393535, -0.50655835, -0.80986936],
       [ 1.03709399, -1.71349153,  0.61422496,  0.63177491],
       [-0.05835753,  0.59666023, -0.4476829 ,  0.0071656 ]])

In [11]:
arr.sum()

-2.164250766577205

In [13]:
arr

array([[ 0.01317899, -0.7289059 , -0.57534444,  0.73148306],
       [-0.74446384, -0.7989202 ,  0.37277891,  1.79198321],
       [ 0.68952524, -0.7881893 , -0.32445255,  0.78839129],
       [-0.69914737, -1.44474989,  1.0156834 ,  0.53516029],
       [ 0.54205526, -0.97941254, -1.73636487,  0.1754605 ]])

In [14]:
#Functions like mean and sum take an optional axis argument that computes the statistic
#over the given axis, resulting in an array with one fewer dimension:
arr.mean(axis=1)
arr.sum(axis=0)

array([-0.19885173, -4.74017783, -1.24769956,  4.02247835])

In [12]:
arr.cumsum()

array([ 0.01317899, -0.71572692, -1.29107136, -0.5595883 , -1.30405215,
       -2.10297234, -1.73019343,  0.06178978,  0.75131502, -0.03687428,
       -0.36132684,  0.42706446, -0.27208291, -1.7168328 , -0.7011494 ,
       -0.16598912,  0.37606614, -0.60334639, -2.33971127, -2.16425077])

In [17]:
arr[0].sum()

-0.5595883030081928

In [26]:
# Few npotes on random numbers
import random
random.seed(12)
nu = random.randint(10,15)
nu

13

In [27]:
nu = random.randint(10,15)
nu

12

In [28]:
import random
random.seed(12)
nu = random.randint(10,15)
nu

13

In [34]:
random.seed(12)
nu = random.randint(10,15)
nu

13

In [37]:
np.random.randint(3, size=10)

array([2, 0, 0, 2, 0, 1, 1, 0, 1, 2])

In [38]:
np.random.randint(5, size=(2, 4))

array([[2, 3, 0, 0],
       [4, 1, 1, 1]])

In [39]:
np.random.randint(5, size=(2, 4))

array([[2, 0, 3, 4],
       [4, 4, 2, 3]])

In [41]:
random.seed(33)
np.random.randint(5, size=(2, 4))

array([[0, 4, 0, 1],
       [3, 2, 3, 2]])

In [42]:
np.random.randint(5, size=(2, 4))

array([[1, 1, 1, 0],
       [4, 0, 1, 3]])

In [43]:
random.seed(33)
np.random.randint(5, size=(2, 4))

array([[4, 1, 0, 0],
       [1, 3, 1, 3]])

In [45]:
np.random.seed(42)
r = np.random.randint(3, size=(2, 3))
r

array([[2, 0, 2],
       [2, 0, 0]])

In [48]:
np.random.seed(42)
r = np.random.randint(5, size=(4, 3))
r

array([[3, 4, 2],
       [4, 4, 1],
       [2, 2, 2],
       [4, 3, 2]])

In [52]:
np.random.seed(42)
r = np.random.randint(5, size=(2, 3))
r

array([[3, 4, 2],
       [4, 4, 1]])

In [53]:
r.mean(axis=1)

array([3., 3.])

In [54]:
r.mean(axis=0)

array([3.5, 4. , 1.5])

In [55]:
r.sum(axis=1)

array([9, 9])

In [56]:
r.sum(axis=0)

array([7, 8, 3])

In [None]:
#arr.mean(1) means “compute mean across the columns” where arr.sum(0) means “compute sum down the rows.”

In [57]:
#Cumulative
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7])
arr.cumsum()

array([ 0,  1,  3,  6, 10, 15, 21, 28], dtype=int32)

In [None]:
""" Table 4-5. Basic array statistical methods
Method Description
sum Sum of all the elements in the array or along an axis; zero-length arrays have sum 0
mean Arithmetic mean; zero-length arrays have NaN mean
std, var Standard deviation and variance, respectively, with optional degrees of freedom adjustment (default
denominator n)
min, max Minimum and maximum
argmin, argmax Indices of minimum and maximum elements, respectively
cumsum Cumulative sum of elements starting from 0
cumprod Cumulative product of elements starting from 1  """

In [62]:
#Sorting
arr = np.random.randn(6)
arr

array([-1.02155219, -0.16175539, -0.5336488 , -0.00552786, -0.22945045,
        0.38934891])

In [64]:
np.sort(arr)

array([-1.02155219, -0.5336488 , -0.22945045, -0.16175539, -0.00552786,
        0.38934891])

In [67]:
arr.sort()
arr

array([-1.02155219, -0.5336488 , -0.22945045, -0.16175539, -0.00552786,
        0.38934891])

In [69]:
"""Unique and Other Set Logic: NumPy has some basic set operations for one-dimensional ndarrays. A commonly
used one is np.unique, which returns the sorted unique values in an array:"""
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)

array(['Bob', 'Joe', 'Will'], dtype='<U4')