# statistical functions

NumPy has quite a few useful statistical functions for finding minimum, maximum, percentile standard deviation and variance, etc. from the given elements in the array. 

In [1]:
import numpy as np 

## numpy.amin() and numpy.amax()
These functions return the minimum and the maximum from the elements in the given array along the specified axis.

In [23]:
a = np.array([[3,7,5], [8,4,3], [2,4,9]]) 
a

array([[3, 7, 5],
       [8, 4, 3],
       [2, 4, 9]])

In [24]:
np.amin(a, axis=1, keepdims=True) 

array([[3],
       [3],
       [2]])

In [25]:
np.amin(a, axis=0, keepdims=True) 

array([[2, 4, 3]])

In [26]:
np.amin(a, keepdims=True) 

array([[2]])

In [27]:
np.amax(a, axis = 0, keepdims=True)

array([[8, 7, 9]])

In [28]:
np.amax(a, keepdims=True)

array([[9]])

## numpy.ptp()

The name of the function comes from the acronym for ‘peak to peak’.

The numpy.ptp() function returns the range (maximum-minimum) of values along an axis.



In [39]:
a

array([[3, 7, 5],
       [8, 4, 3],
       [2, 4, 9]])

In [40]:
np.ptp(a, keepdims=True) 

array([[7]])

In [41]:
np.ptp(a, axis = 1, keepdims=True) 

array([[4],
       [5],
       [7]])

In [42]:
np.ptp(a, axis = 0, keepdims=True) 

array([[6, 3, 6]])

## numpy.percentile()
Percentile (or a centile) is a measure used in statistics indicating the value below which a given percentage of observations in a group of observations fall. 

The function numpy.percentile() takes the following arguments.

numpy.percentile(a, q, axis)

- a Input array

- q The percentile to compute must be between 0-100

- axis - The axis along which the percentile is to be calculated

Returns the q-th percentile(s) of the array elements.

In [33]:
a = np.array([[3,7,5], [8,4,3], [2,4,9]]) 
a

array([[3, 7, 5],
       [8, 4, 3],
       [2, 4, 9]])

In [34]:
np.percentile(a, 50, keepdims=True) 

array([[4.]])

In [36]:
np.percentile(a, 50, axis = 1, keepdims=True) 

array([[5.],
       [4.],
       [4.]])

In [38]:
np.percentile(a, 50, axis=0, keepdims=True)

array([[3., 4., 5.]])

## numpy.median
numpy.median(a, axis=None, out=None, overwrite_input=False, keepdims=False)[source]
Compute the median along the specified axis.

Returns the median of the array elements.

In [46]:
a = np.array([[10, 7, 4], [3, 2, 1], [1,2,3]])
a

array([[10,  7,  4],
       [ 3,  2,  1],
       [ 1,  2,  3]])

In [47]:
np.median(a)

3.0

In [48]:
np.median(a, axis=0, keepdims=True)

array([[3., 2., 3.]])

In [49]:
np.median(a, axis=1, keepdims=True)

array([[7.],
       [2.],
       [2.]])

## numpy.average
numpy.average(a, axis=None, weights=None, returned=False)

Compute the weighted average along the specified axis.

In [10]:
data = list(range(1,4))
data

[1, 2, 3]

In [11]:
np.average(data)

2.0

In [4]:
np.average(data, weights=[1,2,3])

2.3333333333333335

In [5]:
data = list(range(1, 3))
data

[1, 2]

In [6]:
np.average(data, weights=[1, 3])

1.75

In [7]:
np.average(data, weights=[1./4, 3./4])

1.75

In [8]:
np.average(data, weights=[.5,.5])

1.5

In [9]:
data = np.arange(6).reshape((3,2))
data

array([[0, 1],
       [2, 3],
       [4, 5]])

In [85]:
np.average(data, axis=1, weights=[1./4, 3./4])

array([0.75, 2.75, 4.75])

In [12]:
incorrect1     = np.array([10, 20])
sample_weight1 = [1, 1]

np.average(incorrect1, weights=sample_weight1, axis=0)

15.0

In [13]:
incorrect1     = np.array([10, 20])
sample_weight1 = [.1, .1]

np.average(incorrect1, weights=sample_weight1, axis=0)

15.0

In [14]:
incorrect1     = np.array([40, 20])
sample_weight1 = [.4, .8]

np.average(incorrect1, weights=sample_weight1, axis=0)

26.666666666666664

In [15]:
incorrect1     = np.array([True, 20])
sample_weight1 = [1, 2]

np.average(incorrect1, weights=sample_weight1, axis=0)

13.666666666666666

In [16]:
incorrect1     = np.array([True, False])
sample_weight1 = [.4, .8]

np.average(incorrect1, weights=sample_weight1, axis=0)

0.3333333333333333

In [100]:
incorrect1     = np.array([False, False])
sample_weight1 = [.4, .8]

np.average(incorrect1, weights=sample_weight1, axis=0)

0.0

## numpy.mean
numpy.mean(a, axis=None, dtype=None, out=None, keepdims=<no value>)[source]
Compute the arithmetic mean along the specified axis.

Returns the average of the array elements. The average is taken over the flattened array by default, otherwise over the specified axis. float64 intermediate and return values are used for integer inputs.

In [86]:
a = np.array([[1, 2], [3, 4]])
a

array([[1, 2],
       [3, 4]])

In [87]:
np.mean(a)

2.5

In [89]:
np.mean(a, axis=0, keepdims=True)

array([[2., 3.]])

In [90]:
# Computing the mean in float64 is more accurate:

np.mean(a, dtype=np.float64)

2.5

## numpy.std
numpy.std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=<no value>)[source]
Compute the standard deviation along the specified axis.

Returns the standard deviation, a measure of the spread of a distribution, of the array elements. The standard deviation is computed for the flattened array by default, otherwise over the specified axis.

In [91]:
a = np.array([[1, 2], [3, 4]])
a

array([[1, 2],
       [3, 4]])

In [92]:
np.std(a)

1.118033988749895

In [93]:
np.std(a, axis=0)

array([1., 1.])

In [94]:
np.std(a, axis=1)

array([0.5, 0.5])