In [1]:
## basic tutorial on z score
## tutorial url:
## https://www.statology.org/z-score-python/

In statistics, a z-score tells us how many standard deviations away a value is from the mean. We use the following formula to calculate a z-score:

z = (X – μ) / σ

where:

X is a single raw data value

μ is the population mean

σ is the population standard deviation

In [2]:
import pandas as pd
import numpy as np
import scipy.stats as stats

#### 1D array

In [3]:
data = np.array([6, 7, 7, 12, 13, 13, 15, 16, 19, 22])

In [5]:
## calculate the z-scores for each value in the array
## the first value of “6” in the array is 1.394 standard deviations below the mean
stats.zscore(data)

array([-1.39443338, -1.19522861, -1.19522861, -0.19920477,  0.        ,
        0.        ,  0.39840954,  0.5976143 ,  1.19522861,  1.79284291])

#### Multi-dimensional arrays

In [6]:
data = np.array([[5, 6, 7, 7, 8],
                 [8, 8, 8, 9, 9],
                 [2, 2, 4, 4, 5]])

In [8]:
## The first value of “5” is 1.159 standard deviations below the mean of its array.
stats.zscore(data, axis=1)

array([[-1.56892908, -0.58834841,  0.39223227,  0.39223227,  1.37281295],
       [-0.81649658, -0.81649658, -0.81649658,  1.22474487,  1.22474487],
       [-1.16666667, -1.16666667,  0.5       ,  0.5       ,  1.33333333]])

#### Dataframe

In [9]:
data = pd.DataFrame(np.random.randint(0, 10, size=(5, 3)), columns=['A', 'B', 'C'])
data

Unnamed: 0,A,B,C
0,6,0,9
1,1,4,2
2,4,6,0
3,0,6,7
4,4,6,5


In [10]:
## z scores are column based
data.apply(stats.zscore)

Unnamed: 0,A,B,C
0,1.369306,-1.886484,1.348907
1,-0.912871,-0.171499,-0.797081
2,0.456435,0.685994,-1.41022
3,-1.369306,0.685994,0.735767
4,0.456435,0.685994,0.122628


#### Calculate a z-score From a Mean + Standard Deviation

In [22]:
# Calculate a z-score from a provided mean and standard deviation
import statistics

mean = 7
standard_deviation = 1.3

zscore = statistics.NormalDist(mean, standard_deviation).zscore(5)
print(zscore)

AttributeError: module 'statistics' has no attribute 'NormalDist'

In [23]:
import sys
print (sys.version)

3.7.3 (default, Mar 27 2019, 17:13:21) [MSC v.1915 64 bit (AMD64)]


In [None]:
## need to update Python to 3.8+ for NormalDist; in progress