In [1]:
import math
import statistics
import numpy as np
import scipy.stats
import pandas as pd

In [3]:
x = [8.0, 1, 2.5, 4, 28.0]
x_with_nan = [8.0, 1, 2.5, math.nan, 4, 28.0]
print(x)
print(x_with_nan)

[8.0, 1, 2.5, 4, 28.0]
[8.0, 1, 2.5, nan, 4, 28.0]


In [5]:
y, y_with_nan = np.array(x), np.array(x_with_nan)
z, z_with_nan = pd.Series(x), pd.Series(x_with_nan)
print(y)
print(y_with_nan)
print(z_with_nan)

[ 8.   1.   2.5  4.  28. ]
[ 8.   1.   2.5  nan  4.  28. ]
0     8.0
1     1.0
2     2.5
3     NaN
4     4.0
5    28.0
dtype: float64


In [6]:
mean_ = sum(x)/len(x)
mean_

8.7

In [7]:
mean_ = statistics.mean(x)
print(mean_)

8.7


In [8]:
mean_ = statistics.mean(x_with_nan)
print(mean_)

nan


In [9]:
mean_ = np.mean(y)
mean_

8.7

In [10]:
mean_ = y.mean()
mean_

8.7

In [11]:
print(np.mean(y_with_nan))
print(y_with_nan.mean())

nan
nan


In [12]:
np.nanmean(y_with_nan)

8.7

In [13]:
mean_ = z.mean()
mean_

8.7

In [14]:
z_with_nan.mean()

8.7

In [15]:
x = [8.0, 1, 2.5, 4, 28.0]
w = [0.1, 0.2, 0.3, 0.25, 0.15]

wmean = sum(w[i] * x[i] for i in range(len(x))) / sum(w)
print(wmean)

wmean = sum(x_ * w_ for (x_, w_) in zip(x, w)) / sum(w)
print(wmean)


6.95
6.95


In [16]:
y, z, w = np.array(x), pd.Series(x), np.array(w)

wmean = np.average(y, weights=w)
print(wmean)

wmean = np.average(z, weights=w)
print(wmean)


6.95
6.95


In [17]:
hmean = len(x)/sum(1/item for item in x)
hmean

2.7613412228796843

In [18]:
hmean = statistics.harmonic_mean(x)
hmean

2.7613412228796843

In [19]:
scipy.stats.hmean(x)

2.7613412228796843

In [20]:
gmean = 1

for item in x:
    gmean *= item

gmean **= 1 / len(x)
gmean


4.677885674856041

In [21]:
scipy.stats.gmean(y)

4.67788567485604

In [22]:
scipy.stats.gmean(z)

4.67788567485604

In [23]:
n = len(x)
if n % 2:
    median_ = sorted(x)[round(0.5*(n-1))]
else:
    x_ord, index = sorted(x), round(0.5 * n)
    median_ = 0.5 * (x_ord[index-1] + x_ord[index])

median_


4

In [24]:
statistics.median_low(x[:-1])

2.5

In [25]:
statistics.median_high(x[:-1])

4

In [26]:
median_ = np.median(y)
print(median_)


4.0


In [27]:
u = [2, 3, 2, 8, 12]

v = [12, 15, 12, 15, 21, 15, 12]

mode_ = max((u.count(item), item) for item in set(u))[1]
mode_


2

In [28]:
mode_ = statistics.mode(u)
mode_


2

In [29]:
u, v = np.array(u), np.array(v)

mode_ = scipy.stats.mode(u)
mode_


ModeResult(mode=array([2]), count=array([2]))

In [30]:
mode_.mode

array([2])

In [31]:
mode_.count

array([2])

In [32]:
u, v, w = pd.Series(u), pd.Series(v), pd.Series([2, 2, math.nan])

#print(u, v, w)

print(u.mode())

print(v.mode())

print(w.mode())


0    2
dtype: int32
0    12
1    15
dtype: int32
0    2.0
dtype: float64


In [33]:
n = len(x)
mean_ = sum(x)/n
var_ = sum((item-mean_)**2 for item in x) / (n-1)
var_

123.19999999999999

In [34]:
var_ = statistics.variance(x)
var_

123.2

In [35]:
var_ = np.var(y, ddof=1)
var_

123.19999999999999

In [36]:
var_ = y.var(ddof=1)
var_

123.19999999999999

In [37]:
z.var(ddof=1)

123.19999999999999

In [38]:
z_with_nan.var(ddof=1, skipna=True)

123.19999999999999

In [39]:
n = len(x)

mean_ = sum(x) / n

var_ = sum((item - mean_)**2 for item in x) / (n )
var_


98.55999999999999

In [40]:
var_ = statistics.pvariance(x)
var_

98.56

In [41]:
var_ = np.var(y, ddof=0)
var_


98.55999999999999

In [42]:
z.var(ddof=0)

98.55999999999999

In [45]:
std_ = var_ ** 0.5
std_

9.927738916792684

In [46]:
std_ = statistics.stdev(x)
std_


11.099549540409287

In [47]:
std_ = np.std(y, ddof=1)
# std = y.std(ddof=1)
std_


11.099549540409285

In [48]:
x = [8.0, 1, 2.5, 4, 28.0]

n = len(x)

mean_ = sum(x) / n
var_ = sum((item - mean_)**2 for item in x) / (n-1)
std_ = var_ ** 0.5

skew_ = (sum((item - mean_)**3 for item in x) * n / ((n - 1) * (n - 2) * std_**3))
skew_


1.9470432273905929

In [49]:
# y
# y _with_nan
y, y_with_nan = np.array(x), np.array(x_with_nan)

skew_ = scipy.stats.skew(y, bias=False)
skew_



1.9470432273905927

In [50]:
skew_ = scipy.stats.skew(y_with_nan, bias=False, nan_policy='omit')
skew_


masked_array(data=1.94704323,
             mask=False,
       fill_value=1e+20)

In [51]:
#intentionally raise error
skew_ = scipy.stats.skew(y_with_nan, bias=False, nan_policy='raise')
skew_


ValueError: The input contains nan values

In [52]:
skew_ = scipy.stats.skew(y_with_nan, bias=False, nan_policy='propagate')
skew_


nan

In [53]:
x = [-5.0, -1.1, 0.1, 2.0, 8.0, 12.8, 21.0, 25.8, 41.0]
statistics.quantiles(x, n=2)

[8.0]

In [55]:
y = np.array(x)
np.percentile(y, 5)


34.919999999999995

In [56]:
np.percentile(y, 95)

34.919999999999995

In [57]:
print(np.quantile(y, 0.05))
print(np.quantile(y, 0.95))
print(np.quantile(y, [0.25, 0.5, 0.75]))
print(np.nanquantile(y_with_nan, [0.25, 0.5, 0.75]))


-3.44
34.919999999999995
[ 0.1  8.  21. ]
[2.5 4.  8. ]


In [58]:
z, z_with_nan = pd.Series(y), pd.Series(y_with_nan)
print(z.quantile(0.05))
print(z.quantile(0.95))
print(z.quantile([0.25, 0.5]))


-3.44
34.919999999999995
0.25    0.1
0.50    8.0
dtype: float64


In [61]:
max(y) - min(y)

46.0

In [60]:
np.ptp(y)

46.0

In [62]:
quartiles = np.quantile(y, [0.25, 0.75])
print(quartiles)
quartiles[1] - quartiles[0]


[ 0.1 21. ]


20.9

In [65]:
quartiles = z.quantile([0.25, 0.75])
print(quartiles)
quartiles[0.75] - quartiles[0.25]


0.25     0.1
0.75    21.0
dtype: float64


20.9

In [66]:
result = scipy.stats.describe(y, ddof=1, bias=False)
result


DescribeResult(nobs=9, minmax=(-5.0, 41.0), mean=11.622222222222222, variance=228.75194444444446, skewness=0.9249043136685094, kurtosis=0.14770623629658886)

In [67]:
result = z.describe()
result


count     9.000000
mean     11.622222
std      15.124548
min      -5.000000
25%       0.100000
50%       8.000000
75%      21.000000
max      41.000000
dtype: float64