https://towardsdev.com/how-to-calculate-correlation-in-python-part-ii-f108c6a6269

In [1]:
import numpy as np
import scipy.stats

x = np.arange(10, 20)
y = np.array([2, 1, 4, 5, 8, 12, 18, 25, 96, 48])

In [2]:
liner = scipy.stats.linregress(x, y)
print(liner.slope)
print(liner.intercept)
print(liner.rvalue)
print(liner.pvalue)
print(liner.stderr)

7.4363636363636365
-85.92727272727274
0.7586402890911869
0.010964341301680825
2.257878767543913


In [3]:
xy = np.array([range(10, 20),
             [2, 1, 4, 5, 8, 12, 18, 25, 96, 48]])

print(scipy.stats.linregress(xy))

LinregressResult(slope=7.4363636363636365, intercept=-85.92727272727274, rvalue=0.7586402890911869, pvalue=0.010964341301680825, stderr=2.257878767543913, intercept_stderr=33.37538930964323)


In [4]:
scipy.stats.linregress(xy.T)

LinregressResult(slope=7.4363636363636365, intercept=-85.92727272727274, rvalue=0.7586402890911869, pvalue=0.010964341301680825, stderr=2.257878767543913, intercept_stderr=33.37538930964323)

In [5]:
scipy.stats.linregress(np.arange(3),
                        np.array([2, np.nan, 5]))

LinregressResult(slope=nan, intercept=nan, rvalue=nan, pvalue=nan, stderr=nan, intercept_stderr=nan)

In [6]:
r, p = scipy.stats.pearsonr(x, y)
print(r)
print(p)

np.corrcoef(x, y)

0.7586402890911869
0.010964341301680832


array([[1.        , 0.75864029],
       [0.75864029, 1.        ]])

In [7]:
scipy.stats.pearsonr(np.arange(3),
                    np.array([2, np.nan, 5]))

ValueError: array must not contain infs or NaNs

In [8]:
np.corrcoef(xy)

array([[1.        , 0.75864029],
       [0.75864029, 1.        ]])

In [9]:
arr_nan = np.array([[0, 1, 2, 3],
                   [2, 4, 1, 8],
                   [2, 5, np.nan, 2]])

np.corrcoef(arr_nan)

array([[1.        , 0.62554324,        nan],
       [0.62554324, 1.        ,        nan],
       [       nan,        nan,        nan]])

In [11]:
xyz = np.array([range(10, 20),
               [2, 1, 4, 5, 8, 12, 18, 25, 96, 48],
               [5, 3, 2, 1, 0, -2, -8, -11, -15, -16]])

np.corrcoef(xyz)

array([[ 1.        ,  0.75864029, -0.96807242],
       [ 0.75864029,  1.        , -0.83407922],
       [-0.96807242, -0.83407922,  1.        ]])

In [12]:
np.corrcoef(xyz.T, rowvar=False)

array([[ 1.        ,  0.75864029, -0.96807242],
       [ 0.75864029,  1.        , -0.83407922],
       [-0.96807242, -0.83407922,  1.        ]])

In [13]:
arr_nan = np.array([[0, 1, 2, 3],
                   [2, 4, 1, 8],
                   [2, 5, np.nan, 2]])

np.corrcoef(arr_nan)

array([[1.        , 0.62554324,        nan],
       [0.62554324, 1.        ,        nan],
       [       nan,        nan,        nan]])

In [14]:
np.corrcoef(xyz.T, rowvar=False)

array([[ 1.        ,  0.75864029, -0.96807242],
       [ 0.75864029,  1.        , -0.83407922],
       [-0.96807242, -0.83407922,  1.        ]])

In [15]:
import pandas as pd

x = pd.Series(range(10, 20))
y = pd.Series([2, 1, 4, 5, 8, 12, 18, 25, 96, 48])
z = pd.Series([5, 3, 2, 1, 0, -2, -8, -11, -15, -16])

xy = pd.DataFrame({'x': x, 'y': y})
xyz = pd.DataFrame({'x': x, 'y': y, 'z': z})

In [16]:
x.corr(y)

0.7586402890911867

In [17]:
u, u_nan = pd.Series([1, 2, 3]), pd.Series([1, 2, np.nan, 3])
v, w = pd.Series([1, 4, 8]), pd.Series([1, 4, 154, 8])

print(u.corr(v))
print(u_nan.corr(w))

0.9966158955401239
0.9966158955401239


In [18]:
xy.corr()

Unnamed: 0,x,y
x,1.0,0.75864
y,0.75864,1.0


In [19]:
xyz.corr()

Unnamed: 0,x,y,z
x,1.0,0.75864,-0.968072
y,0.75864,1.0,-0.834079
z,-0.968072,-0.834079,1.0


In [20]:
xy.corrwith(z)

x   -0.968072
y   -0.834079
dtype: float64