In [1]:
from scipy.stats import norm

In [2]:
import pandas as pd

In [3]:
s = pd.Series([2, 1, 1, np.nan, 4, 3], index=["a", "b", "c", "d", "e", "f"])
s

a    2.0
b    1.0
c    1.0
d    NaN
e    4.0
f    3.0
dtype: float64

In [4]:
import scipy.stats as ss

In [5]:
def rank_INT(series, c= 3.0/8, stochastic = True):
    """ Perform rank-based inverse normal transformation on pandas series.
        If stochastic is True ties are given rank randomly, otherwise ties will
        share the same value. NaN values are ignored.
        Args:
            param1 (pandas.Series):   Series of values to transform
            param2 (Optional[float]): Constand parameter (Bloms constant)
            param3 (Optional[bool]):  Whether to randomise rank of ties
        
        Returns:
            pandas.Series
    """    
    # Check input
    assert(isinstance(series, pd.Series))
    assert(isinstance(c, float))
    assert(isinstance(stochastic, bool))

    # Set seed
    np.random.seed(123)

    # Drop NaNs
    series = series.loc[~pd.isnull(series)] 

    # Take original series indexes
    orig_idx = series.index

   
    # Get ranks
    if stochastic == True:
        # Shuffle by index
        series = series.loc[np.random.permutation(series.index)]
        # Get rank, ties are determined by their position in the series (hence
        # why we randomised the series)
        rank = ss.rankdata(series, method="ordinal")
    else:
        # Get rank, ties are averaged
        rank = ss.rankdata(series, method="average")

    # Convert numpy array back to series
    rank = pd.Series(rank, index=series.index)

    # Convert rank to normal distribution
    transformed = rank.apply(rank_to_normal, c=c, n=len(rank))  
    return transformed[orig_idx]

In [6]:
def rank_to_normal(rank, c, n):
    x = (rank - c) /(n-2 *c + 1)
    return ss.norm.ppf(x)

In [7]:
rank_INT(s, stochastic=True)

a    0.000000
b   -1.179761
c   -0.497201
e    1.179761
f    0.497201
dtype: float64