In [57]:
%load_ext Cython

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


#1.Stupid version

In [59]:
def target_mean_v1(data, y_name, x_name):
    result = np.zeros(data.shape[0])

    for i in range(data.shape[0]):
        groupby_result = data[data.index != i].groupby([x_name], as_index=False).agg(['mean', 'count'])
        series = groupby_result.loc[groupby_result.index == data.loc[i, x_name], (y_name, 'mean')]
        if series.shape != (0,):
            result[i] = series

    return result

In [60]:
restul = target_mean_v1(data,'y','x')

#2.Improved version, inspired by teacher Wang's code

In [61]:

def target_mean_v2(data, y_name, x_name):
    result = np.zeros(data.shape[0])
    sum_dict = {}
    count_dict = {}
    for i  in range(data.shape[0]):
        x_value = data.loc[i,x_name]
        y_value = data.loc[i,y_name]
        sum_dict[x_value] = sum_dict.get(x_value,0)+y_value
        count_dict[x_value] = count_dict.get(x_value,0)+1

    for i  in range(data.shape[0]):
        x_value = data.loc[i, x_name]
        y_value = data.loc[i, y_name]
        sum = sum_dict[x_value]
        count = count_dict[x_value]
        if (count-1) != 0:
            result[i] = (sum-y_value)/(count-1)
    return result

#3.Pythonic version, inspired by classmate A君

In [62]:
from collections import defaultdict
def target_mean_v3(data, y_name, x_name):
    n = data.shape[0]
    X = data[x_name].values
    Y = data[y_name].values
    sum_dict = defaultdict(lambda:0)
    count_dict = defaultdict(lambda:0)
    for x, y in zip(X,Y):
      sum_dict[x] += y
      count_dict[x] +=1
    result = [(sum_dict[x]-y)/(count_dict[x]-1) for x,y in zip(X,Y)]
    return result

#4.Cython v1, pure python code, cimported Numpy's c-level header file

In [77]:
%%cython -a
import cython
cimport numpy as cnp
import numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef target_mean_cy_v1(cnp.ndarray[long,ndim=2] data):
    cdef cnp.ndarray result = np.zeros(data.shape[0])
    sum_dict = {}
    count_dict = {}
    y_index=0
    x_index=1

    for i  in range(data.shape[0]):
        x_value = data[i,x_index]
        y_value = data[i,y_index]
        sum_dict[x_value] = sum_dict.get(x_value,0)+y_value
        count_dict[x_value] = count_dict.get(x_value,0)+1
        # if x_value not in sum_dict.keys():
        #     sum_dict[x_value] =y_value
        #     count_dict[x_value]=1
        # else:
        #     sum_dict[x_value] = sum_dict[x_value] + y_value
        #     count_dict[x_value]= count_dict[x_value] +1


    for i  in range(data.shape[0]):
        x_value = data[i, x_index]
        y_value = data[i, y_index]
        sum = sum_dict[x_value]
        count = count_dict[x_value]
        result[i] = (sum-y_value)/(count-1)
    return result

#5.Cython v2 , using cython data type, more faster

In [64]:
%%cython -a
# distutils: language=c++
import cython
cimport numpy as cnp
import numpy as np
from libcpp.map cimport map

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef target_mean_cy_v2(cnp.ndarray[long,ndim=2] data,y_index_s, x_index_s):
    cdef long nrow = data.shape[0]
    cdef cnp.ndarray result = np.zeros(nrow)
    cdef map[long, long] sum_dict, count_dict 
    cdef long i, x_value, y_value,sum, count,x_index=1,y_index=0 # assumed that  Index 0 is dataset is y_value.Index 1 of dataset is x_value
    for i  in range(nrow):
        x_value = data[i,x_index]
        y_value = data[i,y_index]

        if sum_dict.find(x_value) == sum_dict.end():
          sum_dict[x_value] = y_value
          count_dict[x_value] = 1
        else:
          sum_dict[x_value] += y_value
          count_dict[x_value] += 1

    for i  in range(nrow):
        x_value = data[i, x_index]
        y_value = data[i, y_index]
        sum = sum_dict[x_value]
        count = count_dict[x_value]
        result[i] = (sum-y_value)/(count-1)
    return result

#6.Cython v3, using c++ unordered_map, How to do speed-up parallelly, I get a stuck on c++ map

In [65]:
%%cython -a
# distutils: language=c++
import cython
cimport numpy as cnp
import numpy as np
from libcpp.unordered_map cimport unordered_map as map

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef target_mean_cy_v3(cnp.ndarray[long,ndim=2] data, y_name_s, x_name_s):
    cdef long nrow = data.shape[0]
    cdef cnp.ndarray result = np.zeros(nrow)
    cdef map[long, long] sum_dict, count_dict 
    cdef long i, x_value, y_value,sum, count,x_index=1,y_index=0 # assumed that  Index 0 is dataset is y_value.Index 1 of dataset is x_value
    for i  in range(nrow):
        x_value = data[i,x_index]
        y_value = data[i,y_index]

        if sum_dict.find(x_value) == sum_dict.end():
          sum_dict[x_value] = y_value
          count_dict[x_value] = 1
        else:
          sum_dict[x_value] += y_value
          count_dict[x_value] += 1

    for i  in range(nrow):
        x_value = data[i, x_index]
        y_value = data[i, y_index]
        sum = sum_dict[x_value]
        count = count_dict[x_value]
        result[i] = (sum-y_value)/(count-1)
    return result

#7.Cython v4 ,using memoryview arrary store sum_value and count_value

In [109]:
%%cython -a
# distutils: language=c++
import cython
cimport numpy as cnp
import numpy as np


@cython.boundscheck(False)
@cython.wraparound(False)
cpdef target_mean_cy_v4(data,y_name,x_name):
    cdef long[:] X = data[x_name].values
    cdef long[:] Y = data[y_name].values

    cdef long nrow = data.shape[0]
    cdef double[:] result = np.zeros(nrow,dtype=np.double)
    cdef long[:] sum_dict =np.zeros(10,dtype=np.long)
    cdef long[:] count_dict =np.zeros(10,dtype=np.long)
    cdef long i, x_value, y_value,sum, count
    for i  in range(nrow):
        x_value = X[i]
        y_value = Y[i]
        sum_dict[x_value] += y_value
        count_dict[x_value] += 1

    for i  in range(nrow):
        x_value = X[i]
        y_value = Y[i]
        sum = sum_dict[x_value]
        count = count_dict[x_value]
        result[i] = (sum-y_value)/(count-1)
    return result

#8.Cython v5 ,parallel speed-up within nogil

In [133]:
%%cython -a
# distutils: language=c++
import cython
cimport numpy as cnp
import numpy as np
from cython.parallel import prange

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef target_mean_cy_v5(data, y_name, x_name):
    cdef long[:] X = data[x_name].values
    cdef long[:] Y = data[y_name].values

    cdef long nrow = data.shape[0]
    cdef double[:] result = np.zeros(nrow)
    cdef long[:] sum_dict =np.zeros(10,dtype=np.long)
    cdef long[:] count_dict =np.zeros(10,dtype=np.long)
    cdef long i
    for i  in prange(nrow,nogil=True):
        sum_dict[X[i]] += Y[i]
        count_dict[X[i]] += 1
    for i  in prange(nrow,nogil=True):
        result[i] = (sum_dict[X[i]]-Y[i])/(count_dict[X[i]]-1)
    return result

#9.Test correctness

In [137]:
import pandas as pd 
y = np.random.randint(2, size=(5000, 1))
x = np.random.randint(10, size=(5000, 1))
data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x'])

In [138]:
result1   = target_mean_v1(data,'y','x')
base_line = target_mean_v2(data,'y','x')

In [139]:

print(np.linalg.norm(result1 - base_line))

0.0


In [140]:

result3   = target_mean_v3(data,'y','x')
result4   = target_mean_cy_v1(data.to_numpy())
result5   = target_mean_cy_v2(data.to_numpy(),'y','x')
result6   = target_mean_cy_v3(data.to_numpy(),'y','x')
result7   = target_mean_cy_v4(data,'y','x')
result8   = target_mean_cy_v5(data,'y','x')


In [141]:

print(np.linalg.norm(result3 - base_line))
print(np.linalg.norm(result4 - base_line))
print(np.linalg.norm(result5 - base_line))
print(np.linalg.norm(result6 - base_line))
print(np.linalg.norm(result7 - base_line))
print(np.linalg.norm(result8 - base_line))

0.0
0.0
0.0
0.0
0.0
0.0


#10.Test performance

In [142]:

%timeit -n 10 target_mean_v2(data,'y','x')
%timeit -n 10 target_mean_v3(data,'y','x')
%timeit -n 100 target_mean_cy_v1(data.to_numpy())
%timeit -n 100 target_mean_cy_v2(data.to_numpy(),'y','x')
%timeit -n 100 target_mean_cy_v3(data.to_numpy(),'y','x')
%timeit -n 10000 target_mean_cy_v4(data,'y','x')
%timeit -n 10000 target_mean_cy_v5(data,'y','x')

10 loops, best of 3: 154 ms per loop
10 loops, best of 3: 6.77 ms per loop
100 loops, best of 3: 1.1 ms per loop
100 loops, best of 3: 577 µs per loop
100 loops, best of 3: 607 µs per loop
10000 loops, best of 3: 36.7 µs per loop
10000 loops, best of 3: 35.6 µs per loop
