## 这是老师的V2版本，也就是我们优化的目标

In [1]:
import numpy as np
import pandas as pd

def target_mean_v2(data, y_name, x_name):
    result = np.zeros(data.shape[0])
    value_dict = dict()
    count_dict = dict()
    for i in range(data.shape[0]):
        if data.loc[i, x_name] not in value_dict.keys():
            value_dict[data.loc[i, x_name]] = data.loc[i, y_name]
            count_dict[data.loc[i, x_name]] = 1
        else:
            value_dict[data.loc[i, x_name]] += data.loc[i, y_name]
            count_dict[data.loc[i, x_name]] += 1
    for i in range(data.shape[0]):
        result[i] = (value_dict[data.loc[i, x_name]] - data.loc[i, y_name]) / (count_dict[data.loc[i, x_name]] - 1)
    return result

In [2]:
y = np.random.randint(2, size=(5000, 1))
x = np.random.randint(10, size=(5000, 1))
data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x'])

In [3]:
%%timeit
target_mean_v2(data, 'y', 'x')

1 loop, best of 3: 255 ms per loop


## 首先对V2版本的代码本身进行优化，得到V3版本，由255ms降低到9.15ms，速度提升26.8倍左右 

In [4]:
from collections import defaultdict

def target_mean_v3(data, y_name, x_name):
    result = np.zeros(data.shape[0])
    value_dict = defaultdict(int)
    count_dict = defaultdict(int)
    x = data[x_name].values
    y = data[y_name].values
    for i in range(x.shape[0]):
        value_dict[x[i]] += y[i]
        count_dict[x[i]] += 1
    for i in range(x.shape[0]):
        result[i] = (value_dict[x[i]] - y[i]) / (count_dict[x[i]] - 1)
    return result

In [5]:
%%timeit
target_mean_v3(data, 'y', 'x')

100 loops, best of 3: 9.15 ms per loop


In [6]:
# 正确性判断
result_2 = target_mean_v2(data, 'y', 'x')
result_3 = target_mean_v3(data,'y','x')
diff = np.linalg.norm(result_2 - result_3)
print(diff)

0.0


## 使用 Cython 进行优化，得到V4版本，由9.15ms降低到1.05ms，速度提升7.7倍左右

In [7]:
%load_ext Cython

In [8]:
%%cython

import numpy as np
from collections import defaultdict
cimport numpy as np

cpdef np.ndarray[double] target_mean_v4(data, y_name, x_name):
    cdef np.ndarray[double] result = np.zeros(data.shape[0])

    value_dict = defaultdict(int)
    count_dict = defaultdict(int)
    cdef np.ndarray[long] x = data[x_name].values
    cdef np.ndarray[long] y = data[y_name].values
    for i in range(x.shape[0]):
        value_dict[x[i]] += y[i]
        count_dict[x[i]] += 1
    for i in range(x.shape[0]):
        result[i] = (value_dict[x[i]] - y[i]) / (count_dict[x[i]] - 1)
    return result

In [9]:
%%timeit
target_mean_v4(data,'y','x')

1000 loops, best of 3: 1.05 ms per loop


In [10]:
# 正确性判断
result_4 = target_mean_v4(data,'y','x')
diff = np.linalg.norm(result_2 - result_4)
print(diff)

0.0


## 使用 Cython cplus 进行优化，得到V5版本，从1.05ms降低到0.414ms，速度提升了1.5倍左右

In [11]:
%%cython --cplus

import numpy as np
cimport numpy as np

from libcpp.unordered_map cimport unordered_map

cpdef np.ndarray[double] target_mean_v5(data, y_name, x_name):
    cdef np.ndarray[double] result = np.zeros(data.shape[0])
    cdef unordered_map[int, int] value_dict
    cdef unordered_map[int, int] count_dict
    
    cdef np.ndarray[long] x = data[x_name].values
    cdef np.ndarray[long] y = data[y_name].values
    for i in range(x.shape[0]):
        value_dict[x[i]] += y[i]
        count_dict[x[i]] += 1
    for i in range(x.shape[0]):
        result[i] = (value_dict[x[i]] - y[i]) / (count_dict[x[i]] - 1)
    return result

In [12]:
%%timeit
target_mean_v5(data,'y','x')

1000 loops, best of 3: 414 µs per loop


In [13]:
# 正确性判断
result_5 = target_mean_v5(data,'y','x')
diff = np.linalg.norm(result_2 - result_5)
print(diff)

0.0
