# 说明
1、使用Dict类型存入每个X值(0-9为Key)，累加（不含当前Index）其和与个数，再求相除求平均值

2、将X值转为字符型

3、用cython加速，cpdef export function

4、colab安装 !pip install git+https://github.com/rkern/line_profiler.git


# **#Cyton版本**

In [83]:
%load_ext Cython

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [84]:
%%cython -a

import numpy as np
cimport numpy as cnp
import pandas as pd

cpdef target_mean_test(x,y):
  dl = x.shape[0]
  result = np.zeros(dl)
  # 定义字典类型，用于第个X的和与个数
  value_elements = dict()
  count_elements = dict()
  for i in range(dl):
    x_value = str(x[i, 0])
    y_value = y[i, 0]
    # 如果X是第一次出现，就直接存入，否则就累加
    if (x_value not in value_elements):
      value_elements[x_value] = y_value
      count_elements[x_value] = 1
    else:
      value_elements[x_value] += y_value
      count_elements[x_value] += 1
  for i in range(dl):
    x_value = str(x[i, 0])
    y_value = y[i, 0]
    # 去掉当前所在Index的Y值
    result[i] = (value_elements[x_value] - y_value) / (count_elements[x_value] - 1)

  return result



In [85]:
%%timeit
x = np.random.randint(10, size=(5000, 2))
y = np.random.randint(2, size=(5000, 2))

result = target_mean_test(x, y)
print(result)

[0.49678801 0.4989899  0.46899225 ... 0.52587992 0.52587992 0.46705426]
[0.51744186 0.49306931 0.48670757 ... 0.51937984 0.48461538 0.51937984]
[0.49427481 0.50293542 0.48722986 ... 0.50293542 0.48987854 0.50097466]
[0.47644928 0.45676275 0.45454545 ... 0.52219873 0.5243129  0.45454545]
[0.4704797  0.48514851 0.47708333 ... 0.53438114 0.475      0.52688172]
[0.4844358  0.51361868 0.52941176 ... 0.50646552 0.52123552 0.50646552]
[0.52131783 0.46218487 0.49484536 ... 0.51503759 0.49508841 0.51503759]
[0.50759219 0.47807933 0.50107066 ... 0.46572581 0.47807933 0.51509054]
[0.51190476 0.50298211 0.51334702 ... 0.52208835 0.42913386 0.49610895]
[0.5105973  0.45703125 0.51851852 ... 0.51851852 0.50867052 0.52057613]
[0.50423729 0.51646091 0.5077821  ... 0.51646091 0.51646091 0.51606426]
[0.54248366 0.54248366 0.5010142  ... 0.51757812 0.50485437 0.454     ]
[0.52620545 0.51901566 0.48405253 ... 0.52066116 0.45564516 0.51901566]
[0.52016129 0.52471483 0.4989605  ... 0.45528455 0.48927875 0.52

# **当前Pyhton版本**

In [81]:
# coding = 'utf-8'
import numpy as np
import pandas as pd
from line_profiler import LineProfiler

def target_mean_test(data,x_name,y_name):
  result = np.zeros(data.shape[0])
  # 定义字典类型，用于第个X的和与个数
  value_elements = dict()
  count_elements = dict()
  for i in range(data.shape[0]): 
    x_value = data.loc[i, x_name]
    y_value = data.loc[i, y_name]
    if (x_value not in value_elements.keys()):
      value_elements[x_value] = y_value
      count_elements[x_value] = 1
    else:
      value_elements[x_value] += y_value
      count_elements[x_value] += 1
  for i in range(data.shape[0]):
    x_value = data.loc[i, x_name]
    y_value = data.loc[i, y_name]
    # 去掉当前所在Index的Y值
    result[i] = (value_elements[x_value] - y_value) / (count_elements[x_value] - 1)

  return result 

x = np.random.randint(10, size=(50000, 1))
y = np.random.randint(2, size=(50000, 1))
data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x'])

from line_profiler import LineProfiler
lp2 = LineProfiler()
lp2_wrapper = lp2(target_mean_test)
lp2_wrapper(data, 'y', 'x')
lp2.print_stats()

result = target_mean_test(data, 'x', 'y')
print(result)

Timer unit: 1e-06 s

Total time: 4.76276 s
File: <ipython-input-81-3402437e9a6e>
Function: target_mean_test at line 6

Line #      Hits         Time  Per Hit   % Time  Line Contents
     6                                           def target_mean_test(data,x_name,y_name):
     7         1        122.0    122.0      0.0    result = np.zeros(data.shape[0])
     8                                             # 定义字典类型，用于第个X的和与个数
     9         1          1.0      1.0      0.0    value_elements = dict()
    10         1          1.0      1.0      0.0    count_elements = dict()
    11     50001      27278.0      0.5      0.6    for i in range(data.shape[0]): 
    12     50000    1145806.0     22.9     24.1      x_value = data.loc[i, x_name]
    13     50000    1126743.0     22.5     23.7      y_value = data.loc[i, y_name]
    14     50000      45369.0      0.9      1.0      if (x_value not in value_elements.keys()):
    15         2          1.0      0.5      0.0        value_elements[x_value