# 说明
1、使用Dict类型存入每个X值(0-9为Key)，累加（不含当前Index）其和与个数，再求相除求平均值

2、将X值转为字符型

3、用cython加速，cpdef export function

4、colab安装 !pip install git+https://github.com/rkern/line_profiler.git


# **#Cython版本**

In [89]:
%load_ext Cython

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [90]:
%%cython -a

import numpy as np
cimport numpy as cnp
import pandas as pd

cpdef target_mean_test(x,y):
  dl = x.shape[0]
  result = np.zeros(dl)
  # 定义字典类型，用于第个X的和与个数
  value_elements = dict()
  count_elements = dict()
  for i in range(dl):
    x_value = str(x[i, 0])
    y_value = y[i, 0]
    # 如果X是第一次出现，就直接存入，否则就累加
    if (x_value not in value_elements):
      value_elements[x_value] = y_value
      count_elements[x_value] = 1
    else:
      value_elements[x_value] += y_value
      count_elements[x_value] += 1
  for i in range(dl):
    x_value = str(x[i, 0])
    y_value = y[i, 0]
    # 去掉当前所在Index的Y值
    result[i] = (value_elements[x_value] - y_value) / (count_elements[x_value] - 1)

  return result



In [91]:
%%timeit
x = np.random.randint(10, size=(50000, 2))
y = np.random.randint(2, size=(50000, 2))

result = target_mean_test(x, y)
print(result)

[0.49940852 0.5048834  0.50322321 ... 0.50446973 0.50508272 0.50909462]
[0.50581862 0.50610236 0.49615915 ... 0.50099602 0.50099602 0.50099602]
[0.49309309 0.49118852 0.49309309 ... 0.49309309 0.50039888 0.49309309]
[0.50264335 0.49234093 0.50119808 ... 0.49163138 0.49234093 0.50359138]
[0.50020178 0.4988189  0.5081156  ... 0.49576355 0.50009883 0.5081156 ]
[0.50664251 0.50741117 0.49990179 ... 0.49990179 0.49990179 0.51008991]
[0.5071599  0.49193223 0.48991815 ... 0.4905057  0.50696102 0.49327798]
[0.50358423 0.50351335 0.50358423 ... 0.49755716 0.49020795 0.49979928]
[0.49497386 0.49477282 0.49629926 ... 0.50742376 0.49826707 0.4964993 ]
[0.49590164 0.50120773 0.49718173 ... 0.50874466 0.49902382 0.50514037]
[0.49919517 0.49939638 0.49959612 ... 0.49277427 0.51168033 0.49795441]
[0.51124498 0.50680272 0.48218196 ... 0.50660264 0.49010989 0.49453552]
[0.49899719 0.49889536 0.51221482 ... 0.50458167 0.49899719 0.51221482]
[0.50370074 0.49792777 0.49063518 ... 0.49968963 0.50247476 0.50

# **当前Python版本**

In [87]:
# coding = 'utf-8'
import numpy as np
import pandas as pd
from line_profiler import LineProfiler

def target_mean_test(data,x_name,y_name):
  result = np.zeros(data.shape[0])
  # 定义字典类型，用于第个X的和与个数
  value_elements = dict()
  count_elements = dict()
  for i in range(data.shape[0]): 
    x_value = data.loc[i, x_name]
    y_value = data.loc[i, y_name]
    if (x_value not in value_elements.keys()):
      value_elements[x_value] = y_value
      count_elements[x_value] = 1
    else:
      value_elements[x_value] += y_value
      count_elements[x_value] += 1
  for i in range(data.shape[0]):
    x_value = data.loc[i, x_name]
    y_value = data.loc[i, y_name]
    # 去掉当前所在Index的Y值
    result[i] = (value_elements[x_value] - y_value) / (count_elements[x_value] - 1)

  return result 

x = np.random.randint(10, size=(50000, 1))
y = np.random.randint(2, size=(50000, 1))
data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x'])

from line_profiler import LineProfiler
lp2 = LineProfiler()
lp2_wrapper = lp2(target_mean_test)
lp2_wrapper(data, 'y', 'x')
lp2.print_stats()

result = target_mean_test(data, 'x', 'y')
print(result)

Timer unit: 1e-06 s

Total time: 4.59285 s
File: <ipython-input-87-3402437e9a6e>
Function: target_mean_test at line 6

Line #      Hits         Time  Per Hit   % Time  Line Contents
     6                                           def target_mean_test(data,x_name,y_name):
     7         1         70.0     70.0      0.0    result = np.zeros(data.shape[0])
     8                                             # 定义字典类型，用于第个X的和与个数
     9         1          3.0      3.0      0.0    value_elements = dict()
    10         1          1.0      1.0      0.0    count_elements = dict()
    11     50001      26813.0      0.5      0.6    for i in range(data.shape[0]): 
    12     50000    1111370.0     22.2     24.2      x_value = data.loc[i, x_name]
    13     50000    1097797.0     22.0     23.9      y_value = data.loc[i, y_name]
    14     50000      44073.0      0.9      1.0      if (x_value not in value_elements.keys()):
    15         2          1.0      0.5      0.0        value_elements[x_value