In [1]:
import pandas as pd
import numpy as np
import time
import datetime
import pickle
import random
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2, 3, 4"
import logging
LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
# logging.basicConfig(level=logging.INFO, filename='log_Model', format=LOG_FORMAT)
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)

from IPython.display import display, HTML
def show_df(df):
    display(HTML(df.to_html()))
    
VAL_POINT = 65623

from tools.pos_encoder import *
from tools.time_encoder import *

# build

## function

In [2]:

# mp_info_dict_train = {
#     'action' : mp_action_train,
#     'distance' : mp_distance_train_detail,
#     'order' : mp_order_train,
#     'couriers' : mp_couriers_features_train
# }

# data_info_dict_train = {
#     'konw_lens' : df_train_info['know_lens'].values,
#     'know_lens_list' : df_train_info['know_lens_list'].values,
#     'full_lens' : df_train_info['lens'].values,
#     'impossible_idxs' : df_train_info['impossible_idxs'].values,
#     'impossible_idxs_list' : df_train_info['impossible_idxs_list'].values
# }

# mp_info_dict_testA = {
#     'action' : mp_action_testA,
#     'distance' : mp_distance_testA_detail,
#     'order' : mp_order_testA,
#     'couriers' : mp_couriers_features_testA
# }

# data_info_dict_testA = {
#     'konw_lens' : df_testA_info['know_lens'].values,
#     'full_lens' : df_testA_info['lens'].values,
#     'impossible_idxs' : df_testA_info['impossible_idxs'].values
# }

mp_action_type = {'PICKUP' : 0, 'DELIVERY' : 1}
mp_weather = {'正常天气' : 0, '轻微恶劣天气' : 1, '恶劣天气' : 2, '极恶劣天气' : 3}
def get_static_action_feature_dict(prefix, se_a_action, df_a_order, df_a_distance):
    feature_dict = {}
    feature_dict[prefix + '_action_type'] = mp_action_type[se_a_action.action_type]
    feature_dict[prefix + '_weather'] = mp_weather[df_a_order.loc[se_a_action.tracking_id]['weather_grade']]
    se_lng_lat = df_a_order.loc[se_a_action.tracking_id][['pick_lng', 'pick_lat', 'deliver_lng', 'deliver_lat']]
    se_lng_lat.index = prefix + '_' + se_lng_lat.index
    feature_dict.update(se_lng_lat.to_dict())
    
    self_row = df_a_distance[se_a_action.tracking_id][se_a_action.tracking_id].query('source_type == "PICKUP" & target_type == "DELIVERY"')
    feature_dict[prefix + '_self_p_d_distance'] = float(self_row['grid_distance'])
    return feature_dict

mp_row = {'ASSIGN' : 0, 'DELIVERY' : 1, 'PICKUP' : 2}
def get_cross_action_feature_dict(last_action, cur_action, df_a_order, df_a_distance, speed):
    features_dict = {}
    #action
    features_dict['same_tracking_id'] = (last_action.tracking_id == cur_action.tracking_id)
    #order
    features_dict['cur_pd_sub_last_time'] = df_a_order.loc[cur_action.tracking_id]['promise_deliver_time'] - last_action['expect_time']
    features_dict['cur_ep_sub_last_time'] = df_a_order.loc[cur_action.tracking_id]['estimate_pick_time'] - last_action['expect_time']
    features_dict['cur_assigned_sub_last_time'] = last_action['expect_time'] - df_a_order.loc[cur_action.tracking_id]['assigned_time']
    features_dict['last_assigned_sub_last_time'] = last_action['expect_time'] - df_a_order.loc[last_action.tracking_id]['assigned_time']
    
    features_dict['cur_create_sub_last_time'] = last_action['expect_time'] - df_a_order.loc[cur_action.tracking_id]['create_time']
    features_dict['last_create_sub_last_time'] = last_action['expect_time'] - df_a_order.loc[last_action.tracking_id]['create_time']
    features_dict['cur_confirm_sub_last_time'] = last_action['expect_time'] - df_a_order.loc[cur_action.tracking_id]['confirm_time']
    features_dict['last_confirm_sub_last_time'] = last_action['expect_time'] - df_a_order.loc[last_action.tracking_id]['confirm_time']

    #distance
    df_a_distance_relation = df_a_distance[last_action.tracking_id][cur_action.tracking_id]
    df_a_distance_relation = df_a_distance_relation.sort_values(by = ['source_type', 'target_type'])    

    if features_dict['same_tracking_id']:
        idx = mp_row[last_action.action_type] * 2 + mp_row[cur_action.action_type]
        features_dict['distance_a_a'] = 1.
        features_dict['distance_a_d'] = df_a_distance_relation.iloc[0]['grid_distance'] 
        features_dict['distance_a_p'] = df_a_distance_relation.iloc[1]['grid_distance'] 
        features_dict['distance_d_a'] = df_a_distance_relation.iloc[2]['grid_distance'] 
        features_dict['distance_d_d'] = 1.
        features_dict['distance_d_p'] = df_a_distance_relation.iloc[3]['grid_distance'] 
        features_dict['distance_p_a'] = df_a_distance_relation.iloc[4]['grid_distance'] 
        features_dict['distance_p_d'] = df_a_distance_relation.iloc[5]['grid_distance'] 
        features_dict['distance_p_p'] = 1.
    else:
        idx = mp_row[last_action.action_type] * 3 + mp_row[cur_action.action_type]
        features_dict['distance_a_a'] = df_a_distance_relation.iloc[0]['grid_distance'] 
        features_dict['distance_a_d'] = df_a_distance_relation.iloc[1]['grid_distance'] 
        features_dict['distance_a_p'] = df_a_distance_relation.iloc[2]['grid_distance'] 
        features_dict['distance_d_a'] = df_a_distance_relation.iloc[3]['grid_distance'] 
        features_dict['distance_d_d'] = df_a_distance_relation.iloc[4]['grid_distance'] 
        features_dict['distance_d_p'] = df_a_distance_relation.iloc[5]['grid_distance'] 
        features_dict['distance_p_a'] = df_a_distance_relation.iloc[6]['grid_distance'] 
        features_dict['distance_p_d'] = df_a_distance_relation.iloc[7]['grid_distance'] 
        features_dict['distance_p_p'] = df_a_distance_relation.iloc[8]['grid_distance'] 

    a_distance_row = df_a_distance_relation.iloc[idx]
    features_dict['grid_distance'] = a_distance_row['grid_distance']
    
    pos_dict = a_distance_row[['source_lng', 'source_lat', 'target_lng', 'target_lat']].to_dict()
    pos_mutual_dict = mutual2pos(pos_dict['source_lat'], pos_dict['source_lng'], pos_dict['target_lat'], pos_dict['target_lng'], 'last_cur_position')
    features_dict.update(pos_dict)
    features_dict.update(pos_mutual_dict)
    
    #estimate time
    features_dict['cur_action_estimate_time'] = features_dict['grid_distance'] / speed
#     if cur_action.action_type == "PICKUP":
    time_diff_pickup =  features_dict['cur_ep_sub_last_time'] - features_dict['cur_action_estimate_time']
#     else:
    time_diff_delivery = features_dict['cur_pd_sub_last_time'] - features_dict['cur_action_estimate_time']
    
    features_dict["estimate_time_diff_pickup"] = time_diff_pickup
    features_dict["pickup_estimate_time_in_0min"] = time_diff_pickup < 0
    features_dict["pickup_estimate_time_in_5min"] = time_diff_pickup >= 0 and time_diff_pickup < 60 * 5
    features_dict["pickup_estimate_time_in_15min"] = time_diff_pickup >= 60 * 5 and time_diff_pickup < 60 * 15 
    features_dict["pickup_estimate_time_in_45min"] = time_diff_pickup >= 60 * 15 and time_diff_pickup < 60 * 45
    features_dict["pickup_estimate_time_in_120min"] = time_diff_pickup >= 60 * 45 and time_diff_pickup < 60 * 120
    features_dict["pickup_estimate_time_exceed_120min"] = time_diff_pickup >= 120 * 60
    
    features_dict["estimate_time_diff_delivery"] = time_diff_delivery
    features_dict["delivery_estimate_time_in_0min"] = time_diff_delivery < 0
    features_dict["delivery_estimate_time_in_5min"] = time_diff_delivery >= 0 and time_diff_delivery < 60 * 5
    features_dict["delivery_estimate_time_in_15min"] = time_diff_delivery >= 60 * 5 and time_diff_delivery < 60 * 15 
    features_dict["delivery_estimate_time_in_45min"] = time_diff_delivery >= 60 * 15 and time_diff_delivery < 60 * 45
    features_dict["delivery_estimate_time_in_120min"] = time_diff_delivery >= 60 * 45 and time_diff_delivery < 60 * 120
    features_dict["delivery_estimate_time_exceed_120min"] = time_diff_delivery >= 120 * 60

    
    return features_dict

def softmax_np(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

def get_agg_action_feature_dict(tracking_ids, action_types, cur_action, last_time, \
                                  df_a_action, df_a_order, df_a_distance, prefix):
    
    cur_tracking_id = cur_action.tracking_id
    cur_action_type = cur_action.action_type
    
    agg_feature_list = []
    for i in range(len(tracking_ids)):
        agg_feature_dict = {}
        df_a_distance_relation = df_a_distance[cur_tracking_id][tracking_ids[i]]
        next_action_type = action_types[i]
        query_row = df_a_distance_relation.query('source_type == @cur_action_type & target_type == @next_action_type')
        if query_row.shape[0] == 0:
            continue
        else:
            se_query_row = query_row.iloc[0]
        pos_dict = se_query_row[['source_lng', 'source_lat', 'target_lng', 'target_lat']].to_dict()
        pos_mutual_dict = mutual2pos(pos_dict['source_lat'], pos_dict['source_lng'], pos_dict['target_lat'], pos_dict['target_lng'], prefix)
        agg_feature_dict.update(pos_mutual_dict)
        agg_feature_dict[prefix + '_grid_distance'] = se_query_row['grid_distance']
        agg_feature_dict[prefix + '_create_time_sub_last'] = last_time - df_a_order.loc[tracking_ids[i]].create_time 
        agg_feature_dict[prefix + '_confirm_time_sub_last'] = last_time - df_a_order.loc[tracking_ids[i]].confirm_time
        agg_feature_dict[prefix + '_assigned_time_sub_last'] = last_time - df_a_order.loc[tracking_ids[i]].assigned_time
        agg_feature_dict[prefix + '_promise_deliver_time_sub_last'] = df_a_order.loc[tracking_ids[i]].promise_deliver_time - last_time
        agg_feature_dict[prefix + '_estimate_pick_time_sub_last'] = df_a_order.loc[tracking_ids[i]].estimate_pick_time - last_time

        agg_feature_list.append(agg_feature_dict)
    
    df_agg_feature = pd.DataFrame(agg_feature_list)
        
    feature_dict = {}
    se_mean = df_agg_feature.mean()
    se_mean.index = se_mean.index + '_mean'
    feature_dict.update(se_mean.to_dict())
    
    se_min = df_agg_feature.min()
    se_min.index = se_min.index + '_min'
    feature_dict.update(se_min.to_dict())

    se_max = df_agg_feature.max()
    se_max.index = se_max.index + '_max'
    feature_dict.update(se_max.to_dict())
    
#     se_dist_weight = df_agg_feature.mul(softmax_np(1./df_agg_feature[prefix + '_haversine']), axis=0).sum()
#     se_dist_weight.index = se_dist_weight.index + '_haversine_weight'
#     feature_dict.update(se_dist_weight.to_dict())
    
#     se_time_weight = df_agg_feature.mul(softmax_np(1./df_agg_feature[prefix + '_promise_deliver_time_sub_last']), axis=0).sum()
#     se_time_weight.index = se_time_weight.index + '_pd_time_weight'
#     feature_dict.update(se_time_weight.to_dict())
    
    return feature_dict

def a_feature_dict(i, j, df_a_action, start_action_expect_time, cur_action, last_action, know_len, full_len, \
                   se_a_couier, df_a_order, df_a_distance, last_load, unknow_tracking_ids, unknow_action_types,\
                  know_tracking_ids, know_action_types):
    
    cur_action = df_a_action.iloc[j]
    features_dict = {}            

    #generate features(can not be used)
    features_dict['origin_i'] = i
    features_dict['target_position'] = j
    features_dict['start_action_expect_time'] = start_action_expect_time
    features_dict['last_action_expect_time'] = last_action.expect_time
    features_dict['cur_action_expect_time'] = cur_action.expect_time

    #time feature
    last_action_time_feature_dict = time_vector(last_action.expect_time, 'last_action_time')
    features_dict.update(last_action_time_feature_dict)


    #i fix features
    features_dict['know_lens'] = know_len
    features_dict['full_lens'] = full_len
    features_dict.update(se_a_couier[['level', 'speed', 'max_load'] + courier_delay_features])

    #last action features
    last_action_static_feature = get_static_action_feature_dict('last', last_action, df_a_order, df_a_distance)
    features_dict.update(last_action_static_feature)
    features_dict['last_load'] = last_load

    #cur action features
    cur_action_static_feature = get_static_action_feature_dict('cur', cur_action, df_a_order, df_a_distance)
    features_dict.update(cur_action_static_feature)
    if cur_action.action_type == "PICKUP":
        features_dict['cur_load'] = last_load + 1
    else:
        features_dict['cur_load'] = last_load - 1
    features_dict['over_load'] = features_dict['cur_load'] > features_dict['max_load']

    #cross feature
    cross_features = get_cross_action_feature_dict(last_action, cur_action, df_a_order, df_a_distance, features_dict['speed'])
    features_dict.update(cross_features)
    
    #agg feature
    #future feature
    future_features = get_agg_action_feature_dict(unknow_tracking_ids, unknow_action_types, cur_action, last_action.expect_time, \
                                                     df_a_action, df_a_order, df_a_distance, 'future_agg')
#     pass feature
    pass_features = get_agg_action_feature_dict(know_tracking_ids, know_action_types, cur_action, last_action.expect_time,\
                                                     df_a_action, df_a_order, df_a_distance, 'pass_agg')
    
    #all feature
    all_features = get_agg_action_feature_dict(list(df_a_action.tracking_id), list(df_a_action.action_type), cur_action, last_action.expect_time,\
                                                     df_a_action, df_a_order, df_a_distance, 'all_agg')
    
    features_dict.update(future_features)
    features_dict.update(pass_features)
    features_dict.update(all_features)
  
    return features_dict

# df_features_train = generate_gbdt_df(key_list_train[:10], mp_info_dict_train.copy(), data_info_dict_train.copy())
# show_df(df_features_train[:10])

## data arugment & multi-process

In [7]:
import multiprocessing
from multiprocessing import Manager,Process,Lock    
import gc

courier_delay_features = ['pickup_delay_rate', 'delivery_delay_rate', 'pickup_delay_time_avg', 'delivery_delay_time_avg',
                         'delivery_delay_count', 'pickup_delay_count']

def load_data(start, end):
    pickle_path = '../user_data/part_train_mp/keylist_%d_%d.pickle' % (start, end)
    with open(pickle_path, 'rb') as f:
        key_list = pickle.load(f)
        
    pickle_path = '../user_data/part_train_mp/mp_info_dict_%d_%d.pickle' % (start, end)
    with open(pickle_path, 'rb') as f:
        mp_info_dict = pickle.load(f)
            
    pickle_path = '../user_data/part_train_mp/data_info_dict_%d_%d.pickle' % (start, end)
    with open(pickle_path, 'rb') as f:
        data_info_dict = pickle.load(f)
    
    return key_list, mp_info_dict, data_info_dict

def generate_gbdt_df_multiprocess_part_data_arugment(key_list, mp_info_dict, data_info_dict, n_process):

    features_dict_list_process = Manager().list()       
    lock=Lock()
    cnt = multiprocessing.Value("d", 0.0)

    def a_process(key_list, mp_info_dict, data_info_dict, start, end):
        mp_action, mp_distance, mp_order, mp_couriers = mp_info_dict['action'],  mp_info_dict['distance'],  mp_info_dict['order'],  mp_info_dict['couriers']
        know_lens_list, full_lens, impossible_idxs_list = data_info_dict['know_lens_list'], data_info_dict['full_lens'], data_info_dict['impossible_idxs_list']

        for i in range(start, end):
            with lock:
                cnt.value += 1
                if cnt.value % 1000 == 0:
                    logging.info('finish %d samples' % cnt.value)
            for know_len in know_lens_list[i]:
                date, courier, wave_idx = key_list[i]
                df_a_action = mp_action[date][courier][wave_idx]
                df_a_distance = mp_distance[date][courier][wave_idx]
                df_a_order = mp_order[date][courier][wave_idx]
                df_a_order.index = df_a_order.tracking_id
                se_a_couier = mp_couriers[courier][date]        

                start_action_expect_time = df_a_action.iloc[0].expect_time
                last_action = df_a_action.iloc[know_len - 1]
                pickup_num = df_a_action.query('action_type == "PICKUP"').shape[0] 
                delivery_num = know_len - pickup_num
                last_load = pickup_num - delivery_num 

                df_unknow_action = df_a_action.iloc[know_len : full_lens[i]]
                unknow_tracking_ids, unknow_action_types = list(df_unknow_action.tracking_id), list(df_unknow_action.action_type)
                df_know_action = df_a_action.iloc[: know_len]
                know_tracking_ids, know_action_types = list(df_know_action.tracking_id), list(df_know_action.action_type)

                for j in range(know_len, full_lens[i]):
                    if j - know_len in impossible_idxs_list[i][know_len]:
                        continue

                    cur_action = df_a_action.iloc[j]
                    full_len = full_lens[i]
                    features_dict = a_feature_dict(i, j, df_a_action, start_action_expect_time, cur_action, last_action, know_len, \
                                                   full_len, se_a_couier, df_a_order, df_a_distance, last_load, unknow_tracking_ids, \
                                                   unknow_action_types, know_tracking_ids, know_action_types)
                    with lock:
                        features_dict_list_process.append(features_dict)
    
    
    len_key_list = len(key_list)
    
    process_list = []
    if (int(len_key_list / n_process) > 0):
        for i in range(0, len_key_list, int(len_key_list / n_process)):
            start, end = i, i + int(len_key_list / n_process)
            end = min(end, len_key_list)
            process = Process(target=a_process, args=(key_list, mp_info_dict, data_info_dict, start, end))
            process_list.append(process)
            process.start()
    else:
        process = Process(target=a_process, args=(key_list, mp_info_dict, data_info_dict, 0, len_key_list))
        process_list.append(process)
        process.start()
        
        
    for process in process_list:
        process.join()
    
    features_dict_list = []
    for data in features_dict_list_process:
        features_dict_list.append(data)
        
    df_features = pd.DataFrame(features_dict_list)
    df_features = df_features.sort_values(by = ['origin_i', 'target_position'])
    df_features.index = range(df_features.shape[0])
    return df_features

def generate_gbdt_df_data_arugment_multiprocess():
    part = 32
    len_key_list = 82533
    s_list, e_list = [], []

    for i in range(0, len_key_list, int(len_key_list / part)):
        start, end = i, i + int(len_key_list / part)
        end = min(end, len_key_list)
        s_list.append(start)
        e_list.append(end)

    logging.info('start build part features')
    n_process = 32
    for (s, e) in zip(s_list, e_list):
        logging.info('start s:%d,fr e:%d' % (s, e))
        pickle_path = '../user_data/generate_train_data_arugment_sample/df_feature_train_%d_%d.pickle' % (s, e)
        if os.path.exists(pickle_path):
            continue
        key_list, mp_info_dict, data_info_dict = load_data(s, e)
        logging.info('finish reading s:%d, e:%d' % (s, e))
        df_feature_train_part = generate_gbdt_df_multiprocess_part_data_arugment(key_list, mp_info_dict, data_info_dict, n_process)
        logging.info('finish s:%d, e:%d' % (s, e))
        #write
        with open(pickle_path, 'wb') as f:
            pickle.dump(df_feature_train_part, f)

        del key_list, mp_info_dict, data_info_dict
        gc.collect()
    


In [8]:
logging.info('start building df_features')
df_features_train = generate_gbdt_df_data_arugment_multiprocess()
logging.info('finish building df_features')

2020-04-13 20:25:34,471 - INFO - start building df_features
2020-04-13 20:25:34,473 - INFO - start build part features
2020-04-13 20:25:34,474 - INFO - start s:0, e:2579
2020-04-13 20:25:57,038 - INFO - finish reading s:0, e:2579
2020-04-13 20:26:16,207 - INFO - finish 200 samples
2020-04-13 20:26:37,838 - INFO - finish 400 samples
2020-04-13 20:26:57,772 - INFO - finish 600 samples
2020-04-13 20:27:18,297 - INFO - finish 800 samples
2020-04-13 20:27:33,596 - INFO - finish 1000 samples
2020-04-13 20:27:58,304 - INFO - finish 1200 samples
2020-04-13 20:28:22,756 - INFO - finish 1400 samples
2020-04-13 20:28:48,013 - INFO - finish 1600 samples
2020-04-13 20:29:06,517 - INFO - finish 1800 samples
2020-04-13 20:29:23,714 - INFO - finish 2000 samples
2020-04-13 20:29:47,702 - INFO - finish 2200 samples
2020-04-13 20:30:15,075 - INFO - finish 2400 samples
2020-04-13 20:31:35,894 - INFO - finish s:0, e:2579
2020-04-13 20:31:36,973 - INFO - start s:2579, e:5158
2020-04-13 20:32:02,243 - INFO -

KeyboardInterrupt: 

  File "<ipython-input-7-3e3051fd1a76>", line 70, in a_process
    unknow_action_types, know_tracking_ids, know_action_types)
Traceback (most recent call last):
  File "<ipython-input-2-72748e781038>", line 221, in a_feature_dict
    df_a_action, df_a_order, df_a_distance, 'future_agg')
  File "/home/huangweilin/anaconda3/envs/hyr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "<ipython-input-2-72748e781038>", line 134, in get_agg_action_feature_dict
    query_row = df_a_distance_relation.query('source_type == @cur_action_type & target_type == @next_action_type')
  File "/home/huangweilin/anaconda3/envs/hyr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/huangweilin/anaconda3/envs/hyr/lib/python3.6/site-packages/pandas/core/frame.py", line 3199, in query
    res = self.eval(expr, **kwargs)
  File "<ipython-input-7-3e3051fd1a76>", line 70, in a_process
    unknow_action_types

  File "<ipython-input-7-3e3051fd1a76>", line 70, in a_process
    unknow_action_types, know_tracking_ids, know_action_types)
  File "/home/huangweilin/anaconda3/envs/hyr/lib/python3.6/site-packages/pandas/core/internals/managers.py", line 1267, in <listcomp>
    for blk in self.blocks
  File "/home/huangweilin/anaconda3/envs/hyr/lib/python3.6/site-packages/pandas/core/computation/expr.py", line 441, in visit
    return visitor(node, **kwargs)
  File "/home/huangweilin/anaconda3/envs/hyr/lib/python3.6/site-packages/pandas/core/computation/expr.py", line 58, in tokenize_string
    for toknum, tokval, _, _, _ in token_generator:
  File "<ipython-input-2-72748e781038>", line 228, in a_feature_dict
    df_a_action, df_a_order, df_a_distance, 'all_agg')
  File "<ipython-input-2-72748e781038>", line 221, in a_feature_dict
    df_a_action, df_a_order, df_a_distance, 'future_agg')
  File "/home/huangweilin/anaconda3/envs/hyr/lib/python3.6/site-packages/pandas/core/internals/blocks.py", line 13

  File "/home/huangweilin/anaconda3/envs/hyr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/huangweilin/anaconda3/envs/hyr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "<ipython-input-7-3e3051fd1a76>", line 70, in a_process
    unknow_action_types, know_tracking_ids, know_action_types)
  File "/home/huangweilin/anaconda3/envs/hyr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "<ipython-input-2-72748e781038>", line 221, in a_feature_dict
    df_a_action, df_a_order, df_a_distance, 'future_agg')
  File "/home/huangweilin/anaconda3/envs/hyr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-2-72748e781038>", line 134, in get_agg_action_feature_dict
    query_row = df_a_distance_relation.query('source_type == @cur_action_typ