In [None]:
import pandas as pd
import numpy as py
import pickle


# pickle保存
def save_pkl(data, datadir):
    with open(datadir, 'wb') as handle:
        pickle.dump(data, handle)
    
# pickle读取
def read_pkl(datadir):
    data = None
    with open(datadir, 'rb') as handle:
        data = pickle.load(handle)
    return data

# 减少dataframe内存消耗
def reduce_size(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

def get_default_lgb_params():
    params = params = {
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': ['binary_logloss', 'auc'],
    'num_leaves': 31,
    'learning_rate': 0.1,
    'feature_fraction': 0.9,
    'bagging_fraction': 1.0,
    'bagging_seed':0,
    'bagging_freq': 1,
    'subsample': 0.8, 
    'colsample_bytree': 0.8,
    'subsample_freq': 1,
    'reg_alpha': 0, 
    'reg_lambda': 0.01,
    'verbose': 1,
    'max_depth': -1,
    'random_state': 2019,
    'min_child_samples': 50,
    'device': 'gpu',
    'gpu_platform_id': 0,
    'gpu_device_id': 0
    }
    return params

In [10]:

    
import torch.nn as nn
import torch
import math


class PositionalEmbedding(nn.Module):

    def __init__(self, d_model, max_len=512):
        super().__init__()

        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model).float()
        pe.require_grad = False

        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return self.pe[:, :x.size(1)]

In [33]:
x = torch.FloatTensor([[[1,2],[2,4],[3,5]], [[1,3],[2,4],[3,4]], [[1,3],[2,4],[3,4]]])

In [34]:
x.shape

torch.Size([3, 3, 2])

In [32]:
x.mean(dim=-1, keepdims=True)

tensor([[[1.5000],
         [3.0000],
         [4.0000]],

        [[2.0000],
         [3.0000],
         [3.5000]]])

In [19]:
mask = (x > 0).unsqueeze(1).repeat(1, x.size(1), 1).unsqueeze(1)


In [20]:
mask

tensor([[[[True, True, True],
          [True, True, True],
          [True, True, True]]]])