# CPU Porting

## Manual actions:

* check if `AT_CHECK` have just 2 args: condition and message
* check the order of height, width in parameters of functions such as: resize
* remove `aten/src/THNN/generic/*.c` files that is being ported
* remove functions to be ported from:
  * `/aten/src/THNN/CMakeLists.txt`
  * `/aten/src/ATen/native/LegacyNNDefinitions.cpp`
  * `/aten/src/THNN/init.cpp`
  * if you will port cpu and gpu at this same time remove from respective item from
    * `/aten/src/ATen/nn.yaml`
    * `/aten/src/THNN/generic/THNN.h`
* add dispatch for cpu and gpu on /aten/src/ATen/native/native_functions.yaml

## Warnings:

* not garanteed for nested parentheses

In [2]:
import os
import re
import shutil

In [27]:
home_path = os.environ['HOME']
output_path = '/tmp/pytorch/output/cpu'

os.makedirs(output_path, exist_ok=True)

pytorch_path = os.path.join(
    home_path,
    'dev/quansight/pytorch-project/pytorch'
)
thnn_path = os.path.join(pytorch_path, 'aten/src/THNN')
at_cpu_path = os.path.join(pytorch_path, 'aten/src/ATen/native')

thnn_files = [
    'Im2Col.c',
]
thnn_h_files = []

In [60]:
!cp {pytorch_path}/.clang-format {output_path}

In [14]:
def _remove_ext(v):
    if '.' in v:
        return v.split('.')[0]
    return v

def _get_ext(v):
    if '.' in v:
        return '.' + v.split('.')[-1]
    return ''
    
RULES_NAME = [
    lambda v, w='Temporal': (
        _remove_ext(v).replace(w, '') + '1d' + _get_ext(v)
        if v.startswith(w)
        else v
    ),
    lambda v, w='Spatial': (
        _remove_ext(v).replace(w, '') + '2d' + _get_ext(v)
        if v.startswith(w)
        else v
    ),
    lambda v, w='Volumetric': (
        _remove_ext(v).replace(w, '') + '3d' + _get_ext(v)
        if v.startswith(w)
        else v
    ),
]

RULES_NAME_EXTRA = RULES_NAME + [
    lambda v: v.replace('Sampling', 'Sample'),
]

RULES = [] + RULES_NAME_EXTRA


def apply_rules(rules, text):
    _fn = text
    for r in rules:
        _fn = r(_fn)
    return _fn


def convert_filenames(filenames, extra_rules: list = []):
    rules = RULES + extra_rules
    
    result = []
    for fn in filenames:
        result.append(apply_rules(rules, fn))
    return result


# test
at_cpu_files = convert_filenames(thnn_files)
at_cpu_files

['Im2Col.cpp']

In [30]:
def create_aten_cpu_files(
    output_path: str,
    thnn_path: str,
    at_cpu_path: str,
    th_at_filenames: list
): 
    """Porting code from `/aten/src/THNN/generic` and `/aten/src/THNN`
    to `/aten/src/ATen/native/`
    
    """
    for th_fn, at_fn in th_at_filenames:
        # get file data from THNN/generic
        path_src = os.path.join(thnn_path, 'generic', th_fn)
        at_file_output_path = os.path.join(output_path, at_fn)
        # copy also properties and metadata
        shutil.copy2(path_src, at_file_output_path)

In [31]:
# test
create_aten_cpu_files(
    output_path, 
    thnn_path,
    at_cpu_path,
    zip(thnn_files, at_cpu_files)
)

print(output_path)
!ls -lah {output_path}

/tmp/pytorch/output/cpu
total 16K
drwxrwxr-x 2 xmn xmn 4,0K may 15 22:47 .
drwxrwxr-x 4 xmn xmn 4,0K may 15 22:32 ..
-rw-rw-r-- 1 xmn xmn 4,5K abr 17 12:32 Im2Col.cpp


In [58]:
def add_replace_rule(by, to):
    return lambda v: v.replace(by, to)


def th2at(text: str, extra_rules: list = []):
    # replace rules
    rules = [
        ('#include <THNN/THNN.h>', 
         '/* TODO: remove duplicated includes */\n'
         '#include <ATen/ATen.h>\n'
         '#include <ATen/AccumulateType.h>\n'
         '#include <ATen/NativeFunctions.h>\n'
         '#include <ATen/TensorUtils.h>\n'
         '#include <ATen/Utils.h>\n'
        ),
        ('getSize(', 'size('),
        ('Acctype', 'accscalar_t'),
        ('Dtype', 'scalar_t'),
        ('ScalarConvert<scalar_t, accscalar_t>::to',
         'static_cast<accscalar_t>'),
        ('ScalarConvert<accscalar_t, scalar_t>::to',
         'static_cast<scalar_t>'),
        # ('THCNumerics<scalar_t>::min()',
        #  'at::numeric_lmits<scalar_t>::lowest()'),
        # ('THCUNN_argCheck', '/* TODO: AT_CHECK just have 2 args*/ AT_CHECK'),
        ('THAssert', 'AT_ASSERT'),
        ('THCTensor ', 'Tensor '),
        ('THCTensor*', 'Tensor*'),
        ('THTensor ', 'Tensor '),
        ('THTensor*', 'Tensor*'),
        ('putDepth', 'put_depth'),
        ('putHeight', 'put_height'),
        ('putWidth', 'put_width'),
        ('putLength', 'put_length'),
        ('putPlane', 'put_plane'),
        ('gradOut', 'grad_out'),
        ('gradIn', 'grad_in'),
        ('nBatch', 'nbatch'),
        ('nChannel', 'nchannel'),
        ('THCState *state,', ''),
        ('THState *state,', ''),
        ('THNNState *state,', ''),
        # ('THCDeviceTensor', 'PackedTensorAccessor'),
        ('state, ', ''),
        # ('THCState_getCurrentStream(state)', 'at::cuda::getCurrentCUDAStream()'),
        ('THArgCheck(', '/* TODO: AT_CHECK just have 2 args: condition and message */\n   AT_CHECK('),
        ('THNN_ARGCHECK(', '/* TODO: AT_CHECK just have 2 args: condition and message */\n  AT_CHECK('),
        # ('THCudaCheck(cudaGetLastError())',
        #  'AT_CUDA_CHECK(cudaGetLastError())'),
        ('NULL,', 'Tensor(),'),
        # ('THCNumerics<scalar_t>::min()', 'at::numeric_limits<scalar_t>::lowest()'),
        ('->dim()', '.dim()'),
        ('->size(', '.size('),
        ('THCeilDiv', 'cuda::ATenCeilDiv'),
        ('nInput', 'n_input'),
        ('nOutput', 'n_output'),
        ('THCTensor_(new)(state)', 'Tensor()'),
        ('THTensor_(new)(state)', 'Tensor()'),
        ('THTensor_(new)()', 'Tensor()'),
        ('batchSize', 'batch_size'),
        ('THError', 'AT_ERROR'),
        ('c10::raw::intrusive_ptr::decref', '// c10::raw::intrusive_ptr::decref'),
        ('updateOutput', 'out_cpu'),
        ('updateGradInput', 'backward_out_cpu')
    ] + extra_rules
    
    for by, to in rules:
        text = text.replace(by, to)
        
    # regex rules
    # TODO:
    # - toDeviceTensor
    rules = (
        # rule, output pattern 
        (r'THNN_\((.*)\)', None),
        (r'TH[C]*Tensor_\(size\)\(\s*([^,]*),\s*(.*)\s*\)', '{}.size({})'),
        (r'TH[C]*Tensor_\(resize([0-9]*)d\)\(\s*([^,]*),\s*(.*)\s*\)', '{1}.resize_({{ {2} }})'),
        (r'TH[C]*Tensor_\(nDimensionLegacyNoScalars\)\(\s*(.*)\s*\)', '{}.ndimension()'),
        (r'TH[C]*Tensor_\(zero\)\(\s*(.*)\s*\)', '{0}.zero_()'),
        (r'TH[C]*Tensor_\(data\)\(\s*(.*)\s*\)', '{0}.data()'),
        (r'[!](.*)->is_empty\(\)', '{}.numel() != 0'),
        (r'(\w)\s*!=\s*NULL', '{}.defined()'),
        (r'THCUNN_assertSameGPU\([0-9]*,\s*(.*)\s*\);', 
         '/* TODO: TensorArg tensorname_arg{{tensorname, "tensorname", 1}}; */\n'
         '/* TODO: checkAllSameGPU should use TensorArg */\n'
         'checkAllSameGPU(\n'
         '  "/* TODO: use the name of the function as description here */",'
         '  {{ {} }});'), 
        (r'(.*)=\s*TH[C]*Tensor_\(newContiguous\)\(\s*(.*)\s*\);', 
         'Tensor {0} = {1}_.contiguous(); /* TODO: add _ to the arg definition above */'),
        (r'accscalar_t\(\s*(.*)\s*\)', 'static_cast<accscalar_t>({})'),
        (r'TH[C]*Numerics\<scalar_t\>::ne\(\s*(.*),\s*(.*)\s*\)\s*', '{} != {}'),
    )
    
    for rule, output_format in rules:
        result = re.finditer(rule, text, re.MULTILINE)
        for r in result:
            _in = r.group(0)
            if output_format is None:
                _out = r.group(1)
            else:
                _out = output_format.format(*r.groups())
            text = text.replace(_in, apply_rules(RULES_NAME_EXTRA, _out))

    return text


def cpu_th2at(files_path: list, extra_rules: list = []):
    for f_path in files_path:
        with open(f_path, 'r') as f:
            f_content = th2at(f.read(), extra_rules)
        
        with open(f_path, 'w') as f:
            f.write(f_content)
            

In [61]:
# test
# refresh output files
create_aten_cpu_files(
    output_path, 
    thnn_path,
    at_cpu_path,
    zip(thnn_files, at_cpu_files)
)

at_cpu_files_path = [
    os.path.join(output_path, fn) 
    for fn in at_cpu_files
]

extra_rules = [
    ('#include <THNN/Im2Col.h>', '#include <ATen/native/Im2Col.h>')
]
cpu_th2at(at_cpu_files_path, extra_rules)
for fn in at_cpu_files:
    !clang-format -i {output_path}/{fn}
!cat {output_path}/{at_cpu_files[0]}

FileNotFoundError: [Errno 2] No such file or directory: '/home/xmn/dev/quansight/pytorch-project/pytorch/aten/src/THNN/generic/Im2Col.c'