# Operations on Pytorch Tensor

In [260]:
import numpy as np
import torch
from torch import nn

from rich import print
from rich.traceback import install
install()

<bound method InteractiveShell.excepthook of <ipykernel.zmqshell.ZMQInteractiveShell object at 0x7f9bd2899d60>>

## `select`

In [20]:
help(torch.Tensor.select)

Help on method_descriptor:

select(...)
    select(dim, index) -> Tensor
    
    Slices the :attr:`self` tensor along the selected dimension at the given index.
    This function returns a view of the original tensor with the given dimension removed.
    
    Args:
        dim (int): the dimension to slice
        index (int): the index to select with
    
    .. note::
    
        :meth:`select` is equivalent to slicing. For example,
        ``tensor.select(0, index)`` is equivalent to ``tensor[index]`` and
        ``tensor.select(2, index)`` is equivalent to ``tensor[:,:,index]``.



`select` method is equivalent  to indexing. They both return view and don't create new tensors.

In [188]:
a = torch.randn(10, 128, requires_grad=True)
b = a.select(1, 100)
c = a[:, 100]
print((b == c).all())
print(b.data_ptr() == c.data_ptr())

In [189]:
a -= 1

In [190]:
b -= 1
print((c == b).all())  # inplace operation on `b` also affect `c`
print(b.requires_grad)

But `select` is slightly faster.

In [25]:
%timeit a.select(1, 100)
%timeit a[:, 100]

3.84 µs ± 297 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
7.3 µs ± 289 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## `index_select`

In [27]:
help(torch.index_select)

Help on built-in function index_select:

index_select(...)
    index_select(input, dim, index, *, out=None) -> Tensor
    
    Returns a new tensor which indexes the :attr:`input` tensor along dimension
    :attr:`dim` using the entries in :attr:`index` which is a `LongTensor`.
    
    The returned tensor has the same number of dimensions as the original tensor
    (:attr:`input`).  The :attr:`dim`\ th dimension has the same size as the length
    of :attr:`index`; other dimensions have the same size as in the original tensor.
    
    .. note:: The returned tensor does **not** use the same storage as the original
              tensor.  If :attr:`out` has a different shape than expected, we
              silently change it to the correct shape, reallocating the underlying
              storage if necessary.
    
    Args:
        input (Tensor): the input tensor.
        dim (int): the dimension in which we index
        index (LongTensor): the 1-D tensor containing the indices to ind

`index_select` is similar to indexing, but there are three differences:

- `index_select` creates new tensors.
- index of `index_select` must be on the same device as the tensor.
- `index_select` is slightly faster.

In [281]:
a = torch.randn(10, 3, 32, 32, requires_grad=True)
a.grad = torch.randn(10, 3, 32, 32)
a = a.cuda()
# For `index_select`, input, output and indices must be on the current device
b = torch.index_select(a, torch.tensor(2).cuda(), torch.tensor([2,4,6]).cuda())
print(b.device)
c = a[:, :, [2, 4, 6]]  # But no such requirement for indexing
print((b == c).all())
print(b.grad is c.grad is None)  # Accessing grad of either will trigger UserWarning.
print(b.data_ptr() == c.data_ptr())



## `index_copy_`

In [85]:
help(torch.Tensor.index_copy_)

Help on method_descriptor:

index_copy_(...)
    index_copy_(dim, index, tensor) -> Tensor
    
    Copies the elements of :attr:`tensor` into the :attr:`self` tensor by selecting
    the indices in the order given in :attr:`index`. For example, if ``dim == 0``
    and ``index[i] == j``, then the ``i``\ th row of :attr:`tensor` is copied to the
    ``j``\ th row of :attr:`self`.
    
    The :attr:`dim`\ th dimension of :attr:`tensor` must have the same size as the
    length of :attr:`index` (which must be a vector), and all other dimensions must
    match :attr:`self`, or an error will be raised.
    
    Args:
        dim (int): dimension along which to index
        index (LongTensor): indices of :attr:`tensor` to select from
        tensor (Tensor): the tensor containing values to copy
    
    Example::
    
        >>> x = torch.zeros(5, 3)
        >>> t = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float)
        >>> index = torch.tensor([0, 4, 2])
        >>> x

In [111]:
a = torch.randn(10, 3, 32, 32, requires_grad=True)
b = torch.randn(10, 3, 2, 32, requires_grad=True)
a.index_copy_(2, torch.tensor([20, 30]), b)

In [248]:
a = torch.randn(10, 3, 32, 32, requires_grad=True)
a.grad = torch.randn(10, 3, 32, 32)
c = torch.randn(10, 3, 2, 32, requires_grad=True)
c.grad = torch.ones(10, 3, 2, 32)
print(a.is_leaf)
a[:, :, [20, 30]] = c
print(a.is_leaf)
data_cache = a[:, :, [20, 30]].clone()
# do this after creating any new tensors, because ptr of `a` will change when creating new tensors
ptr_cache = a[:, :, [20, 30]].data_ptr()

print('[yellow]Is data copied?[/]')
print((a[:, :, [20, 30]] == c).all())
print('[yellow]Is ptr copied?[/]')
print(a[:, :, [20, 30]].data_ptr() == c.data_ptr())
print('[yellow]Maybe ptr hasn\'t change?[/]')
print(a[:, :, [20, 30]].data_ptr() == ptr_cache)  # ptr will change after initializing `c`
print('[yellow]Is grad copied?[/]')
print((a.grad[:, :, [20, 30]] == c.grad).all())  # grad is not copied

# print('[yellow]Data remained the same?[/]')
# c[:] -= 1
# print((a[:, :, [20, 30]] == data_cache).all())

optimizer = torch.optim.SGD([c], 0.1)
optimizer.zero_grad()
(c * 2).mean().backward()
optimizer.step()
print('[yellow]Has data changed?[/]')
print((a[:, :, [20, 30]] == c).all())
print('[yellow]Data remained the same?[/]')
print((a[:, :, [20, 30]] == data_cache).all())  # data in `a` didn't change even if `c` has changed


In [237]:
c = torch.randn(10, 3, 2, 32, requires_grad=True)

print(c.is_leaf)  # True
print((c - 1).is_leaf)  # False

c.data -= 1
print(c.is_leaf)  # True
print(c[:].data_ptr() == c.data_ptr())  # True
try:
    c -= 1
except RuntimeError:
    print(RuntimeError)

c[:] -= 1
print(c.is_leaf)  # False
print(c[:].data_ptr() == c.data_ptr())  # True
c -= 1  # This won't raise an error.

In [247]:
c = torch.randn(10, 3, 2, 32, requires_grad=True)
d = c.detach()
print(d.requires_grad)
print(d.data_ptr() == c.data_ptr())
d -= 1
print((d == c).all())
print(c.is_leaf)

In [280]:
c = torch.randn(10, 3, 2, 32, requires_grad=True)
print(c.is_leaf)
try:
    c += 1  # RuntimeError: a leaf Variable that requires grad is being used in an in-place operation.
except RuntimeError:
    print(RuntimeError)
try:
    torch.add(c, 1, out=c)  # RuntimeError: add(): functions with out=... arguments don't support automatic differentiation, but one of the arguments requires grad. Grad will not be copied (only data will be copied).
except RuntimeError:
    print(RuntimeError)
try:
    c.add_(1)  # RuntimeError: a leaf Variable that requires grad is being used in an in-place operation.
except RuntimeError:
    print(RuntimeError)
try:
    c.index_copy_(3, torch.tensor(10), torch.randn(10, 3, 2, 1))  # RuntimeError: a leaf Variable that requires grad is being used in an in-place operation.
except RuntimeError:
    print(RuntimeError)
d = c.detach()
d += 1  # no error, and still leaf
print(c.is_leaf)
c[:] += 1  # no error, but not leaf anymore
print(c.is_leaf)
print(c.requires_grad)

In [266]:
e = c.detach().numpy()
print(e[0,0,0,0])
print(c[0,0,0,0])
e -= 1
print(e[0,0,0,0])
print(c[0,0,0,0])


### Grad will not be copied (only data will be copied).

In [128]:
a = torch.randn(10, 3, 32, 32, requires_grad=False)
b = torch.randn(10, 3, 2, 32, requires_grad=True)
a.grad = torch.randn(10, 3, 32, 32)
grad_cache = a.grad[:, :, [20, 30]].clone()
b.grad = torch.ones(10, 3, 2, 32)
a.index_copy_(2, torch.tensor([20, 30]), b)
print((a.grad[:, :, [20, 30]] == b.grad).all())
print((a.grad[:, :, [20, 30]] == grad_cache).all())

In [114]:
a = torch.randn(10, 3, 32, 32, requires_grad=False)
b = torch.randn(10, 3, 2, 32, requires_grad=True)
print(a.grad)
b.grad = torch.randn(10, 3, 2, 32)
a.index_copy_(2, torch.tensor([20, 30]), b)
print(a.grad)

  print(a.grad)


In [298]:
a = torch.randn(3,4,5)
print(a.grad)
b = torch.randn(3,5, requires_grad=True)
a[:, 0] = b.detach()

In [299]:
print(a.grad)