## Numpy array

In [1]:
import torch

In [None]:
x.shape
x.numel()

torch.arange(12, dtype=torch.float32)
torch.zeros((2, 3, 4))
torch.ones((2, 3, 4))
torch.randn(3, 4)
torch.zeros_like(Y)

# operations
torch.exp(x)
torch.cat((X, Y), dim=0)

## inplace operation
X += Y
X[:] = X + Y


## reshape
A = torch.arange(20).reshape(5, 4) # matrix
X = torch.arange(24).reshape(2, 3, 4) # tensor

# Conversion to Other Python Objects
A = X.numpy()
B = torch.tensor(A)

# assign new memory
B = A.clone()

# reduction
x.sum() # total sum of all elements, torch.Size([])
A.sum(axis=0) # sum over an axis, torch.Size([4])
A.sum(axis=0, keepdims=True) # keep original dimension, torch.Size([1, 4])
A / A.sum(axis=1, keepdims=True) # row average


torch.dot(x, y) # vector dot
torch.mv(A, x)  # matrix vector multiplication
torch.mm(A, B)  # matrix matrix multiplication
torch.norm(u)   # L2 norm

## Pandas

In [2]:
import os

os.makedirs(os.path.join('data'), exist_ok=True)
data_file = os.path.join('data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('NumRooms,Alley,Price\n')  # Column names
    f.write('NA,Pave,127500\n')  # Each row represents a data example
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')

In [9]:
import pandas as pd

data = pd.read_csv(data_file)
inputs = data.fillna(data.mean())
inputs = pd.get_dummies(inputs, dummy_na=True)
X = torch.tensor(inputs.values)

## Automatic Differentiation 

In [9]:
import torch

x = torch.arange(4.0, requires_grad=True)
x.grad  # The default value is None

In [10]:
y = torch.dot(x, x)
y

tensor(14., grad_fn=<DotBackward>)

In [11]:
y.backward()
x.grad

tensor([0., 2., 4., 6.])

### Non-scalar 

In [15]:
x.grad.zero_()
# Invoking `backward` on a non-scalar requires passing in a `gradient` argument
# which specifies the gradient of the differentiated function w.r.t `self`.
y = x * x

# In our case, we simply want to sum the partial derivatives, so passing
# in a gradient of ones is appropriate
# y.backward(torch.ones(len(x)))
y.sum().backward()
x.grad

tensor([0., 2., 4., 6.])

### Detaching Computation

For example, say that `y` was calculated as a function of `x`,
and that subsequently `z` was calculated as a function of both `y` and `x`.
Now, imagine that we wanted to calculate
the gradient of `z` with respect to `x`,
but wanted for some reason to treat `y` as a constant,
and only take into account the role
that `x` played after `y` was calculated.

Here, we can detach `y` to return a new variable `u`
that has the same value as `y` but discards any information
about how `y` was computed in the computational graph.
Thus, the following backpropagation function computes
the partial derivative of `z = u * x` with respect to `x` while treating `u` as a constant,
instead of the partial derivative of `z = x * x * x` with respect to `x`.

In [16]:
x.grad.zero_()
y = x * x
u = y.detach()
z = u * x

z.sum().backward()
x.grad == u

tensor([True, True, True, True])