In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
from PIL import Image
import tensorflow as tf
import tensorflow_probability as tfp

In [2]:
x_in = np.array([[
  [[2], [1], [2], [0], [1]],
  [[1], [3], [2], [2], [3]],
  [[1], [1], [3], [3], [0]],
  [[2], [2], [0], [1], [1]],
  [[0], [0], [3], [1], [2]], ]])
kernel_in = np.array([
 [ [[2, 0.1]], [[3, 0.2]] ],
 [ [[0, 0.3]],[[1, 0.4]] ], ])

In [5]:
x_tf = tf.constant(x_in, dtype=tf.float32)
kernel_tf = tf.constant(kernel_in, dtype=tf.float32)
#NHWC
conv_tf = tf.compat.v1.layers.Conv2D(filters=1, kernel_size=2, strides=1, padding='same')
result = conv_tf(x_tf)
print(result.shape)
print(result)

(1, 5, 5, 1)
tf.Tensor(
[[[[ 2.765432  ]
   [ 0.6969422 ]
   [ 1.6825737 ]
   [ 1.7291266 ]
   [-0.7462553 ]]

  [[ 0.41101313]
   [ 2.95953   ]
   [ 1.8994884 ]
   [-0.83900607]
   [ 1.0125687 ]]

  [[ 1.2016262 ]
   [-0.8152696 ]
   [ 1.4473183 ]
   [ 1.5163326 ]
   [-0.3612594 ]]

  [[ 0.3881967 ]
   [ 3.2701159 ]
   [-0.36217946]
   [ 1.5628855 ]
   [-0.38499588]]

  [[ 0.        ]
   [-0.4302737 ]
   [ 0.86914414]
   [ 0.05067378]
   [ 0.67504585]]]], shape=(1, 5, 5, 1), dtype=float32)


In [6]:
conv_torch = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=2, stride=1, padding=[1,1])

#NHWC
result = conv_torch(torch.Tensor(x_in).permute(0, 3, 1, 2))
print(result.shape)
print(result)

torch.Size([1, 1, 6, 6])
tensor([[[[-0.4728,  0.5362, -0.0578,  0.7152, -0.2938,  0.3002],
          [ 0.2692,  0.6847,  1.6551,  0.9972,  0.4596,  1.4502],
          [-0.0123,  1.2857,  1.2862,  1.7961,  2.6147,  0.8453],
          [-0.1913,  0.9587,  1.8798,  1.5108,  1.0813,  0.3002],
          [ 0.4483,  1.0883, -0.0118,  1.2326,  0.5437,  1.0352],
          [-0.1147, -0.1147,  0.7298,  1.1268,  0.7683,  0.5253]]]],
       grad_fn=<MkldnnConvolutionBackward>)


In [8]:
np.random.seed(0)
from typing import List, Tuple
# Create random weights and input
weights = torch.empty(1, 1, 2, 3)
torch.nn.init.constant_(weights, 5e-2)
x = np.random.randn(1, 1, 5, 5)

weights_tf = tf.convert_to_tensor(weights.numpy().transpose(2, 3, 1, 0), dtype=tf.float32)
weights_torch = torch.Tensor(weights)

# Tensorflow padding behavior. Assuming that kH == kW to keep this simple.
stride = 1

def get_padding(kernel_size: List[int], input_size: List[int], stride: int) -> Tuple[int]:
    
    if input_size[2] % stride == 0:
        pad1 = max(kernel_size[3] - stride, 0)
    else:
        pad1 = max(kernel_size[3] - (input_size[2] % stride), 0)
    
    if input_size[3] % stride == 0:
        pad2 = max(kernel_size[2] - stride, 0)
    else:
        pad2 = max(kernel_size[2] - (input_size[3] % stride), 0)

    padding = [0, 0, 0, 0]
    if pad1 % 2 == 0:
        pad_val1 = pad1 // 2
        padding[0] =  pad_val1
        padding[1] =  pad_val1
    else:
        pad_val_start = pad1 // 2
        pad_val_end = pad1 - pad_val_start
        padding[0] = pad_val_start
        padding[1] = pad_val_end

    
    if pad2 % 2 == 0:
        pad_val2 = pad2 // 2
        padding[2] =  pad_val2
        padding[3] =  pad_val2
    else:
        pad_val_start = pad2 // 2
        pad_val_end = pad2 - pad_val_start
        padding[2] = pad_val_start
        padding[3] = pad_val_end
    
    return padding
    
padding = get_padding(weights.shape, x.shape, stride)

        
x_tf = tf.convert_to_tensor(x.transpose((0, 2, 3, 1)), dtype=tf.float32)
x_torch = torch.Tensor(x)
x_torch = F.pad(x_torch, padding, "constant", 0)

# TF Conv2D
tf_result = tf.nn.conv2d(x_tf,
                         weights_tf,
                         strides=[1, stride, stride, 1],
                         padding="SAME")

# PyTorch Conv2D
torch_conv2d = F.conv2d(x_torch, weights_torch, padding=0, stride=stride)

print(torch_conv2d.detach().permute(0, 2, 3, 1).numpy())

diff = np.mean(np.abs(tf_result - torch_conv2d.detach().permute(0, 2, 3, 1).numpy()))
print('Mean of Abs Diff: {0}'.format(diff))

[[[[ 0.10685101]
   [ 0.14822003]
   [ 0.21576503]
   [ 0.26216057]
   [ 0.22079153]]

  [[ 0.07855638]
   [ 0.10904041]
   [ 0.15162493]
   [ 0.07412992]
   [ 0.0436459 ]]

  [[ 0.17130353]
   [ 0.1990975 ]
   [ 0.19694874]
   [ 0.02901948]
   [ 0.00122551]]

  [[-0.00358089]
   [ 0.02938301]
   [ 0.11889391]
   [ 0.08229198]
   [ 0.04932808]]

  [[-0.09496856]
   [-0.05174674]
   [ 0.03879448]
   [ 0.11960129]
   [ 0.07637948]]]]
Mean of Abs Diff: 0.0


In [3]:
probs_p = [0.1, 0.4, 0.4, 0.1]
p = tfp.distributions.OneHotCategorical(probs=probs_p)

probs_q = [0.25, 0.25, 0.25, 0.25]
q = tfp.distributions.OneHotCategorical(probs=probs_q)

In [4]:
kl = tfp.distributions.kl_divergence(q, p)

In [5]:
tf.print(kl)

0.223143578


In [58]:
torch_q = torch.tensor([probs_q, probs_q])
torch_p = torch.tensor(probs_p)

F.kl_div(torch_q.log(), torch_p, reduction='sum')

tensor(0.3855)

In [7]:
torch.sum(torch_q * (torch.log(torch_q) - torch.log(torch_p)))

tensor(0.2231)

In [49]:
pred = torch.distributions.one_hot_categorical.OneHotCategorical(probs=torch_p)
var_pred =  torch.distributions.categorical.Categorical(probs=torch_p)

In [50]:
pred.log_prob(torch_q)

tensor(-2.3026)

In [51]:
var_pred.log_prob(torch_q)

tensor([-2.3026, -2.3026, -2.3026, -2.3026])

In [16]:
- F.binary_cross_entropy_with_logits(torch_p.log(), torch_q, reduction='sum')

tensor(-2.4730)

In [59]:
indices = torch_q.max(-1)[1]
indices

tensor([3, 3])

In [60]:
target = indices.long().unsqueeze(-1)
target

tensor([[3],
        [3]])

In [61]:
value, log_pmf = torch.broadcast_tensors(target, torch_p.log())
value

tensor([[3, 3, 3, 3],
        [3, 3, 3, 3]])

In [62]:
value = value[..., :1]
value

tensor([[3],
        [3]])

In [63]:
log_pmf.gather(-1, value).squeeze(-1)

tensor([-2.3026, -2.3026])