# Difference of Convolution Calculation

I'm feeding a data into a single convolution layer from Pytorch and TensorFlow.
I could make sure that:
1. the input are the same, they are initialized randomly
2. the kernel weights and bias are same, they are initialized randomly
3. the way of padding is forced to be the same

However, the result shows that there been huge precision differences between the two frameworks. The difference is bigger than 1e-3 and I would be really appreciate it someone could tell me what goes wrong with that. Thanks in advance.

Framework Version:
1.	Pytorch 1.10.0a0+git593e8f4
2.	TensorFlow 2.5.0 (but it’s tensorflow.compat.v1 in the notebook)

In [1]:
import numpy as np

import torch
import torch.nn
import torch.nn.functional as F

import tensorflow.compat.v1 as tf
from tensorflow.compat.v1.keras.initializers import he_uniform
from tensorflow.compat.v1.keras.layers import (
    Input,
    Conv2D
)
from tensorflow.compat.v1.keras.models import Model

In [2]:
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [3]:
# kernel and input parameters
in_ch = 256 # kernel input channels
out_ch = 512 # kernel output channels
input_shape = [16, 32, in_ch] # input data shape, [h,w,c]

## case 1: random init

In [4]:
# using random number to initialise
# unify kernel weights, size k=5
init_kernel = np.random.rand(*(5,5,in_ch,out_ch))
init_bias = np.random.rand(*(out_ch,))

torch_kernel = np.transpose(init_kernel, axes=(3,2,0,1))

# unify input data
np_input = np.random.rand(*input_shape)
tf_input = np.expand_dims(np_input, axis=(0))

torch_input = np.expand_dims(np_input, axis=(0))
torch_input = np.transpose(torch_input, axes=(0,3,1,2))

## output in tensorflow

In [5]:
# tf
# assign kernel with predefined weights
kernel_initializer = tf.keras.initializers.constant(init_kernel)
bias_initializer = tf.keras.initializers.constant(init_bias)
inp = Input(shape=input_shape)
oup = Conv2D(filters=out_ch, kernel_size=5, strides=(2, 2),kernel_initializer=kernel_initializer,bias_initializer=bias_initializer, padding="same")(inp)
model = Model(inp, oup)

# forward
y = model.predict(x=tf_input)
print(y.shape)
print(y[0,:,:,1])

(1, 8, 16, 512)
[[1036.6389  1286.9072  1298.8225  1283.5452  1276.365   1277.3307
  1258.4304  1287.5437  1281.8002  1292.6559  1293.8861  1309.453
  1286.7378  1277.0677  1301.7002   775.3595 ]
 [1289.4779  1627.7542  1612.5903  1609.3292  1602.3656  1583.5837
  1590.3345  1616.9135  1616.2144  1616.5071  1618.2323  1635.531
  1621.8698  1600.3881  1616.7415   959.61127]
 [1295.3243  1640.8416  1627.5337  1605.8823  1600.4493  1610.625
  1611.7426  1625.8309  1637.1473  1613.0815  1625.7319  1628.7733
  1616.8772  1612.8221  1619.5997   972.6801 ]
 [1297.9673  1633.0712  1618.3251  1599.8452  1596.4957  1620.6074
  1648.3531  1618.8705  1632.3997  1611.8512  1615.3243  1611.0486
  1606.9712  1622.3531  1633.6322   962.0845 ]
 [1280.9043  1625.0216  1627.7568  1622.6278  1623.4135  1627.2576
  1629.244   1629.7815  1629.1494  1610.7152  1592.1129  1593.6815
  1603.803   1636.3857  1630.783    965.37335]
 [1282.802   1621.4127  1618.3734  1641.9825  1606.8539  1611.0704
  1604.6492  16

## output in pytorch

In [6]:
# pytorch
torch.set_default_dtype(torch.float64)
image = torch.Tensor(torch_input)

# pad as tensorflow
padding_setting = (1,2,1,2)
image = F.pad(image, padding_setting, "constant", 0)

# assign kernel with predefined weights
conv_filter = torch.nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=3, stride=(2,2), padding=0)
conv_filter.weight = torch.nn.Parameter(torch.Tensor(torch_kernel))
conv_filter.bias = torch.nn.Parameter(torch.Tensor(init_bias))
conv_filter.eval()

# forward
torch_out = conv_filter(image)
print(torch_out.shape)
print(torch_out[0,1,...].detach().numpy())

torch.Size([1, 512, 8, 16])
[[1036.64335161 1286.9226207  1298.83164532 1283.54729802 1276.36572442
  1277.34366467 1258.44408343 1287.55955293 1281.80383857 1292.66285082
  1293.8972526  1309.46106419 1286.74941242 1277.07496218 1301.71036886
   775.36394238]
 [1289.49683434 1627.77819436 1612.60660164 1609.34236299 1602.37233848
  1583.60523012 1590.35812777 1616.94000353 1616.23318025 1616.52659686
  1618.25492932 1635.55053228 1621.88510246 1600.4047178  1616.75682842
   959.62300385]
 [1295.35066291 1640.87627539 1627.55499845 1605.89561287 1600.47077093
  1610.65044331 1611.76687007 1625.86000189 1637.17464345 1613.10641884
  1625.74788237 1628.78212416 1616.89569293 1612.83844215 1619.61461932
   972.68669719]
 [1297.98683996 1633.09091837 1618.34323033 1599.86776552 1596.51702622
  1620.6319902  1648.38013553 1618.88995459 1632.42613947 1611.86861284
  1615.34236042 1611.06178193 1606.98307877 1622.37315644 1633.65035889
   962.09070446]
 [1280.92282653 1625.04752549 1627.77473

## calculate differences

In [7]:
y = np.transpose(y,axes=(0,3,1,2))
t = torch_out.detach().numpy()
diff = np.abs(y - t)
np.all(diff < 1e-3)

False

In [8]:
np.all(diff < 1e-1)

True

## another case, simplier but gives True for np.all(diff < 1e-6)

In [None]:
# using ones and zeros to initialise
# unify kernel weights, size k=5
init_kernel = np.ones((5,5,in_ch,out_ch))
init_bias = np.zeros(*(out_ch,))

torch_kernel = np.transpose(init_kernel, axes=(3,2,0,1))

# unify input data
np_input = np.ones(input_shape)
tf_input = np.expand_dims(np_input, axis=(0))

torch_input = np.expand_dims(np_input, axis=(0))
torch_input = np.transpose(torch_input, axes=(0,3,1,2))