Convert matmuls to quantizable nn.Linear modules (#1304)

Summary: Pull Request resolved: facebookresearch/fairseq#1304 Pull Request resolved: pytorch/translate#657 Pull Request resolved: facebookresearch/pytext#1065 Pull Request resolved: fairinternal/fairseq-py#889 We are converting matmuls to quantizable nn.Linear modules in this diff. First let's test profile after the diff to see how low level operations are changing. Reviewed By: jmp84, edunov, lly-zero-one, jhcross Differential Revision: D17964796 fbshipit-source-id: 3ddd3ff81fa1ea5864dded98e993f4fe3b71fe5e
yzpang · Feb 19, 2021 · 97b1079 · 97b1079
1 parent f2064c5
commit 97b1079
Showing 1 changed file with 6 additions and 1 deletion.
diff --git a/fairseq/modules/multihead_attention.py b/fairseq/modules/multihead_attention.py
@@ -84,7 +84,12 @@ def __init__(
     @property
     def in_proj_weight(self):
         # TODO: Remove this backward compatibility code (in_proj_weight)
-        return torch.cat((self.q_proj_weight, self.k_proj_weight, self.v_proj_weight))
+        return torch.cat((self.q_proj.weight, self.k_proj.weight, self.v_proj.weight))
+
+    @property
+    def in_proj_bias(self):
+        # TODO: Remove this backward compatibility code (in_proj_bias)
+        return torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias))
 
     def prepare_for_onnx_export_(self):
         self.onnx_trace = True