diff --git a/PyTorch/built-in/nlp/GPT-2_for_PyTorch/gpt_patch/gpt_patch.py b/PyTorch/built-in/nlp/GPT-2_for_PyTorch/gpt_patch/gpt_patch.py index ef205768dd82787b7b944860d6cb8255950a7e86..1377d7c655606762f20f5168eea35f675c120f97 100644 --- a/PyTorch/built-in/nlp/GPT-2_for_PyTorch/gpt_patch/gpt_patch.py +++ b/PyTorch/built-in/nlp/GPT-2_for_PyTorch/gpt_patch/gpt_patch.py @@ -849,7 +849,7 @@ def FusedScaleMaskSoftmaxForward(self, input, mask, norm_factor): input = input.float() if self.scale is not None: - input = input * (scale * 1.0 / norm_factor) + input = input * (self.scale * 1.0 / norm_factor) if self.attn_mask_type == AttnMaskType.causal: if self.mask_tri is None: