Skip to content

Commit

Permalink
Merge pull request #114 from gianlucamacri/patch-1
Browse files Browse the repository at this point in the history
turning exception into warning for flash attention inference
  • Loading branch information
yukang2017 authored Nov 2, 2023
2 parents bb06477 + b5c6809 commit 39866af
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion llama_attn_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def forward_flashattn(
attention_mask: [bsz, q_len]
"""
if not self.training:
raise ValueError("This function is only for training. For inference, please use forward_flashattn_inference.")
warnings.warn("This function should be used just for training as it may exhibit reduced inference performance. For inference, please use forward_flashattn_inference.")

if output_attentions:
warnings.warn(
Expand Down
2 changes: 1 addition & 1 deletion llama_attn_replace_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def forward_flashattn(
attention_mask: [bsz, q_len]
"""
if not self.training:
raise ValueError("This function is only for training. For inference, please use forward_flashattn_inference.")
warnings.warn("This function should be used just for training as it may exhibit reduced inference performance. For inference, please use forward_flashattn_inference.")

if output_attentions:
warnings.warn(
Expand Down

0 comments on commit 39866af

Please sign in to comment.