Fixing flash attention droping attention mask support and causal supp…

…ort and adding bias support instead
erfanzar · May 14, 2024 · 2ab0ba9 · 2ab0ba9
1 parent 09c5beb
commit 2ab0ba9
Show file tree

Hide file tree

Showing 13 changed files with 74 additions and 172 deletions.
diff --git a/.idea/FXUtils.iml b/.idea/FXUtils.iml
diff --git a/.idea/deployment.xml b/.idea/deployment.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ requires-python = ">=3.8"
 
 readme = "README.md"
 
-version = "0.0.56"
+version = "0.0.57"
 
 dependencies = [
     "jax>=0.4.20",

diff --git a/src/fjformer/__init__.py b/src/fjformer/__init__.py
@@ -68,7 +68,7 @@
 from . import optimizers as optimizers
 from . import linen as linen
 
-__version__ = "0.0.56"
+__version__ = "0.0.57"
 
 __all__ = (
     # Loss and extra function

diff --git a/src/fjformer/pallas_operations/__init__.py b/src/fjformer/pallas_operations/__init__.py
@@ -6,7 +6,7 @@
 """
 
 from .efficient_attention import efficient_attention as efficient_attention
-from .flash_attention import (
+from .tpu_flash_attention import (
     flash_attention as tpu_flash_attention,
     mha as gpu_flash_attention,
     BlockSizes