implement transformers activation functions

Vectorrent · Vectorrent · commit 096a3eae652b · 2024-01-02T04:28:31.000-06:00
diff --git a/moduleformer/modeling_moduleformer.py b/moduleformer/modeling_moduleformer.py
@@ -9,7 +9,7 @@
 from torch.nn import CrossEntropyLoss, MSELoss, BCEWithLogitsLoss
 from torch.nn import functional as F
 
-from transformers.activations import ACT2FN
+from transformers.activations import get_activation
 from transformers.modeling_outputs import (
     BaseModelOutputWithPast, 
     CausalLMOutputWithPast,
@@ -33,20 +33,6 @@
 # ]
 
 
-@torch.jit.script
-def NewGELU(x):
-    """
-    Compute the NewGELU activation function.
-
-    Args:
-        x (torch.Tensor): Input tensor.
-
-    Returns:
-        torch.Tensor: Output tensor after applying NewGELU activation.
-    """
-    return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))
-
-
 @torch.jit.script
 def stickbreaking_att(
     q: torch.Tensor, 
@@ -230,7 +216,7 @@ def __init__(self, config):
                 num_experts=config.n_mlp_experts, 
                 top_k=config.k_mlp, 
                 bias=False, 
-                activation=NewGELU,
+                activation=get_activation(config.activation_function),
                 acc_aux_loss=False,
                 gating_dropout=config.moe_pdrop,
                 sample_topk=config.sample_topk,