keras-team · vishalshar · Jan 10, 2019 · Jan 10, 2019 · Jan 10, 2019 · Jan 10, 2019
diff --git a/keras/activations.py b/keras/activations.py
@@ -158,6 +158,15 @@ def hard_sigmoid(x):
     return K.hard_sigmoid(x)
 
 
+def gelu(x):
+    """Gaussian Error Linear Units (GELUs)
+
+    GLUEs are nonconvex, nonmonotonic.
+    """
+    return K.gelu(x)
+
+
+
 def exponential(x):
     """Exponential (base e) activation function.
     """
@@ -211,3 +220,6 @@ def get(identifier):
     else:
         raise ValueError('Could not interpret '
                          'activation function identifier:', identifier)
+
+
+
diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
@@ -3614,6 +3614,22 @@ def hard_sigmoid(x):
     x = tf.clip_by_value(x, zero, one)
     return x
 
+def gelu(x):
+    """Gaussian Error Linear Units (GELUs)
+
+    GLUEs are nonconvex, nonmonotonic.
+
+    Arguments
+      x: Input tensor.
+
+    References:
+      Gaussian Error Linear Units (GELUs), Hendrycks et. al, 2018.
+
+    Links: 
+        [https://arxiv.org/pdf/1606.08415.pdf](https://arxiv.org/pdf/1606.08415.pdf)
+    """
+
+    return 0.5 * x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))
 
 def tanh(x):
     """Element-wise tanh.

diff --git a/tests/keras/activations_test.py b/tests/keras/activations_test.py
@@ -171,7 +171,23 @@ def ref_hard_sigmoid(x):
     expected = hard_sigmoid(test_values)
     assert_allclose(result, expected, rtol=1e-05)
 
+
+def test_gelu():
+    """Test using a reference
+    """
+    def ref_gelu(x):
+        return 0.5 * x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * np.pow(x, 3))))
+    gelu = np.vectorize(ref_gelu)
 
+    x = K.placeholder(ndim=2)
+    f = K.function([x], [activations.gelu(x)])
+    test_values = get_standard_values()
+
+    result = f([test_values])[0]
+    expected = gelu(test_values)
+    assert_allclose(result, expected, rtol=1e-05)
+
+
 def test_relu():
     x = K.placeholder(ndim=2)
     f = K.function([x], [activations.relu(x)])