From 2ed3a625979e452d9f2db2f866893e91035cc8b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arvydas=20=C5=BDukauskas?= Date: Tue, 29 Aug 2023 17:28:57 +0300 Subject: [PATCH] Update core.py: fix docstring "y = Wx + b" -> "y = xW + b" Matrix multiplication is not commutative so the documentation might be misleading. --- trax/layers/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/trax/layers/core.py b/trax/layers/core.py index b7fd6fc31..3169013ee 100644 --- a/trax/layers/core.py +++ b/trax/layers/core.py @@ -37,12 +37,12 @@ class Dense(base.Layer): with trainable weights. Each node in a dense layer computes a weighted sum of all node values from the preceding layer and adds to that sum a node-specific bias term. The full layer computation is expressed compactly in linear - algebra as an affine map `y = Wx + b`, where `W` is a matrix and `y`, `x`, + algebra as an affine map `y = xW + b`, where `W` is a matrix and `y`, `x`, and `b` are vectors. The layer is trained, or "learns", by updating the values in `W` and `b`. Less commonly, a dense layer can omit the bias term and be a pure linear map: - `y = Wx`. + `y = xW`. """ def __init__(self, @@ -64,8 +64,8 @@ def __init__(self, connection weights `W` for the layer. bias_initializer: Function that creates a vector of (random) initial bias weights `b` for the layer. - use_bias: If `True`, compute an affine map `y = Wx + b`; else compute - a linear map `y = Wx`. + use_bias: If `True`, compute an affine map `y = xW + b`; else compute + a linear map `y = xW`. use_bfloat16: If `True`, use bfloat16 weights instead of the default float32; this can save memory but may (rarely) lead to numerical issues. """