From 2ed3a625979e452d9f2db2f866893e91035cc8b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arvydas=20=C5=BDukauskas?= <arvyzukai@pagarbiai.lt>
Date: Tue, 29 Aug 2023 17:28:57 +0300
Subject: [PATCH] Update core.py: fix docstring "y = Wx + b" -> "y = xW + b"

Matrix multiplication is not commutative so the documentation might be misleading.
---
 trax/layers/core.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/trax/layers/core.py b/trax/layers/core.py
index b7fd6fc31..3169013ee 100644
--- a/trax/layers/core.py
+++ b/trax/layers/core.py
@@ -37,12 +37,12 @@ class Dense(base.Layer):
   with trainable weights. Each node in a dense layer computes a weighted sum of
   all node values from the preceding layer and adds to that sum a node-specific
   bias term. The full layer computation is expressed compactly in linear
-  algebra as an affine map `y = Wx + b`, where `W` is a matrix and `y`, `x`,
+  algebra as an affine map `y = xW + b`, where `W` is a matrix and `y`, `x`,
   and `b` are vectors. The layer is trained, or "learns", by updating the
   values in `W` and `b`.
 
   Less commonly, a dense layer can omit the bias term and be a pure linear map:
-  `y = Wx`.
+  `y = xW`.
   """
 
   def __init__(self,
@@ -64,8 +64,8 @@ def __init__(self,
           connection weights `W` for the layer.
       bias_initializer: Function that creates a vector of (random) initial
           bias weights `b` for the layer.
-      use_bias: If `True`, compute an affine map `y = Wx + b`; else compute
-          a linear map `y = Wx`.
+      use_bias: If `True`, compute an affine map `y = xW + b`; else compute
+          a linear map `y = xW`.
       use_bfloat16: If `True`, use bfloat16 weights instead of the default
         float32; this can save memory but may (rarely) lead to numerical issues.
     """