Add ability to swap weights to MovingAverage.

tf-marissaw · tf-marissaw · commit addaa4a59963 · 2020-04-27T13:15:41.000-07:00
This patch makes it easier to swap the model weights and the MovingAverage weights before eval and
swap them back after eval.
diff --git a/tensorflow_addons/optimizers/moving_average.py b/tensorflow_addons/optimizers/moving_average.py
@@ -124,3 +124,57 @@ def _create_slots(self, var_list):
         )  # pylint: disable=protected-access
         for var in var_list:
             self.add_slot(var, "average", var.read_value())
+
+    def shadow_copy(self, model_weights):
+        """Creates shadow variables for the given model weights."""
+        for var in model_weights:
+            self.add_slot(var, "average", initializer="zeros")
+        self._average_weights = [self.get_slot(var, "average") for var in model_weights]
+        self._model_weights = model_weights
+
+    @property
+    def has_shadow_copy(self):
+        """Whether this optimizer has created shadow variables."""
+        return self._model_weights is not None
+
+    def swap_weights(self):
+        """Swap the average and moving weights.
+
+      This is a convenience method to allow one to evaluate the averaged weights
+      at test time. Loads the weights stored in `self._average` into the model,
+      keeping a copy of the original model weights. Swapping twice will return
+      the original weights.
+      """
+        if tf.distribute.in_cross_replica_context():
+            strategy = tf.distribute.get_strategy()
+            return strategy.run(self._swap_weights, args=())
+        else:
+            raise ValueError(
+                "Swapping weights must occur under a " "tf.distribute.Strategy"
+            )
+
+    @tf.function
+    def _swap_weights(self):
+        def fn_0(a, b):
+            a.assign_add(b)
+            return a
+
+        def fn_1(b, a):
+            b.assign(a - b)
+            return b
+
+        def fn_2(a, b):
+            a.assign_sub(b)
+            return a
+
+        def swap(strategy, a_and_b):
+            """Swap `a` and `b` and mirror to all devices."""
+            for a, b in a_and_b:
+                strategy.extended.update(a, fn_0, args=(b,))  # a = a + b
+                strategy.extended.update(b, fn_1, args=(a,))  # b = a - b
+                strategy.extended.update(a, fn_2, args=(b,))  # a = a - b
+
+        ctx = tf.distribute.get_replica_context()
+        return ctx.merge_call(
+            swap, args=(zip(self._average_weights, self._model_weights),)
+        )
diff --git a/tensorflow_addons/optimizers/tests/moving_average_test.py b/tensorflow_addons/optimizers/tests/moving_average_test.py
@@ -247,3 +247,48 @@ def test_dynamic_decay(sequential_update):
         ema_var0 = opt.get_slot(var0, "average")
         if sequential_update:
             np.testing.assert_allclose(ema_var0.read_value(), [0.64, 1.64])
+
+
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+@pytest.mark.parametrize("sequential_update", [True, False])
+def test_swap_weights(sequential_update):
+    for sequential_update in [True, False]:
+
+        strategy = tf.distribute.OneDeviceStrategy("device:CPU:0")
+        with strategy.scope():
+            var = tf.Variable([1.0, 2.0])
+
+            opt = MovingAverage(
+                tf.keras.optimizers.SGD(lr=2.0),
+                sequential_update=sequential_update,
+                average_decay=0.5,
+            )
+
+        with strategy.scope():
+            grads = tf.constant([0.1, 0.1])
+
+        def apply_gradients():
+            opt.apply_gradients([(grads, var)])
+
+        strategy.run(apply_gradients)
+
+        np.testing.assert_allclose(var.read_value(), [0.8, 1.8])
+        ema_var = opt.get_slot(var, "average")
+        if sequential_update:
+            np.testing.assert_allclose(ema_var.read_value(), [0.9, 1.9])
+
+        opt.shadow_copy([var])
+
+        with strategy.scope():
+            opt.swap_weights()
+
+        np.testing.assert_allclose(ema_var.read_value(), [0.8, 1.8])
+        if sequential_update:
+            np.testing.assert_allclose(var.read_value(), [0.9, 1.9])
+
+        with strategy.scope():
+            opt.swap_weights()
+
+        np.testing.assert_allclose(var.read_value(), [0.8, 1.8])
+        if sequential_update:
+            np.testing.assert_allclose(ema_var.read_value(), [0.9, 1.9])