Add ability to swap weights to MovingAverage.

tf-marissaw · tf-marissaw · commit d9629a033b4f · 2020-06-10T15:01:03.000-07:00
This patch makes it easier to swap the model weights and the MovingAverage weights before eval and
swap them back after eval.
diff --git a/tensorflow_addons/optimizers/moving_average.py b/tensorflow_addons/optimizers/moving_average.py
@@ -124,3 +124,61 @@ def _create_slots(self, var_list):
         )  # pylint: disable=protected-access
         for var in var_list:
             self.add_slot(var, "average", var.read_value())
+
+    def shadow_copy(self, model_weights):
+        """Creates shadow variables for the given model weights."""
+        for var in model_weights:
+            self.add_slot(var, "average", initializer="zeros")
+        self._average_weights = [self.get_slot(var, "average") for var in model_weights]
+        self._model_weights = model_weights
+
+    @property
+    def has_shadow_copy(self):
+        """Whether this optimizer has created shadow variables."""
+        return self._model_weights is not None
+
+    def swap_weights(self):
+        """Swap the average and moving weights.
+
+      This is a convenience method to allow one to evaluate the averaged weights
+      at test time. Loads the weights stored in `self._average_weights` into the model,
+      keeping a copy of the original model weights. Swapping twice will return
+      the original weights.
+      """
+        if tf.distribute.in_cross_replica_context():
+            strategy = tf.distribute.get_strategy()
+            return strategy.run(self._swap_weights, args=())
+        else:
+            raise ValueError(
+                "Swapping weights must occur under a " "tf.distribute.Strategy"
+            )
+
+    @tf.function
+    def _swap_weights(self):
+        def fn_0(a, b):
+            a.assign_add(b)
+            return a
+
+        def fn_1(b, a):
+            b.assign(a - b)
+            return b
+
+        def fn_2(a, b):
+            a.assign_sub(b)
+            return a
+
+        def swap(strategy, a, b):
+            """Swap `a` and `b` and mirror to all devices."""
+            for a_element, b_element in zip(a, b):
+                strategy.extended.update(
+                    a_element, fn_0, args=(b_element,)
+                )  # a = a + b
+                strategy.extended.update(
+                    b_element, fn_1, args=(a_element,)
+                )  # b = a - b
+                strategy.extended.update(
+                    a_element, fn_2, args=(b_element,)
+                )  # a = a - b
+
+        ctx = tf.distribute.get_replica_context()
+        return ctx.merge_call(swap, args=(self._average_weights, self._model_weights,))
diff --git a/tensorflow_addons/optimizers/tests/moving_average_test.py b/tensorflow_addons/optimizers/tests/moving_average_test.py
@@ -213,3 +213,36 @@ def test_dynamic_decay():
 
     ema_var0 = opt.get_slot(var0, "average")
     np.testing.assert_allclose(ema_var0.read_value(), [0.64, 1.64])
+
+
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+@pytest.mark.with_device([tf.distribute.MirroredStrategy])
+def test_swap_weights(device):
+    with device.scope():
+        var = tf.Variable([1.0, 2.0])
+        grads = tf.constant([0.1, 0.1])
+
+        opt = MovingAverage(tf.keras.optimizers.SGD(lr=2.0), average_decay=0.5,)
+
+    @tf.function
+    def apply_gradients():
+        opt.apply_gradients([(grads, var)])
+
+    device.run(apply_gradients)
+
+    np.testing.assert_allclose(var.read_value(), [0.8, 1.8])
+    ema_var = opt.get_slot(var, "average")
+    np.testing.assert_allclose(ema_var.read_value(), [0.85, 1.85])
+
+    with device.scope():
+        opt.shadow_copy([var])
+        opt.swap_weights()
+
+    np.testing.assert_allclose(ema_var.read_value(), [0.8, 1.8])
+    np.testing.assert_allclose(var.read_value(), [0.85, 1.85])
+
+    with device.scope():
+        opt.swap_weights()
+
+    np.testing.assert_allclose(var.read_value(), [0.8, 1.8])
+    np.testing.assert_allclose(ema_var.read_value(), [0.85, 1.85])