review comments

quantumalaviya · quantumalaviya · commit 3d75b92f92b6 · 2023-03-29T16:15:52.000+05:30
diff --git a/keras_cv/models/object_detection/yolox/binary_crossentropy.py b/keras_cv/models/object_detection/yolox/binary_crossentropy.py
@@ -12,13 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
+import warnings
+
 import tensorflow as tf
 
 
 class BinaryCrossentropy(tf.keras.losses.Loss):
     """Computes the cross-entropy loss between true labels and predicted labels.
+
     Use this cross-entropy loss for binary (0 or 1) classification applications.
     This loss is updated for YoloX by offering support for no axis to mean over.
+
     Args:
         from_logits: Whether to interpret `y_pred` as a tensor of
             [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we
@@ -29,9 +34,10 @@ class BinaryCrossentropy(tf.keras.losses.Loss):
             version of the true labels, where the smoothing squeezes the labels
             towards 0.5.  Larger values of `label_smoothing` correspond to
             heavier smoothing.
-        axis: the axis along which to mean the ious. Defaults to `None` which implies
+        axis: the axis along which to mean the ious. Defaults to `no_reduction` which implies
             mean across no axes.
-    Sample Usage:
+
+    Usage:
     ```python
     model.compile(
       loss=keras_cv.models.object_detection.yolox.binary_crossentropy.BinaryCrossentropy(from_logits=True)
@@ -62,13 +68,19 @@ def _smooth_labels():
             label_smoothing, _smooth_labels, lambda: y_true
         )
 
-        if self.axis is not None:
+        if self.axis == "no_reduction":
+            warnings.warn(
+                "`axis='no_reduction'` is a temporary API, and the API contract "
+                "will be replaced in the future with a more generic solution "
+                "covering all losses."
+            )
             return tf.reduce_mean(
                 tf.keras.backend.binary_crossentropy(
                     y_true, y_pred, from_logits=self.from_logits
                 ),
                 axis=self.axis,
             )
+
         return tf.keras.backend.binary_crossentropy(
             y_true, y_pred, from_logits=self.from_logits
         )
diff --git a/keras_cv/models/object_detection/yolox/layers/yolox_decoder.py b/keras_cv/models/object_detection/yolox/layers/yolox_decoder.py
@@ -30,19 +30,19 @@ class YoloXPredictionDecoder(keras.layers.Layer):
         bounding_box_format:  The format of bounding boxes of input dataset. Refer
             [to the keras.io docs](https://keras.io/api/keras_cv/bounding_box/formats/)
             for more details on supported bounding box formats.
-        classes: The number of classes to be considered for the classification head.
+        num_classes: The number of classes to be considered for the classification head.
         suppression_layer: A `keras.layers.Layer` that follows the same API
             signature of the `keras_cv.layers.MultiClassNonMaxSuppression` layer.
             This layer should perform a suppression operation such as Non Max Suppression,
             or Soft Non-Max Suppression.
     """
 
     def __init__(
-        self, bounding_box_format, classes, suppression_layer=None, **kwargs
+        self, bounding_box_format, num_classes, suppression_layer=None, **kwargs
     ):
         super().__init__(**kwargs)
         self.bounding_box_format = bounding_box_format
-        self.classes = classes
+        self.num_classes = num_classes
 
         self.suppression_layer = (
             suppression_layer
@@ -76,8 +76,12 @@ def call(self, images, predictions):
         strides = []
 
         shapes = [x.shape[1:3] for x in predictions]
+
+        # 5 + self.num_classes is a concatenation of bounding boxes (length=4)
+        # + objectness score (length=1) + num_classes
+        # this reshape is simply collapsing axes 1 and 2 of x into a single dimension
         predictions = [
-            tf.reshape(x, [batch_size, -1, 5 + self.classes])
+            tf.reshape(x, [batch_size, -1, 5 + self.num_classes])
             for x in predictions
         ]
         predictions = tf.cast(
@@ -107,24 +111,24 @@ def call(self, images, predictions):
             (predictions[..., :2] + grids) * strides / image_shape, axis=-2
         )
         box_xy = tf.broadcast_to(
-            box_xy, [batch_size, predictions_shape[1], self.classes, 2]
+            box_xy, [batch_size, predictions_shape[1], self.num_classes, 2]
         )
         box_wh = tf.expand_dims(
             tf.exp(predictions[..., 2:4]) * strides / image_shape, axis=-2
         )
         box_wh = tf.broadcast_to(
-            box_wh, [batch_size, predictions_shape[1], self.classes, 2]
+            box_wh, [batch_size, predictions_shape[1], self.num_classes, 2]
         )
 
         box_confidence = tf.math.sigmoid(predictions[..., 4:5])
         box_class_probs = tf.math.sigmoid(predictions[..., 5:])
 
         # create and broadcast classes for every box before nms
         box_classes = tf.expand_dims(
-            tf.range(self.classes, dtype=self.compute_dtype), axis=-1
+            tf.range(self.num_classes, dtype=self.compute_dtype), axis=-1
         )
         box_classes = tf.broadcast_to(
-            box_classes, [batch_size, predictions_shape[1], self.classes, 1]
+            box_classes, [batch_size, predictions_shape[1], self.num_classes, 1]
         )
 
         box_scores = tf.expand_dims(box_confidence * box_class_probs, axis=-1)
@@ -146,7 +150,6 @@ def call(self, images, predictions):
             target="xywh",
             images=images,
         )
-
         outputs = bounding_box.convert_format(
             outputs,
             source="rel_xywh",
@@ -156,7 +159,7 @@ def call(self, images, predictions):
 
         # preparing the predictions for TF NMS op
         class_predictions = tf.cast(outputs["classes"], tf.int32)
-        class_predictions = tf.one_hot(class_predictions, self.classes)
+        class_predictions = tf.one_hot(class_predictions, self.num_classes)
 
         scores = (
             tf.expand_dims(outputs["confidence"], axis=-1) * class_predictions
diff --git a/keras_cv/models/object_detection/yolox/layers/yolox_head.py b/keras_cv/models/object_detection/yolox/layers/yolox_head.py
@@ -24,7 +24,7 @@ class YoloXHead(keras.layers.Layer):
     """The YoloX prediction head.
 
     Arguments:
-        classes: The number of classes to be considered for the classification head.
+        num_classes: The number of classes to be considered for the classification head.
         bias_initializer: Bias Initializer for the final convolution layer for the
             classification and regression heads. Defaults to None.
         width_multiplier: A float value used to calculate the base width of the model
@@ -38,7 +38,7 @@ class YoloXHead(keras.layers.Layer):
 
     def __init__(
         self,
-        classes,
+        num_classes,
         bias_initializer=None,
         width_multiplier=1.0,
         num_level=3,
@@ -110,7 +110,7 @@ def __init__(
 
             self.classification_preds.append(
                 keras.layers.Conv2D(
-                    filters=classes,
+                    filters=num_classes,
                     kernel_size=1,
                     strides=1,
                     padding="same",
diff --git a/keras_cv/models/object_detection/yolox/layers/yolox_head_test.py b/keras_cv/models/object_detection/yolox/layers/yolox_head_test.py
@@ -0,0 +1,52 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import tensorflow as tf
+
+from keras_cv.models.object_detection.yolox.layers import YoloXHead
+
+
+class YoloXHeadTest(tf.test.TestCase):
+    def test_num_parameters(self):
+        input1 = tf.keras.Input((80, 80, 256))
+        input2 = tf.keras.Input((40, 40, 512))
+        input3 = tf.keras.Input((20, 20, 1024))
+
+        output = YoloXHead(20)([input1, input2, input3])
+
+        model = tf.keras.models.Model(inputs=[input1, input2, input3], outputs = output)
+
+        keras_params = sum([tf.keras.backend.count_params(p) for p in model.trainable_weights])
+        # taken from original implementation
+        original_params = 7563595
+
+        self.assertEqual(keras_params, original_params)
+    
+    def test_output_type_and_shape(self):
+        inputs = [
+            tf.random.uniform((3, 80, 80, 256)),
+            tf.random.uniform((3, 40, 40, 512)),
+            tf.random.uniform((3, 20, 20, 1024)),
+        ]
+
+        output = YoloXHead(20)(inputs)
+
+        self.assertEqual(type(output), list)
+        self.assertEqual(len(output), 3)
+
+        self.assertEqual(output[0].shape, [3, 80, 80, 25])
+        self.assertEqual(output[1].shape, [3, 40, 40, 25])
+        self.assertEqual(output[2].shape, [3, 20, 20, 25])
+
diff --git a/keras_cv/models/object_detection/yolox/layers/yolox_label_encoder_test.py b/keras_cv/models/object_detection/yolox/layers/yolox_label_encoder_test.py
@@ -0,0 +1,82 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+
+from keras_cv.models.object_detection.yolox.layers import YoloXLabelEncoder
+
+
+class YoloXLabelEncoderTest(tf.test.TestCase):
+    def test_ragged_images_exception(self):
+        img1 = tf.random.uniform((10, 11, 3))
+        img2 = tf.random.uniform((9, 14, 3))
+        img3 = tf.random.uniform((7, 12, 3))
+
+        images = tf.ragged.stack([img1, img2, img3])
+        box_labels = {}
+        box_labels["bounding_boxes"] = tf.random.uniform((3, 4, 4))
+        box_labels["classes"] = tf.random.uniform((3, 4), maxval = 20, dtype = tf.int32)
+        layer = YoloXLabelEncoder()
+
+        with self.assertRaisesRegexp(
+            ValueError, 
+            "method does not support RaggedTensor inputs for the `images` argument."
+        ):
+            layer(images, box_labels)
+
+    def test_ragged_labels(self):
+        images = tf.random.uniform((3, 12, 12, 3))
+
+        box_labels = {}
+
+        box1 = tf.random.uniform((11, 4))
+        class1 = tf.random.uniform([11], maxval = 20, dtype = tf.int32)
+        box2 = tf.random.uniform((14, 4))
+        class2 = tf.random.uniform([14], maxval = 20, dtype = tf.int32)
+        box3 = tf.random.uniform((12, 4))
+        class3 = tf.random.uniform([12], maxval = 20, dtype = tf.int32)
+
+        box_labels["boxes"] = tf.ragged.stack([box1, box2, box3])
+        box_labels["classes"] = tf.ragged.stack([class1, class2, class3])
+
+        layer = YoloXLabelEncoder()
+
+        encoded_boxes, _ = layer(images, box_labels)
+        self.assertEqual(encoded_boxes.shape, (3, 14, 4))
+
+    def test_one_hot_classes_exception(self):
+        images = tf.random.uniform((3, 12, 12, 3))
+
+        box_labels = {}
+
+        box1 = tf.random.uniform((11, 4))
+        class1 = tf.random.uniform([11], maxval = 20, dtype = tf.int32)
+        class1 = tf.one_hot(class1, 20)
+
+        box2 = tf.random.uniform((14, 4))
+        class2 = tf.random.uniform([14], maxval = 20, dtype = tf.int32)
+        class2 = tf.one_hot(class2, 20)
+
+        box3 = tf.random.uniform((12, 4))
+        class3 = tf.random.uniform([12], maxval = 20, dtype = tf.int32)
+        class3 = tf.one_hot(class3, 20)
+
+        box_labels["boxes"] = tf.ragged.stack([box1, box2, box3])
+        box_labels["classes"] = tf.ragged.stack([class1, class2, class3])
+
+        layer = YoloXLabelEncoder()
+
+        with self.assertRaises(ValueError):
+            layer(images, box_labels)
+        
diff --git a/keras_cv/models/object_detection/yolox/layers/yolox_pafpn.py b/keras_cv/models/object_detection/yolox/layers/yolox_pafpn.py
@@ -33,7 +33,7 @@ class YoloXPAFPN(keras.layers.Layer):
             this changes based on the detection model being used. Defaults to 1.0.
         in_channels: A list representing the number of filters in the FPN output.
             The length of the list will be same as the number of outputs. Defaults to
-            [256, 512, 1024].
+            (256, 512, 1024).
         use_depthwise: a boolean value used to decide whether a depthwise conv block
             should be used over a regular darknet block. Defaults to False.
         activation: the activation applied after the BatchNorm layer. One of "silu",
@@ -44,7 +44,7 @@ def __init__(
         self,
         depth_multiplier=1.0,
         width_multiplier=1.0,
-        in_channels=[256, 512, 1024],
+        in_channels=(256, 512, 1024),
         use_depthwise=False,
         activation="silu",
         **kwargs
diff --git a/keras_cv/models/object_detection/yolox/layers/yolox_pafpn_test.py b/keras_cv/models/object_detection/yolox/layers/yolox_pafpn_test.py
@@ -0,0 +1,53 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import tensorflow as tf
+
+from keras_cv.models.object_detection.yolox.layers import YoloXPAFPN
+
+
+class YoloXLabelEncoderTest(tf.test.TestCase):
+    def test_num_parameters(self):
+        input1 = tf.keras.Input((80, 80, 256))
+        input2 = tf.keras.Input((40, 40, 512))
+        input3 = tf.keras.Input((20, 20, 1024))
+
+        output = YoloXPAFPN()({
+            3: input1,
+            4: input2,
+            5: input3
+        })
+
+        model = tf.keras.models.Model(inputs=[input1, input2, input3], outputs = output)
+
+        keras_params = sum([tf.keras.backend.count_params(p) for p in model.trainable_weights])
+        # taken from original implementation
+        original_params = 19523072
+
+        self.assertEqual(keras_params, original_params)
+    
+    def test_output_shape(self):
+        inputs = {
+            3: tf.random.uniform((3, 80, 80, 256)),
+            4: tf.random.uniform((3, 40, 40, 512)),
+            5: tf.random.uniform((3, 20, 20, 1024)),
+        }
+
+        output1, output2, output3 = YoloXPAFPN()(inputs)
+
+        self.assertEqual(output1.shape, [3, 80, 80, 256])
+        self.assertEqual(output2.shape, [3, 40, 40, 512])
+        self.assertEqual(output3.shape, [3, 20, 20, 1024])
+
diff --git a/keras_cv/version_check.py b/keras_cv/version_check.py
@@ -18,7 +18,7 @@
 import tensorflow as tf
 from packaging.version import parse
 
-MIN_VERSION = "2.11.0"
+MIN_VERSION = "2.9.0"
 
 
 def check_tf_version():